From a432ee5a4a9304bb390186b5878a1ee5f924d4d6 Mon Sep 17 00:00:00 2001 From: borja Date: Fri, 14 Nov 2025 11:02:24 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20reducir=20logs=20a=20cambios=20de=20est?= =?UTF-8?q?ado=20y=20exponer=20m=C3=A9tricas=20de=20Evolution?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: aider (openrouter/openai/gpt-5) --- src/services/maintenance.ts | 53 ++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/src/services/maintenance.ts b/src/services/maintenance.ts index 81bda69..50e5a5c 100644 --- a/src/services/maintenance.ts +++ b/src/services/maintenance.ts @@ -1,11 +1,14 @@ import type { Database } from 'bun:sqlite'; import { getDb } from '../db/locator'; import { toIsoSqlUTC } from '../utils/datetime'; +import { Metrics } from './metrics'; export class MaintenanceService { private static _timer: any = null; private static _healthCheckTimer: any = null; private static _lastRestartAttempt: number = 0; + private static _lastEvolutionState: string | null = null; + private static _lastStateChangeTs: number = 0; private static get retentionDays(): number { const v = Number(process.env.GROUP_MEMBERS_INACTIVE_RETENTION_DAYS); @@ -18,7 +21,7 @@ export class MaintenanceService { url: process.env.EVOLUTION_API_URL, instance: process.env.EVOLUTION_API_INSTANCE, apiKey: process.env.EVOLUTION_API_KEY, - intervalMs: Number(process.env.HEALTH_CHECK_INTERVAL_MS || '120000'), // 2 min por defecto + intervalMs: Number(process.env.HEALTH_CHECK_INTERVAL_MS || '60000'), // 1 min por defecto restartCooldownMs: Number(process.env.HEALTH_CHECK_RESTART_COOLDOWN_MS || '900000'), // 15 min por defecto }; } @@ -142,27 +145,63 @@ export class MaintenanceService { const restartUrl = `${url}/instance/restart/${instance}`; const headers: HeadersInit = { apikey: String(apiKey || '') }; + const recordState = (newState: string) => { + const prev = this._lastEvolutionState; + const nowSec = Math.floor(Date.now() / 1000); + + // Actualizar métricas de estado (1 para el actual; 0 para el anterior si cambió) + try { + Metrics.set('evolution_instance_state', 1, { instance: String(instance || ''), state: newState }); + if (prev && prev !== newState) { + Metrics.set('evolution_instance_state', 0, { instance: String(instance || ''), state: prev }); + } + } catch {} + + // Logging solo en primer muestreo o cuando cambie + if (!prev) { + console.log(`[HealthCheck] Estado inicial de la instancia '${instance}': ${newState}`); + this._lastStateChangeTs = nowSec; + try { Metrics.set('evolution_instance_last_state_change_ts', nowSec, { instance: String(instance || '') }); } catch {} + } else if (prev !== newState) { + console.log(`[HealthCheck] Cambio de estado en instancia '${instance}': ${prev} → ${newState}`); + this._lastStateChangeTs = nowSec; + try { + Metrics.set('evolution_instance_last_state_change_ts', nowSec, { instance: String(instance || '') }); + Metrics.inc('evolution_instance_state_changes_total', 1, { instance: String(instance || '') }); + } catch {} + } + + this._lastEvolutionState = newState; + }; + try { const response = await fetch(stateUrl, { method: 'GET', headers }); if (!response.ok) { console.error(`[HealthCheck] Error al consultar estado de Evolution API: ${response.status} ${response.statusText}`); + try { Metrics.inc('evolution_health_check_errors_total', 1, { instance: String(instance || '') }); } catch {} + // Registrar estado como 'unreachable' (sin intentar reinicio aquí) + recordState('unreachable'); return; } const data = await response.json(); - const currentState = data?.instance?.state; + const currentState = String(data?.instance?.state ?? 'unknown'); - console.log(`[HealthCheck] Estado de la instancia '${instance}': ${currentState}`); + // Registrar estado y métricas (sin spam de logs si no cambia) + recordState(currentState); + // Intentar reinicio si no está 'open' y ha pasado el cooldown if (currentState !== 'open') { const now = Date.now(); if (now - this._lastRestartAttempt > restartCooldownMs) { - console.warn(`[HealthCheck] La instancia no está 'open'. Estado actual: ${currentState}. Intentando reiniciar...`); + console.warn(`[HealthCheck] La instancia no está 'open' (estado: ${currentState}). Intentando reiniciar...`); try { + try { Metrics.inc('evolution_instance_restart_attempts_total', 1, { instance: String(instance || '') }); } catch {} const restartResponse = await fetch(restartUrl, { method: 'PUT', headers }); if (restartResponse.ok) { - console.log(`[HealthCheck] Petición de reinicio para '${instance}' enviada exitosamente.`); + console.log(`[HealthCheck] Petición de reinicio enviada exitosamente para '${instance}'.`); + try { Metrics.inc('evolution_instance_restart_success_total', 1, { instance: String(instance || '') }); } catch {} this._lastRestartAttempt = now; } else { console.error(`[HealthCheck] Fallo al reiniciar la instancia. Status: ${restartResponse.status} ${restartResponse.statusText}`); @@ -171,11 +210,13 @@ export class MaintenanceService { console.error('[HealthCheck] Error de red al intentar reiniciar la instancia:', restartError); } } else { - console.log(`[HealthCheck] La instancia no está 'open', pero esperando cooldown de ${Math.round(restartCooldownMs / 60000)} minutos para no sobrecargar la API.`); + // Reducir ruido: no loguear en cada intervalo si seguimos en el mismo estado } } } catch (error) { console.error('[HealthCheck] Error de red o inesperado al verificar el estado de la Evolution API:', error); + try { Metrics.inc('evolution_health_check_errors_total', 1, { instance: String(instance || '') }); } catch {} + recordState('unreachable'); } } }