From de9bfba9aad3de18902c0b0fa76f496ba05cded9 Mon Sep 17 00:00:00 2001 From: borja Date: Fri, 14 Nov 2025 11:27:20 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20a=C3=B1adir=20tests=20de=20health-check?= =?UTF-8?q?=20y=20documentar=20m=C3=A9tricas/ENV?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: aider (openrouter/openai/gpt-5) --- .env.example | 6 + README.md | 9 +- tests/unit/services/maintenance.test.ts | 160 +++++++++++++++++++++++- 3 files changed, 173 insertions(+), 2 deletions(-) diff --git a/.env.example b/.env.example index 7adfb76..8b5c38b 100644 --- a/.env.example +++ b/.env.example @@ -88,6 +88,12 @@ ONBOARDING_FALLBACK_MIN_DIGITS=8 # A2: longitud mínima para conservar núme # METRICS_ENABLED=true # METRICS_FORMAT=prom # prom|json +# Health check de Evolution API (opcional) +# Intervalo en milisegundos para consultar estado (por defecto 60000 = 60s). +# HEALTH_CHECK_INTERVAL_MS=60000 +# Cooldown en milisegundos entre intentos de reinicio (por defecto 900000 = 15 min). +# HEALTH_CHECK_RESTART_COOLDOWN_MS=900000 + # Migrador (opcional) # MIGRATIONS_LOG_LEVEL="silent" # Silencia logs del migrador (en test ya se silencian automáticamente) diff --git a/README.md b/README.md index 01b6a62..daacb86 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,7 @@ Variables clave: - TZ (por defecto Europe/Madrid). - REMINDERS_GRACE_MINUTES (ventana de gracia tras la hora; por defecto 60). - ALLOWED_GROUPS (semilla inicial), NOTIFY_ADMINS_ON_DISCOVERY. +- HEALTH_CHECK_INTERVAL_MS (ms, por defecto 60000) y HEALTH_CHECK_RESTART_COOLDOWN_MS (ms, por defecto 900000). - METRICS_ENABLED, PORT. - WEB_BASE_URL (host público de la web para generar enlaces absolutos; usado por /t web). - Rate limit: RATE_LIMIT_PER_MIN, RATE_LIMIT_BURST. @@ -93,7 +94,13 @@ Consulta: ## Operación y mantenimiento -- /metrics expone contadores y gauges; puede deshabilitarse por configuración. +- /metrics expone contadores y gauges; puede deshabilitarse por configuración. Principales series: + - evolution_instance_state{instance, state} (gauge): 1 para el estado actual de Evolution (open/connecting/closed/unreachable…), 0 al estado anterior en cada transición. + - evolution_instance_last_state_change_ts{instance} (gauge): timestamp epoch (s) del último cambio de estado. + - evolution_instance_state_changes_total{instance} (counter): número de transiciones de estado observadas. + - evolution_instance_restart_attempts_total{instance} (counter): intentos de reinicio cuando el estado no es 'open'. + - evolution_instance_restart_success_total{instance} (counter): reinicios exitosos. + - evolution_health_check_errors_total{instance} (counter): errores HTTP/red al consultar estado. - Schedulers configurables; se evitan en entornos de test. - Migraciones up-only al arranque; logging de eventos de migración. - Copias de seguridad: respaldar el directorio data/ y planificar retención. diff --git a/tests/unit/services/maintenance.test.ts b/tests/unit/services/maintenance.test.ts index 3791dfa..2863921 100644 --- a/tests/unit/services/maintenance.test.ts +++ b/tests/unit/services/maintenance.test.ts @@ -1,8 +1,9 @@ -import { beforeEach, describe, expect, it } from 'bun:test'; +import { beforeEach, describe, expect, it, afterEach } from 'bun:test'; import Database from 'bun:sqlite'; import { initializeDatabase } from '../../../src/db'; import { MaintenanceService } from '../../../src/services/maintenance'; import { toIsoSqlUTC } from '../../../src/utils/datetime'; +import { Metrics } from '../../../src/services/metrics'; function makeMem(): any { const db = new Database(':memory:'); @@ -90,3 +91,160 @@ describe('MaintenanceService', () => { expect(merged).toBe(0); }); }); + +describe('MaintenanceService - Evolution health check', () => { + let originalFetch: any; + const ENV_KEYS = ['METRICS_ENABLED', 'EVOLUTION_API_URL', 'EVOLUTION_API_INSTANCE', 'EVOLUTION_API_KEY', 'HEALTH_CHECK_RESTART_COOLDOWN_MS', 'HEALTH_CHECK_INTERVAL_MS']; + const savedEnv: Record = {}; + + beforeEach(() => { + // Guardar y configurar entorno mínimo para habilitar métricas y health-check + for (const k of ENV_KEYS) savedEnv[k] = process.env[k]; + process.env.METRICS_ENABLED = 'true'; + process.env.EVOLUTION_API_URL = 'http://evo'; + process.env.EVOLUTION_API_INSTANCE = 'inst'; + process.env.EVOLUTION_API_KEY = 'key'; + process.env.HEALTH_CHECK_RESTART_COOLDOWN_MS = '0'; // facilitar intentos de reinicio en tests + + // Resetear métricas y estado interno + Metrics.reset(); + (MaintenanceService as any)._lastEvolutionState = null; + (MaintenanceService as any)._lastStateChangeTs = 0; + (MaintenanceService as any)._lastRestartAttempt = 0; + + // Guardar fetch original + originalFetch = globalThis.fetch; + }); + + afterEach(() => { + // Restaurar entorno + for (const k of ENV_KEYS) { + if (savedEnv[k] == null) delete (process.env as any)[k]; + else process.env[k] = savedEnv[k]; + } + // Restaurar fetch + globalThis.fetch = originalFetch; + // Reset de métricas tras cada caso + Metrics.reset(); + }); + + it('registra y actualiza métricas en transición de estado (open → closed)', async () => { + let currentState = 'open'; + let restartStatus = 200; + + globalThis.fetch = async (url: any, init?: any) => { + const u = String(url); + if (u.includes('/instance/connectionState/')) { + return new Response(JSON.stringify({ instance: { state: currentState } }), { status: 200 }); + } + if (u.includes('/instance/restart/')) { + return new Response('', { status: restartStatus }); + } + return new Response('', { status: 404 }); + }; + + // Primer muestreo: open + await (MaintenanceService as any).performEvolutionHealthCheck(); + // Segundo muestreo: closed (debe disparar transición) + currentState = 'closed'; + await (MaintenanceService as any).performEvolutionHealthCheck(); + + const stats = JSON.parse(Metrics.render('json')); + const lg = stats.labeledGauges || {}; + const lc = stats.labeledCounters || {}; + + expect(lg.evolution_instance_state['instance="inst",state="closed"']).toBe(1); + expect(lg.evolution_instance_state['instance="inst",state="open"']).toBe(0); + expect(typeof lg.evolution_instance_last_state_change_ts['instance="inst"']).toBe('number'); + expect(lc.evolution_instance_state_changes_total['instance="inst"']).toBe(1); + }); + + it('no incrementa cambios si el estado se repite (open → open)', async () => { + let currentState = 'open'; + + globalThis.fetch = async (url: any, init?: any) => { + const u = String(url); + if (u.includes('/instance/connectionState/')) { + return new Response(JSON.stringify({ instance: { state: currentState } }), { status: 200 }); + } + if (u.includes('/instance/restart/')) { + return new Response('', { status: 200 }); + } + return new Response('', { status: 404 }); + }; + + await (MaintenanceService as any).performEvolutionHealthCheck(); + await (MaintenanceService as any).performEvolutionHealthCheck(); + + const stats = JSON.parse(Metrics.render('json')); + const lg = stats.labeledGauges || {}; + const lc = stats.labeledCounters || {}; + expect(lg.evolution_instance_state['instance="inst",state="open"']).toBe(1); + const changes = lc.evolution_instance_state_changes_total; + expect(!changes || Object.values(changes).reduce((a: number, b: any) => a + Number(b || 0), 0) === 0).toBe(true); + }); + + it('registra error y marca estado "unreachable" ante HTTP no OK', async () => { + globalThis.fetch = async (url: any, init?: any) => { + const u = String(url); + if (u.includes('/instance/connectionState/')) { + return new Response('err', { status: 500 }); + } + return new Response('', { status: 404 }); + }; + + await (MaintenanceService as any).performEvolutionHealthCheck(); + + const stats = JSON.parse(Metrics.render('json')); + const lg = stats.labeledGauges || {}; + const lc = stats.labeledCounters || {}; + expect(lg.evolution_instance_state['instance="inst",state="unreachable"']).toBe(1); + expect(lc.evolution_health_check_errors_total['instance="inst"']).toBe(1); + }); + + it('incrementa attempts y success al reiniciar cuando el estado no es open', async () => { + let currentState = 'closed'; + let restartStatus = 200; + + globalThis.fetch = async (url: any, init?: any) => { + const u = String(url); + if (u.includes('/instance/connectionState/')) { + return new Response(JSON.stringify({ instance: { state: currentState } }), { status: 200 }); + } + if (u.includes('/instance/restart/')) { + return new Response('', { status: restartStatus }); + } + return new Response('', { status: 404 }); + }; + + await (MaintenanceService as any).performEvolutionHealthCheck(); + + const stats = JSON.parse(Metrics.render('json')); + const lc = stats.labeledCounters || {}; + expect(lc.evolution_instance_restart_attempts_total['instance="inst"']).toBe(1); + expect(lc.evolution_instance_restart_success_total['instance="inst"']).toBe(1); + }); + + it('no incrementa success si el reinicio falla', async () => { + let currentState = 'closed'; + let restartStatus = 500; + + globalThis.fetch = async (url: any, init?: any) => { + const u = String(url); + if (u.includes('/instance/connectionState/')) { + return new Response(JSON.stringify({ instance: { state: currentState } }), { status: 200 }); + } + if (u.includes('/instance/restart/')) { + return new Response('', { status: restartStatus }); + } + return new Response('', { status: 404 }); + }; + + await (MaintenanceService as any).performEvolutionHealthCheck(); + + const stats = JSON.parse(Metrics.render('json')); + const lc = stats.labeledCounters || {}; + expect(lc.evolution_instance_restart_attempts_total['instance="inst"']).toBe(1); + expect(!lc.evolution_instance_restart_success_total || lc.evolution_instance_restart_success_total['instance="inst"'] == null).toBe(true); + }); +});