feat: añadir tests de health-check y documentar métricas/ENV

Co-authored-by: aider (openrouter/openai/gpt-5) <aider@aider.chat>
main
borja 1 month ago
parent a432ee5a4a
commit de9bfba9aa

@ -88,6 +88,12 @@ ONBOARDING_FALLBACK_MIN_DIGITS=8 # A2: longitud mínima para conservar núme
# METRICS_ENABLED=true # METRICS_ENABLED=true
# METRICS_FORMAT=prom # prom|json # METRICS_FORMAT=prom # prom|json
# Health check de Evolution API (opcional)
# Intervalo en milisegundos para consultar estado (por defecto 60000 = 60s).
# HEALTH_CHECK_INTERVAL_MS=60000
# Cooldown en milisegundos entre intentos de reinicio (por defecto 900000 = 15 min).
# HEALTH_CHECK_RESTART_COOLDOWN_MS=900000
# Migrador (opcional) # Migrador (opcional)
# MIGRATIONS_LOG_LEVEL="silent" # Silencia logs del migrador (en test ya se silencian automáticamente) # MIGRATIONS_LOG_LEVEL="silent" # Silencia logs del migrador (en test ya se silencian automáticamente)

@ -80,6 +80,7 @@ Variables clave:
- TZ (por defecto Europe/Madrid). - TZ (por defecto Europe/Madrid).
- REMINDERS_GRACE_MINUTES (ventana de gracia tras la hora; por defecto 60). - REMINDERS_GRACE_MINUTES (ventana de gracia tras la hora; por defecto 60).
- ALLOWED_GROUPS (semilla inicial), NOTIFY_ADMINS_ON_DISCOVERY. - ALLOWED_GROUPS (semilla inicial), NOTIFY_ADMINS_ON_DISCOVERY.
- HEALTH_CHECK_INTERVAL_MS (ms, por defecto 60000) y HEALTH_CHECK_RESTART_COOLDOWN_MS (ms, por defecto 900000).
- METRICS_ENABLED, PORT. - METRICS_ENABLED, PORT.
- WEB_BASE_URL (host público de la web para generar enlaces absolutos; usado por /t web). - WEB_BASE_URL (host público de la web para generar enlaces absolutos; usado por /t web).
- Rate limit: RATE_LIMIT_PER_MIN, RATE_LIMIT_BURST. - Rate limit: RATE_LIMIT_PER_MIN, RATE_LIMIT_BURST.
@ -93,7 +94,13 @@ Consulta:
## Operación y mantenimiento ## Operación y mantenimiento
- /metrics expone contadores y gauges; puede deshabilitarse por configuración. - /metrics expone contadores y gauges; puede deshabilitarse por configuración. Principales series:
- evolution_instance_state{instance, state} (gauge): 1 para el estado actual de Evolution (open/connecting/closed/unreachable…), 0 al estado anterior en cada transición.
- evolution_instance_last_state_change_ts{instance} (gauge): timestamp epoch (s) del último cambio de estado.
- evolution_instance_state_changes_total{instance} (counter): número de transiciones de estado observadas.
- evolution_instance_restart_attempts_total{instance} (counter): intentos de reinicio cuando el estado no es 'open'.
- evolution_instance_restart_success_total{instance} (counter): reinicios exitosos.
- evolution_health_check_errors_total{instance} (counter): errores HTTP/red al consultar estado.
- Schedulers configurables; se evitan en entornos de test. - Schedulers configurables; se evitan en entornos de test.
- Migraciones up-only al arranque; logging de eventos de migración. - Migraciones up-only al arranque; logging de eventos de migración.
- Copias de seguridad: respaldar el directorio data/ y planificar retención. - Copias de seguridad: respaldar el directorio data/ y planificar retención.

@ -1,8 +1,9 @@
import { beforeEach, describe, expect, it } from 'bun:test'; import { beforeEach, describe, expect, it, afterEach } from 'bun:test';
import Database from 'bun:sqlite'; import Database from 'bun:sqlite';
import { initializeDatabase } from '../../../src/db'; import { initializeDatabase } from '../../../src/db';
import { MaintenanceService } from '../../../src/services/maintenance'; import { MaintenanceService } from '../../../src/services/maintenance';
import { toIsoSqlUTC } from '../../../src/utils/datetime'; import { toIsoSqlUTC } from '../../../src/utils/datetime';
import { Metrics } from '../../../src/services/metrics';
function makeMem(): any { function makeMem(): any {
const db = new Database(':memory:'); const db = new Database(':memory:');
@ -90,3 +91,160 @@ describe('MaintenanceService', () => {
expect(merged).toBe(0); expect(merged).toBe(0);
}); });
}); });
describe('MaintenanceService - Evolution health check', () => {
let originalFetch: any;
const ENV_KEYS = ['METRICS_ENABLED', 'EVOLUTION_API_URL', 'EVOLUTION_API_INSTANCE', 'EVOLUTION_API_KEY', 'HEALTH_CHECK_RESTART_COOLDOWN_MS', 'HEALTH_CHECK_INTERVAL_MS'];
const savedEnv: Record<string, string | undefined> = {};
beforeEach(() => {
// Guardar y configurar entorno mínimo para habilitar métricas y health-check
for (const k of ENV_KEYS) savedEnv[k] = process.env[k];
process.env.METRICS_ENABLED = 'true';
process.env.EVOLUTION_API_URL = 'http://evo';
process.env.EVOLUTION_API_INSTANCE = 'inst';
process.env.EVOLUTION_API_KEY = 'key';
process.env.HEALTH_CHECK_RESTART_COOLDOWN_MS = '0'; // facilitar intentos de reinicio en tests
// Resetear métricas y estado interno
Metrics.reset();
(MaintenanceService as any)._lastEvolutionState = null;
(MaintenanceService as any)._lastStateChangeTs = 0;
(MaintenanceService as any)._lastRestartAttempt = 0;
// Guardar fetch original
originalFetch = globalThis.fetch;
});
afterEach(() => {
// Restaurar entorno
for (const k of ENV_KEYS) {
if (savedEnv[k] == null) delete (process.env as any)[k];
else process.env[k] = savedEnv[k];
}
// Restaurar fetch
globalThis.fetch = originalFetch;
// Reset de métricas tras cada caso
Metrics.reset();
});
it('registra y actualiza métricas en transición de estado (open → closed)', async () => {
let currentState = 'open';
let restartStatus = 200;
globalThis.fetch = async (url: any, init?: any) => {
const u = String(url);
if (u.includes('/instance/connectionState/')) {
return new Response(JSON.stringify({ instance: { state: currentState } }), { status: 200 });
}
if (u.includes('/instance/restart/')) {
return new Response('', { status: restartStatus });
}
return new Response('', { status: 404 });
};
// Primer muestreo: open
await (MaintenanceService as any).performEvolutionHealthCheck();
// Segundo muestreo: closed (debe disparar transición)
currentState = 'closed';
await (MaintenanceService as any).performEvolutionHealthCheck();
const stats = JSON.parse(Metrics.render('json'));
const lg = stats.labeledGauges || {};
const lc = stats.labeledCounters || {};
expect(lg.evolution_instance_state['instance="inst",state="closed"']).toBe(1);
expect(lg.evolution_instance_state['instance="inst",state="open"']).toBe(0);
expect(typeof lg.evolution_instance_last_state_change_ts['instance="inst"']).toBe('number');
expect(lc.evolution_instance_state_changes_total['instance="inst"']).toBe(1);
});
it('no incrementa cambios si el estado se repite (open → open)', async () => {
let currentState = 'open';
globalThis.fetch = async (url: any, init?: any) => {
const u = String(url);
if (u.includes('/instance/connectionState/')) {
return new Response(JSON.stringify({ instance: { state: currentState } }), { status: 200 });
}
if (u.includes('/instance/restart/')) {
return new Response('', { status: 200 });
}
return new Response('', { status: 404 });
};
await (MaintenanceService as any).performEvolutionHealthCheck();
await (MaintenanceService as any).performEvolutionHealthCheck();
const stats = JSON.parse(Metrics.render('json'));
const lg = stats.labeledGauges || {};
const lc = stats.labeledCounters || {};
expect(lg.evolution_instance_state['instance="inst",state="open"']).toBe(1);
const changes = lc.evolution_instance_state_changes_total;
expect(!changes || Object.values(changes).reduce((a: number, b: any) => a + Number(b || 0), 0) === 0).toBe(true);
});
it('registra error y marca estado "unreachable" ante HTTP no OK', async () => {
globalThis.fetch = async (url: any, init?: any) => {
const u = String(url);
if (u.includes('/instance/connectionState/')) {
return new Response('err', { status: 500 });
}
return new Response('', { status: 404 });
};
await (MaintenanceService as any).performEvolutionHealthCheck();
const stats = JSON.parse(Metrics.render('json'));
const lg = stats.labeledGauges || {};
const lc = stats.labeledCounters || {};
expect(lg.evolution_instance_state['instance="inst",state="unreachable"']).toBe(1);
expect(lc.evolution_health_check_errors_total['instance="inst"']).toBe(1);
});
it('incrementa attempts y success al reiniciar cuando el estado no es open', async () => {
let currentState = 'closed';
let restartStatus = 200;
globalThis.fetch = async (url: any, init?: any) => {
const u = String(url);
if (u.includes('/instance/connectionState/')) {
return new Response(JSON.stringify({ instance: { state: currentState } }), { status: 200 });
}
if (u.includes('/instance/restart/')) {
return new Response('', { status: restartStatus });
}
return new Response('', { status: 404 });
};
await (MaintenanceService as any).performEvolutionHealthCheck();
const stats = JSON.parse(Metrics.render('json'));
const lc = stats.labeledCounters || {};
expect(lc.evolution_instance_restart_attempts_total['instance="inst"']).toBe(1);
expect(lc.evolution_instance_restart_success_total['instance="inst"']).toBe(1);
});
it('no incrementa success si el reinicio falla', async () => {
let currentState = 'closed';
let restartStatus = 500;
globalThis.fetch = async (url: any, init?: any) => {
const u = String(url);
if (u.includes('/instance/connectionState/')) {
return new Response(JSON.stringify({ instance: { state: currentState } }), { status: 200 });
}
if (u.includes('/instance/restart/')) {
return new Response('', { status: restartStatus });
}
return new Response('', { status: 404 });
};
await (MaintenanceService as any).performEvolutionHealthCheck();
const stats = JSON.parse(Metrics.render('json'));
const lc = stats.labeledCounters || {};
expect(lc.evolution_instance_restart_attempts_total['instance="inst"']).toBe(1);
expect(!lc.evolution_instance_restart_success_total || lc.evolution_instance_restart_success_total['instance="inst"'] == null).toBe(true);
});
});

Loading…
Cancel
Save