mailwolt/app/Jobs/RunHealthChecks.php

371 lines
13 KiB
PHP
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

<?php
namespace App\Jobs;
use App\Models\Setting as SettingsModel;
use App\Support\CacheVer;
use App\Support\WoltGuard\Probes;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Queue\Queueable;
use Illuminate\Support\Facades\Cache;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Process;
use Symfony\Component\Finder\Finder;
class RunHealthChecks implements ShouldQueue
{
use Queueable, Probes;
public int $timeout = 10;
public int $tries = 1;
public function handle(): void
{
$cards = config('woltguard.cards', []);
$svcRows = [];
foreach ($cards as $key => $card) {
$ok = false;
foreach ($card['sources'] as $src) {
if ($this->check($src)) {
$ok = true;
break;
}
}
$svcRows[] = ['name' => $key, 'ok' => $ok]; // labels brauchst du im UI
}
$payload = ['ts' => time(), 'rows' => $svcRows];
Cache::put(CacheVer::k('health:services'), $payload, 300);
Log::info('WG: writing services', ['count'=>count($svcRows)]);
SettingsModel::set('woltguard.services', $payload);
Cache::forget('health:services');
}
/** Wraps a probe; logs and returns fallback on error */
protected function safe(callable $fn, $fallback = null)
{
try {
return $fn();
} catch (\Throwable $e) {
Log::warning('Health probe failed', ['err' => $e->getMessage()]);
return $fallback;
}
}
protected function service(string $name): array
{
// works even if exit code != 0
$r = Process::run("systemctl is-active {$name}");
$raw = trim($r->output() ?: $r->errorOutput());
return ['name' => $name, 'ok' => ($raw === 'active'), 'raw' => $raw ?: 'unknown'];
}
protected function tcp(string $host, int $port): array
{
$ok = @fsockopen($host, $port, $errno, $errstr, 0.4) !== false;
return ['name'=>"$host:$port", 'ok'=>$ok, 'raw'=>$ok ? 'open' : ($errstr ?: 'closed')];
}
protected function db(): array
{
DB::select('select 1'); // will throw if broken
return ['name'=>'db', 'ok'=>true, 'raw'=>'ok'];
}
protected function queueWorkers(): array
{
$okQueue = $this->probeSystemd('mailwolt-queue.service');
$okSched = $this->probeSystemd('mailwolt-schedule.service');
$ok = $okQueue && $okSched;
$raw = sprintf('queue:%s sched:%s', $okQueue ? 'active' : 'down', $okSched ? 'active' : 'down');
return ['name' => 'queue', 'ok' => $ok, 'raw' => $raw];
// $r = Process::run("systemctl is-active supervisor");
// $raw = trim($r->output() ?: $r->errorOutput());
// return ['name'=>'queue', 'ok'=>$raw === 'active', 'raw'=>$raw ?: 'unknown'];
}
protected function diskUsage(): array
{
$total = @disk_total_space('/') ?: 0;
$free = @disk_free_space('/') ?: 0;
if ($total <= 0) return ['percent'=>null,'free_gb'=>null];
$used = max(0, $total - $free);
return [
'percent' => (int) round($used / $total * 100),
'free_gb' => (int) round($free / 1024 / 1024 / 1024),
];
}
/** Safe pending migration count for database/migrations */
protected function pendingMigrationsCount(): int
{
// Compare migration files with repository entries
$files = collect(iterator_to_array(
Finder::create()->files()->in(database_path('migrations'))->name('*.php')
))->map(fn($f) => pathinfo($f->getFilename(), PATHINFO_FILENAME));
$ran = collect(app('migration.repository')->getRan());
return $files->diff($ran)->count();
}
protected function certificatesDue(int $days): array
{
// TODO: hook real cert source
return ['count'=>0, 'nearest_days'=>null];
}
protected function queueMetrics(): array
{
// TODO: replace with real counters
return ['outgoing'=>19, 'incoming'=>5, 'today_ok'=>834, 'today_err'=>12];
}
protected function recentAlerts(): array
{
// return [
// ['level'=>'warning','text'=>'TLS handshake retry from 1.2.3.4','at'=>now()->subMinutes(3)->toIso8601String()],
// ['level'=>'error','text'=>'Queue backlog high (outgoing>500)','at'=>now()->subMinutes(12)->toIso8601String()],
// ];
$events = [];
// Postfix example (letzte 15 Min)
$r = \Illuminate\Support\Facades\Process::run(
'journalctl -u postfix --since "15 min ago" -o short-iso -n 200'
);
foreach (explode("\n", trim($r->output())) as $line) {
if ($line === '') continue;
// Beispiele für Patterns
if (preg_match('/NOQUEUE: reject/i', $line)) {
$events[] = [
'level' => 'warning',
'text' => 'Postfix reject detected',
'at' => $this->extractIsoTime($line),
];
}
if (preg_match('/timeout|lost connection/i', $line)) {
$events[] = [
'level' => 'warning',
'text' => 'Postfix connection issue',
'at' => $this->extractIsoTime($line),
];
}
}
// Rspamd example
$r2 = \Illuminate\Support\Facades\Process::run(
'journalctl -u rspamd --since "15 min ago" -o short-iso -n 200'
);
foreach (explode("\n", trim($r2->output())) as $line) {
if (preg_match('/greylist|ratelimit/i', $line)) {
$events[] = [
'level' => 'info',
'text' => 'Rspamd rate/greylist notice',
'at' => $this->extractIsoTime($line),
];
}
if (preg_match('/critical|error/i', $line)) {
$events[] = [
'level' => 'error',
'text' => 'Rspamd error',
'at' => $this->extractIsoTime($line),
];
}
}
// Queue-Backlog Signal (optional)
$q = Cache::get('metrics:queues', []);
if (($q['outgoing'] ?? 0) > 500) {
$events[] = [
'level' => 'error',
'text' => 'Queue backlog high (outgoing>500)',
'at' => now()->toIso8601String(),
];
}
// Auf 510 Einträge begrenzen, nach Zeit sortieren
usort($events, fn($a,$b) => strcmp($b['at'] ?? '', $a['at'] ?? ''));
return array_slice($events, 0, 5);
}
// Hilfsfunktion: Zeit aus journalctl-Zeile holen (oder now())
protected function extractIsoTime(string $line): string
{
// journalctl -o short-iso: beginnt mit "2025-10-04T18:33:21+0200 ..."
if (preg_match('/^\s*([0-9T:\-+]+)\s/', $line, $m)) {
try { return \Carbon\Carbon::parse($m[1])->toIso8601String(); } catch (\Throwable $e) {}
}
return now()->toIso8601String();
}
protected function systemLoad(): array
{
// Load (1/5/15)
$load = function_exists('sys_getloadavg') ? (array) sys_getloadavg() : [null, null, null];
// RAM aus /proc/meminfo
$mem = ['total_gb'=>null,'used_gb'=>null,'free_gb'=>null,'percent'=>null];
if (is_readable('/proc/meminfo')) {
$info = [];
foreach (file('/proc/meminfo') as $line) {
if (preg_match('/^(\w+):\s+(\d+)/', $line, $m)) {
$info[$m[1]] = (int) $m[2]; // kB
}
}
if (!empty($info['MemTotal']) && isset($info['MemAvailable'])) {
$total = $info['MemTotal'] * 1024;
$avail = $info['MemAvailable'] * 1024;
$used = max(0, $total - $avail);
$mem = [
'total_gb' => round($total/1024/1024/1024, 1),
'used_gb' => round($used /1024/1024/1024, 1),
'free_gb' => round($avail/1024/1024/1024, 1),
'percent' => $total ? (int) round($used/$total*100) : null,
];
}
}
// Core-Anzahl (für Last-Schätzung & Info)
$cores = $this->cpuCores();
// CPU-Prozent (schnelle 200ms-Probe über /proc/stat)
$cpuPercent = $this->cpuPercentSample(200); // kann null sein, wenn nicht lesbar
// Uptime
$uptime = $this->uptimeInfo(); // ['seconds'=>int|null, 'human'=>string|null]
return [
'cpu_load_1' => $load[0] ?? null,
'cpu_load_5' => $load[1] ?? null,
'cpu_load_15' => $load[2] ?? null,
// hilft der Livewire-Klasse beim Schätzen (falls cpu_percent null ist)
'cores' => $cores,
// direkt nutzbar wird bevorzugt angezeigt
'cpu_percent' => $cpuPercent,
// RAM Block (wie bisher, nur vollständiger)
'ram' => $mem,
// Uptime in zwei Formen
'uptime_seconds'=> $uptime['seconds'],
'uptime_human' => $uptime['human'],
];
}
/** Anzahl CPU-Kerne robust ermitteln */
protected function cpuCores(): ?int
{
// 1) nproc
$n = @trim((string) @shell_exec('nproc 2>/dev/null'));
if (ctype_digit($n) && (int)$n > 0) return (int)$n;
// 2) /proc/cpuinfo
if (is_readable('/proc/cpuinfo')) {
$cnt = preg_match_all('/^processor\s*:\s*\d+/mi', file_get_contents('/proc/cpuinfo'));
if ($cnt > 0) return $cnt;
}
return null;
}
/**
* CPU-Auslastung in % per 2-Punkt-Messung über /proc/stat.
* $ms: Messdauer in Millisekunden.
*/
protected function cpuPercentSample(int $ms = 200): ?int
{
$a = $this->readProcStatTotals();
if (!$a) return null;
usleep(max(1, $ms) * 1000);
$b = $this->readProcStatTotals();
if (!$b) return null;
$idleDelta = $b['idle'] - $a['idle'];
$totalDelta = $b['total'] - $a['total'];
if ($totalDelta <= 0) return null;
$usage = 100 * (1 - ($idleDelta / $totalDelta));
return (int) round(max(0, min(100, $usage)));
}
/** Totals aus /proc/stat (user,nice,system,idle,iowait,irq,softirq,steal,guest,guest_nice) */
protected function readProcStatTotals(): ?array
{
if (!is_readable('/proc/stat')) return null;
$line = strtok(file('/proc/stat')[0] ?? '', "\n");
if (!str_starts_with($line, 'cpu ')) return null;
$parts = preg_split('/\s+/', trim($line));
// cpu user nice system idle iowait irq softirq steal guest guest_nice
$vals = array_map('floatval', array_slice($parts, 1));
$idle = ($vals[3] ?? 0) + ($vals[4] ?? 0);
$total = array_sum($vals);
return ['idle' => $idle, 'total' => $total];
}
/** Uptime aus /proc/uptime: Sekunden + menschenlesbar */
protected function uptimeInfo(): array
{
$sec = null;
if (is_readable('/proc/uptime')) {
$first = trim(explode(' ', trim(file_get_contents('/proc/uptime')))[0] ?? '');
if (is_numeric($first)) $sec = (int) round((float) $first);
}
return [
'seconds' => $sec,
'human' => $sec !== null ? $this->fmtSecondsHuman($sec) : null,
];
}
protected function fmtSecondsHuman(int $s): string
{
$d = intdiv($s, 86400); $s %= 86400;
$h = intdiv($s, 3600); $s %= 3600;
$m = intdiv($s, 60);
if ($d > 0) return "{$d}d {$h}h";
if ($h > 0) return "{$h}h {$m}m";
return "{$m}m";
}
// protected function systemLoad(): array
// {
// // 1, 5, 15 Minuten Load averages
// $load = function_exists('sys_getloadavg') ? sys_getloadavg() : [null,null,null];
//
// // RAM aus /proc/meminfo (Linux)
// $mem = ['total'=>null,'free'=>null,'used'=>null,'percent'=>null];
// if (is_readable('/proc/meminfo')) {
// $info = [];
// foreach (file('/proc/meminfo') as $line) {
// if (preg_match('/^(\w+):\s+(\d+)/', $line, $m)) {
// $info[$m[1]] = (int)$m[2]; // kB
// }
// }
// if (!empty($info['MemTotal']) && !empty($info['MemAvailable'])) {
// $total = $info['MemTotal'] * 1024;
// $avail = $info['MemAvailable'] * 1024;
// $used = $total - $avail;
// $mem = [
// 'total_gb' => round($total/1024/1024/1024,1),
// 'used_gb' => round($used/1024/1024/1024,1),
// 'free_gb' => round($avail/1024/1024/1024,1),
// 'percent' => $total ? round($used/$total*100) : null,
// ];
// }
// }
//
// return [
// 'cpu_load_1' => $load[0],
// 'cpu_load_5' => $load[1],
// 'cpu_load_15' => $load[2],
// 'ram' => $mem,
// ];
// }
}