382 lines
14 KiB
PHP
382 lines
14 KiB
PHP
<?php
|
||
|
||
namespace App\Jobs;
|
||
|
||
use Illuminate\Contracts\Queue\ShouldQueue;
|
||
use Illuminate\Foundation\Queue\Queueable;
|
||
use Illuminate\Support\Facades\Artisan;
|
||
use Illuminate\Support\Facades\Cache;
|
||
use Illuminate\Support\Facades\DB;
|
||
use Illuminate\Support\Facades\Log;
|
||
use Illuminate\Support\Facades\Process;
|
||
use Symfony\Component\Finder\Finder;
|
||
|
||
class RunHealthChecks implements ShouldQueue
|
||
{
|
||
use Queueable;
|
||
|
||
public int $timeout = 10; // safety
|
||
public int $tries = 1;
|
||
|
||
public function handle(): void
|
||
{
|
||
try {
|
||
$services = [
|
||
$this->safe(fn() => $this->service('postfix'), ['name'=>'postfix']),
|
||
$this->safe(fn() => $this->service('dovecot'), ['name'=>'dovecot']),
|
||
$this->safe(fn() => $this->service('rspamd'), ['name'=>'rspamd']),
|
||
$this->safe(fn() => $this->tcp('127.0.0.1', 6379), ['name'=>'redis']),
|
||
$this->safe(fn() => $this->db(), ['name'=>'db']),
|
||
// $this->safe(fn() => $this->queueWorkers(), ['name'=>'queue']),
|
||
$this->safe(fn() => $this->tcp('127.0.0.1', 8080), ['name'=>'reverb']),
|
||
];
|
||
|
||
$meta = [
|
||
'app_version' => config('app.version', app()->version()),
|
||
'pending_migs' => $this->safe(fn() => $this->pendingMigrationsCount(), 0),
|
||
'cert_soon' => $this->safe(fn() => $this->certificatesDue(30), ['count'=>0,'nearest_days'=>null]),
|
||
'disk' => $this->safe(fn() => $this->diskUsage(), ['percent'=>null,'free_gb'=>null]),
|
||
'system' => $this->systemLoad(),
|
||
'updated_at' => now()->toIso8601String(),
|
||
];
|
||
|
||
Cache::put('health:services', array_values($services), 300);
|
||
Cache::put('health:meta', $meta, 300);
|
||
Cache::put('metrics:queues', [
|
||
'outgoing' => 19,
|
||
'incoming' => 5,
|
||
'today_ok' => 834,
|
||
'today_err'=> 12,
|
||
'trend' => [
|
||
'outgoing' => [2,1,0,4,3,5,4,0,0,0], // letzte 10 Zeitfenster
|
||
'incoming' => [1,0,0,1,0,2,1,0,0,0],
|
||
'ok' => [50,62,71,88,92,110,96,120,130,115],
|
||
'err' => [1,0,0,2,1,0,1,3,2,2],
|
||
],
|
||
], 120);
|
||
Cache::put('events:recent', $this->safe(fn() => $this->recentAlerts(), []), 300);
|
||
} catch (\Throwable $e) {
|
||
// Last-resort catch: never allow the job to fail hard
|
||
Log::error('RunHealthChecks fatal', ['ex' => $e]);
|
||
}
|
||
}
|
||
|
||
/** Wraps a probe; logs and returns fallback on error */
|
||
protected function safe(callable $fn, $fallback = null)
|
||
{
|
||
try {
|
||
return $fn();
|
||
} catch (\Throwable $e) {
|
||
Log::warning('Health probe failed', ['err' => $e->getMessage()]);
|
||
return $fallback;
|
||
}
|
||
}
|
||
|
||
protected function service(string $name): array
|
||
{
|
||
// works even if exit code != 0
|
||
$r = Process::run("systemctl is-active {$name}");
|
||
$raw = trim($r->output() ?: $r->errorOutput());
|
||
return ['name' => $name, 'ok' => ($raw === 'active'), 'raw' => $raw ?: 'unknown'];
|
||
}
|
||
|
||
protected function tcp(string $host, int $port): array
|
||
{
|
||
$ok = @fsockopen($host, $port, $errno, $errstr, 0.4) !== false;
|
||
return ['name'=>"$host:$port", 'ok'=>$ok, 'raw'=>$ok ? 'open' : ($errstr ?: 'closed')];
|
||
}
|
||
|
||
protected function db(): array
|
||
{
|
||
DB::select('select 1'); // will throw if broken
|
||
return ['name'=>'db', 'ok'=>true, 'raw'=>'ok'];
|
||
}
|
||
|
||
protected function queueWorkers(): array
|
||
{
|
||
$r = Process::run("systemctl is-active supervisor");
|
||
$raw = trim($r->output() ?: $r->errorOutput());
|
||
return ['name'=>'queue', 'ok'=>$raw === 'active', 'raw'=>$raw ?: 'unknown'];
|
||
}
|
||
|
||
protected function diskUsage(): array
|
||
{
|
||
$total = @disk_total_space('/') ?: 0;
|
||
$free = @disk_free_space('/') ?: 0;
|
||
if ($total <= 0) return ['percent'=>null,'free_gb'=>null];
|
||
|
||
$used = max(0, $total - $free);
|
||
return [
|
||
'percent' => (int) round($used / $total * 100),
|
||
'free_gb' => (int) round($free / 1024 / 1024 / 1024),
|
||
];
|
||
}
|
||
|
||
/** Safe pending migration count for database/migrations */
|
||
protected function pendingMigrationsCount(): int
|
||
{
|
||
// Compare migration files with repository entries
|
||
$files = collect(iterator_to_array(
|
||
Finder::create()->files()->in(database_path('migrations'))->name('*.php')
|
||
))->map(fn($f) => pathinfo($f->getFilename(), PATHINFO_FILENAME));
|
||
|
||
$ran = collect(app('migration.repository')->getRan());
|
||
return $files->diff($ran)->count();
|
||
}
|
||
|
||
protected function certificatesDue(int $days): array
|
||
{
|
||
// TODO: hook real cert source
|
||
return ['count'=>0, 'nearest_days'=>null];
|
||
}
|
||
|
||
protected function queueMetrics(): array
|
||
{
|
||
// TODO: replace with real counters
|
||
return ['outgoing'=>19, 'incoming'=>5, 'today_ok'=>834, 'today_err'=>12];
|
||
}
|
||
|
||
protected function recentAlerts(): array
|
||
{
|
||
// return [
|
||
// ['level'=>'warning','text'=>'TLS handshake retry from 1.2.3.4','at'=>now()->subMinutes(3)->toIso8601String()],
|
||
// ['level'=>'error','text'=>'Queue backlog high (outgoing>500)','at'=>now()->subMinutes(12)->toIso8601String()],
|
||
// ];
|
||
$events = [];
|
||
|
||
// Postfix example (letzte 15 Min)
|
||
$r = \Illuminate\Support\Facades\Process::run(
|
||
'journalctl -u postfix --since "15 min ago" -o short-iso -n 200'
|
||
);
|
||
foreach (explode("\n", trim($r->output())) as $line) {
|
||
if ($line === '') continue;
|
||
|
||
// Beispiele für Patterns
|
||
if (preg_match('/NOQUEUE: reject/i', $line)) {
|
||
$events[] = [
|
||
'level' => 'warning',
|
||
'text' => 'Postfix reject detected',
|
||
'at' => $this->extractIsoTime($line),
|
||
];
|
||
}
|
||
if (preg_match('/timeout|lost connection/i', $line)) {
|
||
$events[] = [
|
||
'level' => 'warning',
|
||
'text' => 'Postfix connection issue',
|
||
'at' => $this->extractIsoTime($line),
|
||
];
|
||
}
|
||
}
|
||
|
||
// Rspamd example
|
||
$r2 = \Illuminate\Support\Facades\Process::run(
|
||
'journalctl -u rspamd --since "15 min ago" -o short-iso -n 200'
|
||
);
|
||
foreach (explode("\n", trim($r2->output())) as $line) {
|
||
if (preg_match('/greylist|ratelimit/i', $line)) {
|
||
$events[] = [
|
||
'level' => 'info',
|
||
'text' => 'Rspamd rate/greylist notice',
|
||
'at' => $this->extractIsoTime($line),
|
||
];
|
||
}
|
||
if (preg_match('/critical|error/i', $line)) {
|
||
$events[] = [
|
||
'level' => 'error',
|
||
'text' => 'Rspamd error',
|
||
'at' => $this->extractIsoTime($line),
|
||
];
|
||
}
|
||
}
|
||
|
||
// Queue-Backlog Signal (optional)
|
||
$q = Cache::get('metrics:queues', []);
|
||
if (($q['outgoing'] ?? 0) > 500) {
|
||
$events[] = [
|
||
'level' => 'error',
|
||
'text' => 'Queue backlog high (outgoing>500)',
|
||
'at' => now()->toIso8601String(),
|
||
];
|
||
}
|
||
|
||
// Auf 5–10 Einträge begrenzen, nach Zeit sortieren
|
||
usort($events, fn($a,$b) => strcmp($b['at'] ?? '', $a['at'] ?? ''));
|
||
return array_slice($events, 0, 5);
|
||
}
|
||
|
||
// Hilfsfunktion: Zeit aus journalctl-Zeile holen (oder now())
|
||
protected function extractIsoTime(string $line): string
|
||
{
|
||
// journalctl -o short-iso: beginnt mit "2025-10-04T18:33:21+0200 ..."
|
||
if (preg_match('/^\s*([0-9T:\-+]+)\s/', $line, $m)) {
|
||
try { return \Carbon\Carbon::parse($m[1])->toIso8601String(); } catch (\Throwable $e) {}
|
||
}
|
||
return now()->toIso8601String();
|
||
}
|
||
|
||
protected function systemLoad(): array
|
||
{
|
||
// Load (1/5/15)
|
||
$load = function_exists('sys_getloadavg') ? (array) sys_getloadavg() : [null, null, null];
|
||
|
||
// RAM aus /proc/meminfo
|
||
$mem = ['total_gb'=>null,'used_gb'=>null,'free_gb'=>null,'percent'=>null];
|
||
if (is_readable('/proc/meminfo')) {
|
||
$info = [];
|
||
foreach (file('/proc/meminfo') as $line) {
|
||
if (preg_match('/^(\w+):\s+(\d+)/', $line, $m)) {
|
||
$info[$m[1]] = (int) $m[2]; // kB
|
||
}
|
||
}
|
||
if (!empty($info['MemTotal']) && isset($info['MemAvailable'])) {
|
||
$total = $info['MemTotal'] * 1024;
|
||
$avail = $info['MemAvailable'] * 1024;
|
||
$used = max(0, $total - $avail);
|
||
$mem = [
|
||
'total_gb' => round($total/1024/1024/1024, 1),
|
||
'used_gb' => round($used /1024/1024/1024, 1),
|
||
'free_gb' => round($avail/1024/1024/1024, 1),
|
||
'percent' => $total ? (int) round($used/$total*100) : null,
|
||
];
|
||
}
|
||
}
|
||
|
||
// Core-Anzahl (für Last-Schätzung & Info)
|
||
$cores = $this->cpuCores();
|
||
|
||
// CPU-Prozent (schnelle 200ms-Probe über /proc/stat)
|
||
$cpuPercent = $this->cpuPercentSample(200); // kann null sein, wenn nicht lesbar
|
||
|
||
// Uptime
|
||
$uptime = $this->uptimeInfo(); // ['seconds'=>int|null, 'human'=>string|null]
|
||
|
||
return [
|
||
'cpu_load_1' => $load[0] ?? null,
|
||
'cpu_load_5' => $load[1] ?? null,
|
||
'cpu_load_15' => $load[2] ?? null,
|
||
|
||
// hilft der Livewire-Klasse beim Schätzen (falls cpu_percent null ist)
|
||
'cores' => $cores,
|
||
|
||
// direkt nutzbar – wird bevorzugt angezeigt
|
||
'cpu_percent' => $cpuPercent,
|
||
|
||
// RAM Block (wie bisher, nur vollständiger)
|
||
'ram' => $mem,
|
||
|
||
// Uptime in zwei Formen
|
||
'uptime_seconds'=> $uptime['seconds'],
|
||
'uptime_human' => $uptime['human'],
|
||
];
|
||
}
|
||
|
||
/** Anzahl CPU-Kerne robust ermitteln */
|
||
protected function cpuCores(): ?int
|
||
{
|
||
// 1) nproc
|
||
$n = @trim((string) @shell_exec('nproc 2>/dev/null'));
|
||
if (ctype_digit($n) && (int)$n > 0) return (int)$n;
|
||
|
||
// 2) /proc/cpuinfo
|
||
if (is_readable('/proc/cpuinfo')) {
|
||
$cnt = preg_match_all('/^processor\s*:\s*\d+/mi', file_get_contents('/proc/cpuinfo'));
|
||
if ($cnt > 0) return $cnt;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
/**
|
||
* CPU-Auslastung in % per 2-Punkt-Messung über /proc/stat.
|
||
* $ms: Messdauer in Millisekunden.
|
||
*/
|
||
protected function cpuPercentSample(int $ms = 200): ?int
|
||
{
|
||
$a = $this->readProcStatTotals();
|
||
if (!$a) return null;
|
||
usleep(max(1, $ms) * 1000);
|
||
$b = $this->readProcStatTotals();
|
||
if (!$b) return null;
|
||
|
||
$idleDelta = $b['idle'] - $a['idle'];
|
||
$totalDelta = $b['total'] - $a['total'];
|
||
if ($totalDelta <= 0) return null;
|
||
|
||
$usage = 100 * (1 - ($idleDelta / $totalDelta));
|
||
return (int) round(max(0, min(100, $usage)));
|
||
}
|
||
|
||
/** Totals aus /proc/stat (user,nice,system,idle,iowait,irq,softirq,steal,guest,guest_nice) */
|
||
protected function readProcStatTotals(): ?array
|
||
{
|
||
if (!is_readable('/proc/stat')) return null;
|
||
$line = strtok(file('/proc/stat')[0] ?? '', "\n");
|
||
if (!str_starts_with($line, 'cpu ')) return null;
|
||
|
||
$parts = preg_split('/\s+/', trim($line));
|
||
// cpu user nice system idle iowait irq softirq steal guest guest_nice
|
||
$vals = array_map('floatval', array_slice($parts, 1));
|
||
$idle = ($vals[3] ?? 0) + ($vals[4] ?? 0);
|
||
$total = array_sum($vals);
|
||
return ['idle' => $idle, 'total' => $total];
|
||
}
|
||
|
||
/** Uptime aus /proc/uptime: Sekunden + menschenlesbar */
|
||
protected function uptimeInfo(): array
|
||
{
|
||
$sec = null;
|
||
if (is_readable('/proc/uptime')) {
|
||
$first = trim(explode(' ', trim(file_get_contents('/proc/uptime')))[0] ?? '');
|
||
if (is_numeric($first)) $sec = (int) round((float) $first);
|
||
}
|
||
return [
|
||
'seconds' => $sec,
|
||
'human' => $sec !== null ? $this->fmtSecondsHuman($sec) : null,
|
||
];
|
||
}
|
||
|
||
protected function fmtSecondsHuman(int $s): string
|
||
{
|
||
$d = intdiv($s, 86400); $s %= 86400;
|
||
$h = intdiv($s, 3600); $s %= 3600;
|
||
$m = intdiv($s, 60);
|
||
if ($d > 0) return "{$d}d {$h}h";
|
||
if ($h > 0) return "{$h}h {$m}m";
|
||
return "{$m}m";
|
||
}
|
||
|
||
// protected function systemLoad(): array
|
||
// {
|
||
// // 1, 5, 15 Minuten Load averages
|
||
// $load = function_exists('sys_getloadavg') ? sys_getloadavg() : [null,null,null];
|
||
//
|
||
// // RAM aus /proc/meminfo (Linux)
|
||
// $mem = ['total'=>null,'free'=>null,'used'=>null,'percent'=>null];
|
||
// if (is_readable('/proc/meminfo')) {
|
||
// $info = [];
|
||
// foreach (file('/proc/meminfo') as $line) {
|
||
// if (preg_match('/^(\w+):\s+(\d+)/', $line, $m)) {
|
||
// $info[$m[1]] = (int)$m[2]; // kB
|
||
// }
|
||
// }
|
||
// if (!empty($info['MemTotal']) && !empty($info['MemAvailable'])) {
|
||
// $total = $info['MemTotal'] * 1024;
|
||
// $avail = $info['MemAvailable'] * 1024;
|
||
// $used = $total - $avail;
|
||
// $mem = [
|
||
// 'total_gb' => round($total/1024/1024/1024,1),
|
||
// 'used_gb' => round($used/1024/1024/1024,1),
|
||
// 'free_gb' => round($avail/1024/1024/1024,1),
|
||
// 'percent' => $total ? round($used/$total*100) : null,
|
||
// ];
|
||
// }
|
||
// }
|
||
//
|
||
// return [
|
||
// 'cpu_load_1' => $load[0],
|
||
// 'cpu_load_5' => $load[1],
|
||
// 'cpu_load_15' => $load[2],
|
||
// 'ram' => $mem,
|
||
// ];
|
||
// }
|
||
}
|