dims (512). * ============================================================================ */ if (PHP_SAPI !== 'cli') { fwrite(STDERR, "Solo CLI\n"); exit(1); } if (!defined('BASE_PATH')) define('BASE_PATH', dirname(__DIR__)); if (!defined('APP_PATH')) define('APP_PATH', BASE_PATH . '/application'); require_once APP_PATH . '/config/env.php'; require_once APP_PATH . '/config/config.php'; require_once APP_PATH . '/config/database.php'; require_once APP_PATH . '/services/EmbedService.php'; require_once APP_PATH . '/services/VectorService.php'; $opts = getopt('', ['only::', 'dry-run', 'no-purge']); $only = $opts['only'] ?? null; $dryRun = isset($opts['dry-run']); $noPurge = isset($opts['no-purge']); const ACN_JSON = 'docs/nis2/allegati_acn/acn_requirements.json'; const ACN_SOURCE = 'ACN, Misure di sicurezza di base NIS - Allegati 1 e 2 alla Determinazione n. 164179/2025'; const ACN_ENTITY_TYPE = 'requisito_acn'; const ACN_NAMESPACE_UUID = '7f3b6d2a-9c41-5e88-b0a4-2acn164179bas'; function logln(string $m): void { echo '[' . date('Y-m-d H:i:s') . "] $m\n"; } /** UUIDv5 deterministico (RFC 4122 ยง4.3) per point id stabile => idempotenza. */ function uuidv5(string $namespaceUuid, string $name): string { $nhex = str_replace(['-', '{', '}'], '', $namespaceUuid); $nbin = ''; for ($i = 0; $i + 1 < strlen($nhex); $i += 2) { $nbin .= chr(hexdec(substr($nhex, $i, 2))); } $hash = sha1($nbin . $name); return sprintf('%08s-%04s-%04x-%04x-%12s', substr($hash, 0, 8), substr($hash, 8, 4), (hexdec(substr($hash, 12, 4)) & 0x0fff) | 0x5000, (hexdec(substr($hash, 16, 4)) & 0x3fff) | 0x8000, substr($hash, 20, 12)); } function allegatoLabel(string $entity): string { $e = strtolower(trim($entity)); if (strpos($e, 'import') === 0) return 'Allegato 1 (soggetti importanti)'; if (strpos($e, 'essenz') === 0) return 'Allegato 2 (soggetti essenziali)'; return 'Allegato ACN'; } function canonEntity(string $entity): string { $e = strtolower(trim($entity)); if (strpos($e, 'import') === 0) return 'importante'; if (strpos($e, 'essenz') === 0) return 'essenziale'; return $e !== '' ? $e : 'sconosciuta'; } $abs = BASE_PATH . '/' . ACN_JSON; if (!is_file($abs)) { fwrite(STDERR, "ERRORE: file non trovato: $abs\n"); exit(1); } $json = json_decode((string)file_get_contents($abs), true); if (!is_array($json)) { fwrite(STDERR, "ERRORE: JSON non valido\n"); exit(1); } $records = []; foreach ($json as $groupKey => $items) { if (!is_array($items)) continue; if ($only && $groupKey !== $only) continue; foreach ($items as $r) { if (!is_array($r)) continue; $requirement = trim((string)($r['requirement'] ?? '')); if ($requirement === '') continue; $entity = canonEntity((string)($r['entity'] ?? $groupKey)); $subcategory = trim((string)($r['subcategory'] ?? '')) ?: 'N/D'; $function = trim((string)($r['function'] ?? '')) ?: 'N/D'; $subcatText = trim((string)($r['subcategory_text'] ?? '')); $reqIndex = isset($r['req_index']) ? (int)$r['req_index'] : 1; $code = sprintf('%s/%s/%d', $entity, $subcategory, $reqIndex); $records[] = compact('entity', 'function', 'subcategory', 'subcatText', 'reqIndex', 'requirement', 'code'); } } logln('=== Ingest requisiti ACN granulari (scope SYSTEM, entity_type=' . ACN_ENTITY_TYPE . ') ==='); logln('Requisiti da indicizzare: ' . count($records) . ($only ? " (solo: {$only})" : '')); if ($dryRun) logln('MODALITA DRY-RUN: nessun embed/upsert.'); if (empty($records)) { logln('Nessun requisito. Esco.'); exit(0); } $embed = null; $vector = null; if (!$dryRun) { try { $embed = new EmbedService(); $vector = new VectorService(); $vector->ensureCollection($embed->dims); } catch (Throwable $e) { fwrite(STDERR, 'ERRORE init Voyage/Qdrant: ' . $e->getMessage() . "\n"); exit(1); } if (!$noPurge) { try { $vector->deleteByFilter(['must' => [ ['key' => 'scope', 'match' => ['value' => 'SYSTEM']], ['key' => 'entity_type', 'match' => ['value' => ACN_ENTITY_TYPE]], ['key' => 'source', 'match' => ['value' => ACN_SOURCE]], ]]); logln('Purge chunk pre-esistenti: OK'); } catch (Throwable $e) { logln(' (warning) purge fallita: ' . $e->getMessage()); } } } $ok = 0; $fail = 0; $failed = []; $batch = []; $BATCH_SIZE = 32; $flush = function () use (&$batch, $vector) { if (empty($batch)) return; $vector->upsertBatch($batch); $batch = []; }; foreach ($records as $n => $rec) { $human = ($n + 1) . '/' . count($records); $allegato = allegatoLabel($rec['entity']); $chunkText = implode("\n", array_filter([ 'FONTE NORMATIVA: ' . ACN_SOURCE, 'RIFERIMENTO: ' . $allegato . ' - Soggetti ' . $rec['entity'], 'FUNZIONE (Framework Nazionale): ' . $rec['function'], 'SOTTOCATEGORIA: ' . $rec['subcategory'], $rec['subcatText'] !== '' ? 'DESCRIZIONE SOTTOCATEGORIA: ' . $rec['subcatText'] : null, 'REQUISITO #' . $rec['reqIndex'] . ': ' . $rec['requirement'], ])); if ($dryRun) { if ($n < 3) logln(" [{$human}] {$rec['code']}"); $ok++; continue; } $vec = null; $lastErr = ''; for ($try = 1; $try <= 5; $try++) { try { $vec = $embed->embed($chunkText); break; } catch (Throwable $e) { $lastErr = $e->getMessage(); if ($try < 5) sleep($try); } } if (!is_array($vec)) { $fail++; $failed[] = $rec['code']; logln(" ERRORE [{$human}] embed {$rec['code']}: " . substr($lastErr, 0, 100) . ' -> SKIP'); continue; } $pointId = uuidv5(ACN_NAMESPACE_UUID, ACN_SOURCE . '|' . $rec['code']); $batch[] = [ 'id' => $pointId, 'vector' => $vec, 'payload' => [ 'doc_uuid' => $pointId, 'title' => $rec['subcategory'] . ' #' . $rec['reqIndex'] . ' (' . $rec['entity'] . ')', 'chunk' => $chunkText, 'entity_type' => ACN_ENTITY_TYPE, 'source' => ACN_SOURCE, 'lang' => 'it', 'scope' => 'SYSTEM', 'consulting_firm_id' => null, 'organization_id' => null, 'shared_with_orgs' => [], 'code' => $rec['code'], 'subcategory' => $rec['subcategory'], 'subcategory_text' => $rec['subcatText'], 'function' => $rec['function'], 'req_index' => $rec['reqIndex'], 'entity' => $rec['entity'], 'allegato' => $allegato, 'requirement_text' => $rec['requirement'], ], ]; $ok++; if (count($batch) >= $BATCH_SIZE) { try { $flush(); logln(" upsert batch (ok cumulato={$ok})"); } catch (Throwable $e) { $n0 = count($batch); $fail += $n0; $ok -= $n0; foreach ($batch as $b) $failed[] = $b['payload']['code'] ?? '(?)'; $batch = []; logln(' ERRORE upsert batch (' . $n0 . ' persi): ' . substr($e->getMessage(), 0, 120)); } } } if (!$dryRun) { try { $flush(); } catch (Throwable $e) { $n0 = count($batch); $fail += $n0; $ok -= $n0; foreach ($batch as $b) $failed[] = $b['payload']['code'] ?? '(?)'; logln(' ERRORE upsert finale (' . $n0 . ' persi): ' . substr($e->getMessage(), 0, 120)); } } logln('=== Completato ==='); logln("Indicizzati OK : {$ok}"); logln("Falliti : {$fail}"); if (!empty($failed)) logln('Codici falliti : ' . implode(', ', array_slice($failed, 0, 50)) . (count($failed) > 50 ? ' ...' : '')); exit($fail > 0 ? 2 : 0);