Il FAB ARIA (common.js) chiamava POST /api/ai/ask ma il controller non esisteva (assistente AI rotto). Creato AiController::ask -> AIService::askWithRag con RAG su KB + grounding fonti certe. Verificato in produzione: rag_used=True, cita Ambiti NIS2 / Determina ACN. Fix DNS Qdrant: nei worker php-fpm (musl) getenv e gethostbyname NON funzionano per hostname Docker single-label; funziona solo un IP letterale. VectorService fallback -> 172.21.0.3 (fpm-safe); QDRANT_URL compose resta hostname per CLI. Vedi nota drift in VectorService. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
182 lines
6.9 KiB
PHP
182 lines
6.9 KiB
PHP
<?php
|
|
/**
|
|
* NIS2 Agile - VectorService
|
|
*
|
|
* Client minimale Qdrant + filtro multi-livello (Migration 012-014).
|
|
* Modello a 3 livelli (SYSTEM/FIRM/ORG) coerente con TRPG e SustainAI.
|
|
*/
|
|
|
|
class VectorService
|
|
{
|
|
private string $qdrantUrl;
|
|
private string $collection;
|
|
|
|
public function __construct(string $collection = 'nis2_kb')
|
|
{
|
|
// PHP-FPM Alpine non popola correttamente env via getenv() (clear_env non
|
|
// applicato + bug DNS musl per hostname senza dots). Workaround: leggi da
|
|
// multiple sources e ricadi su IP statico del container Qdrant.
|
|
// Risoluzione Qdrant URL. Bug noto (verificato 2026-05-29): nei worker PHP-FPM
|
|
// (Alpine/musl) durante una request HTTP NON funziona né getenv('QDRANT_URL')
|
|
// (ritorna false) né la risoluzione DNS dell'hostname (gethostbyname e curl
|
|
// falliscono per i nomi single-label Docker). Funziona SOLO un IP letterale.
|
|
// In CLI invece getenv + gethostbyname funzionano. Strategia:
|
|
// - env (CLI): usa QDRANT_URL (hostname) e, sotto, gethostbyname -> IP (drift-proof in CLI)
|
|
// - fpm: env=false -> fallback IP LETTERALE qui sotto (unico modo che funziona in fpm)
|
|
// NOTA DRIFT: se il container nis2-qdrant viene ricreato e cambia IP, aggiornare
|
|
// questo fallback. Fix definitivo: assegnare ipv4_address statico a qdrant in
|
|
// docker-compose.yml (richiede recreate della rete) e allineare qui l'IP.
|
|
$url = getenv('QDRANT_URL')
|
|
?: ($_SERVER['QDRANT_URL'] ?? null)
|
|
?: ($_ENV['QDRANT_URL'] ?? null)
|
|
?: 'http://172.21.0.3:6333'; // IP letterale nis2-qdrant (fpm-safe). Vedi nota drift sopra.
|
|
// Se l'URL contiene un hostname (caso CLI/env), prova a risolverlo a IP per
|
|
// evitare il problema di risoluzione dentro curl. In fpm gethostbyname fallisce
|
|
// e l'URL resta invariato: per questo il fallback sopra è già un IP.
|
|
$url = rtrim($url, '/');
|
|
if (preg_match('#^(https?://)([^/:]+)(:\d+)?(.*)$#', $url, $m)) {
|
|
$host = $m[2];
|
|
if (!filter_var($host, FILTER_VALIDATE_IP)) {
|
|
$ip = gethostbyname($host);
|
|
if ($ip && $ip !== $host && filter_var($ip, FILTER_VALIDATE_IP)) {
|
|
$url = $m[1] . $ip . ($m[3] ?? '') . ($m[4] ?? '');
|
|
}
|
|
}
|
|
}
|
|
$this->qdrantUrl = $url;
|
|
$this->collection = $collection;
|
|
}
|
|
|
|
public function ensureCollection(int $dims = 1024): void
|
|
{
|
|
$info = $this->request('GET', "/collections/{$this->collection}");
|
|
if ($info['status'] === 200) return;
|
|
|
|
$this->request('PUT', "/collections/{$this->collection}", [
|
|
'vectors' => ['size' => $dims, 'distance' => 'Cosine'],
|
|
]);
|
|
}
|
|
|
|
public function upsertBatch(array $points): void
|
|
{
|
|
if (empty($points)) return;
|
|
$resp = $this->request('PUT', "/collections/{$this->collection}/points?wait=true", [
|
|
'points' => $points,
|
|
]);
|
|
if ($resp['status'] !== 200) {
|
|
throw new RuntimeException('Qdrant upsert failed (HTTP ' . $resp['status'] . '): ' . json_encode($resp['body']));
|
|
}
|
|
}
|
|
|
|
public function deleteByFilter(array $filter): void
|
|
{
|
|
$this->request('POST', "/collections/{$this->collection}/points/delete", [
|
|
'filter' => $filter,
|
|
]);
|
|
}
|
|
|
|
public function setPayloadByFilter(array $payload, array $filter): void
|
|
{
|
|
$this->request('POST', "/collections/{$this->collection}/points/payload", [
|
|
'payload' => $payload,
|
|
'filter' => $filter,
|
|
]);
|
|
}
|
|
|
|
public function search(array $vector, array $filter = [], int $limit = 8, float $minScore = 0.28): array
|
|
{
|
|
$body = [
|
|
'vector' => $vector,
|
|
'limit' => $limit,
|
|
'with_payload' => true,
|
|
'score_threshold'=> $minScore,
|
|
];
|
|
if (!empty($filter)) {
|
|
$body['filter'] = $filter;
|
|
}
|
|
$resp = $this->request('POST', "/collections/{$this->collection}/points/search", $body);
|
|
if ($resp['status'] !== 200) {
|
|
return [];
|
|
}
|
|
return $resp['body']['result'] ?? [];
|
|
}
|
|
|
|
/**
|
|
* Filtro a 3 livelli (SYSTEM/FIRM/ORG) basato sull'utente.
|
|
* Restituisce SOLO chunks visibili a quell'utente.
|
|
*
|
|
* @param array $userContext ['user_id'=>int, 'organization_id'=>int|null, 'consulting_firm_id'=>int|null]
|
|
*/
|
|
public static function buildAuthzFilter(array $userContext): array
|
|
{
|
|
$firmId = isset($userContext['consulting_firm_id']) && $userContext['consulting_firm_id'] !== null
|
|
? (int)$userContext['consulting_firm_id'] : null;
|
|
$orgId = isset($userContext['organization_id']) && $userContext['organization_id'] !== null
|
|
? (int)$userContext['organization_id'] : null;
|
|
|
|
$should = [];
|
|
|
|
// L0 SYSTEM: vendor knowledge (sempre visibile)
|
|
$should[] = ['key' => 'scope', 'match' => ['value' => 'SYSTEM']];
|
|
|
|
// L1 FIRM: KB del proprio studio (visibile a tutti i collaboratori)
|
|
if ($firmId !== null) {
|
|
$should[] = [
|
|
'must' => [
|
|
['key' => 'scope', 'match' => ['value' => 'FIRM']],
|
|
['key' => 'consulting_firm_id', 'match' => ['value' => $firmId]],
|
|
],
|
|
];
|
|
}
|
|
|
|
if ($orgId !== null) {
|
|
// L1 FIRM con sharing esplicito alla organization corrente
|
|
$should[] = [
|
|
'must' => [
|
|
['key' => 'scope', 'match' => ['value' => 'FIRM']],
|
|
['key' => 'shared_with_orgs', 'match' => ['any' => [$orgId]]],
|
|
],
|
|
];
|
|
// L2 ORG: chunk dell'organizzazione corrente
|
|
$should[] = [
|
|
'must' => [
|
|
['key' => 'scope', 'match' => ['value' => 'ORG']],
|
|
['key' => 'organization_id', 'match' => ['value' => $orgId]],
|
|
],
|
|
];
|
|
}
|
|
|
|
return ['should' => $should];
|
|
}
|
|
|
|
/**
|
|
* @return array{status:int, body:array}
|
|
*/
|
|
private function request(string $method, string $path, ?array $body = null): array
|
|
{
|
|
$url = $this->qdrantUrl . $path;
|
|
$ch = curl_init($url);
|
|
|
|
$opts = [
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_CUSTOMREQUEST => $method,
|
|
CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
|
|
CURLOPT_CONNECTTIMEOUT => 3,
|
|
CURLOPT_TIMEOUT => 30,
|
|
];
|
|
if ($body !== null) {
|
|
$opts[CURLOPT_POSTFIELDS] = json_encode($body);
|
|
}
|
|
curl_setopt_array($ch, $opts);
|
|
|
|
$raw = curl_exec($ch);
|
|
$status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
curl_close($ch);
|
|
|
|
return [
|
|
'status' => $status,
|
|
'body' => $raw ? (json_decode($raw, true) ?? []) : [],
|
|
];
|
|
}
|
|
}
|