nis2-agile/application/controllers/KnowledgeBaseController.php
DevEnv nis2-agile a7a21faa82 [FEAT] Knowledge Base RAG multi-livello (SYSTEM/FIRM/ORG) + Qdrant + Voyage
- KnowledgeBaseController: ingest, list, firmOrgs, search, delete
- VectorService (Qdrant + buildAuthzFilter), EmbedService (Voyage), RagService (pipeline)
- AIService::askWithRag con fallback graceful
- docker-compose: servizio qdrant + env Voyage (chiave da .env/vault, no hardcoded)
- SQL 012 consulting_firms, 013 firm_assignments + kb_uploaded_documents
- public/kb.html + kb.js (upload, lista, search preview)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-29 15:44:13 +02:00

330 lines
13 KiB
PHP

<?php
/**
* NIS2 Agile - Knowledge Base Controller
*
* Migration 012-014: KB multi-livello (SYSTEM/FIRM/ORG) con Qdrant + Voyage.
*
* Endpoint:
* POST /api/knowledgebase/ingest - Carica nuovo documento (testo)
* GET /api/knowledgebase/list - Lista documenti caricati visibili
* GET /api/knowledgebase/firm-orgs - Lista organizzazioni del firm dell'utente (per multi-select UI)
* POST /api/knowledgebase/search - Search semantica (debug/preview)
* DELETE /api/knowledgebase/document/{id}- Cancella documento + chunk Qdrant
*/
require_once __DIR__ . '/BaseController.php';
require_once APP_PATH . '/services/VectorService.php';
require_once APP_PATH . '/services/EmbedService.php';
require_once APP_PATH . '/services/RagService.php';
class KnowledgeBaseController extends BaseController
{
/**
* GET /api/knowledgebase/firm-orgs
* Restituisce le organizzazioni gestite dal firm dell'utente, per popolare la
* multi-select del form di upload (scope=FIRM).
*/
public function firmOrgs(): void
{
$this->requireAuth();
$firmId = $this->currentUser['consulting_firm_id'] ?? null;
if (!$firmId) {
$this->jsonSuccess(['organizations' => []]);
return;
}
$rows = Database::fetchAll(
'SELECT id, name, vat_number, sector
FROM organizations
WHERE consulting_firm_id = ? AND is_active = 1
ORDER BY name',
[(int)$firmId]
);
$this->jsonSuccess(['organizations' => $rows]);
}
/**
* POST /api/knowledgebase/ingest
* Body JSON:
* { title, text, entity_type?, source?, scope?, shared_with_orgs?, organization_id? }
*/
public function ingest(): void
{
$this->requireAuth();
$userId = (int)$this->currentUser['id'];
$userRole = $this->currentUser['role'] ?? '';
$userFirmId = $this->currentUser['consulting_firm_id'] ?? null;
// Solo questi ruoli possono uploadare. employee/auditor sono read-only.
$allowedUploadRoles = ['super_admin', 'org_admin', 'compliance_manager', 'consultant'];
if (!in_array($userRole, $allowedUploadRoles, true)) {
$this->jsonError('Ruolo non autorizzato a caricare documenti KB', 403, 'KB_FORBIDDEN');
}
$this->validateRequired(['title', 'text']);
$title = trim((string)$this->getParam('title'));
$text = (string)$this->getParam('text');
$entityType = $this->getParam('entity_type', 'custom');
$source = $this->getParam('source', $title);
$orgId = (int)$this->getParam('organization_id', 0);
$scope = strtoupper((string)$this->getParam('scope', 'SYSTEM'));
if (!in_array($scope, ['SYSTEM', 'FIRM', 'ORG'], true)) {
$scope = 'SYSTEM';
}
$sharedWith = $this->getParam('shared_with_orgs', []);
if (!is_array($sharedWith)) $sharedWith = [];
$sharedWith = array_values(array_filter(array_map('intval', $sharedWith)));
// Validazioni testo
$textLen = strlen($text);
if ($textLen < 50) {
$this->jsonError('Testo troppo breve (min 50 caratteri)', 422, 'TEXT_TOO_SHORT');
}
if ($textLen > 50000) {
$this->jsonError('Testo troppo lungo (max 50.000 caratteri)', 422, 'TEXT_TOO_LONG');
}
// Authorization per scope
if ($scope === 'SYSTEM' && !in_array($userRole, ['super_admin'], true)) {
$this->jsonError('Solo i super_admin possono caricare documenti SYSTEM', 403, 'KB_SYSTEM_FORBIDDEN');
}
if ($scope === 'FIRM') {
if (!$userFirmId) {
$this->jsonError('Solo i membri di uno studio possono caricare documenti FIRM', 403, 'KB_NO_FIRM');
}
// Verifica che le organizations di shared_with appartengano davvero al firm
if (!empty($sharedWith)) {
$placeholders = implode(',', array_fill(0, count($sharedWith), '?'));
$valid = Database::fetchAll(
"SELECT id FROM organizations WHERE id IN ($placeholders) AND consulting_firm_id = ?",
array_merge($sharedWith, [(int)$userFirmId])
);
$validIds = array_map(fn($r) => (int)$r['id'], $valid);
$invalid = array_diff($sharedWith, $validIds);
if (!empty($invalid)) {
$this->jsonError('Alcune organizzazioni non appartengono al tuo studio: ' . implode(',', $invalid), 403, 'KB_INVALID_SHARE');
}
$sharedWith = $validIds;
}
}
if ($scope === 'ORG') {
if ($orgId <= 0) {
$this->jsonError('organization_id obbligatorio per scope=ORG', 422, 'KB_ORG_REQUIRED');
}
// Verifica accesso dell'utente all'organization
if ($userRole !== 'super_admin') {
$access = Database::fetchOne(
"SELECT 1 FROM user_organizations WHERE user_id = ? AND organization_id = ? AND role IN ('org_admin','compliance_manager')",
[$userId, $orgId]
);
if (!$access) {
$this->jsonError('Non hai permessi di scrittura su questa organizzazione', 403, 'KB_ORG_FORBIDDEN');
}
}
}
// Chunking: ~2000 char con overlap 200
$chunks = $this->chunkText($text, 2000, 200);
try {
$embed = new EmbedService();
$vector = new VectorService();
$vector->ensureCollection($embed->dims);
$docUuid = $this->generateUuid();
$points = [];
foreach ($chunks as $i => $chunk) {
$vec = $embed->embed($chunk);
$points[] = [
'id' => $this->generateUuid(),
'vector' => $vec,
'payload' => [
'doc_uuid' => $docUuid,
'title' => $title . ($i > 0 ? ' (parte ' . ($i + 1) . ')' : ''),
'chunk' => $chunk,
'entity_type' => $entityType,
'source' => $source,
'lang' => 'it',
'scope' => $scope,
'consulting_firm_id' => $userFirmId !== null ? (int)$userFirmId : null,
'organization_id' => $orgId > 0 ? $orgId : null,
'shared_with_orgs' => $sharedWith,
'uploaded_by' => $userId,
],
];
}
$vector->upsertBatch($points);
} catch (Exception $e) {
$this->jsonError('Errore durante l\'indicizzazione: ' . $e->getMessage(), 500, 'KB_INGEST_ERROR');
}
// Tracking row in MySQL
try {
$stmt = Database::getInstance()->prepare(
"INSERT INTO kb_uploaded_documents
(qdrant_doc_uuid, scope, consulting_firm_id, organization_id, uploaded_by, title, entity_type, source, lang, chunk_count, shared_with_orgs, status)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'ready')"
);
$stmt->execute([
$docUuid,
$scope,
$userFirmId,
$orgId > 0 ? $orgId : null,
$userId,
$title,
$entityType,
$source,
'it',
count($chunks),
json_encode($sharedWith),
]);
} catch (Exception $e) {
error_log('[KB] kb_uploaded_documents insert failed: ' . $e->getMessage());
}
$this->jsonSuccess([
'doc_uuid' => $docUuid,
'title' => $title,
'scope' => $scope,
'chunks' => count($chunks),
'shared_with_orgs' => $sharedWith,
], 'Documento indicizzato');
}
/**
* GET /api/knowledgebase/list
* Lista i documenti che l'utente puo' vedere via il filtro authz.
* Note: lista basata su kb_uploaded_documents (audit), non su Qdrant.
*/
public function list(): void
{
$this->requireAuth();
$userId = (int)$this->currentUser['id'];
$firmId = $this->currentUser['consulting_firm_id'] ?? null;
$orgId = $this->resolveOrgId();
$where = [];
$params = [];
// SYSTEM sempre visibile
$clauses = ["scope = 'SYSTEM'"];
if ($firmId) {
$clauses[] = "(scope = 'FIRM' AND consulting_firm_id = ?)";
$params[] = (int)$firmId;
}
if ($orgId) {
$clauses[] = "(scope = 'FIRM' AND JSON_CONTAINS(shared_with_orgs, JSON_ARRAY(?)))";
$params[] = (int)$orgId;
$clauses[] = "(scope = 'ORG' AND organization_id = ?)";
$params[] = (int)$orgId;
}
$sql = 'SELECT id, qdrant_doc_uuid, scope, consulting_firm_id, organization_id, title, entity_type, source, lang, chunk_count, shared_with_orgs, status, created_at
FROM kb_uploaded_documents
WHERE ' . implode(' OR ', $clauses) . '
ORDER BY created_at DESC LIMIT 200';
$rows = Database::fetchAll($sql, $params);
// Decode shared_with_orgs JSON
foreach ($rows as &$r) {
if (!empty($r['shared_with_orgs'])) {
$r['shared_with_orgs'] = json_decode($r['shared_with_orgs'], true) ?: [];
} else {
$r['shared_with_orgs'] = [];
}
}
$this->jsonSuccess(['documents' => $rows]);
}
/**
* POST /api/knowledgebase/search
* Body: { query, top_k? }
* Search semantica preview (utile per debug e per UI "find similar").
*/
public function search(): void
{
$this->requireAuth();
$this->validateRequired(['query']);
$query = (string)$this->getParam('query');
$topK = (int)$this->getParam('top_k', 5);
$userContext = [
'user_id' => (int)$this->currentUser['id'],
'organization_id' => $this->resolveOrgId(),
'consulting_firm_id' => $this->currentUser['consulting_firm_id'] ?? null,
];
try {
$rag = new RagService();
$hits = $rag->searchForUser($query, $userContext, $topK);
$this->jsonSuccess(['results' => $hits]);
} catch (Exception $e) {
$this->jsonError('Errore search: ' . $e->getMessage(), 500, 'KB_SEARCH_ERROR');
}
}
/**
* DELETE /api/knowledgebase/document/{id}
* Cancella documento + tutti i chunk Qdrant via doc_uuid.
*/
public function delete(int $id): void
{
$this->requireAuth();
$userRole = $this->currentUser['role'] ?? '';
$userId = (int)$this->currentUser['id'];
$doc = Database::fetchOne('SELECT * FROM kb_uploaded_documents WHERE id = ?', [$id]);
if (!$doc) {
$this->jsonError('Documento non trovato', 404, 'KB_NOT_FOUND');
}
// Solo l'uploader o un super_admin puo' cancellare
if ($userRole !== 'super_admin' && (int)$doc['uploaded_by'] !== $userId) {
$this->jsonError('Non autorizzato a cancellare questo documento', 403, 'KB_DELETE_FORBIDDEN');
}
try {
$vector = new VectorService();
$vector->deleteByFilter([
'must' => [
['key' => 'doc_uuid', 'match' => ['value' => $doc['qdrant_doc_uuid']]],
],
]);
} catch (Exception $e) {
error_log('[KB] qdrant delete failed: ' . $e->getMessage());
}
Database::query('DELETE FROM kb_uploaded_documents WHERE id = ?', [$id]);
$this->jsonSuccess(null, 'Documento eliminato');
}
// ─── helpers ─────────────────────────────────────────
private function chunkText(string $text, int $chunkSize = 2000, int $overlap = 200): array
{
$chunks = [];
$length = strlen($text);
$start = 0;
while ($start < $length) {
$end = min($start + $chunkSize, $length);
$chunks[] = substr($text, $start, $end - $start);
if ($end >= $length) break;
$start = $end - $overlap;
}
return $chunks;
}
private function generateUuid(): string
{
$data = random_bytes(16);
$data[6] = chr(ord($data[6]) & 0x0f | 0x40);
$data[8] = chr(ord($data[8]) & 0x3f | 0x80);
return vsprintf('%s%s-%s-%s-%s-%s%s%s', str_split(bin2hex($data), 4));
}
// resolveOrgId() e' ereditato da BaseController (riga 351)
}