- KnowledgeBaseController: ingest, list, firmOrgs, search, delete - VectorService (Qdrant + buildAuthzFilter), EmbedService (Voyage), RagService (pipeline) - AIService::askWithRag con fallback graceful - docker-compose: servizio qdrant + env Voyage (chiave da .env/vault, no hardcoded) - SQL 012 consulting_firms, 013 firm_assignments + kb_uploaded_documents - public/kb.html + kb.js (upload, lista, search preview) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
330 lines
13 KiB
PHP
330 lines
13 KiB
PHP
<?php
|
|
/**
|
|
* NIS2 Agile - Knowledge Base Controller
|
|
*
|
|
* Migration 012-014: KB multi-livello (SYSTEM/FIRM/ORG) con Qdrant + Voyage.
|
|
*
|
|
* Endpoint:
|
|
* POST /api/knowledgebase/ingest - Carica nuovo documento (testo)
|
|
* GET /api/knowledgebase/list - Lista documenti caricati visibili
|
|
* GET /api/knowledgebase/firm-orgs - Lista organizzazioni del firm dell'utente (per multi-select UI)
|
|
* POST /api/knowledgebase/search - Search semantica (debug/preview)
|
|
* DELETE /api/knowledgebase/document/{id}- Cancella documento + chunk Qdrant
|
|
*/
|
|
|
|
require_once __DIR__ . '/BaseController.php';
|
|
require_once APP_PATH . '/services/VectorService.php';
|
|
require_once APP_PATH . '/services/EmbedService.php';
|
|
require_once APP_PATH . '/services/RagService.php';
|
|
|
|
class KnowledgeBaseController extends BaseController
|
|
{
|
|
/**
|
|
* GET /api/knowledgebase/firm-orgs
|
|
* Restituisce le organizzazioni gestite dal firm dell'utente, per popolare la
|
|
* multi-select del form di upload (scope=FIRM).
|
|
*/
|
|
public function firmOrgs(): void
|
|
{
|
|
$this->requireAuth();
|
|
$firmId = $this->currentUser['consulting_firm_id'] ?? null;
|
|
if (!$firmId) {
|
|
$this->jsonSuccess(['organizations' => []]);
|
|
return;
|
|
}
|
|
$rows = Database::fetchAll(
|
|
'SELECT id, name, vat_number, sector
|
|
FROM organizations
|
|
WHERE consulting_firm_id = ? AND is_active = 1
|
|
ORDER BY name',
|
|
[(int)$firmId]
|
|
);
|
|
$this->jsonSuccess(['organizations' => $rows]);
|
|
}
|
|
|
|
/**
|
|
* POST /api/knowledgebase/ingest
|
|
* Body JSON:
|
|
* { title, text, entity_type?, source?, scope?, shared_with_orgs?, organization_id? }
|
|
*/
|
|
public function ingest(): void
|
|
{
|
|
$this->requireAuth();
|
|
$userId = (int)$this->currentUser['id'];
|
|
$userRole = $this->currentUser['role'] ?? '';
|
|
$userFirmId = $this->currentUser['consulting_firm_id'] ?? null;
|
|
|
|
// Solo questi ruoli possono uploadare. employee/auditor sono read-only.
|
|
$allowedUploadRoles = ['super_admin', 'org_admin', 'compliance_manager', 'consultant'];
|
|
if (!in_array($userRole, $allowedUploadRoles, true)) {
|
|
$this->jsonError('Ruolo non autorizzato a caricare documenti KB', 403, 'KB_FORBIDDEN');
|
|
}
|
|
|
|
$this->validateRequired(['title', 'text']);
|
|
|
|
$title = trim((string)$this->getParam('title'));
|
|
$text = (string)$this->getParam('text');
|
|
$entityType = $this->getParam('entity_type', 'custom');
|
|
$source = $this->getParam('source', $title);
|
|
$orgId = (int)$this->getParam('organization_id', 0);
|
|
|
|
$scope = strtoupper((string)$this->getParam('scope', 'SYSTEM'));
|
|
if (!in_array($scope, ['SYSTEM', 'FIRM', 'ORG'], true)) {
|
|
$scope = 'SYSTEM';
|
|
}
|
|
|
|
$sharedWith = $this->getParam('shared_with_orgs', []);
|
|
if (!is_array($sharedWith)) $sharedWith = [];
|
|
$sharedWith = array_values(array_filter(array_map('intval', $sharedWith)));
|
|
|
|
// Validazioni testo
|
|
$textLen = strlen($text);
|
|
if ($textLen < 50) {
|
|
$this->jsonError('Testo troppo breve (min 50 caratteri)', 422, 'TEXT_TOO_SHORT');
|
|
}
|
|
if ($textLen > 50000) {
|
|
$this->jsonError('Testo troppo lungo (max 50.000 caratteri)', 422, 'TEXT_TOO_LONG');
|
|
}
|
|
|
|
// Authorization per scope
|
|
if ($scope === 'SYSTEM' && !in_array($userRole, ['super_admin'], true)) {
|
|
$this->jsonError('Solo i super_admin possono caricare documenti SYSTEM', 403, 'KB_SYSTEM_FORBIDDEN');
|
|
}
|
|
if ($scope === 'FIRM') {
|
|
if (!$userFirmId) {
|
|
$this->jsonError('Solo i membri di uno studio possono caricare documenti FIRM', 403, 'KB_NO_FIRM');
|
|
}
|
|
// Verifica che le organizations di shared_with appartengano davvero al firm
|
|
if (!empty($sharedWith)) {
|
|
$placeholders = implode(',', array_fill(0, count($sharedWith), '?'));
|
|
$valid = Database::fetchAll(
|
|
"SELECT id FROM organizations WHERE id IN ($placeholders) AND consulting_firm_id = ?",
|
|
array_merge($sharedWith, [(int)$userFirmId])
|
|
);
|
|
$validIds = array_map(fn($r) => (int)$r['id'], $valid);
|
|
$invalid = array_diff($sharedWith, $validIds);
|
|
if (!empty($invalid)) {
|
|
$this->jsonError('Alcune organizzazioni non appartengono al tuo studio: ' . implode(',', $invalid), 403, 'KB_INVALID_SHARE');
|
|
}
|
|
$sharedWith = $validIds;
|
|
}
|
|
}
|
|
if ($scope === 'ORG') {
|
|
if ($orgId <= 0) {
|
|
$this->jsonError('organization_id obbligatorio per scope=ORG', 422, 'KB_ORG_REQUIRED');
|
|
}
|
|
// Verifica accesso dell'utente all'organization
|
|
if ($userRole !== 'super_admin') {
|
|
$access = Database::fetchOne(
|
|
"SELECT 1 FROM user_organizations WHERE user_id = ? AND organization_id = ? AND role IN ('org_admin','compliance_manager')",
|
|
[$userId, $orgId]
|
|
);
|
|
if (!$access) {
|
|
$this->jsonError('Non hai permessi di scrittura su questa organizzazione', 403, 'KB_ORG_FORBIDDEN');
|
|
}
|
|
}
|
|
}
|
|
|
|
// Chunking: ~2000 char con overlap 200
|
|
$chunks = $this->chunkText($text, 2000, 200);
|
|
|
|
try {
|
|
$embed = new EmbedService();
|
|
$vector = new VectorService();
|
|
$vector->ensureCollection($embed->dims);
|
|
|
|
$docUuid = $this->generateUuid();
|
|
$points = [];
|
|
foreach ($chunks as $i => $chunk) {
|
|
$vec = $embed->embed($chunk);
|
|
$points[] = [
|
|
'id' => $this->generateUuid(),
|
|
'vector' => $vec,
|
|
'payload' => [
|
|
'doc_uuid' => $docUuid,
|
|
'title' => $title . ($i > 0 ? ' (parte ' . ($i + 1) . ')' : ''),
|
|
'chunk' => $chunk,
|
|
'entity_type' => $entityType,
|
|
'source' => $source,
|
|
'lang' => 'it',
|
|
'scope' => $scope,
|
|
'consulting_firm_id' => $userFirmId !== null ? (int)$userFirmId : null,
|
|
'organization_id' => $orgId > 0 ? $orgId : null,
|
|
'shared_with_orgs' => $sharedWith,
|
|
'uploaded_by' => $userId,
|
|
],
|
|
];
|
|
}
|
|
$vector->upsertBatch($points);
|
|
} catch (Exception $e) {
|
|
$this->jsonError('Errore durante l\'indicizzazione: ' . $e->getMessage(), 500, 'KB_INGEST_ERROR');
|
|
}
|
|
|
|
// Tracking row in MySQL
|
|
try {
|
|
$stmt = Database::getInstance()->prepare(
|
|
"INSERT INTO kb_uploaded_documents
|
|
(qdrant_doc_uuid, scope, consulting_firm_id, organization_id, uploaded_by, title, entity_type, source, lang, chunk_count, shared_with_orgs, status)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'ready')"
|
|
);
|
|
$stmt->execute([
|
|
$docUuid,
|
|
$scope,
|
|
$userFirmId,
|
|
$orgId > 0 ? $orgId : null,
|
|
$userId,
|
|
$title,
|
|
$entityType,
|
|
$source,
|
|
'it',
|
|
count($chunks),
|
|
json_encode($sharedWith),
|
|
]);
|
|
} catch (Exception $e) {
|
|
error_log('[KB] kb_uploaded_documents insert failed: ' . $e->getMessage());
|
|
}
|
|
|
|
$this->jsonSuccess([
|
|
'doc_uuid' => $docUuid,
|
|
'title' => $title,
|
|
'scope' => $scope,
|
|
'chunks' => count($chunks),
|
|
'shared_with_orgs' => $sharedWith,
|
|
], 'Documento indicizzato');
|
|
}
|
|
|
|
/**
|
|
* GET /api/knowledgebase/list
|
|
* Lista i documenti che l'utente puo' vedere via il filtro authz.
|
|
* Note: lista basata su kb_uploaded_documents (audit), non su Qdrant.
|
|
*/
|
|
public function list(): void
|
|
{
|
|
$this->requireAuth();
|
|
$userId = (int)$this->currentUser['id'];
|
|
$firmId = $this->currentUser['consulting_firm_id'] ?? null;
|
|
$orgId = $this->resolveOrgId();
|
|
|
|
$where = [];
|
|
$params = [];
|
|
|
|
// SYSTEM sempre visibile
|
|
$clauses = ["scope = 'SYSTEM'"];
|
|
|
|
if ($firmId) {
|
|
$clauses[] = "(scope = 'FIRM' AND consulting_firm_id = ?)";
|
|
$params[] = (int)$firmId;
|
|
}
|
|
if ($orgId) {
|
|
$clauses[] = "(scope = 'FIRM' AND JSON_CONTAINS(shared_with_orgs, JSON_ARRAY(?)))";
|
|
$params[] = (int)$orgId;
|
|
$clauses[] = "(scope = 'ORG' AND organization_id = ?)";
|
|
$params[] = (int)$orgId;
|
|
}
|
|
|
|
$sql = 'SELECT id, qdrant_doc_uuid, scope, consulting_firm_id, organization_id, title, entity_type, source, lang, chunk_count, shared_with_orgs, status, created_at
|
|
FROM kb_uploaded_documents
|
|
WHERE ' . implode(' OR ', $clauses) . '
|
|
ORDER BY created_at DESC LIMIT 200';
|
|
|
|
$rows = Database::fetchAll($sql, $params);
|
|
// Decode shared_with_orgs JSON
|
|
foreach ($rows as &$r) {
|
|
if (!empty($r['shared_with_orgs'])) {
|
|
$r['shared_with_orgs'] = json_decode($r['shared_with_orgs'], true) ?: [];
|
|
} else {
|
|
$r['shared_with_orgs'] = [];
|
|
}
|
|
}
|
|
$this->jsonSuccess(['documents' => $rows]);
|
|
}
|
|
|
|
/**
|
|
* POST /api/knowledgebase/search
|
|
* Body: { query, top_k? }
|
|
* Search semantica preview (utile per debug e per UI "find similar").
|
|
*/
|
|
public function search(): void
|
|
{
|
|
$this->requireAuth();
|
|
$this->validateRequired(['query']);
|
|
$query = (string)$this->getParam('query');
|
|
$topK = (int)$this->getParam('top_k', 5);
|
|
|
|
$userContext = [
|
|
'user_id' => (int)$this->currentUser['id'],
|
|
'organization_id' => $this->resolveOrgId(),
|
|
'consulting_firm_id' => $this->currentUser['consulting_firm_id'] ?? null,
|
|
];
|
|
|
|
try {
|
|
$rag = new RagService();
|
|
$hits = $rag->searchForUser($query, $userContext, $topK);
|
|
$this->jsonSuccess(['results' => $hits]);
|
|
} catch (Exception $e) {
|
|
$this->jsonError('Errore search: ' . $e->getMessage(), 500, 'KB_SEARCH_ERROR');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* DELETE /api/knowledgebase/document/{id}
|
|
* Cancella documento + tutti i chunk Qdrant via doc_uuid.
|
|
*/
|
|
public function delete(int $id): void
|
|
{
|
|
$this->requireAuth();
|
|
$userRole = $this->currentUser['role'] ?? '';
|
|
$userId = (int)$this->currentUser['id'];
|
|
|
|
$doc = Database::fetchOne('SELECT * FROM kb_uploaded_documents WHERE id = ?', [$id]);
|
|
if (!$doc) {
|
|
$this->jsonError('Documento non trovato', 404, 'KB_NOT_FOUND');
|
|
}
|
|
|
|
// Solo l'uploader o un super_admin puo' cancellare
|
|
if ($userRole !== 'super_admin' && (int)$doc['uploaded_by'] !== $userId) {
|
|
$this->jsonError('Non autorizzato a cancellare questo documento', 403, 'KB_DELETE_FORBIDDEN');
|
|
}
|
|
|
|
try {
|
|
$vector = new VectorService();
|
|
$vector->deleteByFilter([
|
|
'must' => [
|
|
['key' => 'doc_uuid', 'match' => ['value' => $doc['qdrant_doc_uuid']]],
|
|
],
|
|
]);
|
|
} catch (Exception $e) {
|
|
error_log('[KB] qdrant delete failed: ' . $e->getMessage());
|
|
}
|
|
|
|
Database::query('DELETE FROM kb_uploaded_documents WHERE id = ?', [$id]);
|
|
$this->jsonSuccess(null, 'Documento eliminato');
|
|
}
|
|
|
|
// ─── helpers ─────────────────────────────────────────
|
|
|
|
private function chunkText(string $text, int $chunkSize = 2000, int $overlap = 200): array
|
|
{
|
|
$chunks = [];
|
|
$length = strlen($text);
|
|
$start = 0;
|
|
while ($start < $length) {
|
|
$end = min($start + $chunkSize, $length);
|
|
$chunks[] = substr($text, $start, $end - $start);
|
|
if ($end >= $length) break;
|
|
$start = $end - $overlap;
|
|
}
|
|
return $chunks;
|
|
}
|
|
|
|
private function generateUuid(): string
|
|
{
|
|
$data = random_bytes(16);
|
|
$data[6] = chr(ord($data[6]) & 0x0f | 0x40);
|
|
$data[8] = chr(ord($data[8]) & 0x3f | 0x80);
|
|
return vsprintf('%s%s-%s-%s-%s-%s%s%s', str_split(bin2hex($data), 4));
|
|
}
|
|
|
|
// resolveOrgId() e' ereditato da BaseController (riga 351)
|
|
}
|