- New 5-step onboarding wizard (onboarding.html) replacing setup-org.html - Step 1: Choose data source (Upload Visura / CertiSource / Manual) - Step 2: PDF upload with AI extraction or CertiSource P.IVA lookup - Step 3: Verify/complete company data with NIS2 sector mapping - Step 4: User profile completion - Step 5: NIS2 classification (Essential/Important) with summary - OnboardingController with upload-visura, fetch-company, complete endpoints - VisuraService with Claude AI PDF extraction and ATECO-to-NIS2 mapping - CertiSource API integration for automatic company data retrieval - Updated login/register redirects to point to new onboarding wizard Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
311 lines
13 KiB
PHP
311 lines
13 KiB
PHP
<?php
|
|
/**
|
|
* NIS2 Agile - Visura Service
|
|
*
|
|
* Estrae dati aziendali da visura camerale PDF tramite AI
|
|
* e recupera dati da CertiSource.
|
|
*/
|
|
|
|
class VisuraService
|
|
{
|
|
/**
|
|
* Extract company data from a PDF visura camerale using Claude AI
|
|
*/
|
|
public function extractFromPdf(string $filePath): array
|
|
{
|
|
if (!file_exists($filePath)) {
|
|
throw new RuntimeException('File visura non trovato');
|
|
}
|
|
|
|
if (!ANTHROPIC_API_KEY) {
|
|
throw new RuntimeException('Chiave API Anthropic non configurata');
|
|
}
|
|
|
|
// Read PDF and base64 encode it
|
|
$pdfContent = file_get_contents($filePath);
|
|
$base64Pdf = base64_encode($pdfContent);
|
|
|
|
// Call Claude API with the PDF
|
|
$response = $this->callClaudeApi([
|
|
[
|
|
'type' => 'document',
|
|
'source' => [
|
|
'type' => 'base64',
|
|
'media_type' => 'application/pdf',
|
|
'data' => $base64Pdf,
|
|
],
|
|
],
|
|
[
|
|
'type' => 'text',
|
|
'text' => "Analizza questa visura camerale italiana ed estrai i seguenti dati in formato JSON. Rispondi SOLO con il JSON, senza testo aggiuntivo, senza markdown code blocks.\n\nCampi da estrarre:\n- company_name: ragione sociale completa\n- vat_number: partita IVA (solo numeri, senza prefisso IT)\n- fiscal_code: codice fiscale\n- legal_form: forma giuridica (es. S.R.L., S.P.A., ecc.)\n- address: indirizzo sede legale (via/piazza e numero civico)\n- city: comune sede legale\n- province: sigla provincia (es. MI, RM, TO)\n- zip_code: CAP\n- pec: indirizzo PEC se presente\n- phone: telefono se presente\n- ateco_code: codice ATECO principale se presente\n- ateco_description: descrizione attività ATECO se presente\n- incorporation_date: data di costituzione (formato YYYY-MM-DD)\n- share_capital: capitale sociale in EUR (solo numero)\n- employees_range: stima range dipendenti se indicato (es. \"10-49\", \"50-249\", \"250+\")\n- legal_representative: nome e cognome del legale rappresentante\n\nSe un campo non è presente nella visura, usa null come valore.",
|
|
],
|
|
]);
|
|
|
|
if (!$response) {
|
|
throw new RuntimeException('Nessuna risposta dall\'AI');
|
|
}
|
|
|
|
// Parse JSON response
|
|
$jsonStr = trim($response);
|
|
// Remove potential markdown code blocks
|
|
$jsonStr = preg_replace('/^```(?:json)?\s*/i', '', $jsonStr);
|
|
$jsonStr = preg_replace('/\s*```$/', '', $jsonStr);
|
|
|
|
$data = json_decode($jsonStr, true);
|
|
|
|
if (json_last_error() !== JSON_ERROR_NONE) {
|
|
error_log('[VISURA_PARSE_ERROR] Could not parse AI response: ' . $jsonStr);
|
|
throw new RuntimeException('Impossibile interpretare i dati estratti dalla visura');
|
|
}
|
|
|
|
// Map to suggested NIS2 sector based on ATECO code
|
|
$data['suggested_sector'] = $this->mapAtecoToNis2Sector($data['ateco_code'] ?? '', $data['ateco_description'] ?? '');
|
|
|
|
// Log AI interaction
|
|
$this->logAiInteraction('visura_extraction', 'Estrazione dati da visura camerale PDF');
|
|
|
|
return $data;
|
|
}
|
|
|
|
/**
|
|
* Fetch company data from CertiSource API
|
|
*/
|
|
public function fetchFromCertiSource(string $vatNumber): array
|
|
{
|
|
// CertiSource is on the same server - call its API internally
|
|
$certisourceUrl = $this->getCertiSourceBaseUrl() . '/api/company/enrich';
|
|
|
|
$ch = curl_init();
|
|
curl_setopt_array($ch, [
|
|
CURLOPT_URL => $certisourceUrl . '?vat=' . urlencode($vatNumber),
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_TIMEOUT => 30,
|
|
CURLOPT_HTTPHEADER => [
|
|
'Content-Type: application/json',
|
|
'Accept: application/json',
|
|
'X-Internal-Service: nis2-agile',
|
|
],
|
|
// Same server, skip SSL verification for internal calls
|
|
CURLOPT_SSL_VERIFYPEER => false,
|
|
]);
|
|
|
|
$response = curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
$error = curl_error($ch);
|
|
curl_close($ch);
|
|
|
|
if ($error) {
|
|
error_log("[CERTISOURCE_CURL_ERROR] $error");
|
|
throw new RuntimeException('Impossibile contattare CertiSource: ' . $error);
|
|
}
|
|
|
|
if ($httpCode !== 200) {
|
|
error_log("[CERTISOURCE_HTTP_ERROR] HTTP $httpCode: $response");
|
|
throw new RuntimeException('CertiSource ha restituito un errore (HTTP ' . $httpCode . ')');
|
|
}
|
|
|
|
$result = json_decode($response, true);
|
|
if (!$result) {
|
|
throw new RuntimeException('Risposta CertiSource non valida');
|
|
}
|
|
|
|
// Map CertiSource response to our format
|
|
// CertiSource typically returns data in its own format, normalize it
|
|
$companyData = $result['data'] ?? $result;
|
|
|
|
return [
|
|
'company_name' => $companyData['ragione_sociale'] ?? $companyData['denominazione'] ?? $companyData['company_name'] ?? null,
|
|
'vat_number' => $companyData['partita_iva'] ?? $companyData['vat_number'] ?? $vatNumber,
|
|
'fiscal_code' => $companyData['codice_fiscale'] ?? $companyData['fiscal_code'] ?? null,
|
|
'legal_form' => $companyData['forma_giuridica'] ?? $companyData['legal_form'] ?? null,
|
|
'address' => $companyData['indirizzo'] ?? $companyData['address'] ?? null,
|
|
'city' => $companyData['comune'] ?? $companyData['city'] ?? null,
|
|
'province' => $companyData['provincia'] ?? $companyData['province'] ?? null,
|
|
'zip_code' => $companyData['cap'] ?? $companyData['zip_code'] ?? null,
|
|
'pec' => $companyData['pec'] ?? null,
|
|
'phone' => $companyData['telefono'] ?? $companyData['phone'] ?? null,
|
|
'ateco_code' => $companyData['codice_ateco'] ?? $companyData['ateco_code'] ?? null,
|
|
'ateco_description' => $companyData['descrizione_ateco'] ?? $companyData['ateco_description'] ?? null,
|
|
'suggested_sector' => $this->mapAtecoToNis2Sector(
|
|
$companyData['codice_ateco'] ?? '',
|
|
$companyData['descrizione_ateco'] ?? ''
|
|
),
|
|
'source' => 'certisource',
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Map ATECO code to NIS2 sector
|
|
*/
|
|
private function mapAtecoToNis2Sector(string $atecoCode, string $atecoDesc): ?string
|
|
{
|
|
$code = substr($atecoCode, 0, 2); // Use first 2 digits
|
|
$descLower = strtolower($atecoDesc);
|
|
|
|
// ATECO to NIS2 mapping (approximate)
|
|
$mapping = [
|
|
'35' => 'energy_electricity', // Electricity, gas, steam
|
|
'49' => 'transport_road', // Land transport
|
|
'50' => 'transport_water', // Water transport
|
|
'51' => 'transport_air', // Air transport
|
|
'64' => 'banking', // Financial services
|
|
'65' => 'banking', // Insurance
|
|
'66' => 'financial_markets', // Financial auxiliaries
|
|
'86' => 'health', // Health
|
|
'36' => 'drinking_water', // Water supply
|
|
'37' => 'waste_water', // Sewerage
|
|
'38' => 'waste_management', // Waste management
|
|
'61' => 'digital_infrastructure', // Telecommunications
|
|
'62' => 'ict_service_management', // IT services
|
|
'63' => 'digital_providers', // Information services
|
|
'84' => 'public_administration', // Public admin
|
|
'53' => 'postal_courier', // Postal services
|
|
'20' => 'chemicals', // Chemicals manufacturing
|
|
'10' => 'food', // Food manufacturing
|
|
'11' => 'food', // Beverages
|
|
'21' => 'manufacturing_medical', // Pharma/medical
|
|
'26' => 'manufacturing_computers', // Electronics
|
|
'27' => 'manufacturing_electrical', // Electrical equipment
|
|
'28' => 'manufacturing_machinery', // Machinery
|
|
'29' => 'manufacturing_vehicles', // Motor vehicles
|
|
'30' => 'manufacturing_transport', // Other transport
|
|
'72' => 'research', // Scientific research
|
|
];
|
|
|
|
if (isset($mapping[$code])) {
|
|
return $mapping[$code];
|
|
}
|
|
|
|
// Try to match by description keywords
|
|
$keywords = [
|
|
'energia' => 'energy_electricity',
|
|
'elettric' => 'energy_electricity',
|
|
'gas' => 'energy_gas',
|
|
'petroli' => 'energy_oil',
|
|
'trasport' => 'transport_road',
|
|
'ferrov' => 'transport_rail',
|
|
'maritt' => 'transport_water',
|
|
'aere' => 'transport_air',
|
|
'banc' => 'banking',
|
|
'finanz' => 'financial_markets',
|
|
'sanit' => 'health',
|
|
'osped' => 'health',
|
|
'farm' => 'manufacturing_medical',
|
|
'acqua' => 'drinking_water',
|
|
'rifiut' => 'waste_management',
|
|
'telecom' => 'digital_infrastructure',
|
|
'informatica' => 'ict_service_management',
|
|
'software' => 'ict_service_management',
|
|
'digital' => 'digital_providers',
|
|
'postale' => 'postal_courier',
|
|
'corriere' => 'postal_courier',
|
|
'chimic' => 'chemicals',
|
|
'alimentar' => 'food',
|
|
'ricerca' => 'research',
|
|
];
|
|
|
|
foreach ($keywords as $kw => $sector) {
|
|
if (str_contains($descLower, $kw)) {
|
|
return $sector;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Call Claude API
|
|
*/
|
|
private function callClaudeApi(array $content): ?string
|
|
{
|
|
$payload = [
|
|
'model' => ANTHROPIC_MODEL,
|
|
'max_tokens' => ANTHROPIC_MAX_TOKENS,
|
|
'messages' => [
|
|
[
|
|
'role' => 'user',
|
|
'content' => $content,
|
|
],
|
|
],
|
|
];
|
|
|
|
$ch = curl_init('https://api.anthropic.com/v1/messages');
|
|
curl_setopt_array($ch, [
|
|
CURLOPT_POST => true,
|
|
CURLOPT_POSTFIELDS => json_encode($payload),
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_TIMEOUT => 60,
|
|
CURLOPT_HTTPHEADER => [
|
|
'Content-Type: application/json',
|
|
'x-api-key: ' . ANTHROPIC_API_KEY,
|
|
'anthropic-version: 2023-06-01',
|
|
],
|
|
]);
|
|
|
|
$response = curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
$error = curl_error($ch);
|
|
curl_close($ch);
|
|
|
|
if ($error) {
|
|
throw new RuntimeException('Claude API error: ' . $error);
|
|
}
|
|
|
|
if ($httpCode !== 200) {
|
|
error_log("[CLAUDE_API_ERROR] HTTP $httpCode: $response");
|
|
throw new RuntimeException('Claude API returned HTTP ' . $httpCode);
|
|
}
|
|
|
|
$result = json_decode($response, true);
|
|
|
|
return $result['content'][0]['text'] ?? null;
|
|
}
|
|
|
|
/**
|
|
* Get CertiSource API base URL (same server)
|
|
*/
|
|
private function getCertiSourceBaseUrl(): string
|
|
{
|
|
// Both apps are on the same server, use internal URL
|
|
if (defined('CERTISOURCE_API_URL')) {
|
|
return CERTISOURCE_API_URL;
|
|
}
|
|
// Default: same server via localhost
|
|
return 'https://certisource.it/certisource';
|
|
}
|
|
|
|
/**
|
|
* Log AI interaction to database
|
|
*/
|
|
private function logAiInteraction(string $type, string $summary): void
|
|
{
|
|
try {
|
|
// Get current user from JWT if available
|
|
$userId = null;
|
|
$token = $_SERVER['HTTP_AUTHORIZATION'] ?? '';
|
|
if (preg_match('/Bearer\s+(.+)$/i', $token, $matches)) {
|
|
$parts = explode('.', $matches[1]);
|
|
if (count($parts) === 3) {
|
|
$payload = json_decode(base64_decode(strtr($parts[1], '-_', '+/')), true);
|
|
$userId = $payload['sub'] ?? null;
|
|
}
|
|
}
|
|
|
|
if ($userId) {
|
|
Database::insert('ai_interactions', [
|
|
'organization_id' => 0, // Not yet created during onboarding
|
|
'user_id' => $userId,
|
|
'interaction_type' => 'qa',
|
|
'prompt_summary' => $summary,
|
|
'response_summary' => 'Dati estratti',
|
|
'tokens_used' => 0,
|
|
'model_used' => ANTHROPIC_MODEL,
|
|
]);
|
|
}
|
|
} catch (Throwable $e) {
|
|
// Silently fail - logging should not break the flow
|
|
error_log('[AI_LOG_ERROR] ' . $e->getMessage());
|
|
}
|
|
}
|
|
}
|