nis2-agile/application/services/VisuraService.php
Cristiano Benassati 9aa2788c68 [FEAT] Add onboarding wizard with visura camerale and CertiSource integration
- New 5-step onboarding wizard (onboarding.html) replacing setup-org.html
- Step 1: Choose data source (Upload Visura / CertiSource / Manual)
- Step 2: PDF upload with AI extraction or CertiSource P.IVA lookup
- Step 3: Verify/complete company data with NIS2 sector mapping
- Step 4: User profile completion
- Step 5: NIS2 classification (Essential/Important) with summary
- OnboardingController with upload-visura, fetch-company, complete endpoints
- VisuraService with Claude AI PDF extraction and ATECO-to-NIS2 mapping
- CertiSource API integration for automatic company data retrieval
- Updated login/register redirects to point to new onboarding wizard

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 19:01:34 +01:00

311 lines
13 KiB
PHP

<?php
/**
* NIS2 Agile - Visura Service
*
* Estrae dati aziendali da visura camerale PDF tramite AI
* e recupera dati da CertiSource.
*/
class VisuraService
{
/**
* Extract company data from a PDF visura camerale using Claude AI
*/
public function extractFromPdf(string $filePath): array
{
if (!file_exists($filePath)) {
throw new RuntimeException('File visura non trovato');
}
if (!ANTHROPIC_API_KEY) {
throw new RuntimeException('Chiave API Anthropic non configurata');
}
// Read PDF and base64 encode it
$pdfContent = file_get_contents($filePath);
$base64Pdf = base64_encode($pdfContent);
// Call Claude API with the PDF
$response = $this->callClaudeApi([
[
'type' => 'document',
'source' => [
'type' => 'base64',
'media_type' => 'application/pdf',
'data' => $base64Pdf,
],
],
[
'type' => 'text',
'text' => "Analizza questa visura camerale italiana ed estrai i seguenti dati in formato JSON. Rispondi SOLO con il JSON, senza testo aggiuntivo, senza markdown code blocks.\n\nCampi da estrarre:\n- company_name: ragione sociale completa\n- vat_number: partita IVA (solo numeri, senza prefisso IT)\n- fiscal_code: codice fiscale\n- legal_form: forma giuridica (es. S.R.L., S.P.A., ecc.)\n- address: indirizzo sede legale (via/piazza e numero civico)\n- city: comune sede legale\n- province: sigla provincia (es. MI, RM, TO)\n- zip_code: CAP\n- pec: indirizzo PEC se presente\n- phone: telefono se presente\n- ateco_code: codice ATECO principale se presente\n- ateco_description: descrizione attività ATECO se presente\n- incorporation_date: data di costituzione (formato YYYY-MM-DD)\n- share_capital: capitale sociale in EUR (solo numero)\n- employees_range: stima range dipendenti se indicato (es. \"10-49\", \"50-249\", \"250+\")\n- legal_representative: nome e cognome del legale rappresentante\n\nSe un campo non è presente nella visura, usa null come valore.",
],
]);
if (!$response) {
throw new RuntimeException('Nessuna risposta dall\'AI');
}
// Parse JSON response
$jsonStr = trim($response);
// Remove potential markdown code blocks
$jsonStr = preg_replace('/^```(?:json)?\s*/i', '', $jsonStr);
$jsonStr = preg_replace('/\s*```$/', '', $jsonStr);
$data = json_decode($jsonStr, true);
if (json_last_error() !== JSON_ERROR_NONE) {
error_log('[VISURA_PARSE_ERROR] Could not parse AI response: ' . $jsonStr);
throw new RuntimeException('Impossibile interpretare i dati estratti dalla visura');
}
// Map to suggested NIS2 sector based on ATECO code
$data['suggested_sector'] = $this->mapAtecoToNis2Sector($data['ateco_code'] ?? '', $data['ateco_description'] ?? '');
// Log AI interaction
$this->logAiInteraction('visura_extraction', 'Estrazione dati da visura camerale PDF');
return $data;
}
/**
* Fetch company data from CertiSource API
*/
public function fetchFromCertiSource(string $vatNumber): array
{
// CertiSource is on the same server - call its API internally
$certisourceUrl = $this->getCertiSourceBaseUrl() . '/api/company/enrich';
$ch = curl_init();
curl_setopt_array($ch, [
CURLOPT_URL => $certisourceUrl . '?vat=' . urlencode($vatNumber),
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTPHEADER => [
'Content-Type: application/json',
'Accept: application/json',
'X-Internal-Service: nis2-agile',
],
// Same server, skip SSL verification for internal calls
CURLOPT_SSL_VERIFYPEER => false,
]);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$error = curl_error($ch);
curl_close($ch);
if ($error) {
error_log("[CERTISOURCE_CURL_ERROR] $error");
throw new RuntimeException('Impossibile contattare CertiSource: ' . $error);
}
if ($httpCode !== 200) {
error_log("[CERTISOURCE_HTTP_ERROR] HTTP $httpCode: $response");
throw new RuntimeException('CertiSource ha restituito un errore (HTTP ' . $httpCode . ')');
}
$result = json_decode($response, true);
if (!$result) {
throw new RuntimeException('Risposta CertiSource non valida');
}
// Map CertiSource response to our format
// CertiSource typically returns data in its own format, normalize it
$companyData = $result['data'] ?? $result;
return [
'company_name' => $companyData['ragione_sociale'] ?? $companyData['denominazione'] ?? $companyData['company_name'] ?? null,
'vat_number' => $companyData['partita_iva'] ?? $companyData['vat_number'] ?? $vatNumber,
'fiscal_code' => $companyData['codice_fiscale'] ?? $companyData['fiscal_code'] ?? null,
'legal_form' => $companyData['forma_giuridica'] ?? $companyData['legal_form'] ?? null,
'address' => $companyData['indirizzo'] ?? $companyData['address'] ?? null,
'city' => $companyData['comune'] ?? $companyData['city'] ?? null,
'province' => $companyData['provincia'] ?? $companyData['province'] ?? null,
'zip_code' => $companyData['cap'] ?? $companyData['zip_code'] ?? null,
'pec' => $companyData['pec'] ?? null,
'phone' => $companyData['telefono'] ?? $companyData['phone'] ?? null,
'ateco_code' => $companyData['codice_ateco'] ?? $companyData['ateco_code'] ?? null,
'ateco_description' => $companyData['descrizione_ateco'] ?? $companyData['ateco_description'] ?? null,
'suggested_sector' => $this->mapAtecoToNis2Sector(
$companyData['codice_ateco'] ?? '',
$companyData['descrizione_ateco'] ?? ''
),
'source' => 'certisource',
];
}
/**
* Map ATECO code to NIS2 sector
*/
private function mapAtecoToNis2Sector(string $atecoCode, string $atecoDesc): ?string
{
$code = substr($atecoCode, 0, 2); // Use first 2 digits
$descLower = strtolower($atecoDesc);
// ATECO to NIS2 mapping (approximate)
$mapping = [
'35' => 'energy_electricity', // Electricity, gas, steam
'49' => 'transport_road', // Land transport
'50' => 'transport_water', // Water transport
'51' => 'transport_air', // Air transport
'64' => 'banking', // Financial services
'65' => 'banking', // Insurance
'66' => 'financial_markets', // Financial auxiliaries
'86' => 'health', // Health
'36' => 'drinking_water', // Water supply
'37' => 'waste_water', // Sewerage
'38' => 'waste_management', // Waste management
'61' => 'digital_infrastructure', // Telecommunications
'62' => 'ict_service_management', // IT services
'63' => 'digital_providers', // Information services
'84' => 'public_administration', // Public admin
'53' => 'postal_courier', // Postal services
'20' => 'chemicals', // Chemicals manufacturing
'10' => 'food', // Food manufacturing
'11' => 'food', // Beverages
'21' => 'manufacturing_medical', // Pharma/medical
'26' => 'manufacturing_computers', // Electronics
'27' => 'manufacturing_electrical', // Electrical equipment
'28' => 'manufacturing_machinery', // Machinery
'29' => 'manufacturing_vehicles', // Motor vehicles
'30' => 'manufacturing_transport', // Other transport
'72' => 'research', // Scientific research
];
if (isset($mapping[$code])) {
return $mapping[$code];
}
// Try to match by description keywords
$keywords = [
'energia' => 'energy_electricity',
'elettric' => 'energy_electricity',
'gas' => 'energy_gas',
'petroli' => 'energy_oil',
'trasport' => 'transport_road',
'ferrov' => 'transport_rail',
'maritt' => 'transport_water',
'aere' => 'transport_air',
'banc' => 'banking',
'finanz' => 'financial_markets',
'sanit' => 'health',
'osped' => 'health',
'farm' => 'manufacturing_medical',
'acqua' => 'drinking_water',
'rifiut' => 'waste_management',
'telecom' => 'digital_infrastructure',
'informatica' => 'ict_service_management',
'software' => 'ict_service_management',
'digital' => 'digital_providers',
'postale' => 'postal_courier',
'corriere' => 'postal_courier',
'chimic' => 'chemicals',
'alimentar' => 'food',
'ricerca' => 'research',
];
foreach ($keywords as $kw => $sector) {
if (str_contains($descLower, $kw)) {
return $sector;
}
}
return null;
}
/**
* Call Claude API
*/
private function callClaudeApi(array $content): ?string
{
$payload = [
'model' => ANTHROPIC_MODEL,
'max_tokens' => ANTHROPIC_MAX_TOKENS,
'messages' => [
[
'role' => 'user',
'content' => $content,
],
],
];
$ch = curl_init('https://api.anthropic.com/v1/messages');
curl_setopt_array($ch, [
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => json_encode($payload),
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TIMEOUT => 60,
CURLOPT_HTTPHEADER => [
'Content-Type: application/json',
'x-api-key: ' . ANTHROPIC_API_KEY,
'anthropic-version: 2023-06-01',
],
]);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$error = curl_error($ch);
curl_close($ch);
if ($error) {
throw new RuntimeException('Claude API error: ' . $error);
}
if ($httpCode !== 200) {
error_log("[CLAUDE_API_ERROR] HTTP $httpCode: $response");
throw new RuntimeException('Claude API returned HTTP ' . $httpCode);
}
$result = json_decode($response, true);
return $result['content'][0]['text'] ?? null;
}
/**
* Get CertiSource API base URL (same server)
*/
private function getCertiSourceBaseUrl(): string
{
// Both apps are on the same server, use internal URL
if (defined('CERTISOURCE_API_URL')) {
return CERTISOURCE_API_URL;
}
// Default: same server via localhost
return 'https://certisource.it/certisource';
}
/**
* Log AI interaction to database
*/
private function logAiInteraction(string $type, string $summary): void
{
try {
// Get current user from JWT if available
$userId = null;
$token = $_SERVER['HTTP_AUTHORIZATION'] ?? '';
if (preg_match('/Bearer\s+(.+)$/i', $token, $matches)) {
$parts = explode('.', $matches[1]);
if (count($parts) === 3) {
$payload = json_decode(base64_decode(strtr($parts[1], '-_', '+/')), true);
$userId = $payload['sub'] ?? null;
}
}
if ($userId) {
Database::insert('ai_interactions', [
'organization_id' => 0, // Not yet created during onboarding
'user_id' => $userId,
'interaction_type' => 'qa',
'prompt_summary' => $summary,
'response_summary' => 'Dati estratti',
'tokens_used' => 0,
'model_used' => ANTHROPIC_MODEL,
]);
}
} catch (Throwable $e) {
// Silently fail - logging should not break the flow
error_log('[AI_LOG_ERROR] ' . $e->getMessage());
}
}
}