invoice-processing-google-d.../services/invoice_processor_service.py
2025-08-26 12:26:03 +02:00

56 lines
2.3 KiB
Python

# services/invoice_processor_service.py
import logging
from typing import Dict, List, Any
# Importamos nuestro nuevo cliente GCP de forma local y limpia
from .gcp_document_ai_client import process_document_gcp
# (Opcional, si tienes utilidades) from .utils import data_cleaner
# Importamos la configuración centralizada
from core.config import settings
# --- Lógica de negocio extraída del antiguo processing.py ---
def _get_confidence_threshold_for_field(field_type: str) -> float:
return settings.CONFIDENCE_THRESHOLDS.get(field_type, settings.CONFIDENCE_THRESHOLDS["__default__"])
def _extract_specific_fields(entities: List[Any]) -> Dict[str, str]:
extracted_data = {field: "Not found or low confidence" for field in settings.REQUIRED_FIELDS}
for entity in entities:
entity_type = entity.type_
threshold = _get_confidence_threshold_for_field(entity_type)
if entity_type in settings.REQUIRED_FIELDS and entity.confidence >= threshold:
value = entity.mention_text.replace('\n', ' ').strip()
# if entity_type == 'invoice_date':
# value = data_cleaner.normalize_date(value) or f"Unparseable Date: '{value}'"
extracted_data[entity_type] = value
return extracted_data
# --- Función principal del servicio ---
def process_invoice_from_bytes(file_bytes: bytes, mime_type: str) -> Dict[str, str]:
"""
Orquesta el proceso completo: llama a Document AI, extrae y limpia los datos.
"""
try:
# 1. Llamar a la API de Google a través de nuestro cliente dedicado
document = process_document_gcp(
project_id=settings.GCP_PROJECT_ID,
location=settings.GCP_LOCATION,
processor_id=settings.DOCAI_PROCESSOR_ID,
file_bytes=file_bytes,
mime_type=mime_type,
)
# 2. Aplicar la lógica de negocio para extraer y validar los campos
validated_data = _extract_specific_fields(document.entities)
logging.info("Documento procesado con éxito y datos validados.")
return validated_data
except Exception as e:
logging.error(f"Error en el flujo de procesamiento de factura: {e}", exc_info=True)
# Re-lanzamos la excepción para que el endpoint de la API la capture y devuelva un 500
raise