version 1 - with FastAPI

This commit is contained in:
Daniel Oscar Zamo 2025-08-27 00:00:34 +02:00
parent ea67f3c30b
commit db8680baa2
7 changed files with 311 additions and 94 deletions

View File

@ -1,35 +1,43 @@
# api/routers/invoices.py # api/routers/invoices.py
from fastapi import APIRouter, Depends, UploadFile, File, HTTPException from fastapi import APIRouter, Depends, UploadFile, File, HTTPException, status, Form
from typing import Dict from typing import Dict, Optional
from api.dependencies import get_current_active_user from api.dependencies import get_current_active_user
from services import invoice_processor_service from services import invoice_processor_service
from core.config import settings
from db.models import User from db.models import User
router = APIRouter() router = APIRouter()
ALLOWED_CONTENT_TYPES = ["application/pdf", "image/jpeg", "image/png", "image/tiff"]
@router.post("/upload", response_model=Dict[str, str]) @router.post("/upload", response_model=Dict[str, str])
async def upload_invoice( async def upload_invoice(
current_user: User = Depends(get_current_active_user),
file: UploadFile = File(...), file: UploadFile = File(...),
current_user: User = Depends(get_current_active_user) # Nuevo parámetro: viene del formulario, es opcional y debe estar entre 0.0 y 1.0
confidence_threshold: Optional[float] = Form(None, ge=0.0, le=1.0)
): ):
""" """
Endpoint para subir una factura, procesarla y devolver los datos extraídos. Endpoint para subir una factura. Ahora acepta un umbral de confianza opcional.
Requiere autenticación.
""" """
if not file.content_type in ["application/pdf", "image/jpeg", "image/png"]: if file.content_type not in ALLOWED_CONTENT_TYPES:
raise HTTPException(status_code=400, detail="Tipo de archivo no soportado.") raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Tipo de archivo no soportado. Permitidos: {', '.join(ALLOWED_CONTENT_TYPES)}"
)
try: try:
file_bytes = await file.read() file_bytes = await file.read()
# Pasamos el umbral recibido al servicio
extracted_data = invoice_processor_service.process_invoice_from_bytes( extracted_data = invoice_processor_service.process_invoice_from_bytes(
project_id=settings.GCP_PROJECT_ID,
location=settings.GCP_LOCATION,
processor_id=settings.DOCAI_PROCESSOR_ID,
file_bytes=file_bytes, file_bytes=file_bytes,
mime_type=file.content_type mime_type=file.content_type,
override_threshold=confidence_threshold
) )
return extracted_data return extracted_data
except Exception as e: except Exception as e:
raise HTTPException(status_code=500, detail=f"Error al procesar la factura: {e}") raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error al procesar la factura: {e}"
)

Binary file not shown.

View File

@ -1,46 +1,35 @@
# requirements.txt # requirements.txt
# Dependencias de la aplicación y del toolkit unificadas
# Framework Web y Servidor # --- Core Web Framework & Server ---
# El corazón de nuestra API y el servidor para ejecutarla.
# [standard] incluye extras de alto rendimiento como uvloop.
fastapi fastapi
uvicorn[standard] uvicorn[standard]
# Base de datos (ORM) # --- Database & ORM ---
# Para la interacción con nuestra base de datos SQLite.
sqlalchemy sqlalchemy
# Para usar SQLite (simple para empezar)
pydantic-settings
# Autenticación y Seguridad # --- Authentication & Security ---
python-jose[cryptography] # Hashing de contraseñas y manejo de tokens JWT.
# [bcrypt] y [cryptography] son los backends recomendados.
passlib[bcrypt] passlib[bcrypt]
python-multipart # Para subida de archivos python-jose[cryptography]
# Plantillas HTML # --- Data Validation & Configuration ---
# Validación de datos en la API y carga de configuración desde .env
pydantic-settings
email-validator # Dependencia explícita para Pydantic EmailStr
# --- Frontend & File Handling ---
# Motor de plantillas para HTML y manejo de subida de archivos.
jinja2 jinja2
python-multipart
# Dependencias directas # --- Google Cloud Services ---
google-cloud-documentai==3.5.0 # El cliente oficial para interactuar con Document AI.
python-dotenv google-cloud-documentai
pyinstaller
# --- Utilities ---
# Herramienta robusta para el parsing de fechas.
python-dateutil python-dateutil
# Dependencias transitivas (fijadas para consistencia, tomadas del toolkit)
cachetools==5.5.2
certifi==2025.8.3
charset-normalizer==3.4.3
google-api-core==2.25.1
google-auth==2.40.3
googleapis-common-protos==1.70.0
grpcio==1.74.0
grpcio-status==1.74.0
idna==3.10
proto-plus==1.26.1
protobuf==6.32.0
pyasn1==0.6.1
pyasn1_modules==0.4.2
requests==2.32.4
rsa==4.9.1
urllib3==2.5.0
# Validación de email para Pydantic
email-validator

46
requirements.txt.v1 Normal file
View File

@ -0,0 +1,46 @@
# requirements.txt
# Dependencias de la aplicación y del toolkit unificadas
# Framework Web y Servidor
fastapi
uvicorn[standard]
# Base de datos (ORM)
sqlalchemy
# Para usar SQLite (simple para empezar)
pydantic-settings
# Autenticación y Seguridad
python-jose[cryptography]
passlib[bcrypt]
python-multipart # Para subida de archivos
# Plantillas HTML
jinja2
# Dependencias directas
google-cloud-documentai==3.5.0
python-dotenv
pyinstaller
python-dateutil
# Dependencias transitivas (fijadas para consistencia, tomadas del toolkit)
cachetools==5.5.2
certifi==2025.8.3
charset-normalizer==3.4.3
google-api-core==2.25.1
google-auth==2.40.3
googleapis-common-protos==1.70.0
grpcio==1.74.0
grpcio-status==1.74.0
idna==3.10
proto-plus==1.26.1
protobuf==6.32.0
pyasn1==0.6.1
pyasn1_modules==0.4.2
requests==2.32.4
rsa==4.9.1
urllib3==2.5.0
# Validación de email para Pydantic
email-validator

57
requirements.txt.v2 Normal file
View File

@ -0,0 +1,57 @@
altgraph==0.17.4
annotated-types==0.7.0
anyio==4.10.0
bcrypt==4.3.0
cachetools==5.5.2
certifi==2025.8.3
cffi==1.17.1
charset-normalizer==3.4.3
click==8.2.1
cryptography==45.0.6
dnspython==2.7.0
ecdsa==0.19.1
email_validator==2.2.0
fastapi==0.116.1
google-api-core==2.25.1
google-auth==2.40.3
google-cloud-documentai==3.5.0
googleapis-common-protos==1.70.0
greenlet==3.2.4
grpcio==1.74.0
grpcio-status==1.74.0
h11==0.16.0
httptools==0.6.4
idna==3.10
Jinja2==3.1.6
MarkupSafe==3.0.2
packaging==25.0
passlib==1.7.4
proto-plus==1.26.1
protobuf==6.32.0
pyasn1==0.6.1
pyasn1_modules==0.4.2
pycparser==2.22
pydantic==2.11.7
pydantic-settings==2.10.1
pydantic_core==2.33.2
pyinstaller==6.15.0
pyinstaller-hooks-contrib==2025.8
python-dateutil==2.9.0.post0
python-dotenv==1.1.1
python-jose==3.5.0
python-multipart==0.0.20
PyYAML==6.0.2
requests==2.32.4
rsa==4.9.1
setuptools==80.9.0
six==1.17.0
sniffio==1.3.1
SQLAlchemy==2.0.43
starlette==0.47.3
typing-inspection==0.4.1
typing_extensions==4.15.0
urllib3==2.5.0
uvicorn==0.35.0
uvloop==0.21.0
watchfiles==1.1.0
websockets==15.0.1

View File

@ -1,40 +1,59 @@
# services/invoice_processor_service.py # services/invoice_processor_service.py
import logging import logging
from typing import Dict, List, Any from typing import Dict, List, Any, Optional
# Importamos nuestro nuevo cliente GCP de forma local y limpia
from .gcp_document_ai_client import process_document_gcp from .gcp_document_ai_client import process_document_gcp
# Importamos nuestro limpiador de datos para usarlo
# (Opcional, si tienes utilidades) from .utils import data_cleaner from .utils import data_cleaner
# Importamos la configuración centralizada
from core.config import settings from core.config import settings
# --- Lógica de negocio extraída del antiguo processing.py --- # --- Lógica de negocio refactorizada ---
def _get_confidence_threshold_for_field(field_type: str) -> float:
return settings.CONFIDENCE_THRESHOLDS.get(field_type, settings.CONFIDENCE_THRESHOLDS["__default__"])
def _extract_specific_fields(entities: List[Any]) -> Dict[str, str]: def _extract_specific_fields(
entities: List[Any],
override_threshold: Optional[float] = None
) -> Dict[str, str]:
"""
Filtra y normaliza entidades. Si se proporciona `override_threshold`,
se utiliza ese valor para todos los campos. De lo contrario, utiliza
los umbrales definidos en la configuración.
"""
extracted_data = {field: "Not found or low confidence" for field in settings.REQUIRED_FIELDS} extracted_data = {field: "Not found or low confidence" for field in settings.REQUIRED_FIELDS}
for entity in entities: for entity in entities:
entity_type = entity.type_ entity_type = entity.type_
threshold = _get_confidence_threshold_for_field(entity_type)
# Lógica de decisión del umbral
if override_threshold is not None:
threshold = override_threshold
else:
# Comportamiento original: usar la configuración por campo
threshold = settings.CONFIDENCE_THRESHOLDS.get(entity_type, settings.CONFIDENCE_THRESHOLDS["__default__"])
if entity_type in settings.REQUIRED_FIELDS and entity.confidence >= threshold: if entity_type in settings.REQUIRED_FIELDS and entity.confidence >= threshold:
value = entity.mention_text.replace('\n', ' ').strip() value = entity.mention_text.replace('\n', ' ').strip()
# if entity_type == 'invoice_date':
# value = data_cleaner.normalize_date(value) or f"Unparseable Date: '{value}'" # Reactivamos la limpieza de fechas
if entity_type == 'invoice_date':
normalized_date = data_cleaner.normalize_date(value)
value = normalized_date if normalized_date else f"Unparseable Date: '{value}'"
extracted_data[entity_type] = value extracted_data[entity_type] = value
return extracted_data return extracted_data
# --- Función principal del servicio --- # --- Función principal del servicio actualizada ---
def process_invoice_from_bytes(file_bytes: bytes, mime_type: str) -> Dict[str, str]:
def process_invoice_from_bytes(
file_bytes: bytes,
mime_type: str,
override_threshold: Optional[float] = None # Nuevo parámetro opcional
) -> Dict[str, str]:
""" """
Orquesta el proceso completo: llama a Document AI, extrae y limpia los datos. Orquesta el proceso completo. Ahora pasa el umbral de confianza
opcional a la capa de lógica de negocio.
""" """
try: try:
# 1. Llamar a la API de Google a través de nuestro cliente dedicado
document = process_document_gcp( document = process_document_gcp(
project_id=settings.GCP_PROJECT_ID, project_id=settings.GCP_PROJECT_ID,
location=settings.GCP_LOCATION, location=settings.GCP_LOCATION,
@ -43,13 +62,12 @@ def process_invoice_from_bytes(file_bytes: bytes, mime_type: str) -> Dict[str, s
mime_type=mime_type, mime_type=mime_type,
) )
# 2. Aplicar la lógica de negocio para extraer y validar los campos # Pasamos el umbral opcional a la función de extracción
validated_data = _extract_specific_fields(document.entities) validated_data = _extract_specific_fields(document.entities, override_threshold)
logging.info("Documento procesado con éxito y datos validados.") logging.info(f"Documento procesado con éxito con un umbral de {override_threshold or 'default'}.")
return validated_data return validated_data
except Exception as e: except Exception as e:
logging.error(f"Error en el flujo de procesamiento de factura: {e}", exc_info=True) logging.error(f"Error en el flujo de procesamiento de factura: {e}", exc_info=True)
# Re-lanzamos la excepción para que el endpoint de la API la capture y devuelva un 500
raise raise

View File

@ -5,34 +5,38 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Dashboard - ACME Invoice Processor</title> <title>Dashboard - ACME Invoice Processor</title>
<!-- ======================= INICIO DEL GUARDIA DE SEGURIDAD ======================= -->
<script> <script>
const token = localStorage.getItem('accessToken'); const token = localStorage.getItem('accessToken');
if (!token) { if (!token) { window.location.href = '/'; }
// Si NO hay token, no cargues esta página. ¡Redirige al login!
window.location.href = '/';
}
</script> </script>
<!-- ======================== FIN DEL GUARDIA DE SEGURIDAD ========================= -->
<style> <style>
/* ... (tus estilos CSS no cambian) ... */ body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; margin: 0; background-color: #f4f4f9; color: #333; }
body { font-family: sans-serif; margin: 0; background-color: #f4f4f9; } .navbar { background-color: #2c3e50; overflow: hidden; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }
.navbar { background-color: #333; overflow: hidden; } .navbar a { float: left; display: block; color: white; text-align: center; padding: 14px 20px; text-decoration: none; font-weight: bold; }
.navbar a { float: left; display: block; color: white; text-align: center; padding: 14px 16px; text-decoration: none; }
.navbar .logout { float: right; cursor: pointer; } .navbar .logout { float: right; cursor: pointer; }
.container { max-width: 800px; margin: 2rem auto; padding: 2rem; background: white; border-radius: 8px; box-shadow: 0 4px 8px rgba(0,0,0,0.1); } .container { max-width: 800px; margin: 2rem auto; padding: 2rem; background: white; border-radius: 8px; box-shadow: 0 4px 8px rgba(0,0,0,0.05); }
h1 { color: #333; } h1, h2 { color: #2c3e50; }
.upload-form { margin-top: 2rem; border: 2px dashed #ccc; padding: 2rem; border-radius: 8px; text-align: center; } .upload-area { margin-top: 1rem; border: 2px dashed #bdc3c7; padding: 2rem; border-radius: 8px; text-align: center; background-color: #ecf0f1; }
.upload-form input[type="file"] { border: none; } .upload-form button { background-color: #3498db; color: white; border: none; padding: 12px 24px; border-radius: 5px; cursor: pointer; font-size: 16px; font-weight: bold; margin-top: 1rem; }
.results { margin-top: 2rem; background-color: #e9ecef; padding: 1rem; border-radius: 8px; display: none; } .upload-form button:disabled { background-color: #95a5a6; cursor: not-allowed; }
.results pre { white-space: pre-wrap; word-wrap: break-word; } /* --- Nuevo: Estilos para la configuración avanzada --- */
.settings { margin-top: 2rem; padding: 1.5rem; background-color: #f8f9fa; border-radius: 8px; }
.slider-container { display: flex; align-items: center; gap: 15px; }
.slider-container label { font-weight: bold; }
.slider-container input[type="range"] { flex-grow: 1; }
.slider-container #confidence-value { font-weight: bold; color: #3498db; min-width: 45px; }
/* --- Fin de nuevos estilos --- */
.results { margin-top: 2rem; background-color: #f8f9fa; border: 1px solid #dee2e6; padding: 1.5rem; border-radius: 8px; display: none; }
.results-table { width: 100%; border-collapse: collapse; }
.results-table td { padding: 12px; border-bottom: 1px solid #dee2e6; }
.results-table td:first-child { font-weight: bold; color: #495057; width: 30%; }
.message { text-align: center; padding: 1rem; margin-top: 1rem; border-radius: 4px; display: none; } .message { text-align: center; padding: 1rem; margin-top: 1rem; border-radius: 4px; display: none; }
.error { background-color: #f8d7da; color: #721c24; } .error { background-color: #f8d7da; color: #721c24; }
.info { background-color: #d1ecf1; color: #0c5460; }
</style> </style>
</head> </head>
<body> <body>
<!-- El resto del HTML no cambia -->
<div class="navbar"> <div class="navbar">
<a href="/dashboard">Dashboard</a> <a href="/dashboard">Dashboard</a>
<a id="logout-button" class="logout">Cerrar Sesión</a> <a id="logout-button" class="logout">Cerrar Sesión</a>
@ -40,25 +44,120 @@
<div class="container"> <div class="container">
<h1>Sube una Factura para Procesar</h1> <h1>Sube una Factura para Procesar</h1>
<form id="upload-form" class="upload-form">
<input type="file" id="invoice-file" name="invoice" required> <form id="upload-form">
<button type="submit">Procesar Factura</button> <div class="settings">
<h2>Configuración Avanzada</h2>
<div class="slider-container">
<label for="confidence-slider">Confianza Mínima:</label>
<input type="range" id="confidence-slider" min="0" max="100" value="85">
<span id="confidence-value">85%</span>
</div>
</div>
<div class="upload-area">
<input type="file" id="invoice-file" name="invoice" required accept="application/pdf,image/jpeg,image/png,image/tiff">
<button type="submit" id="submit-button">Procesar Factura</button>
</div>
</form> </form>
<div id="message-container" class="message"></div> <div id="message-container" class="message"></div>
<div id="results-container" class="results"> <div id="results-container" class="results">
<h2>Resultados de la Extracción:</h2> <h2>Resultados de la Extracción</h2>
<pre id="results-data"></pre> <table id="results-table" class="results-table"></table>
</div> </div>
</div> </div>
<script> <script>
// --- El resto del script no cambia --- const uploadForm = document.getElementById('upload-form');
const submitButton = document.getElementById('submit-button');
const messageContainer = document.getElementById('message-container');
const resultsContainer = document.getElementById('results-container');
const resultsTable = document.getElementById('results-table');
const confidenceSlider = document.getElementById('confidence-slider');
const confidenceValue = document.getElementById('confidence-value');
// --- Lógica para actualizar el valor del slider en la UI ---
confidenceSlider.addEventListener('input', () => {
confidenceValue.textContent = `${confidenceSlider.value}%`;
});
document.getElementById('logout-button').addEventListener('click', () => { document.getElementById('logout-button').addEventListener('click', () => {
localStorage.removeItem('accessToken'); localStorage.removeItem('accessToken');
window.location.href = '/'; window.location.href = '/';
}); });
const uploadForm = document.getElementById('upload-form');
// ... (el resto del script de subida) ... uploadForm.addEventListener('submit', async (event) => {
event.preventDefault();
submitButton.disabled = true;
showMessage('Procesando, por favor espera...');
resultsContainer.style.display = 'none';
const fileInput = document.getElementById('invoice-file');
const file = fileInput.files[0];
const token = localStorage.getItem('accessToken');
// --- Nuevo: Leer el valor del slider ---
const thresholdValue = confidenceSlider.value;
const thresholdFloat = parseFloat(thresholdValue) / 100.0;
// ------------------------------------
if (!file) { /* ... (código sin cambios) ... */ }
if (!token) { /* ... (código sin cambios) ... */ }
const formData = new FormData();
formData.append('file', file);
// --- Nuevo: Añadir el umbral al FormData ---
formData.append('confidence_threshold', thresholdFloat);
// ----------------------------------------
try {
const response = await fetch('/api/invoices/upload', {
method: 'POST',
headers: { 'Authorization': `Bearer ${token}` },
body: formData,
});
const data = await response.json();
if (response.ok) {
displayResults(data);
messageContainer.style.display = 'none';
} else {
showMessage(data.detail || 'Ocurrió un error.', 'error');
}
} catch (error) {
showMessage('Error de conexión.', 'error');
} finally {
submitButton.disabled = false;
fileInput.value = '';
}
});
// --- Funciones de Ayuda para la UI ---
function showMessage(text, type = 'info') {
messageContainer.textContent = text;
messageContainer.className = `message ${type}`;
messageContainer.style.display = 'block';
}
function displayResults(data) {
// Limpiar tabla anterior
resultsTable.innerHTML = '';
// Crear una fila por cada dato extraído
for (const key in data) {
const row = resultsTable.insertRow();
const keyCell = row.insertCell(0);
const valueCell = row.insertCell(1);
// Formatear la clave para que sea legible (ej. 'supplier_name' -> 'Supplier Name')
keyCell.textContent = key.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase());
valueCell.textContent = data[key];
}
resultsContainer.style.display = 'block';
}
</script> </script>
</body> </body>
</html> </html>