vcasas's picture
Update app.py
d40cd6c verified
import os
import requests
from llama_index.core import VectorStoreIndex, Settings
from llama_index.readers.file import PDFReader
import gradio as gr
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
# Disable the default LLM
Settings.llm = None
def download_pdf(url, destination):
os.makedirs(os.path.dirname(destination), exist_ok=True)
response = requests.get(url)
with open(destination, 'wb') as f:
f.write(response.content)
def create_index_from_pdf(pdf_path):
pdf_reader = PDFReader()
documents = pdf_reader.load_data(file=pdf_path)
embed_model = HuggingFaceEmbedding(model_name='BAAI/bge-large-es')
index = VectorStoreIndex.from_documents(
documents,
embed_model=embed_model
)
query_engine = index.as_query_engine(
similarity_top_k=3, # Increased to find more relevant context
response_mode="compact"
)
return query_engine
pdf_url = 'https://www.boe.es/buscar/pdf/1995/BOE-A-1995-25444-consolidado.pdf'
pdf_path = './BOE-A-1995-25444-consolidado.pdf'
download_pdf(pdf_url, pdf_path)
query_engine = create_index_from_pdf(pdf_path)
def search_pdf(query):
# Modificar la consulta para buscar específicamente penas
modified_query = f"Pena para el delito de {query}"
response = query_engine.query(modified_query)
return response.response
gr.Interface(
fn=search_pdf,
inputs="text",
outputs="text",
title="Buscador de Penas en Código Penal",
description="Introduce un tipo de delito para encontrar su pena correspondiente"
).launch()