sammarigarcia's picture
Update app.py
a5ddab0 verified
from fastapi import FastAPI, Request, Form
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
import os
import shutil
from tqdm import tqdm
def load_pdf_and_split(pdf_path):
loader = PyPDFLoader(pdf_path)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2500, chunk_overlap=400)
return text_splitter.split_documents(documents)
# LangChain
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.chat_models import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
# ====================
# FastAPI setup
# ====================
app = FastAPI()
app.mount("/static", StaticFiles(directory="."), name="static")
templates = Jinja2Templates(directory=".")
# ====================
# Limpiar FAISS anterior
# ====================
if os.path.exists("faiss_index"):
shutil.rmtree("faiss_index")
# ====================
# Cargar documentos PDF
# ====================
split_docs = load_pdf_and_split("1 مساعد ممارس ملف المحور.pdf")
# Debug
for i, doc in enumerate(split_docs[:5]):
print(f"Chunk {i+1}:\n{doc.page_content[:300]}\n{'-'*40}")
# ====================
# Embeddings multilingües
# ====================
model_name = "asafaya/bert-base-arabic"
embeddings = HuggingFaceEmbeddings(model_name=model_name)
# ====================
# Crear FAISS vectorstore
# ====================
vectorstore = FAISS.from_documents(split_docs, embeddings)
vectorstore.save_local("faiss_index") # Opcional, para caching
# ====================
# QA Chain
# ====================
qa_chain = RetrievalQA.from_chain_type(
llm=ChatOpenAI(temperature=0.2, model_name="gpt-4o-mini"),
chain_type="refine",
retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
)
# ====================
# Rutas FastAPI
# ====================
@app.get("/", response_class=HTMLResponse)
async def read_root(request: Request):
return templates.TemplateResponse("index.html", {"request": request})
@app.post("/preguntar")
async def preguntar(request: Request, pregunta: str = Form(...)):
respuesta = qa_chain.run(pregunta)
return JSONResponse({"respuesta": respuesta})
# ====================
# Ejecutar localmente
# ====================
if __name__ == "__main__":
import uvicorn
uvicorn.run("app:app", host="0.0.0.0", port=7860)