from fastapi import FastAPI, Request, Form from fastapi.responses import HTMLResponse, JSONResponse from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates import os import shutil from tqdm import tqdm def load_pdf_and_split(pdf_path): loader = PyPDFLoader(pdf_path) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=2500, chunk_overlap=400) return text_splitter.split_documents(documents) # LangChain from langchain_community.document_loaders import PyPDFLoader from langchain_community.vectorstores import FAISS from langchain_community.chat_models import ChatOpenAI from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import RetrievalQA from langchain_community.embeddings import HuggingFaceEmbeddings # ==================== # FastAPI setup # ==================== app = FastAPI() app.mount("/static", StaticFiles(directory="."), name="static") templates = Jinja2Templates(directory=".") # ==================== # Limpiar FAISS anterior # ==================== if os.path.exists("faiss_index"): shutil.rmtree("faiss_index") # ==================== # Cargar documentos PDF # ==================== split_docs = load_pdf_and_split("1 مساعد ممارس ملف المحور.pdf") # Debug for i, doc in enumerate(split_docs[:5]): print(f"Chunk {i+1}:\n{doc.page_content[:300]}\n{'-'*40}") # ==================== # Embeddings multilingües # ==================== model_name = "asafaya/bert-base-arabic" embeddings = HuggingFaceEmbeddings(model_name=model_name) # ==================== # Crear FAISS vectorstore # ==================== vectorstore = FAISS.from_documents(split_docs, embeddings) vectorstore.save_local("faiss_index") # Opcional, para caching # ==================== # QA Chain # ==================== qa_chain = RetrievalQA.from_chain_type( llm=ChatOpenAI(temperature=0.2, model_name="gpt-4o-mini"), chain_type="refine", retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}) ) # ==================== # Rutas FastAPI # ==================== @app.get("/", response_class=HTMLResponse) async def read_root(request: Request): return templates.TemplateResponse("index.html", {"request": request}) @app.post("/preguntar") async def preguntar(request: Request, pregunta: str = Form(...)): respuesta = qa_chain.run(pregunta) return JSONResponse({"respuesta": respuesta}) # ==================== # Ejecutar localmente # ==================== if __name__ == "__main__": import uvicorn uvicorn.run("app:app", host="0.0.0.0", port=7860)