File size: 2,635 Bytes
b068b13 25aecaf b068b13 82da546 0905dfa e4b217c a5ddab0 e4b217c 82da546 e95e88b b068b13 0905dfa b068b13 82da546 0905dfa 82da546 25aecaf 82da546 42d1929 0905dfa 82da546 7876697 82da546 a5ddab0 0905dfa b068b13 82da546 3f394e4 82da546 b068b13 9a87d6c a5ddab0 e2b6573 b068b13 82da546 25aecaf b068b13 82da546 dd9bc92 82da546 dd9bc92 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
from fastapi import FastAPI, Request, Form
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
import os
import shutil
from tqdm import tqdm
def load_pdf_and_split(pdf_path):
loader = PyPDFLoader(pdf_path)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2500, chunk_overlap=400)
return text_splitter.split_documents(documents)
# LangChain
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.chat_models import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
# ====================
# FastAPI setup
# ====================
app = FastAPI()
app.mount("/static", StaticFiles(directory="."), name="static")
templates = Jinja2Templates(directory=".")
# ====================
# Limpiar FAISS anterior
# ====================
if os.path.exists("faiss_index"):
shutil.rmtree("faiss_index")
# ====================
# Cargar documentos PDF
# ====================
split_docs = load_pdf_and_split("1 مساعد ممارس ملف المحور.pdf")
# Debug
for i, doc in enumerate(split_docs[:5]):
print(f"Chunk {i+1}:\n{doc.page_content[:300]}\n{'-'*40}")
# ====================
# Embeddings multilingües
# ====================
model_name = "asafaya/bert-base-arabic"
embeddings = HuggingFaceEmbeddings(model_name=model_name)
# ====================
# Crear FAISS vectorstore
# ====================
vectorstore = FAISS.from_documents(split_docs, embeddings)
vectorstore.save_local("faiss_index") # Opcional, para caching
# ====================
# QA Chain
# ====================
qa_chain = RetrievalQA.from_chain_type(
llm=ChatOpenAI(temperature=0.2, model_name="gpt-4o-mini"),
chain_type="refine",
retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
)
# ====================
# Rutas FastAPI
# ====================
@app.get("/", response_class=HTMLResponse)
async def read_root(request: Request):
return templates.TemplateResponse("index.html", {"request": request})
@app.post("/preguntar")
async def preguntar(request: Request, pregunta: str = Form(...)):
respuesta = qa_chain.run(pregunta)
return JSONResponse({"respuesta": respuesta})
# ====================
# Ejecutar localmente
# ====================
if __name__ == "__main__":
import uvicorn
uvicorn.run("app:app", host="0.0.0.0", port=7860)
|