|
from fastapi import FastAPI, Request, Form |
|
from fastapi.responses import HTMLResponse, JSONResponse |
|
from fastapi.staticfiles import StaticFiles |
|
from fastapi.templating import Jinja2Templates |
|
|
|
import os |
|
import shutil |
|
from tqdm import tqdm |
|
|
|
def load_pdf_and_split(pdf_path): |
|
loader = PyPDFLoader(pdf_path) |
|
documents = loader.load() |
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2500, chunk_overlap=400) |
|
return text_splitter.split_documents(documents) |
|
|
|
|
|
from langchain_community.document_loaders import PyPDFLoader |
|
from langchain_community.vectorstores import FAISS |
|
from langchain_community.chat_models import ChatOpenAI |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.chains import RetrievalQA |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
|
|
|
|
|
|
|
|
app = FastAPI() |
|
app.mount("/static", StaticFiles(directory="."), name="static") |
|
templates = Jinja2Templates(directory=".") |
|
|
|
|
|
|
|
|
|
if os.path.exists("faiss_index"): |
|
shutil.rmtree("faiss_index") |
|
|
|
|
|
|
|
|
|
split_docs = load_pdf_and_split("1 مساعد ممارس ملف المحور.pdf") |
|
|
|
|
|
for i, doc in enumerate(split_docs[:5]): |
|
print(f"Chunk {i+1}:\n{doc.page_content[:300]}\n{'-'*40}") |
|
|
|
|
|
|
|
|
|
model_name = "asafaya/bert-base-arabic" |
|
embeddings = HuggingFaceEmbeddings(model_name=model_name) |
|
|
|
|
|
|
|
|
|
vectorstore = FAISS.from_documents(split_docs, embeddings) |
|
vectorstore.save_local("faiss_index") |
|
|
|
|
|
|
|
|
|
qa_chain = RetrievalQA.from_chain_type( |
|
llm=ChatOpenAI(temperature=0.2, model_name="gpt-4o-mini"), |
|
chain_type="refine", |
|
retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}) |
|
) |
|
|
|
|
|
|
|
|
|
@app.get("/", response_class=HTMLResponse) |
|
async def read_root(request: Request): |
|
return templates.TemplateResponse("index.html", {"request": request}) |
|
|
|
@app.post("/preguntar") |
|
async def preguntar(request: Request, pregunta: str = Form(...)): |
|
respuesta = qa_chain.run(pregunta) |
|
return JSONResponse({"respuesta": respuesta}) |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
import uvicorn |
|
uvicorn.run("app:app", host="0.0.0.0", port=7860) |
|
|