Spaces:

ohalkhateeb
/

Test3

Sleeping

App Files Files Community

ohalkhateeb commited on 17 days ago

Commit

309766f

verified ·

1 Parent(s): aa8c5f5

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -35

app.py CHANGED Viewed

@@ -1,43 +1,61 @@
-import os
-from bs4 import BeautifulSoup
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
 import pickle
-# Function to load documents
-def load_documents(directory):
-    documents = []
-    for filename in os.listdir(directory):
-        if filename.endswith(".html"):
-            file_path = os.path.join(directory, filename)
-            with open(file_path, "r", encoding="latin-1") as f:
-                soup = BeautifulSoup(f, "html.parser")
-                text = soup.get_text(separator=" ", strip=True)
-                documents.append(text)
-    return documents
-# Load and split documents
-print("Loading and splitting documents...")
-documents = load_documents("./legislation")
-text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-chunks = []
-for doc in documents:
-    chunks.extend(text_splitter.split_text(doc))
-# Create embeddings and FAISS index
-print("Generating embeddings...")
 embedding_model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
-embeddings = embedding_model.encode(chunks, show_progress_bar=True)
-dimension = embeddings.shape[1]
-index = faiss.IndexFlatL2(dimension)
-index.add(np.array(embeddings))
-# Save chunks and index
-print("Saving precomputed data...")
-with open("chunks.pkl", "wb") as f:
-    pickle.dump(chunks, f)
-faiss.write_index(index, "index.faiss")
-print("Preprocessing complete!")

+import gradio as gr
 import faiss
 import numpy as np
 import pickle
+from sentence_transformers import SentenceTransformer
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Load precomputed chunks and FAISS index
+print("Loading precomputed data...")
+with open("chunks.pkl", "rb") as f:
+    chunks = pickle.load(f)
+index = faiss.read_index("index.faiss")
+# Load embedding model (for queries only)
 embedding_model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
+# Load Jais model and tokenizer
+model_name = "inceptionai/jais-13b"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+# RAG function
+def get_response(query, k=3):
+    query_embedding = embedding_model.encode([query])
+    distances, indices = index.search(np.array(query_embedding), k)
+    retrieved_chunks = [chunks[i] for i in indices[0]]
+    context = " ".join(retrieved_chunks)
+    prompt = f"استنادًا إلى الوثائق التالية: {context}، أجب على السؤال: {query}"
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=200,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.9
+    )
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response.split(query)[-1].strip()
+# Gradio interface
+with gr.Blocks(title="Dubai Legislation Chatbot") as demo:
+    gr.Markdown("# Dubai Legislation Chatbot\nاسأل أي سؤال حول تشريعات دبي")
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox(placeholder="اكتب سؤالك هنا...", rtl=True)
+    clear = gr.Button("مسح")
+    def user(user_message, history):
+        return "", history + [[user_message, None]]
+    def bot(history):
+        user_message = history[-1][0]
+        bot_message = get_response(user_message)
+        history[-1][1] = bot_message
+        return history
+    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+        bot, chatbot, chatbot
+    )
+    clear.click(lambda: None, None, chatbot, queue=False)
+demo.launch()