Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer, util | |
# Load files | |
df = pd.read_excel("IslamWeb_output.xlsx") | |
df2 = pd.read_excel("JordanFatwas_all.xlsx") | |
# Validate | |
for d, name in [(df, "IslamWeb"), (df2, "JordanFatwas")]: | |
if not {"question", "link"}.issubset(d.columns): | |
raise ValueError(f"❌ Missing required columns in {name}") | |
# Load model + encode | |
model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2") | |
embeddings = model.encode(df["question"].fillna('').tolist(), convert_to_tensor=True) | |
embeddings2 = model.encode(df2["question"].fillna('').tolist(), convert_to_tensor=True) | |
# Define function | |
def search_fatwa(query): | |
query_embedding = model.encode(query, convert_to_tensor=True) | |
scores = util.pytorch_cos_sim(query_embedding, embeddings)[0] | |
top_idx = int(scores.argmax()) | |
scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0] | |
top_idx2 = int(scores2.argmax()) | |
return { | |
"question1": df.iloc[top_idx]["question"], | |
"link1": df.iloc[top_idx]["link"], | |
"question2": df2.iloc[top_idx2]["question"], | |
"link2": df2.iloc[top_idx2]["link"], | |
} | |
# Interface | |
iface = gr.Interface( | |
fn=search_fatwa, | |
inputs="text", | |
outputs="json", | |
allow_flagging="never", | |
title="Fatwa Search (Dual Source)", | |
description="Get the most relevant fatwas from both datasets" | |
) | |
iface.launch() | |