mohbay commited on
Commit
1641ca7
·
verified ·
1 Parent(s): 2bb71b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -32
app.py CHANGED
@@ -1,46 +1,72 @@
1
- import gradio as gr
2
- import pandas as pd
3
- from sentence_transformers import SentenceTransformer, util
4
 
5
- # Load files
6
- df = pd.read_excel("IslamWeb_output.xlsx")
7
- df2 = pd.read_excel("JordanFatwas_all.xlsx")
8
 
9
- # Validate
10
- for d, name in [(df, "IslamWeb"), (df2, "JordanFatwas")]:
11
- if not {"question", "link"}.issubset(d.columns):
12
- raise ValueError(f"❌ Missing required columns in {name}")
13
 
14
- # Load model + encode
15
- model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
16
- embeddings = model.encode(df["question"].fillna('').tolist(), convert_to_tensor=True)
17
- embeddings2 = model.encode(df2["question"].fillna('').tolist(), convert_to_tensor=True)
18
 
19
- # Define function
20
- def search_fatwa(query):
21
- query_embedding = model.encode(query, convert_to_tensor=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- scores = util.pytorch_cos_sim(query_embedding, embeddings)[0]
24
- top_idx = int(scores.argmax())
25
 
26
- scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
27
- top_idx2 = int(scores2.argmax())
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  return {
30
  "question1": df.iloc[top_idx]["question"],
31
  "link1": df.iloc[top_idx]["link"],
32
  "question2": df2.iloc[top_idx2]["question"],
33
- "link2": df2.iloc[top_idx2]["link"],
34
  }
35
 
36
- # Interface
37
- iface = gr.Interface(
38
- fn=search_fatwa,
39
- inputs="text",
40
- outputs="json",
41
- allow_flagging="never",
42
- title="Fatwa Search (Dual Source)",
43
- description="Get the most relevant fatwas from both datasets"
44
- )
45
-
46
  iface.launch()
 
1
+ # import gradio as gr
2
+ # import pandas as pd
3
+ # from sentence_transformers import SentenceTransformer, util
4
 
5
+ # # Load files
6
+ # df = pd.read_excel("IslamWeb_output.xlsx")
7
+ # df2 = pd.read_excel("JordanFatwas_all.xlsx")
8
 
9
+ # # Validate
10
+ # for d, name in [(df, "IslamWeb"), (df2, "JordanFatwas")]:
11
+ # if not {"question", "link"}.issubset(d.columns):
12
+ # raise ValueError(f"❌ Missing required columns in {name}")
13
 
14
+ # # Load model + encode
15
+ # model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
16
+ # embeddings = model.encode(df["question"].fillna('').tolist(), convert_to_tensor=True)
17
+ # embeddings2 = model.encode(df2["question"].fillna('').tolist(), convert_to_tensor=True)
18
 
19
+ # # Define function
20
+ # def search_fatwa(query):
21
+ # query_embedding = model.encode(query, convert_to_tensor=True)
22
+
23
+ # scores = util.pytorch_cos_sim(query_embedding, embeddings)[0]
24
+ # top_idx = int(scores.argmax())
25
+
26
+ # scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
27
+ # top_idx2 = int(scores2.argmax())
28
+
29
+ # return {
30
+ # "question1": df.iloc[top_idx]["question"],
31
+ # "link1": df.iloc[top_idx]["link"],
32
+ # "question2": df2.iloc[top_idx2]["question"],
33
+ # "link2": df2.iloc[top_idx2]["link"],
34
+ # }
35
+
36
+ # # Interface
37
+ # iface = gr.Interface(
38
+ # fn=search_fatwa,
39
+ # inputs="text",
40
+ # outputs="json",
41
+ # allow_flagging="never",
42
+ # title="Fatwa Search (Dual Source)",
43
+ # description="Get the most relevant fatwas from both datasets"
44
+ # )
45
 
46
+ # iface.launch()
 
47
 
 
 
48
 
49
+ import torch
50
+ import pandas as pd
51
+ from sentence_transformers import SentenceTransformer, util
52
+ import gradio as gr
53
+
54
+ model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
55
+ df = pd.read_csv("cleaned1.csv")
56
+ df2 = pd.read_csv("cleaned2.csv")
57
+ embeddings = torch.load("embeddings1.pt")
58
+ embeddings2 = torch.load("embeddings2.pt")
59
+
60
+ def search_fatwa(query):
61
+ query_embedding = model.encode(query, convert_to_tensor=True)
62
+ top_idx = int(util.pytorch_cos_sim(query_embedding, embeddings)[0].argmax())
63
+ top_idx2 = int(util.pytorch_cos_sim(query_embedding, embeddings2)[0].argmax())
64
  return {
65
  "question1": df.iloc[top_idx]["question"],
66
  "link1": df.iloc[top_idx]["link"],
67
  "question2": df2.iloc[top_idx2]["question"],
68
+ "link2": df2.iloc[top_idx2]["link"]
69
  }
70
 
71
+ iface = gr.Interface(fn=search_fatwa, inputs="text", outputs="json")
 
 
 
 
 
 
 
 
 
72
  iface.launch()