Update app.py
Browse filesadded fast lookup for hadith
app.py
CHANGED
@@ -10,7 +10,9 @@ from datasets import Value
|
|
10 |
from datasets import Dataset
|
11 |
import matplotlib.pyplot as plt
|
12 |
import re
|
13 |
-
|
|
|
|
|
14 |
pattern = r'"(.*?)"'
|
15 |
# this pattern captures anything in a double quotes.
|
16 |
|
@@ -56,6 +58,22 @@ matn_info['Book_ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split(
|
|
56 |
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
|
57 |
matn_info = pd.merge(matn_info, books, on='Book_ID')
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
def value_to_hex(value):
|
60 |
rgba_color = cmap(value)
|
61 |
return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))
|
@@ -72,37 +90,52 @@ def get_node_info(node):
|
|
72 |
|
73 |
|
74 |
def visualize_isnad(taraf_num, yaxis):
|
|
|
75 |
taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
|
76 |
taraf_hadith = taraf['bookid_hadithid'].to_list()
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
taraf_author = taraf['Author'].to_list()
|
81 |
-
taraf_hadith_number = taraf['Hadith Number'].to_list()
|
82 |
-
lst_hadith = []
|
83 |
hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
|
84 |
isnad_hadith = isnad_info[hadith_cleaned]
|
85 |
-
for i in range(len(taraf_hadith_split)):
|
86 |
-
# This checks each hadith in the Taraf, is that book id hadith id found in each of the edges of isnad_info
|
87 |
-
#This loop get the end transmitter of each Hadith in the Taraf
|
88 |
-
isnad_in_hadith1 = isnad_hadith['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
|
89 |
-
isnad_hadith1 = isnad_hadith[isnad_in_hadith1][['Source', 'Destination']]
|
90 |
-
G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
|
91 |
-
node = [int(n) for n, d in G.out_degree() if d == 0]
|
92 |
-
for n in node:
|
93 |
-
gen_node = narrator_bios[narrator_bios['Rawi ID']==n]['Generation'].to_list()
|
94 |
-
if len(gen_node):
|
95 |
-
gen_node = gen_node[0]
|
96 |
-
else:
|
97 |
-
gen_node = -1
|
98 |
-
name_node = narrator_bios[narrator_bios['Rawi ID']==n]['Famous Name'].to_list()
|
99 |
-
if len(name_node):
|
100 |
-
name_node = name_node[0]
|
101 |
-
else:
|
102 |
-
name_node = 'ููุงู'
|
103 |
-
lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i], str(n), i])
|
104 |
-
df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Book Hadith Number', 'End Transmitter ID', 'Hadith Number'])
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
|
107 |
isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
|
108 |
isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'ููุงู')
|
|
|
10 |
from datasets import Dataset
|
11 |
import matplotlib.pyplot as plt
|
12 |
import re
|
13 |
+
from collections import defaultdict
|
14 |
+
from huggingface_hub import hf_hub_download
|
15 |
+
|
16 |
pattern = r'"(.*?)"'
|
17 |
# this pattern captures anything in a double quotes.
|
18 |
|
|
|
58 |
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
|
59 |
matn_info = pd.merge(matn_info, books, on='Book_ID')
|
60 |
|
61 |
+
|
62 |
+
from huggingface_hub import hf_hub_download
|
63 |
+
|
64 |
+
# Download and read a file
|
65 |
+
file_path = hf_hub_download(
|
66 |
+
repo_id="FDSRashid/hadith_info", # read in fast lookup data structure
|
67 |
+
filename="hadith_lookup.json",
|
68 |
+
repo_type="dataset",
|
69 |
+
token=Secret_token,
|
70 |
+
)
|
71 |
+
|
72 |
+
with open(file_path, 'r') as f:
|
73 |
+
hadith_lookup_dict = json.load(f)
|
74 |
+
hadith_lookup = defaultdict(list, hadith_lookup_dict)
|
75 |
+
|
76 |
+
|
77 |
def value_to_hex(value):
|
78 |
rgba_color = cmap(value)
|
79 |
return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))
|
|
|
90 |
|
91 |
|
92 |
def visualize_isnad(taraf_num, yaxis):
|
93 |
+
# Precompute filtered dataframes
|
94 |
taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
|
95 |
taraf_hadith = taraf['bookid_hadithid'].to_list()
|
96 |
+
|
97 |
+
|
98 |
+
# Precompute hadiths where taraf_num exists
|
|
|
|
|
|
|
99 |
hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
|
100 |
isnad_hadith = isnad_info[hadith_cleaned]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
+
lst_hadith = []
|
103 |
+
|
104 |
+
for i, hadith_parts in enumerate(taraf_hadith):
|
105 |
+
# look up hadith for each bookid_hadithid
|
106 |
+
isnad_hadith1 = isnad_info.iloc[hadith_lookup[taraf_hadith[i]]][['Source', 'Destination']]
|
107 |
+
|
108 |
+
# Create graph and find end nodes
|
109 |
+
G = nx.from_pandas_edgelist(isnad_hadith1, source='Source', target='Destination', create_using=nx.DiGraph())
|
110 |
+
nodes = [int(n) for n, d in G.out_degree() if d == 0]
|
111 |
+
|
112 |
+
if nodes:
|
113 |
+
# Batch fetch data from narrator_bios for efficiency
|
114 |
+
bio_data = narrator_bios[narrator_bios['Rawi ID'].isin(nodes)]
|
115 |
+
|
116 |
+
for n in nodes:
|
117 |
+
gen_node = bio_data.loc[bio_data['Rawi ID'] == n, 'Generation'].squeeze()
|
118 |
+
gen_node = gen_node if pd.notna(gen_node) else -1
|
119 |
+
|
120 |
+
name_node = bio_data.loc[bio_data['Rawi ID'] == n, 'Famous Name'].squeeze()
|
121 |
+
name_node = name_node if pd.notna(name_node) else 'ููุงู'
|
122 |
+
|
123 |
+
# Append result for each node
|
124 |
+
lst_hadith.append([
|
125 |
+
taraf.iloc[i]['matn'],
|
126 |
+
gen_node,
|
127 |
+
name_node,
|
128 |
+
taraf.iloc[i]['Book_Name'],
|
129 |
+
taraf.iloc[i]['Author'],
|
130 |
+
taraf.iloc[i]['Hadith Number'],
|
131 |
+
str(n),
|
132 |
+
i
|
133 |
+
])
|
134 |
+
|
135 |
+
# Convert to DataFrame
|
136 |
+
df = pd.DataFrame(lst_hadith, columns=['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Book Hadith Number', 'End Transmitter ID', 'Hadith Number'])
|
137 |
+
|
138 |
+
|
139 |
isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
|
140 |
isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
|
141 |
isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'ููุงู')
|