FDSRashid commited on
Commit
f59ce12
ยท
verified ยท
1 Parent(s): a77555e

Update app.py

Browse files

added fast lookup for hadith

Files changed (1) hide show
  1. app.py +60 -27
app.py CHANGED
@@ -10,7 +10,9 @@ from datasets import Value
10
  from datasets import Dataset
11
  import matplotlib.pyplot as plt
12
  import re
13
-
 
 
14
  pattern = r'"(.*?)"'
15
  # this pattern captures anything in a double quotes.
16
 
@@ -56,6 +58,22 @@ matn_info['Book_ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split(
56
  matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
57
  matn_info = pd.merge(matn_info, books, on='Book_ID')
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  def value_to_hex(value):
60
  rgba_color = cmap(value)
61
  return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))
@@ -72,37 +90,52 @@ def get_node_info(node):
72
 
73
 
74
  def visualize_isnad(taraf_num, yaxis):
 
75
  taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
76
  taraf_hadith = taraf['bookid_hadithid'].to_list()
77
- taraf_matns = taraf['matn'].to_list()
78
- taraf_hadith_split = [i.split('_') for i in taraf_hadith]
79
- taraf_book = taraf['Book_Name'].to_list()
80
- taraf_author = taraf['Author'].to_list()
81
- taraf_hadith_number = taraf['Hadith Number'].to_list()
82
- lst_hadith = []
83
  hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
84
  isnad_hadith = isnad_info[hadith_cleaned]
85
- for i in range(len(taraf_hadith_split)):
86
- # This checks each hadith in the Taraf, is that book id hadith id found in each of the edges of isnad_info
87
- #This loop get the end transmitter of each Hadith in the Taraf
88
- isnad_in_hadith1 = isnad_hadith['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
89
- isnad_hadith1 = isnad_hadith[isnad_in_hadith1][['Source', 'Destination']]
90
- G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
91
- node = [int(n) for n, d in G.out_degree() if d == 0]
92
- for n in node:
93
- gen_node = narrator_bios[narrator_bios['Rawi ID']==n]['Generation'].to_list()
94
- if len(gen_node):
95
- gen_node = gen_node[0]
96
- else:
97
- gen_node = -1
98
- name_node = narrator_bios[narrator_bios['Rawi ID']==n]['Famous Name'].to_list()
99
- if len(name_node):
100
- name_node = name_node[0]
101
- else:
102
- name_node = 'ูู„ุงู†'
103
- lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i], str(n), i])
104
- df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Book Hadith Number', 'End Transmitter ID', 'Hadith Number'])
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
107
  isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
108
  isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')
 
10
  from datasets import Dataset
11
  import matplotlib.pyplot as plt
12
  import re
13
+ from collections import defaultdict
14
+ from huggingface_hub import hf_hub_download
15
+
16
  pattern = r'"(.*?)"'
17
  # this pattern captures anything in a double quotes.
18
 
 
58
  matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
59
  matn_info = pd.merge(matn_info, books, on='Book_ID')
60
 
61
+
62
+ from huggingface_hub import hf_hub_download
63
+
64
+ # Download and read a file
65
+ file_path = hf_hub_download(
66
+ repo_id="FDSRashid/hadith_info", # read in fast lookup data structure
67
+ filename="hadith_lookup.json",
68
+ repo_type="dataset",
69
+ token=Secret_token,
70
+ )
71
+
72
+ with open(file_path, 'r') as f:
73
+ hadith_lookup_dict = json.load(f)
74
+ hadith_lookup = defaultdict(list, hadith_lookup_dict)
75
+
76
+
77
  def value_to_hex(value):
78
  rgba_color = cmap(value)
79
  return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))
 
90
 
91
 
92
  def visualize_isnad(taraf_num, yaxis):
93
+ # Precompute filtered dataframes
94
  taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
95
  taraf_hadith = taraf['bookid_hadithid'].to_list()
96
+
97
+
98
+ # Precompute hadiths where taraf_num exists
 
 
 
99
  hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
100
  isnad_hadith = isnad_info[hadith_cleaned]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ lst_hadith = []
103
+
104
+ for i, hadith_parts in enumerate(taraf_hadith):
105
+ # look up hadith for each bookid_hadithid
106
+ isnad_hadith1 = isnad_info.iloc[hadith_lookup[taraf_hadith[i]]][['Source', 'Destination']]
107
+
108
+ # Create graph and find end nodes
109
+ G = nx.from_pandas_edgelist(isnad_hadith1, source='Source', target='Destination', create_using=nx.DiGraph())
110
+ nodes = [int(n) for n, d in G.out_degree() if d == 0]
111
+
112
+ if nodes:
113
+ # Batch fetch data from narrator_bios for efficiency
114
+ bio_data = narrator_bios[narrator_bios['Rawi ID'].isin(nodes)]
115
+
116
+ for n in nodes:
117
+ gen_node = bio_data.loc[bio_data['Rawi ID'] == n, 'Generation'].squeeze()
118
+ gen_node = gen_node if pd.notna(gen_node) else -1
119
+
120
+ name_node = bio_data.loc[bio_data['Rawi ID'] == n, 'Famous Name'].squeeze()
121
+ name_node = name_node if pd.notna(name_node) else 'ูู„ุงู†'
122
+
123
+ # Append result for each node
124
+ lst_hadith.append([
125
+ taraf.iloc[i]['matn'],
126
+ gen_node,
127
+ name_node,
128
+ taraf.iloc[i]['Book_Name'],
129
+ taraf.iloc[i]['Author'],
130
+ taraf.iloc[i]['Hadith Number'],
131
+ str(n),
132
+ i
133
+ ])
134
+
135
+ # Convert to DataFrame
136
+ df = pd.DataFrame(lst_hadith, columns=['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Book Hadith Number', 'End Transmitter ID', 'Hadith Number'])
137
+
138
+
139
  isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
140
  isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
141
  isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')