FDSRashid's picture
testing hierarchal graph format
7d2cacf verified
raw
history blame
17.3 kB
import gradio as gr
from pyvis.network import Network
import networkx as nx
import numpy as np
import pandas as pd
import os
from datasets import load_dataset
from datasets import Features
from datasets import Value
from datasets import Dataset
import matplotlib.pyplot as plt
import re
pattern = r'"(.*?)"'
# this pattern captures anything in a double quotes.
Secret_token = os.getenv('HF_token')
dataset = load_dataset('FDSRashid/hadith_info',data_files = 'Basic_Edge_Information.csv', token = Secret_token, split = 'train')
edge_info = dataset.to_pandas()
features = Features({'Rawi ID': Value('int32'), 'Famous Name': Value('string'), 'Narrator Rank': Value('string'), 'Number of Narrations': Value('string'), 'Generation': Value('string')})
narrator_bios = load_dataset("FDSRashid/hadith_info", data_files = 'Teacher_Bios.csv', token = Secret_token,features=features )
narrator_bios = narrator_bios['train'].to_pandas()
narrator_bios.loc[49845, 'Narrator Rank'] = 'ุฑุณูˆู„ ุงู„ู„ู‡'
narrator_bios.loc[49845, 'Number of Narrations'] = 0
narrator_bios['Number of Narrations'] = narrator_bios['Number of Narrations'].astype(int)
narrator_bios.loc[49845, 'Number of Narrations'] = 327512
# 8125 Narrators have no Generation, listed in dataset as None
narrator_bios['Generation'] = narrator_bios['Generation'].replace([None], [-1])
narrator_bios['Generation'] = narrator_bios['Generation'].astype(int)
features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})
dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
matn_info = dataset['train'].to_pandas()
matn_info = matn_info.drop(97550)
matn_info = matn_info.drop(307206)
matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)
matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)
# Isnad Info Hadiths column is structured like {"BookNum_HadithNum", ...} for each edge
isnad_info = load_dataset('FDSRashid/hadith_info',token = Secret_token, data_files = 'isnad_info.csv', split = 'train').to_pandas()
isnad_info['Hadiths Cleaned'] = isnad_info['Hadiths'].apply(lambda x: [re.findall(pattern, string)[0].split("_") for string in x[1:-1].split(',')])
# Hadiths Cleaned is a list of lists, each sub-list is Book Id, Hadith ID
taraf_max = np.max(matn_info['taraf_ID'].unique())
isnad_info['Tarafs Cleaned'] = isnad_info['Tarafs'].apply(lambda x: np.array([int(i.strip(' ')) for i in x[1:-1].split(',')]))
cmap = plt.colormaps['cool']
books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token = Secret_token)['train'].to_pandas()
matn_info['Book_ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[0]))
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
matn_info = pd.merge(matn_info, books, on='Book_ID')
def value_to_hex(value):
rgba_color = cmap(value)
return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))
#edge_info, matn_info, narrator_bios, isnad_info
def visualize_isnad(taraf_num, yaxis):
taraf_hadith = matn_info[matn_info['taraf_ID'] == taraf_num]['bookid_hadithid'].to_list()
taraf_matns = matn_info[matn_info['taraf_ID'] == taraf_num]['matn'].to_list()
taraf_hadith_split = [i.split('_') for i in taraf_hadith]
taraf_book = matn_info[matn_info['taraf_ID'] == taraf_num]['Book_Name'].to_list()
taraf_author = matn_info[matn_info['taraf_ID'] == taraf_num]['Author'].to_list()
taraf_hadith_number = matn_info[matn_info['taraf_ID'] == taraf_num]['Hadith Number'].to_list()
lst_hadith = []
hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
isnad_hadith = isnad_info[hadith_cleaned]
for i in range(len(taraf_hadith_split)):
# This checks each hadith in the Taraf, is that book id hadith id found in each of the edges of isnad_info
#This loop get the end transmitter of each Hadith in the Taraf
isnad_in_hadith1 = isnad_hadith['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
isnad_hadith1 = isnad_hadith[isnad_in_hadith1][['Source', 'Destination']]
G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
node = [int(n) for n, d in G.out_degree() if d == 0]
for n in node:
gen_node = narrator_bios[narrator_bios['Rawi ID']==n]['Generation'].iloc[0]
name_node = narrator_bios[narrator_bios['Rawi ID']==n]['Famous Name'].iloc[0]
lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i], str(n), str(i)])
df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Book Hadith Number', 'End Transmitter ID', 'Hadith Number'])
#hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split) )
isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')
isnad_hadith['Student'] = isnad_hadith['Student'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')
end_nodes = df['End Transmitter ID'].tolist()
G = nx.from_pandas_edgelist(isnad_hadith, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
isnad_pos = nx.nx_agraph.graphviz_layout(G, prog='dot')
x_stretch = 4
y_stretch = 4
net = Network(directed =True)
for node, pos in isnad_pos.items():
node_info = narrator_bios[narrator_bios['Rawi ID'] == int(node)]
student_narrations = node_info['Number of Narrations'].to_list()
if len(student_narrations):
student_narrations = student_narrations[0]
else:
student_narrations = 1
student_gen = node_info['Generation'].to_list()
if len(student_gen):
student_gen = student_gen[0]
else:
student_gen = -1
student_rank = node_info["Narrator Rank"].to_list()
if len(student_rank):
student_rank = student_rank[0]
else:
student_rank = 'ูู„ุงู†'
node_name = node_info['Famous Name'].to_list()
if len(node_name):
node_name = node_name[0]
else:
node_name = 'ูู„ุงู†'
if node == '99999':
net.add_node(node, font = {'size':50, 'color': 'black'}, color = '#000000', label = f'{node_name} \n ID: {node} - Gen {student_gen}', x= pos[0]*x_stretch, y= -1*pos[1]*y_stretch, size= 70)
elif node in end_nodes:
end_matn_info = df[df["End Transmitter ID"] == source]
net.add_node(node, font = {'size':30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{node_name} \n {student_rank} \n ID: {node} - Gen {student_gen} \n Hadith {" ".join(end_matn_info["Hadith Number"].tolist())}', x= pos[0]*x_stretch, y= -1*pos[1]*y_stretch, size= 50)
else:
net.add_node(node, font = {'size':30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{node_name} \n {student_rank} \n ID: {node} - Gen {student_gen}', x= pos[0]*x_stretch, y= -1*pos[1]*y_stretch, size= 50)
for _, row in isnad_hadith.iterrows():
source = row['Source']
target = row['Destination']
net.add_edge(source, target, color = value_to_hex(int(row[f'{yaxis} Count'])), label = f"{row[f'{yaxis} Count']}")
net.toggle_physics(False)
html = net.generate_html()
html = html.replace("'", "\"")
return f"""<iframe style="width: 100%; height: 600px;margin:0 auto" name="result" allow="midi; geolocation; microphone; camera;
display-capture; encrypted-media;" sandbox="allow-modals allow-forms
allow-scripts allow-same-origin allow-popups
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>""" , df
# for _, row in isnad_hadith.iterrows():
# source = row['Source']
# target = row['Destination']
# teacher_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Source'])]
# student_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Destination'])]
# teacher_narrations = teacher_info['Number of Narrations'].to_list()
# if len(teacher_narrations):
# teacher_narrations = teacher_narrations[0]
# else:
# teacher_narrations = row['Hadith Count']
# student_narrations = student_info['Number of Narrations'].to_list()
# if len(student_narrations):
# student_narrations = student_narrations[0]
# else:
# student_narrations = row['Hadith Count']
# teacher_gen = teacher_info['Generation'].to_list()
# if len(teacher_gen):
# teacher_gen = teacher_gen[0]
# else:
# teacher_gen = -1
# student_gen = student_info['Generation'].to_list()
# if len(student_gen):
# student_gen = student_gen[0]
# else:
# student_gen = -1
# teacher_rank = teacher_info["Narrator Rank"].to_list()
# if len(teacher_rank):
# teacher_rank = teacher_rank[0]
# else:
# teacher_rank = 'ูู„ุงู†'
# student_rank = student_info["Narrator Rank"].to_list()
# if len(student_rank):
# student_rank = student_rank[0]
# else:
# student_rank = 'ูู„ุงู†'
# if row['Source'] == '99999':
# net.add_node(source, font = {'size':50, 'color': 'Black'}, color = '#000000', label = f'{row["Teacher"]}')
# elif source in end_nodes:
# end_matn_info = df[df["End Transmitter ID"] == source]
# net.add_node(source, font = {'size':30, 'color': 'red'}, color = value_to_hex(teacher_narrations), label = f'{row["Teacher"]} \n {teacher_rank} \n ID: {row["Source"]} - Gen {teacher_gen} \n Hadith {" ".join(end_matn_info["Hadith Number"].tolist())}')
# else:
# net.add_node(source, font = {'size':30, 'color': 'red'}, color = value_to_hex(teacher_narrations), label = f'{row["Teacher"]} \n {teacher_rank} \n ID: {row["Source"]} - Gen {teacher_gen}')
# if target in end_nodes:
# end_matn_info = df[df["End Transmitter ID"] == target]
# net.add_node(target, font = {'size': 30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{row["Student"]} \n{student_rank} \n ID: {row["Destination"]} - Gen {student_gen} \n Hadith {" ".join(end_matn_info["Hadith Number"].tolist())}')
# else:
# net.add_node(target, font = {'size': 30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{row["Student"]} \n{student_rank} \n ID: {row["Destination"]} - Gen {student_gen}')
# net.add_edge(source, target, color = value_to_hex(int(row[f'{yaxis} Count'])), label = f"{row[f'{yaxis} Count']}")
# net.barnes_hut(gravity=-5000, central_gravity=0.3, spring_length=200)
# html = net.generate_html()
# html = html.replace("'", "\"")
# return f"""<iframe style="width: 100%; height: 600px;margin:0 auto" name="result" allow="midi; geolocation; microphone; camera;
# display-capture; encrypted-media;" sandbox="allow-modals allow-forms
# allow-scripts allow-same-origin allow-popups
# allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
# allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>""" , df
def taraf_booknum(taraf_num):
taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
return taraf[['matn', 'Book_ID', 'Hadith Number', 'Book_Name', 'Author']]
def visualize_subTaraf(df, yaxis):
df['bookid_hadithid'] = df['Book_ID'].astype(str) + '_' + df['Hadith Number'].astype(str)
hadith = matn_info[matn_info['bookid_hadithid'].isin(df['bookid_hadithid'])]
taraf_hadith_split = [i.split('_') for i in hadith['bookid_hadithid'].to_list()]
hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split))
isnad_hadith = isnad_info[hadith_cleaned]
isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')
isnad_hadith['Student'] = isnad_hadith['Student'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')
net = Network(directed =True)
for _, row in isnad_hadith.iterrows():
source = row['Source']
target = row['Destination']
teacher_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Source'])]
student_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Destination'])]
teacher_narrations = teacher_info['Number of Narrations'].to_list()
if len(teacher_narrations):
teacher_narrations = teacher_narrations[0]
else:
teacher_narrations = row['Hadith Count']
student_narrations = student_info['Number of Narrations'].to_list()
if len(student_narrations):
student_narrations = student_narrations[0]
else:
student_narrations = row['Hadith Count']
teacher_gen = teacher_info['Generation'].to_list()
if len(teacher_gen):
teacher_gen = teacher_gen[0]
else:
teacher_gen = -1
student_gen = student_info['Generation'].to_list()
if len(student_gen):
student_gen = student_gen[0]
else:
student_gen = -1
teacher_rank = teacher_info["Narrator Rank"].to_list()
if len(teacher_rank):
teacher_rank = teacher_rank[0]
else:
teacher_rank = 'ูู„ุงู†'
student_rank = student_info["Narrator Rank"].to_list()
if len(student_rank):
student_rank = student_rank[0]
else:
student_rank = 'ูู„ุงู†'
if row['Source'] == '99999':
net.add_node(source, font = {'size':50, 'color': 'Black'}, color = '#000000', label = f'{row["Teacher"]}')
else:
net.add_node(source, font = {'size':30, 'color': 'red'}, color = value_to_hex(teacher_narrations), label = f'{row["Teacher"]} \n {teacher_rank} \n ID: {row["Source"]} - Gen {teacher_gen}')
net.add_node(target, font = {'size': 30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{row["Student"]} \n{student_rank} \n ID: {row["Destination"]} - Gen {student_gen}')
net.add_edge(source, target, color = value_to_hex(int(row[f'{yaxis} Count'])), label = f"{row[f'{yaxis} Count']}")
net.barnes_hut(gravity=-5000, central_gravity=0.3, spring_length=200)
html = net.generate_html()
html = html.replace("'", "\"")
return f"""<iframe style="width: 100%; height: 600px;margin:0 auto" name="result" allow="midi; geolocation; microphone; camera;
display-capture; encrypted-media;" sandbox="allow-modals allow-forms
allow-scripts allow-same-origin allow-popups
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""
with gr.Blocks() as demo:
with gr.Tab("Whole Taraf Visualizer"):
Yaxis = gr.Dropdown(choices = ['Taraf', 'Hadith', 'Isnad', 'Book'], value = 'Taraf', label = 'Variable to Display', info = 'Choose the variable to visualize.')
taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
btn = gr.Button('Submit')
btn.click(fn = visualize_isnad, inputs = [taraf_number, Yaxis], outputs = [gr.HTML(), gr.DataFrame(wrap=True)])
with gr.Tab("Book and Hadith Number Retriever"):
taraf_num = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
btn_num = gr.Button('Retrieve')
btn_num.click(fn=taraf_booknum, inputs = [taraf_num], outputs= [gr.DataFrame(wrap=True)])
with gr.Tab('Select Hadith Isnad Visualizer'):
yyaxis = gr.Dropdown(choices = ['Taraf', 'Hadith', 'Isnad', 'Book'], value = 'Taraf', label = 'Variable to Display', info = 'Choose the variable to visualize.')
hadith_selection = gr.Dataframe(
headers=["Book_ID", "Hadith Number"],
datatype=["number", "number"],
row_count=5,
col_count=(2, "fixed"))
btn_hadith = gr.Button('Visualize')
btn_hadith.click(fn=visualize_subTaraf, inputs=[hadith_selection, yyaxis], outputs=[gr.HTML()])
demo.launch()