FDSRashid's picture
Update app.py
248b1ce verified
raw
history blame
13.6 kB
import gradio as gr
from pyvis.network import Network
import networkx as nx
import numpy as np
import pandas as pd
import os
from datasets import load_dataset
from datasets import Features
from datasets import Value
from datasets import Dataset
import matplotlib.pyplot as plt
import re
pattern = r'"(.*?)"'
# this pattern captures anything in a double quotes.
Secret_token = os.getenv('HF_token')
dataset = load_dataset('FDSRashid/hadith_info',data_files = 'Basic_Edge_Information.csv', token = Secret_token, split = 'train')
edge_info = dataset.to_pandas()
features = Features({'Rawi ID': Value('int32'), 'Famous Name': Value('string'), 'Narrator Rank': Value('string'), 'Number of Narrations': Value('string'), 'Generation': Value('string')})
narrator_bios = load_dataset("FDSRashid/hadith_info", data_files = 'Teacher_Bios.csv', token = Secret_token,features=features )
narrator_bios = narrator_bios['train'].to_pandas()
narrator_bios.loc[49845, 'Narrator Rank'] = 'ุฑุณูˆู„ ุงู„ู„ู‡'
narrator_bios.loc[49845, 'Number of Narrations'] = 0
narrator_bios['Number of Narrations'] = narrator_bios['Number of Narrations'].astype(int)
narrator_bios.loc[49845, 'Number of Narrations'] = 327512
# 8125 Narrators have no Generation, listed in dataset as None
narrator_bios['Generation'] = narrator_bios['Generation'].replace([None], [-1])
narrator_bios['Generation'] = narrator_bios['Generation'].astype(int)
features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})
dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
matn_info = dataset['train'].to_pandas()
matn_info = matn_info.drop(97550)
matn_info = matn_info.drop(307206)
matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)
matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)
# Isnad Info Hadiths column is structured like {"BookNum_HadithNum", ...} for each edge
isnad_info = load_dataset('FDSRashid/hadith_info',token = Secret_token, data_files = 'isnad_info.csv', split = 'train').to_pandas()
isnad_info['Hadiths Cleaned'] = isnad_info['Hadiths'].apply(lambda x: [re.findall(pattern, string)[0].split("_") for string in x[1:-1].split(',')])
# Hadiths Cleaned is a list of lists, each sub-list is Book Id, Hadith ID
taraf_max = np.max(matn_info['taraf_ID'].unique())
isnad_info['Tarafs Cleaned'] = isnad_info['Tarafs'].apply(lambda x: np.array([int(i.strip(' ')) for i in x[1:-1].split(',')]))
cmap = plt.colormaps['cool']
books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token = Secret_token)['train'].to_pandas()
matn_info['Book_ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[0]))
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
matn_info = pd.merge(matn_info, books, on='Book_ID')
def value_to_hex(value):
rgba_color = cmap(value)
return "#{:02X}{:02X}{:02X}".format(int(rgba_color[0] * 255), int(rgba_color[1] * 255), int(rgba_color[2] * 255))
#edge_info, matn_info, narrator_bios, isnad_info
def visualize_isnad(taraf_num, yaxis):
taraf_hadith = matn_info[matn_info['taraf_ID'] == taraf_num]['bookid_hadithid'].to_list()
taraf_matns = matn_info[matn_info['taraf_ID'] == taraf_num]['matn'].to_list()
taraf_hadith_split = [i.split('_') for i in taraf_hadith]
taraf_book = matn_info[matn_info['taraf_ID'] == taraf_num]['Book_Name'].to_list()
taraf_author = matn_info[matn_info['taraf_ID'] == taraf_num]['Author'].to_list()
taraf_hadith_number = matn_info[matn_info['taraf_ID'] == taraf_num]['Hadith Number'].to_list()
lst_hadith = []
for i in range(len(taraf_hadith_split)):
# This checks each hadith in the Taraf, is that book id hadith id found in each of the edges of isnad_info
#This loop get the end transmitter of each Hadith in the Taraf
isnad_in_hadith1 = isnad_info['Hadiths Cleaned'].apply(lambda x: taraf_hadith_split[i] in x )
isnad_hadith1 = isnad_info[isnad_in_hadith1][['Source', 'Destination']]
G = nx.from_pandas_edgelist(isnad_hadith1, source = 'Source', target = 'Destination', create_using = nx.DiGraph())
node = [int(n) for n, d in G.out_degree() if d == 0][0]
gen_node = narrator_bios[narrator_bios['Rawi ID']==node]['Generation'].iloc[0]
name_node = narrator_bios[narrator_bios['Rawi ID']==node]['Famous Name'].iloc[0]
lst_hadith.append([taraf_matns[i], gen_node, name_node, taraf_book[i], taraf_author[i], taraf_hadith_number[i]])
df = pd.DataFrame(lst_hadith, columns = ['Matn', 'Generation', 'Name', 'Book_Name', 'Author', 'Hadith Number'])
#hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split) )
hadith_cleaned = isnad_info['Tarafs Cleaned'].apply(lambda x: taraf_num in x)
isnad_hadith = isnad_info[hadith_cleaned]
isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')
isnad_hadith['Student'] = isnad_hadith['Student'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')
net = Network(directed =True)
for _, row in isnad_hadith.iterrows():
source = row['Source']
target = row['Destination']
teacher_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Source'])]
student_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Destination'])]
teacher_narrations = teacher_info['Number of Narrations'].to_list()
if len(teacher_narrations):
teacher_narrations = teacher_narrations[0]
else:
teacher_narrations = row['Hadith Count']
student_narrations = student_info['Number of Narrations'].to_list()
if len(student_narrations):
student_narrations = student_narrations[0]
else:
student_narrations = row['Hadith Count']
teacher_gen = teacher_info['Generation'].to_list()
if len(teacher_gen):
teacher_gen = teacher_gen[0]
else:
teacher_gen = -1
student_gen = student_info['Generation'].to_list()
if len(student_gen):
student_gen = student_gen[0]
else:
student_gen = -1
teacher_rank = teacher_info["Narrator Rank"].to_list()
if len(teacher_rank):
teacher_rank = teacher_rank[0]
else:
teacher_rank = 'ูู„ุงู†'
student_rank = student_info["Narrator Rank"].to_list()
if len(student_rank):
student_rank = student_rank[0]
else:
student_rank = 'ูู„ุงู†'
if row['Source'] == '99999':
net.add_node(source, font = {'size':50, 'color': 'Black'}, color = '#000000', label = f'{row["Teacher"]}')
else:
net.add_node(source, font = {'size':30, 'color': 'red'}, color = value_to_hex(teacher_narrations), label = f'{row["Teacher"]} \n {teacher_rank} \n ID: {row["Source"]} - Gen {teacher_gen}')
net.add_node(target, font = {'size': 30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{row["Student"]} \n{student_rank} \n ID: {row["Destination"]} - Gen {student_gen}')
net.add_edge(source, target, color = value_to_hex(int(row[f'{yaxis} Count'])), label = f"{row[f'{yaxis} Count']}")
net.barnes_hut(gravity=-5000, central_gravity=0.3, spring_length=200)
html = net.generate_html()
html = html.replace("'", "\"")
return f"""<iframe style="width: 100%; height: 600px;margin:0 auto" name="result" allow="midi; geolocation; microphone; camera;
display-capture; encrypted-media;" sandbox="allow-modals allow-forms
allow-scripts allow-same-origin allow-popups
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>""" , df
def taraf_booknum(taraf_num):
taraf = matn_info[matn_info['taraf_ID'] == taraf_num]
return taraf[['matn', 'Book_ID', 'Hadith Number', 'Book_Name', 'Author']]
def visualize_subTaraf(df, yaxis):
df['bookid_hadithid'] = df['Book_ID'].astype(str) + '_' + df['Hadith Number'].astype(str)
hadith = matn_info[matn_info['bookid_hadithid'].isin(df['bookid_hadithid'])]
taraf_hadith_split = [i.split('_') for i in hadith['bookid_hadithid'].to_list()]
hadith_cleaned = isnad_info['Hadiths Cleaned'].apply(lambda x: any(i in x for i in taraf_hadith_split))
isnad_hadith = isnad_info[hadith_cleaned]
isnad_hadith['Teacher'] = isnad_hadith['Source'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
isnad_hadith['Student'] = isnad_hadith['Destination'].apply(lambda x: narrator_bios[narrator_bios['Rawi ID'].astype(int) == int(x)]['Famous Name'].to_list())
isnad_hadith['Teacher'] = isnad_hadith['Teacher'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')
isnad_hadith['Student'] = isnad_hadith['Student'].apply(lambda x: x[0] if len(x)==1 else 'ูู„ุงู†')
net = Network(directed =True)
for _, row in isnad_hadith.iterrows():
source = row['Source']
target = row['Destination']
teacher_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Source'])]
student_info = narrator_bios[narrator_bios['Rawi ID'] == int(row['Destination'])]
teacher_narrations = teacher_info['Number of Narrations'].to_list()
if len(teacher_narrations):
teacher_narrations = teacher_narrations[0]
else:
teacher_narrations = row['Hadith Count']
student_narrations = student_info['Number of Narrations'].to_list()
if len(student_narrations):
student_narrations = student_narrations[0]
else:
student_narrations = row['Hadith Count']
teacher_gen = teacher_info['Generation'].to_list()
if len(teacher_gen):
teacher_gen = teacher_gen[0]
else:
teacher_gen = -1
student_gen = student_info['Generation'].to_list()
if len(student_gen):
student_gen = student_gen[0]
else:
student_gen = -1
teacher_rank = teacher_info["Narrator Rank"].to_list()
if len(teacher_rank):
teacher_rank = teacher_rank[0]
else:
teacher_rank = 'ูู„ุงู†'
student_rank = student_info["Narrator Rank"].to_list()
if len(student_rank):
student_rank = student_rank[0]
else:
student_rank = 'ูู„ุงู†'
if row['Source'] == '99999':
net.add_node(source, font = {'size':50, 'color': 'Black'}, color = '#000000', label = f'{row["Teacher"]}')
else:
net.add_node(source, font = {'size':30, 'color': 'red'}, color = value_to_hex(teacher_narrations), label = f'{row["Teacher"]} \n {teacher_rank} \n ID: {row["Source"]} - Gen {teacher_gen}')
net.add_node(target, font = {'size': 30, 'color': 'red'}, color = value_to_hex(student_narrations), label = f'{row["Student"]} \n{student_rank} \n ID: {row["Destination"]} - Gen {student_gen}')
net.add_edge(source, target, color = value_to_hex(int(row[f'{yaxis} Count'])), label = f"{row[f'{yaxis} Count']}")
net.barnes_hut(gravity=-5000, central_gravity=0.3, spring_length=200)
html = net.generate_html()
html = html.replace("'", "\"")
return f"""<iframe style="width: 100%; height: 600px;margin:0 auto" name="result" allow="midi; geolocation; microphone; camera;
display-capture; encrypted-media;" sandbox="allow-modals allow-forms
allow-scripts allow-same-origin allow-popups
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""
with gr.Blocks() as demo:
with gr.Tab("Whole Taraf Visualizer"):
Yaxis = gr.Dropdown(choices = ['Taraf', 'Hadith', 'Isnad', 'Book'], value = 'Taraf', label = 'Variable to Display', info = 'Choose the variable to visualize.')
taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
btn = gr.Button('Submit')
btn.click(fn = visualize_isnad, inputs = [taraf_number, Yaxis], outputs = [gr.HTML(), gr.DataFrame(wrap=True)])
with gr.Tab("Book and Hadith Number Retriever"):
taraf_num = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
btn_num = gr.Button('Retrieve')
btn_num.click(fn=taraf_booknum, inputs = [taraf_num], outputs= [gr.DataFrame(wrap=True)])
with gr.Tab('Select Hadith Isnad Visualizer'):
yyaxis = gr.Dropdown(choices = ['Taraf', 'Hadith', 'Isnad', 'Book'], value = 'Taraf', label = 'Variable to Display', info = 'Choose the variable to visualize.')
hadith_selection = gr.Dataframe(
headers=["Book_ID", "Hadith Number"],
datatype=["number", "number"],
row_count=5,
col_count=(2, "fixed"))
btn_hadith = gr.Button('Search')
btn_hadith.click(fn=visualize_subTaraf, inputs=[hadith_selection, yyaxis], outputs=[gr.HTML()])
demo.launch()