import glob, os, sys; |
sys.path.append('../utils') |
import seaborn as sns |
import matplotlib.pyplot as plt |
import numpy as np |
import pandas as pd |
import streamlit as st |
from utils.vulnerability_classifier import load_vulnerabilityClassifier, vulnerability_classification |
import logging |
logger = logging.getLogger(__name__) |
from utils.config import get_classifier_params |
from utils.preprocessing import paraLengthCheck |
from io import BytesIO |
import xlsxwriter |
import plotly.express as px |
import plotly.graph_objects as go |
from utils.vulnerability_classifier import label_dict |
classifier_identifier = 'vulnerability' |
params = get_classifier_params(classifier_identifier) |
@st.cache_data |
def to_excel(df,sectorlist): |
len_df = len(df) |
output = BytesIO() |
writer = pd.ExcelWriter(output, engine='xlsxwriter') |
df.to_excel(writer, index=False, sheet_name='Sheet1') |
workbook = writer.book |
worksheet = writer.sheets['Sheet1'] |
worksheet.data_validation('S2:S{}'.format(len_df), |
{'validate': 'list', |
'source': ['No', 'Yes', 'Discard']}) |
worksheet.data_validation('X2:X{}'.format(len_df), |
{'validate': 'list', |
'source': sectorlist + ['Blank']}) |
worksheet.data_validation('T2:T{}'.format(len_df), |
{'validate': 'list', |
'source': sectorlist + ['Blank']}) |
worksheet.data_validation('U2:U{}'.format(len_df), |
{'validate': 'list', |
'source': sectorlist + ['Blank']}) |
worksheet.data_validation('V2:V{}'.format(len_df), |
{'validate': 'list', |
'source': sectorlist + ['Blank']}) |
worksheet.data_validation('W2:U{}'.format(len_df), |
{'validate': 'list', |
'source': sectorlist + ['Blank']}) |
writer.save() |
processed_data = output.getvalue() |
return processed_data |
def app(): |
with st.container(): |
if 'key0' in st.session_state: |
df = st.session_state.key0 |
classifier = load_vulnerabilityClassifier(classifier_name=params['model_name']) |
st.session_state['{}_classifier'.format(classifier_identifier)] = classifier |
df = vulnerability_classification(haystack_doc=df, |
threshold= params['threshold']) |
st.session_state.key1 = df |
def vulnerability_display(): |
df = st.session_state['key1'] |
df_filtered = df[df['Vulnerability Label'].apply(lambda x: len(x) > 0 and 'Other' not in x)] |
df_filtered.rename(columns={'Vulnerability Label': 'Group(s)'}, inplace=True) |
st.subheader("Explore references to vulnerable groups:") |
num_paragraphs = len(df['Vulnerability Label']) |
num_references = len(df_filtered['Group(s)']) |
st.markdown(f"""<div style="text-align: justify;">The document contains a |
total of <span style="color: red;">{num_paragraphs}</span> paragraphs. |
We identified <span style="color: red;">{num_references}</span> |
references to groups in vulnerable situations.</div> |
<br> |
<div style="text-align: justify;">We are searching for references related |
to the following groups: (1) Agricultural communities, (2) Children, (3) |
Ethnic, racial and other minorities, (4) Fishery communities, (5) Informal sector |
workers, (6) Members of indigenous and local communities, (7) Migrants and |
displaced persons, (8) Older persons, (9) Persons living in poverty, (10) |
Persons living with disabilities, (11) Persons with pre-existing health conditions, |
(12) Residents of drought-prone regions, (13) Rural populations, (14) Sexual |
minorities (LGBTQI+), (15) Urban populations, (16) Women and other genders.</div> |
<br> |
<div style="text-align: justify;">In the chart on the right you can see how often |
each group has been referenced. For a more detailed view in the text, see the paragraphs and |
their respective labels in the table below.</div>""", unsafe_allow_html=True) |
df_labels = pd.DataFrame(list(label_dict.items()), columns=['Label ID', 'Label']) |
group_counts = {} |
for index, row in df_filtered.iterrows(): |
for sublist in row['Group(s)']: |
group_counts[sublist] = group_counts.get(sublist, 0) + 1 |
df_label_count = pd.DataFrame(list(group_counts.items()), columns=['Label', 'Count']) |
df_label_count = df_labels.merge(df_label_count, on='Label', how='left') |
df_bar_chart = df_label_count[df_label_count['Label'] != 'Other'] |
df_bar_chart = df_bar_chart.dropna(subset=['Count']) |
fig = go.Figure() |
fig.add_trace(go.Bar( |
y=df_bar_chart.Label, |
x=df_bar_chart.Count, |
orientation='h', |
marker=dict(color='purple'), |
)) |
fig.update_layout( |
title='Number of references to each group', |
xaxis_title='Number of references', |
yaxis_title='Group', |
) |
st.plotly_chart(fig, use_container_width=True) |