Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
import yaml | |
import logging | |
import pandas as pd | |
from cryptography.fernet import Fernet, InvalidToken | |
from dotenv import load_dotenv | |
from io import StringIO | |
import modeling | |
def df_to_csv(df): | |
csv = StringIO() | |
df.to_csv(csv, index=True) | |
csv.seek(0) | |
csv_data = csv.getvalue() | |
return(csv_data) | |
def dict_to_yaml(data): | |
return yaml.dump(data, default_flow_style=False) | |
def yaml_to_dict(yaml_str): | |
return yaml.safe_load(yaml_str) | |
def initialize(): | |
logging.basicConfig(level=logging.INFO) | |
load_dotenv() | |
st.session_state.setdefault('config', None) | |
st.session_state.setdefault('encryption_key', None) | |
st.session_state.setdefault('is_authenticated', False) | |
st.session_state.setdefault('db', None) | |
st.session_state.setdefault('search_query', None) | |
st.session_state.setdefault('search_results', pd.DataFrame()) | |
if st.session_state['config'] is None: | |
with open('config.yaml', 'r') as stream: | |
st.session_state['config'] = yaml.safe_load(stream) | |
def show_authentication(): | |
with st.container(height=400, border=None, key=None): | |
with open('tos.md', 'r', encoding='utf-8') as f: | |
tos_content = f.read() | |
st.write(tos_content) | |
checkbox1 = "I agree to use this application **solely for non-commercial research purposes**. Any other usage is **strictly prohibited**!" | |
checkbox2 = "I have **read**, **understood**, and **agree** to be bound by the Terms of Service and Privacy Policy." | |
if st.checkbox(label=checkbox1) & st.checkbox(label=checkbox2): | |
with st.form("authentication_form", border=False): | |
st.markdown(""" | |
## Authentication | |
This app is a research preview and requires authentication. | |
All data is encrypted. Please use your 32-byte encryption key to proceed! | |
""") | |
st.text_input( | |
label="π Encryption key", | |
value="", | |
max_chars=None, | |
key='encryption_key', | |
placeholder="A URL-safe base64-encoded 32-byte key" | |
) | |
submitted = st.form_submit_button( | |
label="Authenticate", | |
type="primary", | |
use_container_width=True | |
) | |
if submitted: | |
try: | |
modeling.load_db() | |
st.rerun() | |
except InvalidToken: | |
error = f"Error: The encryption key you have entered is invalid!" | |
st.error(body=error, icon="π") | |
logging.error(error) | |
st.session_state['is_authenticated'] = False | |
return | |
except ValueError as error: | |
st.error(body=error, icon="π") | |
logging.error(error) | |
st.session_state['is_authenticated'] = False | |
return | |
# with placeholder: | |
# with st.container(): | |
# with st.container(height=200, border=None, key=None): | |
# with open('tos.md', 'r', encoding='utf-8') as f: | |
# tos_content = f.read() | |
# st.write(tos_content) | |
# checkbox1 = "I agree to use this application **solely for non-commercial research purposes**. Any other usage is **strictly prohibited**!" | |
# checkbox2 = "I have **read**, **understood**, and **agree** to be bound by the Terms of Service and Privacy Policy." | |
# if st.checkbox(label=checkbox1) & st.checkbox(label=checkbox2): | |
# with st.form("authentication_form"): | |
# st.markdown(""" | |
# ## Authentication | |
# This app is a research preview and requires authentication. | |
# All data is encrypted. Please use your 32-byte encryption key to proceed! | |
# """) | |
def main(): | |
with st.container(): | |
st.divider() | |
st.markdown(""" | |
## Try it yourself! | |
Define a scale by entering individual items in YAML format. | |
After form submission, a vector representation for the scale is calculated using the selected encoder model. | |
Cosine similarities between this vector and the representations of existing scales are then computed. | |
The resulting table outputs measures with high semantic overlap. | |
""") | |
with st.container(): | |
if 'input_items' not in st.session_state: | |
st.session_state['input_items'] = dict_to_yaml(st.session_state['config']['input_items']) | |
with st.form("submission_form"): | |
st.text_area( | |
label="Search for similar measures by entering items that constitute the scale (YAML-Formatted):", | |
height=175, | |
key='input_items' | |
) | |
submitted = st.form_submit_button( | |
label="Search Synth-Net", | |
type="primary", | |
use_container_width=True | |
) | |
if submitted: | |
try: | |
st.session_state['search_query'] = yaml_to_dict(st.session_state['input_items']) | |
except yaml.YAMLError as e: | |
st.error(f"Yikes, you better get your YAML straight! Check https://yaml.org/ for help! \n {e}") | |
return | |
if not st.session_state.get('model'): | |
modeling.load_model() | |
modeling.search() | |
with st.container(): | |
if not st.session_state['search_results'].empty: | |
with st.spinner('Rendering search results...'): | |
df = st.session_state['search_results'].style.format({ | |
'Match': '{:.2f}'.format, | |
'Scale': str.capitalize, | |
'Instrument': str.capitalize, | |
}) | |
st.dataframe(df, use_container_width=True, hide_index=True) | |
if __name__ == '__main__': | |
st.set_page_config(page_title='Synth-Net') | |
st.markdown("# The Synthetic Nomological Net") | |
st.markdown(""" | |
Psychological science is experiencing rapid growth in constructs and measures, partly due to refinement and new research areas, | |
but also due to excessive proliferation. This proliferation, driven by academic incentives for novelty, may lead to redundant | |
constructs with different names (jangle fallacy) and seemingly similar constructs with little content overlap (jingle fallacy). | |
This web application uses state-of-the-art models and methods in natural language processing to search for semantic overlap in measures. | |
It analyzes textual data from over 21,000 scales (containing more than 330,000 items) in an effort to reduce redundancies in measures used in the behavioral sciences. | |
""", unsafe_allow_html=True) | |
initialize() | |
if st.session_state['is_authenticated']: | |
main() | |
else: | |
show_authentication() |