|
import os |
|
import streamlit as st |
|
import yaml |
|
import logging |
|
import pandas as pd |
|
from cryptography.fernet import Fernet, InvalidToken |
|
from dotenv import load_dotenv |
|
from io import StringIO |
|
|
|
import modeling |
|
|
|
def df_to_csv(df): |
|
csv = StringIO() |
|
df.to_csv(csv, index=True) |
|
csv.seek(0) |
|
csv_data = csv.getvalue() |
|
return(csv_data) |
|
|
|
def dict_to_yaml(data): |
|
return yaml.dump(data, default_flow_style=False) |
|
|
|
def yaml_to_dict(yaml_str): |
|
return yaml.safe_load(yaml_str) |
|
|
|
def initialize(): |
|
|
|
logging.basicConfig(level=logging.INFO) |
|
load_dotenv() |
|
|
|
st.session_state.setdefault('config', None) |
|
|
|
st.session_state.setdefault('encryption_key', None) |
|
st.session_state.setdefault('is_authenticated', False) |
|
|
|
st.session_state.setdefault('db', None) |
|
|
|
st.session_state.setdefault('search_query', None) |
|
st.session_state.setdefault('search_results', pd.DataFrame()) |
|
|
|
if st.session_state['config'] is None: |
|
with open('config.yaml', 'r') as stream: |
|
st.session_state['config'] = yaml.safe_load(stream) |
|
|
|
def show_authentication(): |
|
|
|
with st.container(height=400, border=None, key=None): |
|
|
|
with open('tos.md', 'r', encoding='utf-8') as f: |
|
tos_content = f.read() |
|
st.write(tos_content) |
|
|
|
checkbox1 = "I agree to use this application **solely for non-commercial research purposes**. Any other usage is **strictly prohibited**!" |
|
checkbox2 = "I have **read**, **understood**, and **agree** to be bound by the Terms of Service and Privacy Policy." |
|
|
|
|
|
if st.checkbox(label=checkbox1) & st.checkbox(label=checkbox2): |
|
|
|
with st.form("authentication_form", border=False): |
|
st.markdown(""" |
|
## Authentication |
|
This app is a research preview and requires authentication. |
|
All data is encrypted. Please use your 32-byte encryption key to proceed! |
|
""") |
|
|
|
st.text_input( |
|
label="π Encryption key", |
|
value="", |
|
max_chars=None, |
|
key='encryption_key', |
|
placeholder="A URL-safe base64-encoded 32-byte key" |
|
) |
|
|
|
submitted = st.form_submit_button( |
|
label="Authenticate", |
|
type="primary", |
|
use_container_width=True |
|
) |
|
|
|
if submitted: |
|
try: |
|
modeling.load_db() |
|
st.rerun() |
|
except InvalidToken: |
|
error = f"Error: The encryption key you have entered is invalid!" |
|
st.error(body=error, icon="π") |
|
logging.error(error) |
|
st.session_state['is_authenticated'] = False |
|
return |
|
except ValueError as error: |
|
st.error(body=error, icon="π") |
|
logging.error(error) |
|
st.session_state['is_authenticated'] = False |
|
return |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
|
with st.container(): |
|
st.divider() |
|
st.markdown(""" |
|
## Try it yourself! |
|
Define a scale by entering individual items in YAML format. |
|
After form submission, a vector representation for the scale is calculated using the selected encoder model. |
|
Cosine similarities between this vector and the representations of existing scales are then computed. |
|
The resulting table outputs measures with high semantic overlap. |
|
""") |
|
|
|
with st.container(): |
|
if 'input_items' not in st.session_state: |
|
st.session_state['input_items'] = dict_to_yaml(st.session_state['config']['input_items']) |
|
|
|
with st.form("submission_form"): |
|
st.text_area( |
|
label="Search for similar measures by entering items that constitute the scale (YAML-Formatted):", |
|
height=175, |
|
key='input_items' |
|
) |
|
|
|
|
|
submitted = st.form_submit_button( |
|
label="Search Synth-Net", |
|
type="primary", |
|
use_container_width=True |
|
) |
|
|
|
if submitted: |
|
|
|
try: |
|
st.session_state['search_query'] = yaml_to_dict(st.session_state['input_items']) |
|
except yaml.YAMLError as e: |
|
st.error(f"Yikes, you better get your YAML straight! Check https://yaml.org/ for help! \n {e}") |
|
return |
|
|
|
if not st.session_state.get('model'): |
|
modeling.load_model() |
|
|
|
modeling.search() |
|
|
|
with st.container(): |
|
if not st.session_state['search_results'].empty: |
|
with st.spinner('Rendering search results...'): |
|
df = st.session_state['search_results'].style.format({ |
|
'Match': '{:.2f}'.format, |
|
'Scale': str.capitalize, |
|
'Instrument': str.capitalize, |
|
}) |
|
st.dataframe(df, use_container_width=True, hide_index=True) |
|
|
|
if __name__ == '__main__': |
|
st.set_page_config(page_title='Synth-Net') |
|
st.markdown("# The Synthetic Nomological Net") |
|
st.markdown(""" |
|
Psychological science is experiencing rapid growth in constructs and measures, partly due to refinement and new research areas, |
|
but also due to excessive proliferation. This proliferation, driven by academic incentives for novelty, may lead to redundant |
|
constructs with different names (jangle fallacy) and seemingly similar constructs with little content overlap (jingle fallacy). |
|
|
|
This web application uses state-of-the-art models and methods in natural language processing to search for semantic overlap in measures. |
|
It analyzes textual data from over 21,000 scales (containing more than 330,000 items) in an effort to reduce redundancies in measures used in the behavioral sciences. |
|
""", unsafe_allow_html=True) |
|
|
|
initialize() |
|
|
|
if st.session_state['is_authenticated']: |
|
main() |
|
else: |
|
show_authentication() |