|
import streamlit as st |
|
import yaml |
|
import pandas as pd |
|
from cryptography.fernet import Fernet |
|
from dotenv import load_dotenv |
|
from io import StringIO |
|
|
|
import modeling |
|
|
|
def df_to_csv(df): |
|
csv = StringIO() |
|
df.to_csv(csv, index=True) |
|
csv.seek(0) |
|
csv_data = csv.getvalue() |
|
return(csv_data) |
|
|
|
def dict_to_yaml(data): |
|
return yaml.dump(data, default_flow_style=False) |
|
|
|
def yaml_to_dict(yaml_str): |
|
return yaml.safe_load(yaml_str) |
|
|
|
def initialize(): |
|
load_dotenv() |
|
st.session_state.setdefault('model_names', ['SurveyBot3000', 'PsiSent', 'all_mpnet_base_v2']) |
|
st.session_state.setdefault('loaded_model_name', None) |
|
st.session_state.setdefault('search_query', None) |
|
st.session_state.setdefault('db', None) |
|
st.session_state.setdefault('results', pd.DataFrame()) |
|
st.session_state.setdefault('decrypt_key', None) |
|
st.session_state.setdefault('valid_decrypt_key', False) |
|
|
|
with open('config.yaml', 'r') as stream: |
|
st.session_state['config'] = yaml.safe_load(stream) |
|
|
|
def main(): |
|
st.set_page_config(page_title='Synth-Net') |
|
|
|
st.markdown("# The Synthetic Nomological Net") |
|
|
|
|
|
st.markdown(""" |
|
Psychological science is experiencing rapid growth in constructs and measures, partly due to refinement and new research areas, |
|
but also due to excessive proliferation. This proliferation, driven by academic incentives for novelty, may lead to redundant |
|
constructs with different names (jangle fallacy) and seemingly similar constructs with little content overlap (jingle fallacy). |
|
|
|
This web application uses state-of-the-art models and methods in natural language processing to search for semantic overlap in measures. |
|
It analyzes textual data from over 21,000 scales (containing more than 330,000 items) in an effort to reduce redundancies in measures used in the behavioral sciences. |
|
|
|
- π **Preprint (Open Access)**: NA |
|
- ποΈ **Cite**: NA |
|
- π **Project website**: NA |
|
- πΎ **Data**: NA |
|
- #οΈβ£ **Social Media**: NA |
|
|
|
The web application is maintained by [magnolia psychometrics](https://www.magnolia-psychometrics.com/). |
|
""", unsafe_allow_html=True) |
|
|
|
placeholder_demo = st.empty() |
|
|
|
show_demo(placeholder_demo) |
|
|
|
def show_demo(placeholder): |
|
|
|
with placeholder: |
|
with st.container(): |
|
st.divider() |
|
st.markdown(""" |
|
## Try it yourself! |
|
Define a scale by entering individual items in YAML format. |
|
After form submission, a vector representation for the scale is calculated using the selected encoder model. |
|
Cosine similarities between this vector and the representations of existing scales are then computed. |
|
The resulting table outputs measures with high semantic overlap. |
|
""") |
|
|
|
with st.form("submission_form"): |
|
|
|
if not st.session_state['valid_decrypt_key']: |
|
with st.expander(label="Authentication", expanded=True, icon="π"): |
|
st.text_input( |
|
label="Encryption key", |
|
value="", |
|
max_chars=None, |
|
key='decrypt_key', |
|
placeholder="A URL-safe base64-encoded 32-byte key" |
|
) |
|
|
|
with st.expander(label="Model", expanded=False, icon="π§ "): |
|
|
|
if st.session_state['loaded_model_name'] is not None: |
|
input_model_index = st.session_state['model_names'].index(st.session_state['input_model_name']) |
|
else: |
|
input_model_index = 0 |
|
|
|
st.selectbox( |
|
label="Select model", |
|
options=st.session_state['model_names'], |
|
index=input_model_index, |
|
key='input_model_name' |
|
) |
|
|
|
with st.expander(label="Search Query", expanded=True, icon="π"): |
|
if 'input_items' not in st.session_state: |
|
st.session_state['input_items'] = dict_to_yaml(st.session_state['config']['input_items']) |
|
|
|
st.text_area( |
|
label="Search for similar measures by entering items that constitute the scale (YAML-Formatted):", |
|
height=175, |
|
key='input_items' |
|
) |
|
|
|
submitted = st.form_submit_button( |
|
label="Search Synth-Net", |
|
type="primary", |
|
use_container_width=True |
|
) |
|
|
|
if submitted: |
|
|
|
try: |
|
st.session_state['search_query'] = yaml_to_dict(st.session_state['input_items']) |
|
except yaml.YAMLError as e: |
|
st.error(f"Yikes, you better get your YAML straight! Check https://yaml.org/ for help! \n {e}") |
|
return |
|
|
|
try: |
|
modeling.load_model() |
|
modeling.search() |
|
except Exception as error: |
|
st.error(f"Error while loading model: {error}") |
|
return |
|
|
|
with st.container(): |
|
if not st.session_state['results'].empty: |
|
df = st.session_state['results'].style.format({ |
|
'Match': '{:.2f}'.format, |
|
'Scale': str.capitalize, |
|
'Instrument': str.capitalize, |
|
}) |
|
st.dataframe(df, use_container_width=True) |
|
|
|
st.download_button( |
|
label="Download References", |
|
data=df_to_csv(st.session_state['results']), |
|
file_name='scored_survey_responses.csv', |
|
mime='text/csv', |
|
use_container_width=True |
|
) |
|
|
|
if __name__ == '__main__': |
|
initialize() |
|
main() |