Spaces:
Sleeping
Sleeping
import streamlit as st | |
import yaml | |
import pandas as pd | |
from cryptography.fernet import Fernet | |
from dotenv import load_dotenv | |
from io import StringIO | |
import modeling | |
def df_to_csv(df): | |
csv = StringIO() | |
df.to_csv(csv, index=True) | |
csv.seek(0) | |
csv_data = csv.getvalue() | |
return(csv_data) | |
def dict_to_yaml(data): | |
return yaml.dump(data, default_flow_style=False) | |
def yaml_to_dict(yaml_str): | |
return yaml.safe_load(yaml_str) | |
def initialize(): | |
load_dotenv() | |
st.session_state.setdefault('model_names', ['SurveyBot3000', 'PsiSent', 'all_mpnet_base_v2']) | |
st.session_state.setdefault('loaded_model_name', None) | |
st.session_state.setdefault('search_query', None) | |
st.session_state.setdefault('db', None) | |
st.session_state.setdefault('results', pd.DataFrame()) | |
st.session_state.setdefault('decrypt_key', None) | |
st.session_state.setdefault('valid_decrypt_key', False) | |
with open('config.yaml', 'r') as stream: | |
st.session_state['config'] = yaml.safe_load(stream) | |
def main(): | |
st.set_page_config(page_title='Synth-Net') | |
st.markdown("# The Synthetic Nomological Net") | |
# st.markdown("#### This is a demo on how to extract trait information from responses to open-ended questions.") | |
st.markdown(""" | |
Psychological science is experiencing rapid growth in constructs and measures, partly due to refinement and new research areas, | |
but also due to excessive proliferation. This proliferation, driven by academic incentives for novelty, may lead to redundant | |
constructs with different names (jangle fallacy) and seemingly similar constructs with little content overlap (jingle fallacy). | |
This web application uses state-of-the-art models and methods in natural language processing to search for semantic overlap in measures. | |
It analyzes textual data from over 21,000 scales (containing more than 330,000 items) in an effort to reduce redundancies in measures used in the behavioral sciences. | |
- π **Preprint (Open Access)**: NA | |
- ποΈ **Cite**: NA | |
- π **Project website**: NA | |
- πΎ **Data**: NA | |
- #οΈβ£ **Social Media**: NA | |
The web application is maintained by [magnolia psychometrics](https://www.magnolia-psychometrics.com/). | |
""", unsafe_allow_html=True) | |
placeholder_demo = st.empty() | |
show_demo(placeholder_demo) | |
def show_demo(placeholder): | |
with placeholder: | |
with st.container(): | |
st.divider() | |
st.markdown(""" | |
## Try it yourself! | |
Define a scale by entering individual items in YAML format. | |
After form submission, a vector representation for the scale is calculated using the selected encoder model. | |
Cosine similarities between this vector and the representations of existing scales are then computed. | |
The resulting table outputs measures with high semantic overlap. | |
""") | |
with st.form("submission_form"): | |
if not st.session_state['valid_decrypt_key']: | |
with st.expander(label="Authentication", expanded=True, icon="π"): | |
st.text_input( | |
label="Encryption key", | |
value="", | |
max_chars=None, | |
key='decrypt_key', | |
placeholder="A URL-safe base64-encoded 32-byte key" | |
) | |
with st.expander(label="Model", expanded=False, icon="π§ "): | |
if st.session_state['loaded_model_name'] is not None: | |
input_model_index = st.session_state['model_names'].index(st.session_state['input_model_name']) | |
else: | |
input_model_index = 0 | |
st.selectbox( | |
label="Select model", | |
options=st.session_state['model_names'], | |
index=input_model_index, | |
key='input_model_name' | |
) | |
with st.expander(label="Search Query", expanded=True, icon="π"): | |
if 'input_items' not in st.session_state: | |
st.session_state['input_items'] = dict_to_yaml(st.session_state['config']['input_items']) | |
st.text_area( | |
label="Search for similar measures by entering items that constitute the scale (YAML-Formatted):", | |
height=175, | |
key='input_items' | |
) | |
submitted = st.form_submit_button( | |
label="Search Synth-Net", | |
type="primary", | |
use_container_width=True | |
) | |
if submitted: | |
try: | |
st.session_state['search_query'] = yaml_to_dict(st.session_state['input_items']) | |
except yaml.YAMLError as e: | |
st.error(f"Yikes, you better get your YAML straight! Check https://yaml.org/ for help! \n {e}") | |
return | |
try: | |
modeling.load_model() | |
modeling.search() | |
except Exception as error: | |
st.error(f"Error while loading model: {error}") | |
return | |
with st.container(): | |
if not st.session_state['results'].empty: | |
df = st.session_state['results'].style.format({ | |
'Match': '{:.2f}'.format, | |
'Scale': str.capitalize, | |
'Instrument': str.capitalize, | |
}) | |
st.dataframe(df, use_container_width=True) | |
st.download_button( | |
label="Download References", | |
data=df_to_csv(st.session_state['results']), | |
file_name='scored_survey_responses.csv', | |
mime='text/csv', | |
use_container_width=True | |
) | |
if __name__ == '__main__': | |
initialize() | |
main() |