Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
import yaml | |
import logging | |
import pandas as pd | |
from cryptography.fernet import Fernet, InvalidToken | |
from dotenv import load_dotenv | |
from io import StringIO | |
import modeling | |
def df_to_csv(df): | |
csv = StringIO() | |
df.to_csv(csv, index=True) | |
csv.seek(0) | |
csv_data = csv.getvalue() | |
return(csv_data) | |
def dict_to_yaml(data): | |
return yaml.dump(data, default_flow_style=False) | |
def yaml_to_dict(yaml_str): | |
return yaml.safe_load(yaml_str) | |
def initialize(): | |
logging.basicConfig(level=logging.INFO) | |
load_dotenv() | |
st.session_state.setdefault('model_names', ['SurveyBot3000', 'PsiSent', 'all_mpnet_base_v2']) | |
st.session_state.setdefault('input_model_name', st.session_state['model_names'][0]) | |
st.session_state.setdefault('loaded_model_name', None) | |
st.session_state.setdefault('search_query', None) | |
st.session_state.setdefault('db', None) | |
st.session_state.setdefault('search_results', pd.DataFrame()) | |
st.session_state.setdefault('explore_plot', None) | |
st.session_state.setdefault('is_authenticated', False) | |
with open('config.yaml', 'r') as stream: | |
st.session_state['config'] = yaml.safe_load(stream) | |
if os.environ.get('encryption_key'): | |
encryption_key = os.environ.get('encryption_key') | |
st.session_state.setdefault('encryption_key', encryption_key) | |
# st.session_state.setdefault('encryption_key', None) | |
else: | |
st.session_state.setdefault('encryption_key', None) | |
def main(): | |
st.set_page_config(page_title='Synth-Net') | |
st.markdown("# The Synthetic Nomological Net") | |
st.markdown(""" | |
Psychological science is experiencing rapid growth in constructs and measures, partly due to refinement and new research areas, | |
but also due to excessive proliferation. This proliferation, driven by academic incentives for novelty, may lead to redundant | |
constructs with different names (jangle fallacy) and seemingly similar constructs with little content overlap (jingle fallacy). | |
This web application uses state-of-the-art models and methods in natural language processing to search for semantic overlap in measures. | |
It analyzes textual data from over 21,000 scales (containing more than 330,000 items) in an effort to reduce redundancies in measures used in the behavioral sciences. | |
""", unsafe_allow_html=True) | |
placeholder_authentication = st.empty() | |
placeholder_demo = st.empty() | |
if st.session_state['is_authenticated']: | |
show_demo(placeholder_demo) | |
else: | |
show_authentication(placeholder_authentication) | |
st.markdown(""" | |
- π **Preprint (Open Access)**: NA | |
- ποΈ **Cite**: NA | |
- π **Project website**: NA | |
- πΎ **Data**: NA | |
- #οΈβ£ **Social Media**: NA | |
The web application is maintained by [magnolia psychometrics](https://www.magnolia-psychometrics.com/). | |
""", unsafe_allow_html=True) | |
def show_authentication(placeholder): | |
with placeholder: | |
with st.container(): | |
with st.form("authentication_form"): | |
st.markdown(""" | |
## Authentication | |
This app is a research preview and requires authentication. | |
All data is encrypted. Please use your 32-byte encryption key to proceed! | |
""") | |
st.text_input( | |
label="π Encryption key", | |
value="", | |
max_chars=None, | |
key='encryption_key', | |
placeholder="A URL-safe base64-encoded 32-byte key" | |
) | |
submitted = st.form_submit_button( | |
label="Authenticate", | |
type="primary", | |
use_container_width=True | |
) | |
if submitted: | |
try: | |
modeling.load_db() | |
st.rerun() | |
except InvalidToken: | |
error = f"Error: The encryption key you have entered is invalid (**{st.session_state['encryption_key']}**)!" | |
st.error(body=error, icon="π") | |
logging.error(error) | |
st.session_state['is_authenticated'] = False | |
return | |
except ValueError as error: | |
st.error(body=error, icon="π") | |
logging.error(error) | |
st.session_state['is_authenticated'] = False | |
return | |
def show_demo(placeholder): | |
with placeholder: | |
with st.container(): | |
st.divider() | |
st.markdown(""" | |
## Try it yourself! | |
Define a scale by entering individual items in YAML format. | |
After form submission, a vector representation for the scale is calculated using the selected encoder model. | |
Cosine similarities between this vector and the representations of existing scales are then computed. | |
The resulting table outputs measures with high semantic overlap. | |
""") | |
if st.session_state['loaded_model_name'] is not None: | |
input_model_index = st.session_state['model_names'].index(st.session_state['input_model_name']) | |
else: | |
input_model_index = 0 | |
st.selectbox( | |
label="Select model", | |
options=st.session_state['model_names'], | |
index=input_model_index, | |
placeholder="Choose a model", | |
key='input_model_name' | |
) | |
tab1, tab2 = st.tabs(["π Search for scales", "πΈοΈ Explore the synthetic nomological net"]) | |
with tab1: | |
if 'input_items' not in st.session_state: | |
st.session_state['input_items'] = dict_to_yaml(st.session_state['config']['input_items']) | |
with st.form("submission_form"): | |
st.text_area( | |
label="Search for similar measures by entering items that constitute the scale (YAML-Formatted):", | |
height=175, | |
key='input_items' | |
) | |
submitted = st.form_submit_button( | |
label="Search Synth-Net", | |
type="primary", | |
use_container_width=True | |
) | |
if submitted: | |
try: | |
st.session_state['search_query'] = yaml_to_dict(st.session_state['input_items']) | |
except yaml.YAMLError as e: | |
st.error(f"Yikes, you better get your YAML straight! Check https://yaml.org/ for help! \n {e}") | |
return | |
no_model = st.session_state.get('model') is None | |
swap_model = st.session_state.get('input_model_name') != st.session_state['loaded_model_name'] | |
if swap_model or no_model: | |
modeling.load_db() | |
modeling.load_model() | |
modeling.search() | |
with st.container(): | |
if not st.session_state['search_results'].empty: | |
with st.spinner('Rendering search results...'): | |
df = st.session_state['search_results'].style.format({ | |
'Match': '{:.2f}'.format, | |
'Scale': str.capitalize, | |
'Instrument': str.capitalize, | |
}) | |
st.dataframe(df, use_container_width=True, hide_index=True) | |
with tab2: | |
with st.container(): | |
modeling.explore() | |
if st.session_state['explore_plot']: | |
st.plotly_chart( | |
figure_or_data=st.session_state['explore_plot'], | |
use_container_width=True | |
) | |
# if not st.session_state['search_results'].empty: | |
# st.download_button( | |
# label="Download References", | |
# data=df_to_csv(st.session_state['search_results']), | |
# file_name='scored_survey_responses.csv', | |
# mime='text/csv', | |
# use_container_width=True | |
# ) | |
if __name__ == '__main__': | |
initialize() | |
main() |