Spaces:

yuki-816
/

science-communication

Sleeping

File size: 23,747 Bytes

dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ee6881
dd1a8bd
 
 
 
 
 
 
0ee6881
dd1a8bd
 
 
0ee6881
dd1a8bd
 
 
 
0ee6881
3c66cf8
01fb2fc
3c66cf8
dd1a8bd
 
 
01fb2fc
3c66cf8
 
 
dd1a8bd
 
0ee6881
dd1a8bd
 
 
0ee6881
 
 
dd1a8bd
01fb2fc
 
 
 
 
dd1a8bd
 
 
 
 
 
e8470a3
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c66cf8
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8239f6
3c66cf8
 
 
 
 
dd1a8bd
d8239f6
dd1a8bd
 
 
3c66cf8
dd1a8bd
3c66cf8
d8239f6
 
 
dd1a8bd
 
 
 
 
 
 
 
d8239f6
dd1a8bd
d8239f6
 
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ee6881
dd1a8bd
0ee6881
 
 
 
 
dd1a8bd
3c66cf8
01fb2fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c66cf8
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
3c66cf8
 
 
 
 
 
dd1a8bd
 
3c66cf8
dd1a8bd
3c66cf8
 
 
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c66cf8
 
 
 
 
 
dd1a8bd
 
3c66cf8
dd1a8bd
3c66cf8
 
 
 
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ee6881
 
dd1a8bd
 
 
 
 
 
3c66cf8
dd1a8bd
0ee6881
 
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ee6881
 
 
 
 
 
 
 
 
 
 
 
dd1a8bd
0ee6881
 
dd1a8bd
0ee6881
 
dd1a8bd
 
0ee6881
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd1a8bd
d8239f6
 
 
 
3c66cf8
d8239f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c66cf8
d8239f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c66cf8
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c66cf8
dd1a8bd
 
 
 
 
 
 
 
 
 
3c66cf8
dd1a8bd
 
 
3c66cf8
dd1a8bd
 
 
 
0ee6881
dd1a8bd
 
 
d8239f6
dd1a8bd

import streamlit as st
import streamlit_authenticator as stauth
from deta import Deta
import yaml
from yaml.loader import SafeLoader
import os
from langchain.llms import OpenAI
import re
from PyPDF2 import PdfReader
from streamlit_option_menu import option_menu
import streamlit_survey as ss
from cryptography.fernet import Fernet
import warnings
import openai
import json
from collections import defaultdict

warnings.filterwarnings("ignore", category=UserWarning, module='langchain')


class Validator:
    def validate_username(self, username):
        pattern = r"^[a-zA-Z0-9_-]{1,20}$"
        return bool(re.match(pattern, username))

    def validate_name(self, name):
        return 1 < len(name) < 100

    def validate_email(self, email):
        pattern = "^[a-zA-Z0-9-_]+@[a-zA-Z0-9]+\.[a-z]{1,3}$"
        if re.match(pattern, email):
            return True
        return False


def get_user_data(user):
    data = db.fetch().items
    for person in data:
        if person['key'] == user:
            return person
    return None


def user_history(time, text, ):
    pass


def update_questionnaire_response(user_response, username):
    db.update({"questionnaire_response": user_response}, key=username)
    st.success("Your responses have been recorded. Thank you!")


def generate_responses(text, chat_model="gpt-3.5-turbo", paper_title="", level_education="",
                       english_proficiency="", language_spoken="", tech_usage="",
                       news_read="", books_read="", additional_requirements="None"):
    # Incorporating the parameters into the context
    text = text[:2048]
    user_context = f"""
    The user has achieved an education level up tp {level_education}. In daily routine, the user describes the frequency of using
    technology such as computers, cell phones, and tablets as {tech_usage}. The user's primary language spoken at home is
    {language_spoken}, and has {english_proficiency} level of English proficiency. The user {news_read} reads or watches
    the news and reads approximately {books_read} books in a month.
    """

    # print(user_context)

    # Prompt template
    prompt_template = f"""
    Here's the abstract of a paper (titled) {paper_title}: {text}.
    Considering the user's information: {user_context}.
    And user's additional requirements: {additional_requirements}.
    Generate a plain language summary that summarizes the abstract. While creating this Plain Language Summary, please keep the following must-have elements in mind:
    - The plain language summary should achieve readability at the 8th Grade level as measured by the Flesch Kincaid scale.
    - The plain language summary should achieve Flesh Reading Ease Scores from 60 to 70.
    - Ensure each sentence is shorter than 25 words.
    - The summary should average 6 sentences per paragraph.
    - Less than 10% of the sentences should be in passive voice.
    - Ensure fidelity to the original source.
    - Use clear and simple language, avoiding jargon.
    - Maintain ethical considerations, including objectivity and inclusivity.
    - Aim for universal readability, targeting a reading age of 14-17 years.
    - Consider multi-language accessibility.
    - Take into account any operational context or guidelines that may apply.
    - The plain language summary should be a single paragraph, without subtitles or bullet points.
    """

    conversation = [
        {'role': 'system', 'content': 'You are a helpful assistant.'},
        {'role': 'user', 'content': prompt_template}
    ]

    try:
        response = openai.ChatCompletion.create(
            model=chat_model,
            messages=conversation
        )
        return response['choices'][0]['message']['content']
    except:
        st.error('Invalid api key.', icon="⚠️")


@st.cache_resource
def survey(user_name):
    title = user_name + '_survey'
    return ss.StreamlitSurvey(title)


# connect to/create Deta user database
db_key = st.secrets["deta_key"]
deta = Deta(db_key)
db = deta.Base("user_data")
key = Fernet(st.secrets['fernet_key'])
config_drive = deta.Drive("config")
config = config_drive.get("config.yaml").read()
config = yaml.load(config, Loader=SafeLoader)

# Create an authenticator
authenticator = stauth.Authenticate(
    config['credentials'],
    config['cookie']['name'],
    config['cookie']['key'],
    config['cookie']['expiry_days'],
    config['preauthorized']
)
authenticator.validator = Validator()
init_sidebar = st.sidebar.empty()

with init_sidebar:
    init_page = option_menu(None,
                            ["Login", 'Sign Up'],
                            icons=['lightbulb-fill', 'lightbulb'],
                            menu_icon="cast",
                            default_index=0,
                            styles={})

if init_page == 'Login':
    name, authentication_status, username = authenticator.login('Login', 'main')
    if authentication_status:
        init_sidebar.empty()
        st.sidebar.write(f'**Welcome** {name}')
        app_sidebar = st.sidebar.empty()

        if 'current_page_name' not in st.session_state:
            st.session_state.current_page_name = "Generate Plain Language Summary"  # 设置默认页面

        with app_sidebar:
            page = option_menu(None, ["Generate Plain Language Summary", 'Questionnaire', 'Setup'],
                               icons=['house', 'question-circle', 'gear'],
                               menu_icon="None",
                               default_index=0,
                               styles={})
            authenticator.logout('Logout', 'sidebar', key='unique_key')
        # Fetch user data from the database

        user_data = get_user_data(username)
        # print('current page: ', page)

        if page == "Generate Plain Language Summary":

            st.title("Generate Plain Language Summary")
            st.markdown(
                '''
                ### What is a Plain Language Summary?
                A Plain Language Summary is a clear and concise summary of a scientific paper. It's designed to make complex research findings more accessible and understandable to a general audience.
                #### Detailed Instructions for Generating a Plain Language Summary
                1. **Set Up**: Navigate to the 'Set Up' page to input your API key and specify your writing style. This will help tailor the summary to your preferences.
                2. **Complete the Questionnaire**: On the 'Questionnaire' page, you'll also find a questionnaire designed to further tailor the summary to your needs. Please complete it.
                3. **Choose Content Source**: 
                    - **Option A**: If you have access to the full paper, you can upload the whole document.
                    - **Option B**: Alternatively, you can input the abstract of the paper.
                4. **Input Paper Title**: Paste the exact title of the paper you wish to summarize in the text input field below. An accurate title ensures a more relevant summary.
                5. **Generate Summary**: After completing the above steps, click on the 'Generate' button to receive your Plain Language Summary.
                '''
            )

            # Title input box
            title_text = st.text_area("Paste Your Paper Title Here", height=25)

            # Abstract input box
            abstract_text = st.text_area("Paste Abstract Here", height=200)
            # background_info = st.text_area("Background information on original post (references, relevant information, best practices for responding)",  height=200)

            # PDF input box and text extraction
            uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

            chat_mdl = None
            draft_response = ''

            entire_text = ""
            if uploaded_file is not None:
                reader = PdfReader(uploaded_file)
                num_pages = len(reader.pages)

                for page_num in range(num_pages):
                    page = reader.pages[page_num]
                    page_text = page.extract_text()
                    entire_text += page_text

            if user_data:
                st.session_state.api_key = key.decrypt(user_data['api'].encode()).decode()
            else:
                st.session_state.api_key = ''

            if 'draft_response_content' not in st.session_state:
                st.session_state.draft_response_content = ""

            draft_response = ''
            user_response = user_data['questionnaire_response'] if user_data else defaultdict(lambda: '')

            submit_text = ''
            if entire_text:
                submit_text = entire_text
            else:
                submit_text = abstract_text
            # Check if the "Submit" button is clicked

            # st.write("#### What is your familiarity with the concepts of the paper?")
            # st.markdown('''
            #         * No Familiarity: entirely unfamiliar, no prior knowledge
            #         * Limited Familiarity: basic awareness of the concepts in the paper, but not in-depth knowledge
            #         * Moderate Familiarity: reasonable understanding of the concepts in the paper, encountered before, or some background knowledge
            #         * Good Familiarity: a solid understanding due to prior exposure or study
            #         * Expert: highly knowledgeable and experienced in the field and has worked extensively with these concepts
            #     ''')

            # paper_familiarity = st.select_slider(
            #     label="paper_familiarity",
            #     options=['No Familiarity', 'Limited Familiarity', 'Moderate Familiarity',
            #              'Good Familiarity', 'Expert Familiarity'],
            #     label_visibility="collapsed",
            # )
            #
            # if 'paper_familiarity' not in st.session_state:
            #     st.session_state['paper_familiarity'] = ''
            # st.session_state['paper_familiarity'] = paper_familiarity

            if st.button("Submit"):
                if abstract_text == '' and uploaded_file == None:
                    st.warning('Please paste Abstract or upload a file.', icon="⚠️")

                if st.session_state.api_key:
                    os.environ["OPENAI_API_KEY"] = st.session_state.api_key
                    chat_mdl = OpenAI(model_name='gpt-4', temperature=0.1)
                else:
                    st.warning('Please fill in api-key in Setup.', icon="⚠️")

                if chat_mdl is not None and submit_text:
                    st.session_state.draft_response_content = generate_responses(
                        text=submit_text,
                        paper_title=title_text,
                        level_education=user_response[
                            'level_education'],
                        english_proficiency=user_response[
                            'english_proficiency'],
                        language_spoken=user_response[
                            'language_spoken'],
                        tech_usage=user_response[
                            'tech_usage'],
                        news_read=user_response[
                            'news_read'],
                        books_read=user_response[
                            'books_read'],
                    )

            container = st.empty()
            # Output from function
            container.text_area(label="Plain Language Summary", value=st.session_state.draft_response_content,
                                height=350)
            regenerate_prompt = st.text_area("Additional prompting for regenerating summary", height=100)

            if st.button('Regenerate'):
                if st.session_state.draft_response_content == "":
                    st.warning('Please Generate a PLS first', icon="⚠️")
                elif regenerate_prompt == "":
                    st.warning('Your new prompt is empty', icon="⚠️")
                else:
                    additional_prompt = regenerate_prompt
                    st.session_state.draft_response_content = generate_responses(
                        text=submit_text,
                        paper_title=title_text,
                        level_education=user_response[
                            'level_education'],
                        english_proficiency=user_response[
                            'english_proficiency'],
                        language_spoken=user_response[
                            'language_spoken'],
                        tech_usage=user_response[
                            'tech_usage'],
                        news_read=user_response[
                            'news_read'],
                        books_read=user_response[
                            'books_read'],
                        additional_requirements=additional_prompt,
                    )
                    container.empty()
                    container.text_area(label="Plain Language Summary", value=st.session_state.draft_response_content,
                                        height=350)

            # some function to re-submit prompt and generate new PLS
        # elif page == "History":
        #     st.write('User prompt History TODO')
        elif page == "Setup":
            st.title("Setup")
            # Input boxes with existing data

            if 'api_key' not in st.session_state:
                st.session_state.api_key = ""
            api_input = st.text_input("OpenAI API Token", value=st.session_state.api_key, type='password')
            st.session_state.api_key = api_input

            questionnaire_response = user_data['questionnaire_response'] if user_data else {}

            # Update button
            if st.button("Update"):
                db.put(
                    {"key": username, "api": key.encrypt(bytes(api_input, 'utf-8')).decode(),
                     "questionnaire_response": questionnaire_response})
                st.success('Updating successfully!')
        elif page == "Questionnaire":
            survey = survey(username)
            if 'questionnaire_response' not in st.session_state:
                st.session_state['questionnaire_response'] = {}
            # print(st.session_state['questionnaire_response'])
            page_number = 11
            survey_pages = survey.pages(page_number,
                                        on_submit=lambda: update_questionnaire_response(
                                            st.session_state['questionnaire_response'], username))
            # st.session_state["__streamlit-survey-data__Pages_"] = survey_pages.current
            st.progress((survey_pages.current + 1) / page_number)
            with survey_pages:
                if survey_pages.current == 0:
                    st.write("#### What is your level of education? (for research purposes)")
                    level_education = survey.radio(
                        label="level_education",
                        options=["Primary School", "Middle School", "Secondary School",
                                 "College", "Masters", "PhD"],
                        index=0,
                        label_visibility="collapsed",
                        horizontal=False,
                    )
                    st.session_state['questionnaire_response']['level_education'] = level_education
                elif survey_pages.current == 1:
                    st.write("#### What domains are you most interested in?")
                    domains = ['Global Studies', 'Arts', 'Business & Economics', 'History', 'Humanities',
                               'Law', 'Medicine and Health', 'Science - Biology', 'Science - Chemistry',
                               'Science - Environmental Science', 'Science - Physics', 'Mathematics',
                               'Engineering', 'Social Sciences']
                    domains_interested = {}
                    for i in range(len(domains)):
                        domains_interested[domains[i]] = survey.checkbox(domains[i])
                    interested_domain = []
                    for domain in domains_interested:
                        if domains_interested[domain]:
                            interested_domain.append(domain)
                    st.session_state['questionnaire_response']['interested_domain'] = interested_domain
                elif survey_pages.current == 2:
                    paper_discovery_method = survey.text_area("#### How did you come across this paper?")
                    st.session_state['questionnaire_response']['paper_discovery_method'] = paper_discovery_method
                elif survey_pages.current == 3:
                    reading_purpose = survey.text_area("#### For what purpose are you reading this paper?")
                    st.session_state['questionnaire_response']['reading_purpose'] = reading_purpose
                elif survey_pages.current == 4:
                    st.write("#### What information do you want to get out of this paper?")
                    information_options = ["Main findings and conclusions",
                                           'Methodology and experimental design',
                                           'Data and statistical analysis',
                                           'Limitations or gaps in the research']
                    info_interested = {}
                    for i in range(len(information_options)):
                        info_interested[information_options[i]] = survey.checkbox(information_options[i])
                    desired_information = []
                    for info in info_interested:
                        if info_interested[info]:
                            desired_information.append(info)
                    other_info = survey.text_input('Other aspects:')
                    if other_info:
                        desired_information.append(other_info)
                    st.session_state['questionnaire_response']['desired_information'] = desired_information
                elif survey_pages.current == 5:
                    st.write("#### what is your level of english proficiency?")
                    english_proficiency = st.slider("English Proficiency (1-5):", min_value=1, max_value=5, value=1)
                    st.session_state['questionnaire_response']['english_proficiency'] = english_proficiency

                elif survey_pages.current == 6:
                    st.write("#### What is the primary language spoken in your home? (click from the list and others)")
                    languages = ['English', 'Spanish', ]
                    language_options = {}
                    for i in range(len(languages)):
                        language_options[languages[i]] = survey.checkbox(languages[i])
                    language_spoken = []
                    for language in language_options:
                        if language_options[language]:
                            language_spoken.append(language)
                    other_language = survey.text_input('Other')

                    if other_language:
                        language_spoken.append(other_language)
                    st.session_state['questionnaire_response']['language_spoken'] = language_spoken

                elif survey_pages.current == 7:
                    st.write("#### Do you speak other languages? How fluent are you in each language?")
                    language_fluency = {}
                    language_index = 1
                    col1, col2 = st.columns([3, 2])

                    with col1:
                        other_language = survey.text_input(f'Language {"#" + str(language_index)}')
                    with col2:
                        fluency = survey.selectbox(f'Fluency {"#" + str(language_index)}',
                                                   options=["", "Beginner", "Intermediate", "Advanced", "Native"],
                                                   )
                    if other_language and fluency:
                        language_fluency.update({other_language: fluency})

                    while other_language:
                        language_index += 1
                        with col1:
                            other_language = survey.text_input(f'Language {"#" + str(language_index)}')
                        with col2:
                            fluency = survey.selectbox(f'Fluency {"#" + str(language_index)}',
                                                       options=["", "Beginner", "Intermediate", "Advanced", "Native"],
                                                       )
                        if other_language and fluency:
                            language_fluency.update({other_language: fluency})
                    st.session_state['questionnaire_response']['other_language'] = language_fluency

                elif survey_pages.current == 8:
                    st.write(
                        "#### How much do you use technology (computers, cell phones, tablets, GPS, internet, etc.)?")
                    st.markdown('''
                        * Always: relies heavily on daily tasks
                        * Often in a day: not necessarily every task, but plays a significant role in life
                        * Occasionally: use constantly but not essential for most daily activities
                        * Rarely: use only for specific tasks
                        * Never: avoid using technology
                    ''')
                    tech_usage = survey.select_slider(
                        label="tech_usage",
                        options=['Never', 'Rarely', 'Occasionally',
                                 'Often', 'Always'],
                        # min_value=1,
                        # max_value=5,
                        label_visibility="collapsed",
                    )
                    st.session_state['questionnaire_response']['tech_usage'] = tech_usage
                elif survey_pages.current == 9:
                    st.write("#### How often do you read or watch/listen to the news?")
                    news_read = survey.radio(
                        label="news_read",
                        options=["Never", "Once or Twice a Month", "Once a Week",
                                 "Once in 2-3 Days", "Every Day"],
                        index=0,
                        label_visibility="collapsed",
                        horizontal=False,
                    )
                    st.session_state['questionnaire_response']['news_read'] = news_read
                elif survey_pages.current == 10:
                    st.write("#### How many books do you read or listen to a month?")
                    books_read = survey.radio(
                        label="books_read",
                        options=["0", "1-3", "4-6", "7+"],
                        index=0,
                        label_visibility="collapsed",
                        horizontal=True,
                    )
                    st.session_state['questionnaire_response']['books_read'] = books_read

    elif authentication_status is False:
        st.error('Username or Password is incorrect', icon="⚠️")
elif init_page == 'Sign Up':
    try:
        if authenticator.register_user('Register user', preauthorization=False):
            st.success('User registered successfully')
            st.balloons()
    except Exception as e:
        st.error(e)

with open('config.yaml', 'w') as file:
    yaml.dump(config, file, default_flow_style=False)
config_drive.put("config.yaml", path="config.yaml")