import streamlit as st import streamlit_authenticator as stauth from deta import Deta import yaml from yaml.loader import SafeLoader import os from langchain.llms import OpenAI import re from PyPDF2 import PdfReader from streamlit_option_menu import option_menu import streamlit_survey as ss from cryptography.fernet import Fernet import warnings import openai import json from collections import defaultdict warnings.filterwarnings("ignore", category=UserWarning, module='langchain') class Validator: def validate_username(self, username): pattern = r"^[a-zA-Z0-9_-]{1,20}$" return bool(re.match(pattern, username)) def validate_name(self, name): return 1 < len(name) < 100 def validate_email(self, email): pattern = "^[a-zA-Z0-9-_]+@[a-zA-Z0-9]+\.[a-z]{1,3}$" if re.match(pattern, email): return True return False def get_user_data(user): data = db.fetch().items for person in data: if person['key'] == user: return person return None def user_history(time, text, ): pass def update_questionnaire_response(user_response, username): db.update({"questionnaire_response": user_response}, key=username) st.success("Your responses have been recorded. Thank you!") def generate_responses(text, chat_model="gpt-3.5-turbo", paper_title="", level_education="", english_proficiency="", language_spoken="", tech_usage="", news_read="", books_read="", additional_requirements="None"): # Incorporating the parameters into the context text = text[:2048] user_context = f""" The user has achieved an education level up tp {level_education}. In daily routine, the user describes the frequency of using technology such as computers, cell phones, and tablets as {tech_usage}. The user's primary language spoken at home is {language_spoken}, and has {english_proficiency} level of English proficiency. The user {news_read} reads or watches the news and reads approximately {books_read} books in a month. """ # print(user_context) # Prompt template prompt_template = f""" Here's the abstract of a paper (titled) {paper_title}: {text}. Considering the user's information: {user_context}. And user's additional requirements: {additional_requirements}. Generate a plain language summary that summarizes the abstract. While creating this Plain Language Summary, please keep the following must-have elements in mind: - The plain language summary should achieve readability at the 8th Grade level as measured by the Flesch Kincaid scale. - The plain language summary should achieve Flesh Reading Ease Scores from 60 to 70. - Ensure each sentence is shorter than 25 words. - The summary should average 6 sentences per paragraph. - Less than 10% of the sentences should be in passive voice. - Ensure fidelity to the original source. - Use clear and simple language, avoiding jargon. - Maintain ethical considerations, including objectivity and inclusivity. - Aim for universal readability, targeting a reading age of 14-17 years. - Consider multi-language accessibility. - Take into account any operational context or guidelines that may apply. - The plain language summary should be a single paragraph, without subtitles or bullet points. """ conversation = [ {'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': prompt_template} ] try: response = openai.ChatCompletion.create( model=chat_model, messages=conversation ) return response['choices'][0]['message']['content'] except Exception as e: st.error(e) st.error('Invalid api key.', icon="⚠️") @st.cache_resource def survey(user_name): title = user_name + '_survey' return ss.StreamlitSurvey(title) # connect to/create Deta user database db_key = st.secrets["deta_key"] deta = Deta(db_key) db = deta.Base("user_data") key = Fernet(st.secrets['fernet_key']) config_drive = deta.Drive("config") config = config_drive.get("config.yaml").read() config = yaml.load(config, Loader=SafeLoader) # Create an authenticator authenticator = stauth.Authenticate( config['credentials'], config['cookie']['name'], config['cookie']['key'], config['cookie']['expiry_days'], config['preauthorized'] ) authenticator.validator = Validator() init_sidebar = st.sidebar.empty() with init_sidebar: init_page = option_menu(None, ["Login", 'Sign Up'], icons=['lightbulb-fill', 'lightbulb'], menu_icon="cast", default_index=0, styles={}) if init_page == 'Login': name, authentication_status, username = authenticator.login('Login', 'main') if authentication_status: init_sidebar.empty() st.sidebar.write(f'**Welcome** {name}') app_sidebar = st.sidebar.empty() if 'current_page_name' not in st.session_state: st.session_state.current_page_name = "Generate Plain Language Summary" # 设置默认页面 with app_sidebar: page = option_menu(None, ["Generate Plain Language Summary", 'Questionnaire', 'Setup'], icons=['house', 'question-circle', 'gear'], menu_icon="None", default_index=0, styles={}) authenticator.logout('Logout', 'sidebar', key='unique_key') # Fetch user data from the database user_data = get_user_data(username) # print('current page: ', page) if page == "Generate Plain Language Summary": st.title("Generate Plain Language Summary") st.markdown( ''' ### What is a Plain Language Summary? A Plain Language Summary is a clear and concise summary of a scientific paper. It's designed to make complex research findings more accessible and understandable to a general audience. #### Detailed Instructions for Generating a Plain Language Summary 1. **Set Up**: Navigate to the 'Set Up' page to input your API key and specify your writing style. This will help tailor the summary to your preferences. 2. **Complete the Questionnaire**: On the 'Questionnaire' page, you'll also find a questionnaire designed to further tailor the summary to your needs. Please complete it. 3. **Choose Content Source**: - **Option A**: If you have access to the full paper, you can upload the whole document. - **Option B**: Alternatively, you can input the abstract of the paper. 4. **Input Paper Title**: Paste the exact title of the paper you wish to summarize in the text input field below. An accurate title ensures a more relevant summary. 5. **Generate Summary**: After completing the above steps, click on the 'Generate' button to receive your Plain Language Summary. ''' ) # Title input box title_text = st.text_area("Paste Your Paper Title Here", height=25) # Abstract input box abstract_text = st.text_area("Paste Abstract Here", height=200) # background_info = st.text_area("Background information on original post (references, relevant information, best practices for responding)", height=200) # PDF input box and text extraction uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") chat_mdl = None draft_response = '' entire_text = "" if uploaded_file is not None: reader = PdfReader(uploaded_file) num_pages = len(reader.pages) for page_num in range(num_pages): page = reader.pages[page_num] page_text = page.extract_text() entire_text += page_text if user_data: st.session_state.api_key = key.decrypt(user_data['api'].encode()).decode() else: st.session_state.api_key = '' if 'draft_response_content' not in st.session_state: st.session_state.draft_response_content = "" draft_response = '' user_response = user_data['questionnaire_response'] if user_data else defaultdict(lambda: '') submit_text = '' if entire_text: submit_text = entire_text else: submit_text = abstract_text # Check if the "Submit" button is clicked # st.write("#### What is your familiarity with the concepts of the paper?") # st.markdown(''' # * No Familiarity: entirely unfamiliar, no prior knowledge # * Limited Familiarity: basic awareness of the concepts in the paper, but not in-depth knowledge # * Moderate Familiarity: reasonable understanding of the concepts in the paper, encountered before, or some background knowledge # * Good Familiarity: a solid understanding due to prior exposure or study # * Expert: highly knowledgeable and experienced in the field and has worked extensively with these concepts # ''') # paper_familiarity = st.select_slider( # label="paper_familiarity", # options=['No Familiarity', 'Limited Familiarity', 'Moderate Familiarity', # 'Good Familiarity', 'Expert Familiarity'], # label_visibility="collapsed", # ) # # if 'paper_familiarity' not in st.session_state: # st.session_state['paper_familiarity'] = '' # st.session_state['paper_familiarity'] = paper_familiarity if st.button("Submit"): if abstract_text == '' and uploaded_file == None: st.warning('Please paste Abstract or upload a file.', icon="⚠️") if st.session_state.api_key: os.environ["OPENAI_API_KEY"] = st.session_state.api_key chat_mdl = OpenAI(model_name='gpt-4', temperature=0.1) else: st.warning('Please fill in api-key in Setup.', icon="⚠️") if chat_mdl is not None and submit_text: st.session_state.draft_response_content = generate_responses( text=submit_text, paper_title=title_text, level_education=user_response[ 'level_education'], english_proficiency=user_response[ 'english_proficiency'], language_spoken=user_response[ 'language_spoken'], tech_usage=user_response[ 'tech_usage'], news_read=user_response[ 'news_read'], books_read=user_response[ 'books_read'], ) container = st.empty() # Output from function container.text_area(label="Plain Language Summary", value=st.session_state.draft_response_content, height=350) regenerate_prompt = st.text_area("Additional prompting for regenerating summary", height=100) if st.button('Regenerate'): if st.session_state.draft_response_content == "": st.warning('Please Generate a PLS first', icon="⚠️") elif regenerate_prompt == "": st.warning('Your new prompt is empty', icon="⚠️") else: additional_prompt = regenerate_prompt st.session_state.draft_response_content = generate_responses( text=submit_text, paper_title=title_text, level_education=user_response[ 'level_education'], english_proficiency=user_response[ 'english_proficiency'], language_spoken=user_response[ 'language_spoken'], tech_usage=user_response[ 'tech_usage'], news_read=user_response[ 'news_read'], books_read=user_response[ 'books_read'], additional_requirements=additional_prompt, ) container.empty() container.text_area(label="Plain Language Summary", value=st.session_state.draft_response_content, height=350) # some function to re-submit prompt and generate new PLS # elif page == "History": # st.write('User prompt History TODO') elif page == "Setup": st.title("Setup") # Input boxes with existing data if 'api_key' not in st.session_state: st.session_state.api_key = "" api_input = st.text_input("OpenAI API Token", value=st.session_state.api_key, type='password') st.session_state.api_key = api_input questionnaire_response = user_data['questionnaire_response'] if user_data else {} # Update button if st.button("Update"): db.put( {"key": username, "api": key.encrypt(bytes(api_input, 'utf-8')).decode(), "questionnaire_response": questionnaire_response}) st.success('Updating successfully!') elif page == "Questionnaire": survey = survey(username) if 'questionnaire_response' not in st.session_state: st.session_state['questionnaire_response'] = {} # print(st.session_state['questionnaire_response']) page_number = 11 survey_pages = survey.pages(page_number, on_submit=lambda: update_questionnaire_response( st.session_state['questionnaire_response'], username)) # st.session_state["__streamlit-survey-data__Pages_"] = survey_pages.current st.progress((survey_pages.current + 1) / page_number) with survey_pages: if survey_pages.current == 0: st.write("#### What is your level of education? (for research purposes)") level_education = survey.radio( label="level_education", options=["Primary School", "Middle School", "Secondary School", "College", "Masters", "PhD"], index=0, label_visibility="collapsed", horizontal=False, ) st.session_state['questionnaire_response']['level_education'] = level_education elif survey_pages.current == 1: st.write("#### What domains are you most interested in?") domains = ['Global Studies', 'Arts', 'Business & Economics', 'History', 'Humanities', 'Law', 'Medicine and Health', 'Science - Biology', 'Science - Chemistry', 'Science - Environmental Science', 'Science - Physics', 'Mathematics', 'Engineering', 'Social Sciences'] domains_interested = {} for i in range(len(domains)): domains_interested[domains[i]] = survey.checkbox(domains[i]) interested_domain = [] for domain in domains_interested: if domains_interested[domain]: interested_domain.append(domain) st.session_state['questionnaire_response']['interested_domain'] = interested_domain elif survey_pages.current == 2: paper_discovery_method = survey.text_area("#### How did you come across this paper?") st.session_state['questionnaire_response']['paper_discovery_method'] = paper_discovery_method elif survey_pages.current == 3: reading_purpose = survey.text_area("#### For what purpose are you reading this paper?") st.session_state['questionnaire_response']['reading_purpose'] = reading_purpose elif survey_pages.current == 4: st.write("#### What information do you want to get out of this paper?") information_options = ["Main findings and conclusions", 'Methodology and experimental design', 'Data and statistical analysis', 'Limitations or gaps in the research'] info_interested = {} for i in range(len(information_options)): info_interested[information_options[i]] = survey.checkbox(information_options[i]) desired_information = [] for info in info_interested: if info_interested[info]: desired_information.append(info) other_info = survey.text_input('Other aspects:') if other_info: desired_information.append(other_info) st.session_state['questionnaire_response']['desired_information'] = desired_information elif survey_pages.current == 5: st.write("#### what is your level of english proficiency?") english_proficiency = st.slider("English Proficiency (1-5):", min_value=1, max_value=5, value=1) st.session_state['questionnaire_response']['english_proficiency'] = english_proficiency elif survey_pages.current == 6: st.write("#### What is the primary language spoken in your home? (click from the list and others)") languages = ['English', 'Spanish', ] language_options = {} for i in range(len(languages)): language_options[languages[i]] = survey.checkbox(languages[i]) language_spoken = [] for language in language_options: if language_options[language]: language_spoken.append(language) other_language = survey.text_input('Other') if other_language: language_spoken.append(other_language) st.session_state['questionnaire_response']['language_spoken'] = language_spoken elif survey_pages.current == 7: st.write("#### Do you speak other languages? How fluent are you in each language?") language_fluency = {} language_index = 1 col1, col2 = st.columns([3, 2]) with col1: other_language = survey.text_input(f'Language {"#" + str(language_index)}') with col2: fluency = survey.selectbox(f'Fluency {"#" + str(language_index)}', options=["", "Beginner", "Intermediate", "Advanced", "Native"], ) if other_language and fluency: language_fluency.update({other_language: fluency}) while other_language: language_index += 1 with col1: other_language = survey.text_input(f'Language {"#" + str(language_index)}') with col2: fluency = survey.selectbox(f'Fluency {"#" + str(language_index)}', options=["", "Beginner", "Intermediate", "Advanced", "Native"], ) if other_language and fluency: language_fluency.update({other_language: fluency}) st.session_state['questionnaire_response']['other_language'] = language_fluency elif survey_pages.current == 8: st.write( "#### How much do you use technology (computers, cell phones, tablets, GPS, internet, etc.)?") st.markdown(''' * Always: relies heavily on daily tasks * Often in a day: not necessarily every task, but plays a significant role in life * Occasionally: use constantly but not essential for most daily activities * Rarely: use only for specific tasks * Never: avoid using technology ''') tech_usage = survey.select_slider( label="tech_usage", options=['Never', 'Rarely', 'Occasionally', 'Often', 'Always'], # min_value=1, # max_value=5, label_visibility="collapsed", ) st.session_state['questionnaire_response']['tech_usage'] = tech_usage elif survey_pages.current == 9: st.write("#### How often do you read or watch/listen to the news?") news_read = survey.radio( label="news_read", options=["Never", "Once or Twice a Month", "Once a Week", "Once in 2-3 Days", "Every Day"], index=0, label_visibility="collapsed", horizontal=False, ) st.session_state['questionnaire_response']['news_read'] = news_read elif survey_pages.current == 10: st.write("#### How many books do you read or listen to a month?") books_read = survey.radio( label="books_read", options=["0", "1-3", "4-6", "7+"], index=0, label_visibility="collapsed", horizontal=True, ) st.session_state['questionnaire_response']['books_read'] = books_read elif authentication_status is False: st.error('Username or Password is incorrect', icon="⚠️") elif init_page == 'Sign Up': try: if authenticator.register_user('Register user', preauthorization=False): st.success('User registered successfully') st.balloons() except Exception as e: st.error(e) with open('config.yaml', 'w') as file: yaml.dump(config, file, default_flow_style=False) config_drive.put("config.yaml", path="config.yaml")