Spaces:
Sleeping
Sleeping
import streamlit as st | |
import streamlit_authenticator as stauth | |
from deta import Deta | |
import yaml | |
from yaml.loader import SafeLoader | |
import os | |
from langchain.llms import OpenAI | |
import re | |
from PyPDF2 import PdfReader | |
from streamlit_option_menu import option_menu | |
import streamlit_survey as ss | |
from cryptography.fernet import Fernet | |
import warnings | |
import openai | |
import json | |
from collections import defaultdict | |
warnings.filterwarnings("ignore", category=UserWarning, module='langchain') | |
class Validator: | |
def validate_username(self, username): | |
pattern = r"^[a-zA-Z0-9_-]{1,20}$" | |
return bool(re.match(pattern, username)) | |
def validate_name(self, name): | |
return 1 < len(name) < 100 | |
def validate_email(self, email): | |
pattern = "^[a-zA-Z0-9-_]+@[a-zA-Z0-9]+\.[a-z]{1,3}$" | |
if re.match(pattern, email): | |
return True | |
return False | |
def get_user_data(user): | |
data = db.fetch().items | |
for person in data: | |
if person['key'] == user: | |
return person | |
return None | |
def user_history(time, text, ): | |
pass | |
def update_questionnaire_response(user_response, username): | |
db.update({"questionnaire_response": user_response}, key=username) | |
st.success("Your responses have been recorded. Thank you!") | |
def generate_responses(text, chat_model="gpt-3.5-turbo", paper_title="", level_education="", | |
english_proficiency="", language_spoken="", tech_usage="", | |
news_read="", books_read="", additional_requirements="None"): | |
# Incorporating the parameters into the context | |
text = text[:2048] | |
user_context = f""" | |
The user has achieved an education level up tp {level_education}. In daily routine, the user describes the frequency of using | |
technology such as computers, cell phones, and tablets as {tech_usage}. The user's primary language spoken at home is | |
{language_spoken}, and has {english_proficiency} level of English proficiency. The user {news_read} reads or watches | |
the news and reads approximately {books_read} books in a month. | |
""" | |
# print(user_context) | |
# Prompt template | |
prompt_template = f""" | |
Here's the abstract of a paper (titled) {paper_title}: {text}. | |
Considering the user's information: {user_context}. | |
And user's additional requirements: {additional_requirements}. | |
Generate a plain language summary that summarizes the abstract. While creating this Plain Language Summary, please keep the following must-have elements in mind: | |
- The plain language summary should achieve readability at the 8th Grade level as measured by the Flesch Kincaid scale. | |
- The plain language summary should achieve Flesh Reading Ease Scores from 60 to 70. | |
- Ensure each sentence is shorter than 25 words. | |
- The summary should average 6 sentences per paragraph. | |
- Less than 10% of the sentences should be in passive voice. | |
- Ensure fidelity to the original source. | |
- Use clear and simple language, avoiding jargon. | |
- Maintain ethical considerations, including objectivity and inclusivity. | |
- Aim for universal readability, targeting a reading age of 14-17 years. | |
- Consider multi-language accessibility. | |
- Take into account any operational context or guidelines that may apply. | |
- The plain language summary should be a single paragraph, without subtitles or bullet points. | |
""" | |
conversation = [ | |
{'role': 'system', 'content': 'You are a helpful assistant.'}, | |
{'role': 'user', 'content': prompt_template} | |
] | |
try: | |
response = openai.ChatCompletion.create( | |
model=chat_model, | |
messages=conversation | |
) | |
return response['choices'][0]['message']['content'] | |
except: | |
st.error('Invalid api key.', icon="⚠️") | |
def survey(user_name): | |
title = user_name + '_survey' | |
return ss.StreamlitSurvey(title) | |
# connect to/create Deta user database | |
db_key = st.secrets["deta_key"] | |
deta = Deta(db_key) | |
db = deta.Base("user_data") | |
key = Fernet(st.secrets['fernet_key']) | |
config_drive = deta.Drive("config") | |
config = config_drive.get("config.yaml").read() | |
config = yaml.load(config, Loader=SafeLoader) | |
# Create an authenticator | |
authenticator = stauth.Authenticate( | |
config['credentials'], | |
config['cookie']['name'], | |
config['cookie']['key'], | |
config['cookie']['expiry_days'], | |
config['preauthorized'] | |
) | |
authenticator.validator = Validator() | |
init_sidebar = st.sidebar.empty() | |
with init_sidebar: | |
init_page = option_menu(None, | |
["Login", 'Sign Up'], | |
icons=['lightbulb-fill', 'lightbulb'], | |
menu_icon="cast", | |
default_index=0, | |
styles={}) | |
if init_page == 'Login': | |
name, authentication_status, username = authenticator.login('Login', 'main') | |
if authentication_status: | |
init_sidebar.empty() | |
st.sidebar.write(f'**Welcome** {name}') | |
app_sidebar = st.sidebar.empty() | |
if 'current_page_name' not in st.session_state: | |
st.session_state.current_page_name = "Generate Plain Language Summary" # 设置默认页面 | |
with app_sidebar: | |
page = option_menu(None, ["Generate Plain Language Summary", 'Questionnaire', 'Setup'], | |
icons=['house', 'question-circle', 'gear'], | |
menu_icon="None", | |
default_index=0, | |
styles={}) | |
authenticator.logout('Logout', 'sidebar', key='unique_key') | |
# Fetch user data from the database | |
user_data = get_user_data(username) | |
# print('current page: ', page) | |
if page == "Generate Plain Language Summary": | |
st.title("Generate Plain Language Summary") | |
st.markdown( | |
''' | |
### What is a Plain Language Summary? | |
A Plain Language Summary is a clear and concise summary of a scientific paper. It's designed to make complex research findings more accessible and understandable to a general audience. | |
#### Detailed Instructions for Generating a Plain Language Summary | |
1. **Set Up**: Navigate to the 'Set Up' page to input your API key and specify your writing style. This will help tailor the summary to your preferences. | |
2. **Complete the Questionnaire**: On the 'Questionnaire' page, you'll also find a questionnaire designed to further tailor the summary to your needs. Please complete it. | |
3. **Choose Content Source**: | |
- **Option A**: If you have access to the full paper, you can upload the whole document. | |
- **Option B**: Alternatively, you can input the abstract of the paper. | |
4. **Input Paper Title**: Paste the exact title of the paper you wish to summarize in the text input field below. An accurate title ensures a more relevant summary. | |
5. **Generate Summary**: After completing the above steps, click on the 'Generate' button to receive your Plain Language Summary. | |
''' | |
) | |
# Title input box | |
title_text = st.text_area("Paste Your Paper Title Here", height=25) | |
# Abstract input box | |
abstract_text = st.text_area("Paste Abstract Here", height=200) | |
# background_info = st.text_area("Background information on original post (references, relevant information, best practices for responding)", height=200) | |
# PDF input box and text extraction | |
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") | |
chat_mdl = None | |
draft_response = '' | |
entire_text = "" | |
if uploaded_file is not None: | |
reader = PdfReader(uploaded_file) | |
num_pages = len(reader.pages) | |
for page_num in range(num_pages): | |
page = reader.pages[page_num] | |
page_text = page.extract_text() | |
entire_text += page_text | |
if user_data: | |
st.session_state.api_key = key.decrypt(user_data['api'].encode()).decode() | |
else: | |
st.session_state.api_key = '' | |
if 'draft_response_content' not in st.session_state: | |
st.session_state.draft_response_content = "" | |
draft_response = '' | |
user_response = user_data['questionnaire_response'] if user_data else defaultdict(lambda: '') | |
submit_text = '' | |
if entire_text: | |
submit_text = entire_text | |
else: | |
submit_text = abstract_text | |
# Check if the "Submit" button is clicked | |
# st.write("#### What is your familiarity with the concepts of the paper?") | |
# st.markdown(''' | |
# * No Familiarity: entirely unfamiliar, no prior knowledge | |
# * Limited Familiarity: basic awareness of the concepts in the paper, but not in-depth knowledge | |
# * Moderate Familiarity: reasonable understanding of the concepts in the paper, encountered before, or some background knowledge | |
# * Good Familiarity: a solid understanding due to prior exposure or study | |
# * Expert: highly knowledgeable and experienced in the field and has worked extensively with these concepts | |
# ''') | |
# paper_familiarity = st.select_slider( | |
# label="paper_familiarity", | |
# options=['No Familiarity', 'Limited Familiarity', 'Moderate Familiarity', | |
# 'Good Familiarity', 'Expert Familiarity'], | |
# label_visibility="collapsed", | |
# ) | |
# | |
# if 'paper_familiarity' not in st.session_state: | |
# st.session_state['paper_familiarity'] = '' | |
# st.session_state['paper_familiarity'] = paper_familiarity | |
if st.button("Submit"): | |
if abstract_text == '' and uploaded_file == None: | |
st.warning('Please paste Abstract or upload a file.', icon="⚠️") | |
if st.session_state.api_key: | |
os.environ["OPENAI_API_KEY"] = st.session_state.api_key | |
chat_mdl = OpenAI(model_name='gpt-4', temperature=0.1) | |
else: | |
st.warning('Please fill in api-key in Setup.', icon="⚠️") | |
if chat_mdl is not None and submit_text: | |
st.session_state.draft_response_content = generate_responses( | |
text=submit_text, | |
paper_title=title_text, | |
level_education=user_response[ | |
'level_education'], | |
english_proficiency=user_response[ | |
'english_proficiency'], | |
language_spoken=user_response[ | |
'language_spoken'], | |
tech_usage=user_response[ | |
'tech_usage'], | |
news_read=user_response[ | |
'news_read'], | |
books_read=user_response[ | |
'books_read'], | |
) | |
container = st.empty() | |
# Output from function | |
container.text_area(label="Plain Language Summary", value=st.session_state.draft_response_content, | |
height=350) | |
regenerate_prompt = st.text_area("Additional prompting for regenerating summary", height=100) | |
if st.button('Regenerate'): | |
if st.session_state.draft_response_content == "": | |
st.warning('Please Generate a PLS first', icon="⚠️") | |
elif regenerate_prompt == "": | |
st.warning('Your new prompt is empty', icon="⚠️") | |
else: | |
additional_prompt = regenerate_prompt | |
st.session_state.draft_response_content = generate_responses( | |
text=submit_text, | |
paper_title=title_text, | |
level_education=user_response[ | |
'level_education'], | |
english_proficiency=user_response[ | |
'english_proficiency'], | |
language_spoken=user_response[ | |
'language_spoken'], | |
tech_usage=user_response[ | |
'tech_usage'], | |
news_read=user_response[ | |
'news_read'], | |
books_read=user_response[ | |
'books_read'], | |
additional_requirements=additional_prompt, | |
) | |
container.empty() | |
container.text_area(label="Plain Language Summary", value=st.session_state.draft_response_content, | |
height=350) | |
# some function to re-submit prompt and generate new PLS | |
# elif page == "History": | |
# st.write('User prompt History TODO') | |
elif page == "Setup": | |
st.title("Setup") | |
# Input boxes with existing data | |
if 'api_key' not in st.session_state: | |
st.session_state.api_key = "" | |
api_input = st.text_input("OpenAI API Token", value=st.session_state.api_key, type='password') | |
st.session_state.api_key = api_input | |
questionnaire_response = user_data['questionnaire_response'] if user_data else {} | |
# Update button | |
if st.button("Update"): | |
db.put( | |
{"key": username, "api": key.encrypt(bytes(api_input, 'utf-8')).decode(), | |
"questionnaire_response": questionnaire_response}) | |
st.success('Updating successfully!') | |
elif page == "Questionnaire": | |
survey = survey(username) | |
if 'questionnaire_response' not in st.session_state: | |
st.session_state['questionnaire_response'] = {} | |
# print(st.session_state['questionnaire_response']) | |
page_number = 11 | |
survey_pages = survey.pages(page_number, | |
on_submit=lambda: update_questionnaire_response( | |
st.session_state['questionnaire_response'], username)) | |
# st.session_state["__streamlit-survey-data__Pages_"] = survey_pages.current | |
st.progress((survey_pages.current + 1) / page_number) | |
with survey_pages: | |
if survey_pages.current == 0: | |
st.write("#### What is your level of education? (for research purposes)") | |
level_education = survey.radio( | |
label="level_education", | |
options=["Primary School", "Middle School", "Secondary School", | |
"College", "Masters", "PhD"], | |
index=0, | |
label_visibility="collapsed", | |
horizontal=False, | |
) | |
st.session_state['questionnaire_response']['level_education'] = level_education | |
elif survey_pages.current == 1: | |
st.write("#### What domains are you most interested in?") | |
domains = ['Global Studies', 'Arts', 'Business & Economics', 'History', 'Humanities', | |
'Law', 'Medicine and Health', 'Science - Biology', 'Science - Chemistry', | |
'Science - Environmental Science', 'Science - Physics', 'Mathematics', | |
'Engineering', 'Social Sciences'] | |
domains_interested = {} | |
for i in range(len(domains)): | |
domains_interested[domains[i]] = survey.checkbox(domains[i]) | |
interested_domain = [] | |
for domain in domains_interested: | |
if domains_interested[domain]: | |
interested_domain.append(domain) | |
st.session_state['questionnaire_response']['interested_domain'] = interested_domain | |
elif survey_pages.current == 2: | |
paper_discovery_method = survey.text_area("#### How did you come across this paper?") | |
st.session_state['questionnaire_response']['paper_discovery_method'] = paper_discovery_method | |
elif survey_pages.current == 3: | |
reading_purpose = survey.text_area("#### For what purpose are you reading this paper?") | |
st.session_state['questionnaire_response']['reading_purpose'] = reading_purpose | |
elif survey_pages.current == 4: | |
st.write("#### What information do you want to get out of this paper?") | |
information_options = ["Main findings and conclusions", | |
'Methodology and experimental design', | |
'Data and statistical analysis', | |
'Limitations or gaps in the research'] | |
info_interested = {} | |
for i in range(len(information_options)): | |
info_interested[information_options[i]] = survey.checkbox(information_options[i]) | |
desired_information = [] | |
for info in info_interested: | |
if info_interested[info]: | |
desired_information.append(info) | |
other_info = survey.text_input('Other aspects:') | |
if other_info: | |
desired_information.append(other_info) | |
st.session_state['questionnaire_response']['desired_information'] = desired_information | |
elif survey_pages.current == 5: | |
st.write("#### what is your level of english proficiency?") | |
english_proficiency = st.slider("English Proficiency (1-5):", min_value=1, max_value=5, value=1) | |
st.session_state['questionnaire_response']['english_proficiency'] = english_proficiency | |
elif survey_pages.current == 6: | |
st.write("#### What is the primary language spoken in your home? (click from the list and others)") | |
languages = ['English', 'Spanish', ] | |
language_options = {} | |
for i in range(len(languages)): | |
language_options[languages[i]] = survey.checkbox(languages[i]) | |
language_spoken = [] | |
for language in language_options: | |
if language_options[language]: | |
language_spoken.append(language) | |
other_language = survey.text_input('Other') | |
if other_language: | |
language_spoken.append(other_language) | |
st.session_state['questionnaire_response']['language_spoken'] = language_spoken | |
elif survey_pages.current == 7: | |
st.write("#### Do you speak other languages? How fluent are you in each language?") | |
language_fluency = {} | |
language_index = 1 | |
col1, col2 = st.columns([3, 2]) | |
with col1: | |
other_language = survey.text_input(f'Language {"#" + str(language_index)}') | |
with col2: | |
fluency = survey.selectbox(f'Fluency {"#" + str(language_index)}', | |
options=["", "Beginner", "Intermediate", "Advanced", "Native"], | |
) | |
if other_language and fluency: | |
language_fluency.update({other_language: fluency}) | |
while other_language: | |
language_index += 1 | |
with col1: | |
other_language = survey.text_input(f'Language {"#" + str(language_index)}') | |
with col2: | |
fluency = survey.selectbox(f'Fluency {"#" + str(language_index)}', | |
options=["", "Beginner", "Intermediate", "Advanced", "Native"], | |
) | |
if other_language and fluency: | |
language_fluency.update({other_language: fluency}) | |
st.session_state['questionnaire_response']['other_language'] = language_fluency | |
elif survey_pages.current == 8: | |
st.write( | |
"#### How much do you use technology (computers, cell phones, tablets, GPS, internet, etc.)?") | |
st.markdown(''' | |
* Always: relies heavily on daily tasks | |
* Often in a day: not necessarily every task, but plays a significant role in life | |
* Occasionally: use constantly but not essential for most daily activities | |
* Rarely: use only for specific tasks | |
* Never: avoid using technology | |
''') | |
tech_usage = survey.select_slider( | |
label="tech_usage", | |
options=['Never', 'Rarely', 'Occasionally', | |
'Often', 'Always'], | |
# min_value=1, | |
# max_value=5, | |
label_visibility="collapsed", | |
) | |
st.session_state['questionnaire_response']['tech_usage'] = tech_usage | |
elif survey_pages.current == 9: | |
st.write("#### How often do you read or watch/listen to the news?") | |
news_read = survey.radio( | |
label="news_read", | |
options=["Never", "Once or Twice a Month", "Once a Week", | |
"Once in 2-3 Days", "Every Day"], | |
index=0, | |
label_visibility="collapsed", | |
horizontal=False, | |
) | |
st.session_state['questionnaire_response']['news_read'] = news_read | |
elif survey_pages.current == 10: | |
st.write("#### How many books do you read or listen to a month?") | |
books_read = survey.radio( | |
label="books_read", | |
options=["0", "1-3", "4-6", "7+"], | |
index=0, | |
label_visibility="collapsed", | |
horizontal=True, | |
) | |
st.session_state['questionnaire_response']['books_read'] = books_read | |
elif authentication_status is False: | |
st.error('Username or Password is incorrect', icon="⚠️") | |
elif init_page == 'Sign Up': | |
try: | |
if authenticator.register_user('Register user', preauthorization=False): | |
st.success('User registered successfully') | |
st.balloons() | |
except Exception as e: | |
st.error(e) | |
with open('config.yaml', 'w') as file: | |
yaml.dump(config, file, default_flow_style=False) | |
config_drive.put("config.yaml", path="config.yaml") | |