Spaces:

yuki-816
/

science-communication

Sleeping

App Files Files Community

science-communication / streamlit_app.py

yuki-816

Upload streamlit_app.py

01fb2fc over 1 year ago

raw

history blame

23.7 kB

	import streamlit as st
	import streamlit_authenticator as stauth
	from deta import Deta
	import yaml
	from yaml.loader import SafeLoader
	import os
	from langchain.llms import OpenAI
	import re
	from PyPDF2 import PdfReader
	from streamlit_option_menu import option_menu
	import streamlit_survey as ss
	from cryptography.fernet import Fernet
	import warnings
	import openai
	import json
	from collections import defaultdict

	warnings.filterwarnings("ignore", category=UserWarning, module='langchain')


	class Validator:
	def validate_username(self, username):
	pattern = r"^[a-zA-Z0-9_-]{1,20}$"
	return bool(re.match(pattern, username))

	def validate_name(self, name):
	return 1 < len(name) < 100

	def validate_email(self, email):
	pattern = "^[a-zA-Z0-9-_]+@[a-zA-Z0-9]+\.[a-z]{1,3}$"
	if re.match(pattern, email):
	return True
	return False


	def get_user_data(user):
	data = db.fetch().items
	for person in data:
	if person['key'] == user:
	return person
	return None


	def user_history(time, text, ):
	pass


	def update_questionnaire_response(user_response, username):
	db.update({"questionnaire_response": user_response}, key=username)
	st.success("Your responses have been recorded. Thank you!")


	def generate_responses(text, chat_model="gpt-3.5-turbo", paper_title="", level_education="",
	english_proficiency="", language_spoken="", tech_usage="",
	news_read="", books_read="", additional_requirements="None"):
	# Incorporating the parameters into the context
	text = text[:2048]
	user_context = f"""
	The user has achieved an education level up tp {level_education}. In daily routine, the user describes the frequency of using
	technology such as computers, cell phones, and tablets as {tech_usage}. The user's primary language spoken at home is
	{language_spoken}, and has {english_proficiency} level of English proficiency. The user {news_read} reads or watches
	the news and reads approximately {books_read} books in a month.
	"""

	# print(user_context)

	# Prompt template
	prompt_template = f"""
	Here's the abstract of a paper (titled) {paper_title}: {text}.
	Considering the user's information: {user_context}.
	And user's additional requirements: {additional_requirements}.
	Generate a plain language summary that summarizes the abstract. While creating this Plain Language Summary, please keep the following must-have elements in mind:
	- The plain language summary should achieve readability at the 8th Grade level as measured by the Flesch Kincaid scale.
	- The plain language summary should achieve Flesh Reading Ease Scores from 60 to 70.
	- Ensure each sentence is shorter than 25 words.
	- The summary should average 6 sentences per paragraph.
	- Less than 10% of the sentences should be in passive voice.
	- Ensure fidelity to the original source.
	- Use clear and simple language, avoiding jargon.
	- Maintain ethical considerations, including objectivity and inclusivity.
	- Aim for universal readability, targeting a reading age of 14-17 years.
	- Consider multi-language accessibility.
	- Take into account any operational context or guidelines that may apply.
	- The plain language summary should be a single paragraph, without subtitles or bullet points.
	"""

	conversation = [
	{'role': 'system', 'content': 'You are a helpful assistant.'},
	{'role': 'user', 'content': prompt_template}
	]

	try:
	response = openai.ChatCompletion.create(
	model=chat_model,
	messages=conversation
	)
	return response['choices'][0]['message']['content']
	except:
	st.error('Invalid api key.', icon="⚠️")


	@st.cache_resource
	def survey(user_name):
	title = user_name + '_survey'
	return ss.StreamlitSurvey(title)


	# connect to/create Deta user database
	db_key = st.secrets["deta_key"]
	deta = Deta(db_key)
	db = deta.Base("user_data")
	key = Fernet(st.secrets['fernet_key'])
	config_drive = deta.Drive("config")
	config = config_drive.get("config.yaml").read()
	config = yaml.load(config, Loader=SafeLoader)

	# Create an authenticator
	authenticator = stauth.Authenticate(
	config['credentials'],
	config['cookie']['name'],
	config['cookie']['key'],
	config['cookie']['expiry_days'],
	config['preauthorized']
	)
	authenticator.validator = Validator()
	init_sidebar = st.sidebar.empty()

	with init_sidebar:
	init_page = option_menu(None,
	["Login", 'Sign Up'],
	icons=['lightbulb-fill', 'lightbulb'],
	menu_icon="cast",
	default_index=0,
	styles={})

	if init_page == 'Login':
	name, authentication_status, username = authenticator.login('Login', 'main')
	if authentication_status:
	init_sidebar.empty()
	st.sidebar.write(f'Welcome {name}')
	app_sidebar = st.sidebar.empty()

	if 'current_page_name' not in st.session_state:
	st.session_state.current_page_name = "Generate Plain Language Summary" # 设置默认页面

	with app_sidebar:
	page = option_menu(None, ["Generate Plain Language Summary", 'Questionnaire', 'Setup'],
	icons=['house', 'question-circle', 'gear'],
	menu_icon="None",
	default_index=0,
	styles={})
	authenticator.logout('Logout', 'sidebar', key='unique_key')
	# Fetch user data from the database

	user_data = get_user_data(username)
	# print('current page: ', page)

	if page == "Generate Plain Language Summary":

	st.title("Generate Plain Language Summary")
	st.markdown(
	'''
	### What is a Plain Language Summary?
	A Plain Language Summary is a clear and concise summary of a scientific paper. It's designed to make complex research findings more accessible and understandable to a general audience.
	#### Detailed Instructions for Generating a Plain Language Summary
	1. Set Up: Navigate to the 'Set Up' page to input your API key and specify your writing style. This will help tailor the summary to your preferences.
	2. Complete the Questionnaire: On the 'Questionnaire' page, you'll also find a questionnaire designed to further tailor the summary to your needs. Please complete it.
	3. Choose Content Source:
	- Option A: If you have access to the full paper, you can upload the whole document.
	- Option B: Alternatively, you can input the abstract of the paper.
	4. Input Paper Title: Paste the exact title of the paper you wish to summarize in the text input field below. An accurate title ensures a more relevant summary.
	5. Generate Summary: After completing the above steps, click on the 'Generate' button to receive your Plain Language Summary.
	'''
	)

	# Title input box
	title_text = st.text_area("Paste Your Paper Title Here", height=25)

	# Abstract input box
	abstract_text = st.text_area("Paste Abstract Here", height=200)
	# background_info = st.text_area("Background information on original post (references, relevant information, best practices for responding)", height=200)

	# PDF input box and text extraction
	uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

	chat_mdl = None
	draft_response = ''

	entire_text = ""
	if uploaded_file is not None:
	reader = PdfReader(uploaded_file)
	num_pages = len(reader.pages)

	for page_num in range(num_pages):
	page = reader.pages[page_num]
	page_text = page.extract_text()
	entire_text += page_text

	if user_data:
	st.session_state.api_key = key.decrypt(user_data['api'].encode()).decode()
	else:
	st.session_state.api_key = ''

	if 'draft_response_content' not in st.session_state:
	st.session_state.draft_response_content = ""

	draft_response = ''
	user_response = user_data['questionnaire_response'] if user_data else defaultdict(lambda: '')

	submit_text = ''
	if entire_text:
	submit_text = entire_text
	else:
	submit_text = abstract_text
	# Check if the "Submit" button is clicked

	# st.write("#### What is your familiarity with the concepts of the paper?")
	# st.markdown('''
	# * No Familiarity: entirely unfamiliar, no prior knowledge
	# * Limited Familiarity: basic awareness of the concepts in the paper, but not in-depth knowledge
	# * Moderate Familiarity: reasonable understanding of the concepts in the paper, encountered before, or some background knowledge
	# * Good Familiarity: a solid understanding due to prior exposure or study
	# * Expert: highly knowledgeable and experienced in the field and has worked extensively with these concepts
	# ''')

	# paper_familiarity = st.select_slider(
	# label="paper_familiarity",
	# options=['No Familiarity', 'Limited Familiarity', 'Moderate Familiarity',
	# 'Good Familiarity', 'Expert Familiarity'],
	# label_visibility="collapsed",
	# )
	#
	# if 'paper_familiarity' not in st.session_state:
	# st.session_state['paper_familiarity'] = ''
	# st.session_state['paper_familiarity'] = paper_familiarity

	if st.button("Submit"):
	if abstract_text == '' and uploaded_file == None:
	st.warning('Please paste Abstract or upload a file.', icon="⚠️")

	if st.session_state.api_key:
	os.environ["OPENAI_API_KEY"] = st.session_state.api_key
	chat_mdl = OpenAI(model_name='gpt-4', temperature=0.1)
	else:
	st.warning('Please fill in api-key in Setup.', icon="⚠️")

	if chat_mdl is not None and submit_text:
	st.session_state.draft_response_content = generate_responses(
	text=submit_text,
	paper_title=title_text,
	level_education=user_response[
	'level_education'],
	english_proficiency=user_response[
	'english_proficiency'],
	language_spoken=user_response[
	'language_spoken'],
	tech_usage=user_response[
	'tech_usage'],
	news_read=user_response[
	'news_read'],
	books_read=user_response[
	'books_read'],
	)

	container = st.empty()
	# Output from function
	container.text_area(label="Plain Language Summary", value=st.session_state.draft_response_content,
	height=350)
	regenerate_prompt = st.text_area("Additional prompting for regenerating summary", height=100)

	if st.button('Regenerate'):
	if st.session_state.draft_response_content == "":
	st.warning('Please Generate a PLS first', icon="⚠️")
	elif regenerate_prompt == "":
	st.warning('Your new prompt is empty', icon="⚠️")
	else:
	additional_prompt = regenerate_prompt
	st.session_state.draft_response_content = generate_responses(
	text=submit_text,
	paper_title=title_text,
	level_education=user_response[
	'level_education'],
	english_proficiency=user_response[
	'english_proficiency'],
	language_spoken=user_response[
	'language_spoken'],
	tech_usage=user_response[
	'tech_usage'],
	news_read=user_response[
	'news_read'],
	books_read=user_response[
	'books_read'],
	additional_requirements=additional_prompt,
	)
	container.empty()
	container.text_area(label="Plain Language Summary", value=st.session_state.draft_response_content,
	height=350)

	# some function to re-submit prompt and generate new PLS
	# elif page == "History":
	# st.write('User prompt History TODO')
	elif page == "Setup":
	st.title("Setup")
	# Input boxes with existing data

	if 'api_key' not in st.session_state:
	st.session_state.api_key = ""
	api_input = st.text_input("OpenAI API Token", value=st.session_state.api_key, type='password')
	st.session_state.api_key = api_input

	questionnaire_response = user_data['questionnaire_response'] if user_data else {}

	# Update button
	if st.button("Update"):
	db.put(
	{"key": username, "api": key.encrypt(bytes(api_input, 'utf-8')).decode(),
	"questionnaire_response": questionnaire_response})
	st.success('Updating successfully!')
	elif page == "Questionnaire":
	survey = survey(username)
	if 'questionnaire_response' not in st.session_state:
	st.session_state['questionnaire_response'] = {}
	# print(st.session_state['questionnaire_response'])
	page_number = 11
	survey_pages = survey.pages(page_number,
	on_submit=lambda: update_questionnaire_response(
	st.session_state['questionnaire_response'], username))
	# st.session_state["__streamlit-survey-data__Pages_"] = survey_pages.current
	st.progress((survey_pages.current + 1) / page_number)
	with survey_pages:
	if survey_pages.current == 0:
	st.write("#### What is your level of education? (for research purposes)")
	level_education = survey.radio(
	label="level_education",
	options=["Primary School", "Middle School", "Secondary School",
	"College", "Masters", "PhD"],
	index=0,
	label_visibility="collapsed",
	horizontal=False,
	)
	st.session_state['questionnaire_response']['level_education'] = level_education
	elif survey_pages.current == 1:
	st.write("#### What domains are you most interested in?")
	domains = ['Global Studies', 'Arts', 'Business & Economics', 'History', 'Humanities',
	'Law', 'Medicine and Health', 'Science - Biology', 'Science - Chemistry',
	'Science - Environmental Science', 'Science - Physics', 'Mathematics',
	'Engineering', 'Social Sciences']
	domains_interested = {}
	for i in range(len(domains)):
	domains_interested[domains[i]] = survey.checkbox(domains[i])
	interested_domain = []
	for domain in domains_interested:
	if domains_interested[domain]:
	interested_domain.append(domain)
	st.session_state['questionnaire_response']['interested_domain'] = interested_domain
	elif survey_pages.current == 2:
	paper_discovery_method = survey.text_area("#### How did you come across this paper?")
	st.session_state['questionnaire_response']['paper_discovery_method'] = paper_discovery_method
	elif survey_pages.current == 3:
	reading_purpose = survey.text_area("#### For what purpose are you reading this paper?")
	st.session_state['questionnaire_response']['reading_purpose'] = reading_purpose
	elif survey_pages.current == 4:
	st.write("#### What information do you want to get out of this paper?")
	information_options = ["Main findings and conclusions",
	'Methodology and experimental design',
	'Data and statistical analysis',
	'Limitations or gaps in the research']
	info_interested = {}
	for i in range(len(information_options)):
	info_interested[information_options[i]] = survey.checkbox(information_options[i])
	desired_information = []
	for info in info_interested:
	if info_interested[info]:
	desired_information.append(info)
	other_info = survey.text_input('Other aspects:')
	if other_info:
	desired_information.append(other_info)
	st.session_state['questionnaire_response']['desired_information'] = desired_information
	elif survey_pages.current == 5:
	st.write("#### what is your level of english proficiency?")
	english_proficiency = st.slider("English Proficiency (1-5):", min_value=1, max_value=5, value=1)
	st.session_state['questionnaire_response']['english_proficiency'] = english_proficiency

	elif survey_pages.current == 6:
	st.write("#### What is the primary language spoken in your home? (click from the list and others)")
	languages = ['English', 'Spanish', ]
	language_options = {}
	for i in range(len(languages)):
	language_options[languages[i]] = survey.checkbox(languages[i])
	language_spoken = []
	for language in language_options:
	if language_options[language]:
	language_spoken.append(language)
	other_language = survey.text_input('Other')

	if other_language:
	language_spoken.append(other_language)
	st.session_state['questionnaire_response']['language_spoken'] = language_spoken

	elif survey_pages.current == 7:
	st.write("#### Do you speak other languages? How fluent are you in each language?")
	language_fluency = {}
	language_index = 1
	col1, col2 = st.columns([3, 2])

	with col1:
	other_language = survey.text_input(f'Language {"#" + str(language_index)}')
	with col2:
	fluency = survey.selectbox(f'Fluency {"#" + str(language_index)}',
	options=["", "Beginner", "Intermediate", "Advanced", "Native"],
	)
	if other_language and fluency:
	language_fluency.update({other_language: fluency})

	while other_language:
	language_index += 1
	with col1:
	other_language = survey.text_input(f'Language {"#" + str(language_index)}')
	with col2:
	fluency = survey.selectbox(f'Fluency {"#" + str(language_index)}',
	options=["", "Beginner", "Intermediate", "Advanced", "Native"],
	)
	if other_language and fluency:
	language_fluency.update({other_language: fluency})
	st.session_state['questionnaire_response']['other_language'] = language_fluency

	elif survey_pages.current == 8:
	st.write(
	"#### How much do you use technology (computers, cell phones, tablets, GPS, internet, etc.)?")
	st.markdown('''
	* Always: relies heavily on daily tasks
	* Often in a day: not necessarily every task, but plays a significant role in life
	* Occasionally: use constantly but not essential for most daily activities
	* Rarely: use only for specific tasks
	* Never: avoid using technology
	''')
	tech_usage = survey.select_slider(
	label="tech_usage",
	options=['Never', 'Rarely', 'Occasionally',
	'Often', 'Always'],
	# min_value=1,
	# max_value=5,
	label_visibility="collapsed",
	)
	st.session_state['questionnaire_response']['tech_usage'] = tech_usage
	elif survey_pages.current == 9:
	st.write("#### How often do you read or watch/listen to the news?")
	news_read = survey.radio(
	label="news_read",
	options=["Never", "Once or Twice a Month", "Once a Week",
	"Once in 2-3 Days", "Every Day"],
	index=0,
	label_visibility="collapsed",
	horizontal=False,
	)
	st.session_state['questionnaire_response']['news_read'] = news_read
	elif survey_pages.current == 10:
	st.write("#### How many books do you read or listen to a month?")
	books_read = survey.radio(
	label="books_read",
	options=["0", "1-3", "4-6", "7+"],
	index=0,
	label_visibility="collapsed",
	horizontal=True,
	)
	st.session_state['questionnaire_response']['books_read'] = books_read

	elif authentication_status is False:
	st.error('Username or Password is incorrect', icon="⚠️")
	elif init_page == 'Sign Up':
	try:
	if authenticator.register_user('Register user', preauthorization=False):
	st.success('User registered successfully')
	st.balloons()
	except Exception as e:
	st.error(e)

	with open('config.yaml', 'w') as file:
	yaml.dump(config, file, default_flow_style=False)
	config_drive.put("config.yaml", path="config.yaml")