File size: 23,747 Bytes
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ee6881
dd1a8bd
 
 
 
 
 
 
0ee6881
dd1a8bd
 
 
0ee6881
dd1a8bd
 
 
 
0ee6881
3c66cf8
01fb2fc
3c66cf8
dd1a8bd
 
 
01fb2fc
3c66cf8
 
 
dd1a8bd
 
0ee6881
dd1a8bd
 
 
0ee6881
 
 
dd1a8bd
01fb2fc
 
 
 
 
dd1a8bd
 
 
 
 
 
e8470a3
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c66cf8
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8239f6
3c66cf8
 
 
 
 
dd1a8bd
d8239f6
dd1a8bd
 
 
3c66cf8
dd1a8bd
3c66cf8
d8239f6
 
 
dd1a8bd
 
 
 
 
 
 
 
d8239f6
dd1a8bd
d8239f6
 
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ee6881
dd1a8bd
0ee6881
 
 
 
 
dd1a8bd
3c66cf8
01fb2fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c66cf8
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
3c66cf8
 
 
 
 
 
dd1a8bd
 
3c66cf8
dd1a8bd
3c66cf8
 
 
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c66cf8
 
 
 
 
 
dd1a8bd
 
3c66cf8
dd1a8bd
3c66cf8
 
 
 
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ee6881
 
dd1a8bd
 
 
 
 
 
3c66cf8
dd1a8bd
0ee6881
 
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ee6881
 
 
 
 
 
 
 
 
 
 
 
dd1a8bd
0ee6881
 
dd1a8bd
0ee6881
 
dd1a8bd
 
0ee6881
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd1a8bd
d8239f6
 
 
 
3c66cf8
d8239f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c66cf8
d8239f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c66cf8
dd1a8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c66cf8
dd1a8bd
 
 
 
 
 
 
 
 
 
3c66cf8
dd1a8bd
 
 
3c66cf8
dd1a8bd
 
 
 
0ee6881
dd1a8bd
 
 
d8239f6
dd1a8bd
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
import streamlit as st
import streamlit_authenticator as stauth
from deta import Deta
import yaml
from yaml.loader import SafeLoader
import os
from langchain.llms import OpenAI
import re
from PyPDF2 import PdfReader
from streamlit_option_menu import option_menu
import streamlit_survey as ss
from cryptography.fernet import Fernet
import warnings
import openai
import json
from collections import defaultdict

warnings.filterwarnings("ignore", category=UserWarning, module='langchain')


class Validator:
    def validate_username(self, username):
        pattern = r"^[a-zA-Z0-9_-]{1,20}$"
        return bool(re.match(pattern, username))

    def validate_name(self, name):
        return 1 < len(name) < 100

    def validate_email(self, email):
        pattern = "^[a-zA-Z0-9-_]+@[a-zA-Z0-9]+\.[a-z]{1,3}$"
        if re.match(pattern, email):
            return True
        return False


def get_user_data(user):
    data = db.fetch().items
    for person in data:
        if person['key'] == user:
            return person
    return None


def user_history(time, text, ):
    pass


def update_questionnaire_response(user_response, username):
    db.update({"questionnaire_response": user_response}, key=username)
    st.success("Your responses have been recorded. Thank you!")


def generate_responses(text, chat_model="gpt-3.5-turbo", paper_title="", level_education="",
                       english_proficiency="", language_spoken="", tech_usage="",
                       news_read="", books_read="", additional_requirements="None"):
    # Incorporating the parameters into the context
    text = text[:2048]
    user_context = f"""
    The user has achieved an education level up tp {level_education}. In daily routine, the user describes the frequency of using
    technology such as computers, cell phones, and tablets as {tech_usage}. The user's primary language spoken at home is
    {language_spoken}, and has {english_proficiency} level of English proficiency. The user {news_read} reads or watches
    the news and reads approximately {books_read} books in a month.
    """

    # print(user_context)

    # Prompt template
    prompt_template = f"""
    Here's the abstract of a paper (titled) {paper_title}: {text}.
    Considering the user's information: {user_context}.
    And user's additional requirements: {additional_requirements}.
    Generate a plain language summary that summarizes the abstract. While creating this Plain Language Summary, please keep the following must-have elements in mind:
    - The plain language summary should achieve readability at the 8th Grade level as measured by the Flesch Kincaid scale.
    - The plain language summary should achieve Flesh Reading Ease Scores from 60 to 70.
    - Ensure each sentence is shorter than 25 words.
    - The summary should average 6 sentences per paragraph.
    - Less than 10% of the sentences should be in passive voice.
    - Ensure fidelity to the original source.
    - Use clear and simple language, avoiding jargon.
    - Maintain ethical considerations, including objectivity and inclusivity.
    - Aim for universal readability, targeting a reading age of 14-17 years.
    - Consider multi-language accessibility.
    - Take into account any operational context or guidelines that may apply.
    - The plain language summary should be a single paragraph, without subtitles or bullet points.
    """

    conversation = [
        {'role': 'system', 'content': 'You are a helpful assistant.'},
        {'role': 'user', 'content': prompt_template}
    ]

    try:
        response = openai.ChatCompletion.create(
            model=chat_model,
            messages=conversation
        )
        return response['choices'][0]['message']['content']
    except:
        st.error('Invalid api key.', icon="⚠️")


@st.cache_resource
def survey(user_name):
    title = user_name + '_survey'
    return ss.StreamlitSurvey(title)


# connect to/create Deta user database
db_key = st.secrets["deta_key"]
deta = Deta(db_key)
db = deta.Base("user_data")
key = Fernet(st.secrets['fernet_key'])
config_drive = deta.Drive("config")
config = config_drive.get("config.yaml").read()
config = yaml.load(config, Loader=SafeLoader)

# Create an authenticator
authenticator = stauth.Authenticate(
    config['credentials'],
    config['cookie']['name'],
    config['cookie']['key'],
    config['cookie']['expiry_days'],
    config['preauthorized']
)
authenticator.validator = Validator()
init_sidebar = st.sidebar.empty()

with init_sidebar:
    init_page = option_menu(None,
                            ["Login", 'Sign Up'],
                            icons=['lightbulb-fill', 'lightbulb'],
                            menu_icon="cast",
                            default_index=0,
                            styles={})

if init_page == 'Login':
    name, authentication_status, username = authenticator.login('Login', 'main')
    if authentication_status:
        init_sidebar.empty()
        st.sidebar.write(f'**Welcome** {name}')
        app_sidebar = st.sidebar.empty()

        if 'current_page_name' not in st.session_state:
            st.session_state.current_page_name = "Generate Plain Language Summary"  # 设置默认页面

        with app_sidebar:
            page = option_menu(None, ["Generate Plain Language Summary", 'Questionnaire', 'Setup'],
                               icons=['house', 'question-circle', 'gear'],
                               menu_icon="None",
                               default_index=0,
                               styles={})
            authenticator.logout('Logout', 'sidebar', key='unique_key')
        # Fetch user data from the database

        user_data = get_user_data(username)
        # print('current page: ', page)

        if page == "Generate Plain Language Summary":

            st.title("Generate Plain Language Summary")
            st.markdown(
                '''
                ### What is a Plain Language Summary?
                A Plain Language Summary is a clear and concise summary of a scientific paper. It's designed to make complex research findings more accessible and understandable to a general audience.
                #### Detailed Instructions for Generating a Plain Language Summary
                1. **Set Up**: Navigate to the 'Set Up' page to input your API key and specify your writing style. This will help tailor the summary to your preferences.
                2. **Complete the Questionnaire**: On the 'Questionnaire' page, you'll also find a questionnaire designed to further tailor the summary to your needs. Please complete it.
                3. **Choose Content Source**: 
                    - **Option A**: If you have access to the full paper, you can upload the whole document.
                    - **Option B**: Alternatively, you can input the abstract of the paper.
                4. **Input Paper Title**: Paste the exact title of the paper you wish to summarize in the text input field below. An accurate title ensures a more relevant summary.
                5. **Generate Summary**: After completing the above steps, click on the 'Generate' button to receive your Plain Language Summary.
                '''
            )

            # Title input box
            title_text = st.text_area("Paste Your Paper Title Here", height=25)

            # Abstract input box
            abstract_text = st.text_area("Paste Abstract Here", height=200)
            # background_info = st.text_area("Background information on original post (references, relevant information, best practices for responding)",  height=200)

            # PDF input box and text extraction
            uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

            chat_mdl = None
            draft_response = ''

            entire_text = ""
            if uploaded_file is not None:
                reader = PdfReader(uploaded_file)
                num_pages = len(reader.pages)

                for page_num in range(num_pages):
                    page = reader.pages[page_num]
                    page_text = page.extract_text()
                    entire_text += page_text

            if user_data:
                st.session_state.api_key = key.decrypt(user_data['api'].encode()).decode()
            else:
                st.session_state.api_key = ''

            if 'draft_response_content' not in st.session_state:
                st.session_state.draft_response_content = ""

            draft_response = ''
            user_response = user_data['questionnaire_response'] if user_data else defaultdict(lambda: '')

            submit_text = ''
            if entire_text:
                submit_text = entire_text
            else:
                submit_text = abstract_text
            # Check if the "Submit" button is clicked

            # st.write("#### What is your familiarity with the concepts of the paper?")
            # st.markdown('''
            #         * No Familiarity: entirely unfamiliar, no prior knowledge
            #         * Limited Familiarity: basic awareness of the concepts in the paper, but not in-depth knowledge
            #         * Moderate Familiarity: reasonable understanding of the concepts in the paper, encountered before, or some background knowledge
            #         * Good Familiarity: a solid understanding due to prior exposure or study
            #         * Expert: highly knowledgeable and experienced in the field and has worked extensively with these concepts
            #     ''')

            # paper_familiarity = st.select_slider(
            #     label="paper_familiarity",
            #     options=['No Familiarity', 'Limited Familiarity', 'Moderate Familiarity',
            #              'Good Familiarity', 'Expert Familiarity'],
            #     label_visibility="collapsed",
            # )
            #
            # if 'paper_familiarity' not in st.session_state:
            #     st.session_state['paper_familiarity'] = ''
            # st.session_state['paper_familiarity'] = paper_familiarity

            if st.button("Submit"):
                if abstract_text == '' and uploaded_file == None:
                    st.warning('Please paste Abstract or upload a file.', icon="⚠️")

                if st.session_state.api_key:
                    os.environ["OPENAI_API_KEY"] = st.session_state.api_key
                    chat_mdl = OpenAI(model_name='gpt-4', temperature=0.1)
                else:
                    st.warning('Please fill in api-key in Setup.', icon="⚠️")

                if chat_mdl is not None and submit_text:
                    st.session_state.draft_response_content = generate_responses(
                        text=submit_text,
                        paper_title=title_text,
                        level_education=user_response[
                            'level_education'],
                        english_proficiency=user_response[
                            'english_proficiency'],
                        language_spoken=user_response[
                            'language_spoken'],
                        tech_usage=user_response[
                            'tech_usage'],
                        news_read=user_response[
                            'news_read'],
                        books_read=user_response[
                            'books_read'],
                    )

            container = st.empty()
            # Output from function
            container.text_area(label="Plain Language Summary", value=st.session_state.draft_response_content,
                                height=350)
            regenerate_prompt = st.text_area("Additional prompting for regenerating summary", height=100)

            if st.button('Regenerate'):
                if st.session_state.draft_response_content == "":
                    st.warning('Please Generate a PLS first', icon="⚠️")
                elif regenerate_prompt == "":
                    st.warning('Your new prompt is empty', icon="⚠️")
                else:
                    additional_prompt = regenerate_prompt
                    st.session_state.draft_response_content = generate_responses(
                        text=submit_text,
                        paper_title=title_text,
                        level_education=user_response[
                            'level_education'],
                        english_proficiency=user_response[
                            'english_proficiency'],
                        language_spoken=user_response[
                            'language_spoken'],
                        tech_usage=user_response[
                            'tech_usage'],
                        news_read=user_response[
                            'news_read'],
                        books_read=user_response[
                            'books_read'],
                        additional_requirements=additional_prompt,
                    )
                    container.empty()
                    container.text_area(label="Plain Language Summary", value=st.session_state.draft_response_content,
                                        height=350)

            # some function to re-submit prompt and generate new PLS
        # elif page == "History":
        #     st.write('User prompt History TODO')
        elif page == "Setup":
            st.title("Setup")
            # Input boxes with existing data

            if 'api_key' not in st.session_state:
                st.session_state.api_key = ""
            api_input = st.text_input("OpenAI API Token", value=st.session_state.api_key, type='password')
            st.session_state.api_key = api_input

            questionnaire_response = user_data['questionnaire_response'] if user_data else {}

            # Update button
            if st.button("Update"):
                db.put(
                    {"key": username, "api": key.encrypt(bytes(api_input, 'utf-8')).decode(),
                     "questionnaire_response": questionnaire_response})
                st.success('Updating successfully!')
        elif page == "Questionnaire":
            survey = survey(username)
            if 'questionnaire_response' not in st.session_state:
                st.session_state['questionnaire_response'] = {}
            # print(st.session_state['questionnaire_response'])
            page_number = 11
            survey_pages = survey.pages(page_number,
                                        on_submit=lambda: update_questionnaire_response(
                                            st.session_state['questionnaire_response'], username))
            # st.session_state["__streamlit-survey-data__Pages_"] = survey_pages.current
            st.progress((survey_pages.current + 1) / page_number)
            with survey_pages:
                if survey_pages.current == 0:
                    st.write("#### What is your level of education? (for research purposes)")
                    level_education = survey.radio(
                        label="level_education",
                        options=["Primary School", "Middle School", "Secondary School",
                                 "College", "Masters", "PhD"],
                        index=0,
                        label_visibility="collapsed",
                        horizontal=False,
                    )
                    st.session_state['questionnaire_response']['level_education'] = level_education
                elif survey_pages.current == 1:
                    st.write("#### What domains are you most interested in?")
                    domains = ['Global Studies', 'Arts', 'Business & Economics', 'History', 'Humanities',
                               'Law', 'Medicine and Health', 'Science - Biology', 'Science - Chemistry',
                               'Science - Environmental Science', 'Science - Physics', 'Mathematics',
                               'Engineering', 'Social Sciences']
                    domains_interested = {}
                    for i in range(len(domains)):
                        domains_interested[domains[i]] = survey.checkbox(domains[i])
                    interested_domain = []
                    for domain in domains_interested:
                        if domains_interested[domain]:
                            interested_domain.append(domain)
                    st.session_state['questionnaire_response']['interested_domain'] = interested_domain
                elif survey_pages.current == 2:
                    paper_discovery_method = survey.text_area("#### How did you come across this paper?")
                    st.session_state['questionnaire_response']['paper_discovery_method'] = paper_discovery_method
                elif survey_pages.current == 3:
                    reading_purpose = survey.text_area("#### For what purpose are you reading this paper?")
                    st.session_state['questionnaire_response']['reading_purpose'] = reading_purpose
                elif survey_pages.current == 4:
                    st.write("#### What information do you want to get out of this paper?")
                    information_options = ["Main findings and conclusions",
                                           'Methodology and experimental design',
                                           'Data and statistical analysis',
                                           'Limitations or gaps in the research']
                    info_interested = {}
                    for i in range(len(information_options)):
                        info_interested[information_options[i]] = survey.checkbox(information_options[i])
                    desired_information = []
                    for info in info_interested:
                        if info_interested[info]:
                            desired_information.append(info)
                    other_info = survey.text_input('Other aspects:')
                    if other_info:
                        desired_information.append(other_info)
                    st.session_state['questionnaire_response']['desired_information'] = desired_information
                elif survey_pages.current == 5:
                    st.write("#### what is your level of english proficiency?")
                    english_proficiency = st.slider("English Proficiency (1-5):", min_value=1, max_value=5, value=1)
                    st.session_state['questionnaire_response']['english_proficiency'] = english_proficiency

                elif survey_pages.current == 6:
                    st.write("#### What is the primary language spoken in your home? (click from the list and others)")
                    languages = ['English', 'Spanish', ]
                    language_options = {}
                    for i in range(len(languages)):
                        language_options[languages[i]] = survey.checkbox(languages[i])
                    language_spoken = []
                    for language in language_options:
                        if language_options[language]:
                            language_spoken.append(language)
                    other_language = survey.text_input('Other')

                    if other_language:
                        language_spoken.append(other_language)
                    st.session_state['questionnaire_response']['language_spoken'] = language_spoken

                elif survey_pages.current == 7:
                    st.write("#### Do you speak other languages? How fluent are you in each language?")
                    language_fluency = {}
                    language_index = 1
                    col1, col2 = st.columns([3, 2])

                    with col1:
                        other_language = survey.text_input(f'Language {"#" + str(language_index)}')
                    with col2:
                        fluency = survey.selectbox(f'Fluency {"#" + str(language_index)}',
                                                   options=["", "Beginner", "Intermediate", "Advanced", "Native"],
                                                   )
                    if other_language and fluency:
                        language_fluency.update({other_language: fluency})

                    while other_language:
                        language_index += 1
                        with col1:
                            other_language = survey.text_input(f'Language {"#" + str(language_index)}')
                        with col2:
                            fluency = survey.selectbox(f'Fluency {"#" + str(language_index)}',
                                                       options=["", "Beginner", "Intermediate", "Advanced", "Native"],
                                                       )
                        if other_language and fluency:
                            language_fluency.update({other_language: fluency})
                    st.session_state['questionnaire_response']['other_language'] = language_fluency

                elif survey_pages.current == 8:
                    st.write(
                        "#### How much do you use technology (computers, cell phones, tablets, GPS, internet, etc.)?")
                    st.markdown('''
                        * Always: relies heavily on daily tasks
                        * Often in a day: not necessarily every task, but plays a significant role in life
                        * Occasionally: use constantly but not essential for most daily activities
                        * Rarely: use only for specific tasks
                        * Never: avoid using technology
                    ''')
                    tech_usage = survey.select_slider(
                        label="tech_usage",
                        options=['Never', 'Rarely', 'Occasionally',
                                 'Often', 'Always'],
                        # min_value=1,
                        # max_value=5,
                        label_visibility="collapsed",
                    )
                    st.session_state['questionnaire_response']['tech_usage'] = tech_usage
                elif survey_pages.current == 9:
                    st.write("#### How often do you read or watch/listen to the news?")
                    news_read = survey.radio(
                        label="news_read",
                        options=["Never", "Once or Twice a Month", "Once a Week",
                                 "Once in 2-3 Days", "Every Day"],
                        index=0,
                        label_visibility="collapsed",
                        horizontal=False,
                    )
                    st.session_state['questionnaire_response']['news_read'] = news_read
                elif survey_pages.current == 10:
                    st.write("#### How many books do you read or listen to a month?")
                    books_read = survey.radio(
                        label="books_read",
                        options=["0", "1-3", "4-6", "7+"],
                        index=0,
                        label_visibility="collapsed",
                        horizontal=True,
                    )
                    st.session_state['questionnaire_response']['books_read'] = books_read

    elif authentication_status is False:
        st.error('Username or Password is incorrect', icon="⚠️")
elif init_page == 'Sign Up':
    try:
        if authenticator.register_user('Register user', preauthorization=False):
            st.success('User registered successfully')
            st.balloons()
    except Exception as e:
        st.error(e)

with open('config.yaml', 'w') as file:
    yaml.dump(config, file, default_flow_style=False)
config_drive.put("config.yaml", path="config.yaml")