DevBM's picture
Update app.py
395daa0 verified
import streamlit as st
import streamlit as st
from pprint import pprint
import subprocess
cmd = ["python", "-m", "spacy", "download", "en_core_web_sm"]
subprocess.run(cmd)
from spacy.cli import download
from Questgen import main, main2
from PyPDF2 import PdfReader
from transformers import pipeline
from PyPDF2 import PdfReader
import nltk
import pandas as pd
nltk.download('punkt')
# st.title(body='7 - Question Generation')
def get_pdf_text(pdf_docs):
text = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
########################################################
# st.header(body='Proposition 1',divider='red')
# if st.toggle(label='Show Proposition 1'):
st.title('Generate Questions from PDFs')
file = st.file_uploader(label='Upload',accept_multiple_files=True)
pr = st.button(label='Process')
if pr:
# pr = st.button(label='Process')
raw_text = get_pdf_text(file)
# questions = []
# ge = main.QGen()
ge = main2.QGen()
payload = {
'input_text' : raw_text,
# 'max_questions':2,
}
output = ge.predict_mcq(payload=payload)
st.header(body='*Generated Questions are:*', divider='orange')
for question in output['questions']:
st.subheader(body=f":orange[Q{question['id']}:] {question['question_statement']}", divider='blue')
st.markdown(f"A: {question['answer']}")
c = 0
for option in question['options']:
# st.markdown(f"{c}")
c+=1
if c==1:
st.markdown(f"B: {option}")
elif c==2:
st.markdown(f"C: {option}")
elif c==3:
st.markdown(f"D: {option}")
if output is not None:
# Convert the dictionary to a DataFrame
df = pd.DataFrame(output['questions'])
# Convert the options from lists to strings
# df['options'] = df['options'].apply(lambda x: ','.join(x))
df = df.drop(labels=['options_algorithm','extra_options','context','question_type'],axis=1)
# Convert the DataFrame to CSV
csv = df.to_csv(index=False).encode('utf-8')
st.download_button(
label='Download Data',
data=csv,
file_name='Generated MCQs.csv',
mime='text/csv'
)
if st.toggle(label='Show Raw Output'):
st.write(output)