Spaces:
Sleeping
Sleeping
import streamlit as st | |
import streamlit as st | |
from pprint import pprint | |
import subprocess | |
cmd = ["python", "-m", "spacy", "download", "en_core_web_sm"] | |
subprocess.run(cmd) | |
from spacy.cli import download | |
from Questgen import main, main2 | |
from PyPDF2 import PdfReader | |
from transformers import pipeline | |
from PyPDF2 import PdfReader | |
import nltk | |
import pandas as pd | |
nltk.download('punkt') | |
# st.title(body='7 - Question Generation') | |
def get_pdf_text(pdf_docs): | |
text = "" | |
for pdf in pdf_docs: | |
pdf_reader = PdfReader(pdf) | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
return text | |
######################################################## | |
# st.header(body='Proposition 1',divider='red') | |
# if st.toggle(label='Show Proposition 1'): | |
st.title('Generate Questions from PDFs') | |
file = st.file_uploader(label='Upload',accept_multiple_files=True) | |
pr = st.button(label='Process') | |
if pr: | |
# pr = st.button(label='Process') | |
raw_text = get_pdf_text(file) | |
# questions = [] | |
# ge = main.QGen() | |
ge = main2.QGen() | |
payload = { | |
'input_text' : raw_text, | |
# 'max_questions':2, | |
} | |
output = ge.predict_mcq(payload=payload) | |
st.header(body='*Generated Questions are:*', divider='orange') | |
for question in output['questions']: | |
st.subheader(body=f":orange[Q{question['id']}:] {question['question_statement']}", divider='blue') | |
st.markdown(f"A: {question['answer']}") | |
c = 0 | |
for option in question['options']: | |
# st.markdown(f"{c}") | |
c+=1 | |
if c==1: | |
st.markdown(f"B: {option}") | |
elif c==2: | |
st.markdown(f"C: {option}") | |
elif c==3: | |
st.markdown(f"D: {option}") | |
if output is not None: | |
# Convert the dictionary to a DataFrame | |
df = pd.DataFrame(output['questions']) | |
# Convert the options from lists to strings | |
# df['options'] = df['options'].apply(lambda x: ','.join(x)) | |
df = df.drop(labels=['options_algorithm','extra_options','context','question_type'],axis=1) | |
# Convert the DataFrame to CSV | |
csv = df.to_csv(index=False).encode('utf-8') | |
st.download_button( | |
label='Download Data', | |
data=csv, | |
file_name='Generated MCQs.csv', | |
mime='text/csv' | |
) | |
if st.toggle(label='Show Raw Output'): | |
st.write(output) |