Spaces:
Build error
Build error
mport streamlit as st | |
from textwrap3 import wrap | |
from flashtext import KeywordProcessor | |
import torch, random, nltk, string, traceback, sys, os, requests, datetime | |
import numpy as np | |
import pandas as pd | |
from transformers import T5ForConditionalGeneration,T5Tokenizer | |
import pke | |
from helper import postprocesstext, summarizer, get_nouns_multipartite, get_keywords,\ | |
get_question, get_related_word, get_final_option_list, load_raw_text | |
def set_seed(seed: int): | |
random.seed(seed) | |
np.random.seed(seed) | |
torch.manual_seed(seed) | |
torch.cuda.manual_seed_all(seed) | |
set_seed(42) | |
def load_model(): | |
nltk.download('punkt') | |
nltk.download('brown') | |
nltk.download('wordnet') | |
nltk.download('stopwords') | |
nltk.download('wordnet') | |
nltk.download('omw-1.4') | |
summary_mod_name = os.environ["summary_mod_name"] | |
question_mod_name = os.environ["question_mod_name"] | |
summary_model = T5ForConditionalGeneration.from_pretrained(summary_mod_name) | |
summary_tokenizer = T5Tokenizer.from_pretrained(summary_mod_name) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
summary_model = summary_model.to(device) | |
question_model = T5ForConditionalGeneration.from_pretrained(question_mod_name) | |
question_tokenizer = T5Tokenizer.from_pretrained(question_mod_name) | |
question_model = question_model.to(device) | |
return summary_model, summary_tokenizer, question_tokenizer, question_model | |
from nltk.corpus import wordnet as wn | |
from nltk.tokenize import sent_tokenize | |
from nltk.corpus import stopwords | |
def csv_downloader(df): | |
res = df.to_csv(index=False,sep="\t").encode('utf-8') | |
st.download_button( | |
label="Download logs data as CSV separated by tab", | |
data=res, | |
file_name='df_quiz_log_file_v1.csv', | |
mime='text/csv') | |
def load_file(): | |
"""Load text from file""" | |
uploaded_file = st.file_uploader("Upload Files",type=['txt']) | |
if uploaded_file is not None: | |
if uploaded_file.type == "text/plain": | |
raw_text = str(uploaded_file.read(),"utf-8") | |
return raw_text | |
st.markdown('![Visitor count](https://shields-io-visitor-counter.herokuapp.com/badge?page=https://share.streamlit.io/https://huggingface.co/spaces/aakashgoel12/getmcq&label=VisitorsCount&labelColor=000000&logo=GitHub&logoColor=FFFFFF&color=1D70B8&style=for-the-badge)') | |
# Loading Model | |
summary_model, summary_tokenizer, question_tokenizer, question_model =load_model() | |
# App title and description | |
st.title("Exam Assistant") | |
st.write("Upload text, Get ready for answering autogenerated questions") | |
# Load file | |
st.text("Disclaimer: This app stores user's input for model improvement purposes !!") | |
# Load file | |
default_text = load_raw_text() | |
raw_text = st.text_area("Enter text here", default_text, height=250, max_chars=1000000, ) | |
# raw_text = load_file() | |
start_time = str(datetime.datetime.now()) | |
if raw_text != None and raw_text != '': | |
summary_text = summarizer(raw_text,summary_model,summary_tokenizer) | |
ans_list = get_keywords(raw_text,summary_text) | |
#print("Ans list: {}".format(ans_list)) | |
questions = [] | |
option1=[] | |
option2=[] | |
option3=[] | |
option4=[] | |
for idx,ans in enumerate(ans_list): | |
#print("IDX: {}, ANS: {}".format(idx, ans)) | |
ques = get_question(summary_text,ans,question_model,question_tokenizer) | |
other_options = get_related_word(ans) | |
final_options, ans_index = get_final_option_list(ans,other_options) | |
option1.append(final_options[0]) | |
option2.append(final_options[1]) | |
option3.append(final_options[2]) | |
option4.append(final_options[3]) | |
if ques not in questions: | |
html_str = f""" | |
<div> | |
<p> | |
{idx+1}: <b> {ques} </b> | |
</p> | |
</div> | |
""" | |
html_str += f' <p style="color:Green;"><b> {final_options[0]} </b></p> ' if ans_index == 0 else f' <p><b> {final_options[0]} </b></p> ' | |
html_str += f' <p style="color:Green;"><b> {final_options[1]} </b></p> ' if ans_index == 1 else f' <p><b> {final_options[1]} </b></p> ' | |
html_str += f' <p style="color:Green;"><b> {final_options[2]} </b></p> ' if ans_index == 2 else f' <p><b> {final_options[2]} </b></p> ' | |
html_str += f' <p style="color:Green;"><b> {final_options[3]} </b></p> ' if ans_index == 3 else f' <p><b> {final_options[3]} </b></p> ' | |
html_str += f""" | |
""" | |
st.markdown(html_str , unsafe_allow_html=True) | |
st.markdown("-----") | |
questions.append(ques) | |
output_path = "results/df_quiz_log_file_v1.csv" | |
res_df = pd.DataFrame({"TimeStamp":[start_time]*len(ans_list),\ | |
"Input":[str(raw_text)]*len(ans_list),\ | |
"Question":questions,"Option1":option1,\ | |
"Option2":option2,\ | |
"Option3":option3,\ | |
"Option4":option4,\ | |
"Correct Answer":ans_list}) | |
res_df.to_csv(output_path, mode='a', index=False, sep="\t", header= not os.path.exists(output_path)) | |
# st.dataframe(pd.read_csv(output_path,sep="\t").tail(5)) | |
csv_downloader(pd.read_csv(output_path,sep="\t")) |