Spaces:
Sleeping
Sleeping
File size: 5,181 Bytes
fabb80a b30dcf3 fabb80a 2f043ed fabb80a f8665e5 fabb80a d8261b6 fabb80a f8665e5 fabb80a b7aba69 442e6c5 fabb80a 442e6c5 dc2ac9f 5202ba5 f8665e5 5202ba5 e0061e3 5202ba5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import streamlit as st
from textwrap3 import wrap
from flashtext import KeywordProcessor
import torch, random, nltk, string, traceback, sys, os, requests, datetime
import numpy as np
import pandas as pd
from transformers import T5ForConditionalGeneration,T5Tokenizer
import pke
from helper import postprocesstext, summarizer, get_nouns_multipartite, get_keywords,\
get_question, get_related_word, get_final_option_list, load_raw_text
def set_seed(seed: int):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
set_seed(42)
@st.cache(allow_output_mutation = True)
def load_model():
nltk.download('punkt')
nltk.download('brown')
nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')
summary_mod_name = os.environ["summary_mod_name"]
question_mod_name = os.environ["question_mod_name"]
summary_model = T5ForConditionalGeneration.from_pretrained(summary_mod_name)
summary_tokenizer = T5Tokenizer.from_pretrained(summary_mod_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
summary_model = summary_model.to(device)
question_model = T5ForConditionalGeneration.from_pretrained(question_mod_name)
question_tokenizer = T5Tokenizer.from_pretrained(question_mod_name)
question_model = question_model.to(device)
return summary_model, summary_tokenizer, question_tokenizer, question_model
from nltk.corpus import wordnet as wn
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
def csv_downloader(df):
res = df.to_csv(index=False,sep="\t").encode('utf-8')
st.download_button(
label="Download logs data as CSV separated by tab",
data=res,
file_name='df_quiz_log_file_v1.csv',
mime='text/csv')
def load_file():
"""Load text from file"""
uploaded_file = st.file_uploader("Upload Files",type=['txt'])
if uploaded_file is not None:
if uploaded_file.type == "text/plain":
raw_text = str(uploaded_file.read(),"utf-8")
return raw_text
st.markdown('')
# Loading Model
summary_model, summary_tokenizer, question_tokenizer, question_model =load_model()
# App title and description
st.title("Exam Assistant")
st.write("Upload text, Get ready for answering autogenerated questions")
# Load file
st.text("Disclaimer: This app stores user's input for model improvement purposes !!")
# Load file
default_text = load_raw_text()
raw_text = st.text_area("Enter text here", default_text, height=250, max_chars=1000000, )
# raw_text = load_file()
start_time = str(datetime.datetime.now())
if raw_text != None and raw_text != '':
summary_text = summarizer(raw_text,summary_model,summary_tokenizer)
ans_list = get_keywords(raw_text,summary_text)
#print("Ans list: {}".format(ans_list))
questions = []
option1=[]
option2=[]
option3=[]
option4=[]
for idx,ans in enumerate(ans_list):
#print("IDX: {}, ANS: {}".format(idx, ans))
ques = get_question(summary_text,ans,question_model,question_tokenizer)
other_options = get_related_word(ans)
final_options, ans_index = get_final_option_list(ans,other_options)
option1.append(final_options[0])
option2.append(final_options[1])
option3.append(final_options[2])
option4.append(final_options[3])
if ques not in questions:
html_str = f"""
<div>
<p>
{idx+1}: <b> {ques} </b>
</p>
</div>
"""
html_str += f' <p style="color:Green;"><b> {final_options[0]} </b></p> ' if ans_index == 0 else f' <p><b> {final_options[0]} </b></p> '
html_str += f' <p style="color:Green;"><b> {final_options[1]} </b></p> ' if ans_index == 1 else f' <p><b> {final_options[1]} </b></p> '
html_str += f' <p style="color:Green;"><b> {final_options[2]} </b></p> ' if ans_index == 2 else f' <p><b> {final_options[2]} </b></p> '
html_str += f' <p style="color:Green;"><b> {final_options[3]} </b></p> ' if ans_index == 3 else f' <p><b> {final_options[3]} </b></p> '
html_str += f"""
"""
st.markdown(html_str , unsafe_allow_html=True)
st.markdown("-----")
questions.append(ques)
output_path = "results/df_quiz_log_file_v1.csv"
res_df = pd.DataFrame({"TimeStamp":[start_time]*len(ans_list),\
"Input":[str(raw_text)]*len(ans_list),\
"Question":questions,"Option1":option1,\
"Option2":option2,\
"Option3":option3,\
"Option4":option4,\
"Correct Answer":ans_list})
res_df.to_csv(output_path, mode='a', index=False, sep="\t", header= not os.path.exists(output_path))
# st.dataframe(pd.read_csv(output_path,sep="\t").tail(5))
csv_downloader(pd.read_csv(output_path,sep="\t")) |