Spaces:
Sleeping
Sleeping
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline | |
from gensim.parsing.preprocessing import STOPWORDS | |
import wikipedia | |
import gradio as gr | |
import nltk | |
from nltk.tokenize import word_tokenize | |
import re | |
nltk.download('punkt') | |
#model_name = "deepset/roberta-base-squad2" | |
model_name="jaimin/Bullet_Point" | |
model = AutoModelForQuestionAnswering.from_pretrained(model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
def bullete(text, wikipedia_language="en"): | |
try: | |
question_words = STOPWORDS.union( | |
set(['likes', 'play', '.', ',', 'like', "don't", '?', 'use', 'choose', 'important', 'better', '?'])) | |
lower_text = text.lower() | |
lower_text = word_tokenize(lower_text) | |
new_text = [i for i in lower_text if i not in question_words] | |
new_txt = "".join(new_text) | |
if wikipedia_language: | |
wikipedia.set_lang(wikipedia_language) | |
et_page = wikipedia.page(new_txt.replace(" ", "")) | |
title = et_page.title | |
content = et_page.content | |
page_url = et_page.url | |
linked_pages = et_page.links | |
text1 = content | |
except: | |
return "Please write correct question" | |
final_out = re.sub(r'\=.+\=', '', text1) | |
result = list(filter(lambda x: x != '', final_out.split('\n\n'))) | |
answer = [] | |
try: | |
for i in range(len(result[0].split('.'))): | |
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name) | |
QA_input = { | |
'question': text, | |
'context': result[0].split('.')[i] | |
} | |
res = nlp(QA_input) | |
print(QA_input) | |
values = list(res.values())[3] | |
answer.append(values) | |
except: | |
gen_output = [] | |
for i in range(len(answer)): | |
gen_output.append("* " + answer[i] + ".") | |
paraphrase = "\n".join(gen_output) | |
final_answer = paraphrase.replace(" ", " ") | |
return final_answer | |
interface = gr.Interface(fn=bullete, | |
inputs="text", | |
outputs="text", | |
title='Bullet Point') | |
interface.launch(inline=False) | |