Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline | |
from PyPDF2 import PdfReader | |
generate_question_pipe = pipeline("text2text-generation", model="thangved/t5-generate-question") | |
qa_pipe = pipeline("question-answering", model="SharKRippeR/QA_T5_small_seq2seq") | |
MAX_OUTPUT = 3 | |
MAX_INPUT_TOKENS = 256 | |
# Truncate text to 256 tokens | |
def split_texts(text:str) -> list[str]: | |
tokens = text.split(' ') # Split text into tokens | |
# If the number of tokens is greater than 256, truncate it | |
if len(tokens) > MAX_INPUT_TOKENS: | |
tokens = tokens[:MAX_INPUT_TOKENS] | |
texts = [] | |
for i in range(0, len(tokens), MAX_INPUT_TOKENS): | |
texts.append(' '.join(tokens[i:i+64])) | |
# Join tokens back into text | |
return texts | |
def generate_questions_request(text:str) -> list[str]: # type: ignore | |
response = generate_question_pipe(text) | |
if response is None: | |
return [] | |
result = [] | |
for question in response: | |
questions = question['generated_text'].split('Question:')[1:] # type: ignore | |
for question in questions: | |
question = question.strip() | |
result.append(question) | |
return result | |
def generate_questions(file): | |
if file is None: | |
return [''] * (MAX_OUTPUT+1) | |
reader = PdfReader(file.name) | |
text = '' | |
for page in reader.pages: | |
text += page.extract_text() | |
texts = split_texts(text) | |
questions = [text] | |
for text in texts: | |
questions += generate_questions_request(text) | |
i = len(questions) | |
while i <= MAX_OUTPUT: | |
questions.append('') | |
i += 1 | |
return questions | |
def generate_answers(context='',q1='', q2='', q3=''): | |
answers = [] | |
for q in [q1, q2, q3]: | |
if q == '': | |
answers.append('') | |
continue | |
answer = qa_pipe({ | |
'question': q, | |
'context': context | |
}) | |
answers.append(answer['answer']) # type: ignore | |
return answers | |
with gr.Blocks() as demo: | |
gr.Markdown("# PDF to Questions") | |
with gr.Row(): | |
inp = gr.File(label='Select file', file_types=['.pdf']) | |
context = gr.Textbox(label='Pdf content', lines=10) | |
with gr.Row(): | |
with gr.Column(): | |
q1 = gr.Textbox(label='Question 1') | |
q2 = gr.Textbox(label='Question 2') | |
q3 = gr.Textbox(label='Question 3') | |
with gr.Column(): | |
a1 = gr.Textbox(label='Answer 1') | |
a2 = gr.Textbox(label='Answer 2') | |
a3 = gr.Textbox(label='Answer 3') | |
generate_question_btn = gr.Button('Generate questions') | |
generate_answer_btn = gr.Button('Generate answers', variant='primary') | |
generate_question_btn.click(fn=generate_questions, inputs=inp, outputs=[context, q1, q2, q3]) | |
generate_answer_btn.click(fn=generate_answers, inputs=[context, q1, q2, q3], outputs=[a1, a2, a3]) | |
if __name__ == '__main__': | |
demo.launch() | |