thangved's picture
Upload folder using huggingface_hub
e0a45a8
raw
history blame contribute delete
2.92 kB
import gradio as gr
from transformers import pipeline
from PyPDF2 import PdfReader
generate_question_pipe = pipeline("text2text-generation", model="thangved/t5-generate-question")
qa_pipe = pipeline("question-answering", model="SharKRippeR/QA_T5_small_seq2seq")
MAX_OUTPUT = 3
MAX_INPUT_TOKENS = 256
# Truncate text to 256 tokens
def split_texts(text:str) -> list[str]:
tokens = text.split(' ') # Split text into tokens
# If the number of tokens is greater than 256, truncate it
if len(tokens) > MAX_INPUT_TOKENS:
tokens = tokens[:MAX_INPUT_TOKENS]
texts = []
for i in range(0, len(tokens), MAX_INPUT_TOKENS):
texts.append(' '.join(tokens[i:i+64]))
# Join tokens back into text
return texts
def generate_questions_request(text:str) -> list[str]: # type: ignore
response = generate_question_pipe(text)
if response is None:
return []
result = []
for question in response:
questions = question['generated_text'].split('Question:')[1:] # type: ignore
for question in questions:
question = question.strip()
result.append(question)
return result
def generate_questions(file):
if file is None:
return [''] * (MAX_OUTPUT+1)
reader = PdfReader(file.name)
text = ''
for page in reader.pages:
text += page.extract_text()
texts = split_texts(text)
questions = [text]
for text in texts:
questions += generate_questions_request(text)
i = len(questions)
while i <= MAX_OUTPUT:
questions.append('')
i += 1
return questions
def generate_answers(context='',q1='', q2='', q3=''):
answers = []
for q in [q1, q2, q3]:
if q == '':
answers.append('')
continue
answer = qa_pipe({
'question': q,
'context': context
})
answers.append(answer['answer']) # type: ignore
return answers
with gr.Blocks() as demo:
gr.Markdown("# PDF to Questions")
with gr.Row():
inp = gr.File(label='Select file', file_types=['.pdf'])
context = gr.Textbox(label='Pdf content', lines=10)
with gr.Row():
with gr.Column():
q1 = gr.Textbox(label='Question 1')
q2 = gr.Textbox(label='Question 2')
q3 = gr.Textbox(label='Question 3')
with gr.Column():
a1 = gr.Textbox(label='Answer 1')
a2 = gr.Textbox(label='Answer 2')
a3 = gr.Textbox(label='Answer 3')
generate_question_btn = gr.Button('Generate questions')
generate_answer_btn = gr.Button('Generate answers', variant='primary')
generate_question_btn.click(fn=generate_questions, inputs=inp, outputs=[context, q1, q2, q3])
generate_answer_btn.click(fn=generate_answers, inputs=[context, q1, q2, q3], outputs=[a1, a2, a3])
if __name__ == '__main__':
demo.launch()