Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from PyPDF2 import PdfReader | |
| import os | |
| from docx import Document as DocxDocument | |
| def process_pdf(file): | |
| # Read the PDF content | |
| pdf_reader = PdfReader(file.name) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| return text | |
| def process_file(file): | |
| file_extension = file.name.split(".")[-1].lower() | |
| if file_extension == 'pdf': | |
| ocr_text = process_pdf(file) | |
| return ocr_text | |
| elif file_extension == 'docx': | |
| docx_document = DocxDocument(file.name) | |
| text = "" | |
| for paragraph in docx_document.paragraphs: | |
| text += paragraph.text + "\n" | |
| return text | |
| #return [Document(text=text)] | |
| with gr.Blocks() as demo: | |
| gr.Markdown("### File upload", elem_classes="tab-header") | |
| with gr.Row(): | |
| text_output = gr.Textbox(label="text") | |
| with gr.Row(): | |
| file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath") | |
| with gr.Row(): | |
| submit_button = gr.Button("upload") | |
| submit_button.click(process_file, inputs=file_input, outputs=text_output) | |
| demo.launch() |