Spaces:
Paused
Paused
File size: 4,605 Bytes
73683aa 0cc1374 53cd054 0cc1374 73683aa 6b438f3 73683aa 53cd054 6b438f3 53cd054 b07522c 0dd8dfd 6b438f3 b07522c 53cd054 73683aa 6b438f3 2dbedf0 53cd054 73683aa 53cd054 a73ec05 73683aa 53cd054 73683aa 53cd054 73683aa 53cd054 73683aa 6b438f3 73683aa a73ec05 73683aa a73ec05 6b438f3 a73ec05 73683aa 0a681f9 0cc1374 0dd8dfd a232e1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# app.py
import os
import json
import gradio as gr
from gradio_pdf import PDF
import logging
from model import model_initialized
from pdf_processor import to_pdf, to_markdown, file_to_pdf
from config import config
from tts import text_to_speech, generate_audio # Import TTS module
from initializer import initialize_app
# Set up logging with ANSI escape codes for colored output
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
def log_info(message: str):
logging.info(f"\033[92m{message}\033[0m") # Green for info
def log_error(message: str):
logging.error(f"\033[91m{message}\033[0m") # Red for errors
# Run the initialization once.
initialize_app()
# Load header HTML content
try:
with open("header.html", "r") as file:
header = file.read()
log_info("Header loaded successfully.")
except Exception as e:
log_error(f"Failed to load header.html. Error: {e}")
header = "<h1>Header not found</h1>"
try:
# Load the language options from the JSON file
with open('language_options.json', 'r') as file:
data = json.load(file)
# Create the all_lang list by concatenating the different language lists
all_lang = ['','auto'] + data["other_lang"] + data["latin_lang"] + data["arabic_lang"] + data["cyrillic_lang"] + data["devanagari_lang"]
except Exception as e:
log_error(f"Filed to load file language_options.json. Error: {e}")
all_lang = ['es', 'en']
with gr.Blocks() as demo:
gr.HTML(header)
with gr.Row():
with gr.Column(variant='panel', scale=5):
file_input = gr.File(
label="Please upload a PDF or image",
file_types=[".pdf", ".png", ".jpeg", ".jpg" ,"webp"])
max_pages = gr.Slider(1, 20,config.get("max_pages_default", config.get("max_pages", 10)), step=1, label='Max convert pages')
with gr.Row():
layout_mode = gr.Dropdown(
["layoutlmv3", "doclayout_yolo"],
label="Layout model",
value=config.get("layout_model_default", "layoutlmv3")
)
language = gr.Dropdown(
all_lang,
label="Language",
value=config.get("language_default", config.get("language", "auto"))
)
with gr.Row():
formula_enable = gr.Checkbox(label="Enable formula recognition", value=True)
is_ocr = gr.Checkbox(label="Force enable OCR", value=False)
table_enable = gr.Checkbox(label="Enable table recognition", value=True)
with gr.Row():
convert_button = gr.Button("Convert")
clear_button = gr.ClearButton(value="Clear")
pdf_display = PDF(label='PDF preview', interactive=False, visible=True, height=800)
with gr.Accordion("Examples:"):
example_root = os.path.join(os.path.dirname(__file__), "examples")
examples = [os.path.join(example_root, f) for f in os.listdir(example_root) if f.endswith("pdf")]
gr.Examples(examples=examples, inputs=file_input)
with gr.Column(variant='panel', scale=5):
output_file = gr.File(label="Convert result", interactive=False)
with gr.Tabs():
with gr.Tab("Markdown rendering"):
md_render = gr.Markdown(label="Markdown rendering", height=1100, show_copy_button=True, line_breaks=True)
with gr.Tab("Markdown text"):
md_text = gr.TextArea(lines=45, show_copy_button=True)
# Audio component for TTS playback
audio_output = gr.Audio(label="Read Aloud", type="filepath")
read_button = gr.Button("Read Aloud")
file_input.change(fn=file_to_pdf, inputs=file_input, outputs=pdf_display)
convert_button.click(
fn=to_markdown,
inputs=[file_input, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
outputs=[md_render, md_text, output_file, pdf_display]
)
# When "Read Aloud" is clicked, generate audio from the markdown text
read_button.click(
fn=generate_audio,
inputs=md_text,
outputs=audio_output
)
clear_button.add([file_input, md_render, pdf_display, md_text, output_file, is_ocr])
if __name__ == "__main__":
import subprocess
print("Checking and downloading models if necessary...")
subprocess.run(["python", "download_models.py"])
print("Models are ready!")
demo.launch(ssr_mode=True)
|