File size: 4,605 Bytes
73683aa
0cc1374
53cd054
0cc1374
73683aa
6b438f3
73683aa
53cd054
6b438f3
53cd054
 
 
b07522c
0dd8dfd
 
 
6b438f3
 
 
 
 
 
 
 
b07522c
53cd054
 
 
73683aa
6b438f3
 
 
 
 
 
 
2dbedf0
53cd054
 
 
 
73683aa
53cd054
 
 
 
 
a73ec05
73683aa
 
 
 
53cd054
 
 
 
73683aa
 
 
 
53cd054
73683aa
 
 
 
53cd054
73683aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b438f3
 
 
73683aa
 
a73ec05
73683aa
 
 
 
 
a73ec05
6b438f3
 
 
 
 
 
a73ec05
73683aa
0a681f9
0cc1374
0dd8dfd
 
 
 
a232e1e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# app.py
import os
import json
import gradio as gr
from gradio_pdf import PDF
import logging
from model import model_initialized
from pdf_processor import to_pdf, to_markdown, file_to_pdf
from config import config
from tts import text_to_speech, generate_audio  # Import TTS module
from initializer import initialize_app





# Set up logging with ANSI escape codes for colored output
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")

def log_info(message: str):
    logging.info(f"\033[92m{message}\033[0m")  # Green for info

def log_error(message: str):
    logging.error(f"\033[91m{message}\033[0m")  # Red for errors

# Run the initialization once.
initialize_app()

# Load header HTML content
try:
    with open("header.html", "r") as file:
        header = file.read()
    log_info("Header loaded successfully.")
except Exception as e:
    log_error(f"Failed to load header.html. Error: {e}")
    header = "<h1>Header not found</h1>"

try:
    # Load the language options from the JSON file
    with open('language_options.json', 'r') as file:
        data = json.load(file)

    # Create the all_lang list by concatenating the different language lists
    all_lang = ['','auto'] + data["other_lang"] + data["latin_lang"] + data["arabic_lang"] + data["cyrillic_lang"] + data["devanagari_lang"]
except Exception as e:
    log_error(f"Filed to load file language_options.json. Error: {e}")
    all_lang = ['es', 'en']

with gr.Blocks() as demo:
    gr.HTML(header)
    with gr.Row():
        with gr.Column(variant='panel', scale=5):
            file_input = gr.File(
                label="Please upload a PDF or image",
                file_types=[".pdf", ".png", ".jpeg", ".jpg" ,"webp"])
            max_pages = gr.Slider(1, 20,config.get("max_pages_default", config.get("max_pages", 10)), step=1, label='Max convert pages')
            with gr.Row():
                layout_mode = gr.Dropdown(
                    ["layoutlmv3", "doclayout_yolo"],
                    label="Layout model",
                    value=config.get("layout_model_default", "layoutlmv3")
                )
                language = gr.Dropdown(
                    all_lang,
                    label="Language",
                    value=config.get("language_default", config.get("language", "auto"))
                )
            with gr.Row():
                formula_enable = gr.Checkbox(label="Enable formula recognition", value=True)
                is_ocr = gr.Checkbox(label="Force enable OCR", value=False)
                table_enable = gr.Checkbox(label="Enable table recognition", value=True)
            with gr.Row():
                convert_button = gr.Button("Convert")
                clear_button = gr.ClearButton(value="Clear")
            pdf_display = PDF(label='PDF preview', interactive=False, visible=True, height=800)
            with gr.Accordion("Examples:"):
                example_root = os.path.join(os.path.dirname(__file__), "examples")
                examples = [os.path.join(example_root, f) for f in os.listdir(example_root) if f.endswith("pdf")]
                gr.Examples(examples=examples, inputs=file_input)
        with gr.Column(variant='panel', scale=5):
            output_file = gr.File(label="Convert result", interactive=False)
            with gr.Tabs():
                with gr.Tab("Markdown rendering"):
                    md_render = gr.Markdown(label="Markdown rendering", height=1100, show_copy_button=True, line_breaks=True)
                with gr.Tab("Markdown text"):
                    md_text = gr.TextArea(lines=45, show_copy_button=True)
            # Audio component for TTS playback
            audio_output = gr.Audio(label="Read Aloud", type="filepath")
            read_button = gr.Button("Read Aloud")
    
    file_input.change(fn=file_to_pdf, inputs=file_input, outputs=pdf_display)
    
    convert_button.click(
        fn=to_markdown,
        inputs=[file_input, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
        outputs=[md_render, md_text, output_file, pdf_display]
    )
    
    # When "Read Aloud" is clicked, generate audio from the markdown text
    read_button.click(
        fn=generate_audio,
        inputs=md_text,
        outputs=audio_output
    )
    
    clear_button.add([file_input, md_render, pdf_display, md_text, output_file, is_ocr])

if __name__ == "__main__":
    import subprocess
    print("Checking and downloading models if necessary...")
    subprocess.run(["python", "download_models.py"])
    print("Models are ready!")
    demo.launch(ssr_mode=True)