Spaces:
Paused
Paused
modularized app function
Browse files- app.py +23 -37
- initializer.py +82 -0
- language_options.json +8 -0
- pdf_processor.py +10 -0
- tts.py +15 -0
app.py
CHANGED
@@ -1,12 +1,15 @@
|
|
1 |
# app.py
|
2 |
import os
|
|
|
3 |
import gradio as gr
|
4 |
from gradio_pdf import PDF
|
5 |
import logging
|
6 |
from model import model_initialized
|
7 |
-
from pdf_processor import to_pdf, to_markdown
|
8 |
from config import config
|
9 |
-
from tts import text_to_speech # Import TTS module
|
|
|
|
|
10 |
|
11 |
|
12 |
|
@@ -20,6 +23,9 @@ def log_info(message: str):
|
|
20 |
def log_error(message: str):
|
21 |
logging.error(f"\033[91m{message}\033[0m") # Red for errors
|
22 |
|
|
|
|
|
|
|
23 |
# Load header HTML content
|
24 |
try:
|
25 |
with open("header.html", "r") as file:
|
@@ -29,55 +35,35 @@ except Exception as e:
|
|
29 |
log_error(f"Failed to load header.html. Error: {e}")
|
30 |
header = "<h1>Header not found</h1>"
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
devanagari_lang = ['hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom', 'sa', 'bgc']
|
37 |
-
other_lang = ['ch', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka']
|
38 |
-
all_lang = ['', 'auto'] + other_lang + latin_lang + arabic_lang + cyrillic_lang + devanagari_lang
|
39 |
-
|
40 |
-
def file_to_pdf(file_obj):
|
41 |
-
if file_obj is not None:
|
42 |
-
try:
|
43 |
-
pdf_path = to_pdf(file_obj.name)
|
44 |
-
log_info("File converted to PDF successfully.")
|
45 |
-
return pdf_path
|
46 |
-
except Exception as e:
|
47 |
-
log_error(f"Error converting file to PDF: {e}")
|
48 |
-
return None
|
49 |
|
50 |
-
|
51 |
-
"""
|
52 |
-
|
53 |
-
""
|
54 |
-
|
55 |
-
try:
|
56 |
-
audio_file = text_to_speech(text)
|
57 |
-
log_info("Audio generated successfully.")
|
58 |
-
return audio_file
|
59 |
-
except Exception as e:
|
60 |
-
log_error(f"Audio generation failed: {e}")
|
61 |
-
return ""
|
62 |
-
log_error("No text provided for TTS.")
|
63 |
-
return ""
|
64 |
|
65 |
with gr.Blocks() as demo:
|
66 |
gr.HTML(header)
|
67 |
with gr.Row():
|
68 |
with gr.Column(variant='panel', scale=5):
|
69 |
-
file_input = gr.File(
|
70 |
-
|
|
|
|
|
71 |
with gr.Row():
|
72 |
layout_mode = gr.Dropdown(
|
73 |
["layoutlmv3", "doclayout_yolo"],
|
74 |
label="Layout model",
|
75 |
-
value=config.get("layout_model_default", "
|
76 |
)
|
77 |
language = gr.Dropdown(
|
78 |
all_lang,
|
79 |
label="Language",
|
80 |
-
value=config.get("language_default", "auto")
|
81 |
)
|
82 |
with gr.Row():
|
83 |
formula_enable = gr.Checkbox(label="Enable formula recognition", value=True)
|
|
|
1 |
# app.py
|
2 |
import os
|
3 |
+
import json
|
4 |
import gradio as gr
|
5 |
from gradio_pdf import PDF
|
6 |
import logging
|
7 |
from model import model_initialized
|
8 |
+
from pdf_processor import to_pdf, to_markdown, file_to_pdf
|
9 |
from config import config
|
10 |
+
from tts import text_to_speech, generate_audio # Import TTS module
|
11 |
+
from initializer import initialize_app
|
12 |
+
|
13 |
|
14 |
|
15 |
|
|
|
23 |
def log_error(message: str):
|
24 |
logging.error(f"\033[91m{message}\033[0m") # Red for errors
|
25 |
|
26 |
+
# Run the initialization once.
|
27 |
+
initialize_app()
|
28 |
+
|
29 |
# Load header HTML content
|
30 |
try:
|
31 |
with open("header.html", "r") as file:
|
|
|
35 |
log_error(f"Failed to load header.html. Error: {e}")
|
36 |
header = "<h1>Header not found</h1>"
|
37 |
|
38 |
+
try:
|
39 |
+
# Load the language options from the JSON file
|
40 |
+
with open('language_options.json', 'r') as file:
|
41 |
+
data = json.load(file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
+
# Create the all_lang list by concatenating the different language lists
|
44 |
+
all_lang = ['','auto'] + data["other_lang"] + data["latin_lang"] + data["arabic_lang"] + data["cyrillic_lang"] + data["devanagari_lang"]
|
45 |
+
except Exception as e:
|
46 |
+
log_error(f"Filed to load file language_options.json. Error: {e}")
|
47 |
+
all_lang = ['es', 'en']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
with gr.Blocks() as demo:
|
50 |
gr.HTML(header)
|
51 |
with gr.Row():
|
52 |
with gr.Column(variant='panel', scale=5):
|
53 |
+
file_input = gr.File(
|
54 |
+
label="Please upload a PDF or image",
|
55 |
+
file_types=[".pdf", ".png", ".jpeg", ".jpg" ,"webp"])
|
56 |
+
max_pages = gr.Slider(1, 20,config.get("max_pages_default", config.get("max_pages", 10)), step=1, label='Max convert pages')
|
57 |
with gr.Row():
|
58 |
layout_mode = gr.Dropdown(
|
59 |
["layoutlmv3", "doclayout_yolo"],
|
60 |
label="Layout model",
|
61 |
+
value=config.get("layout_model_default", "layoutlmv3")
|
62 |
)
|
63 |
language = gr.Dropdown(
|
64 |
all_lang,
|
65 |
label="Language",
|
66 |
+
value=config.get("language_default", config.get("language", "auto"))
|
67 |
)
|
68 |
with gr.Row():
|
69 |
formula_enable = gr.Checkbox(label="Enable formula recognition", value=True)
|
initializer.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# initializer.py
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
import requests
|
5 |
+
from huggingface_hub import snapshot_download
|
6 |
+
|
7 |
+
def download_json(url):
|
8 |
+
"""Download JSON content from the given URL."""
|
9 |
+
response = requests.get(url)
|
10 |
+
response.raise_for_status() # Check if request was successful
|
11 |
+
return response.json()
|
12 |
+
|
13 |
+
def download_and_modify_json(url, local_filename, modifications):
|
14 |
+
"""Download JSON from URL and modify its contents based on modifications."""
|
15 |
+
if os.path.exists(local_filename):
|
16 |
+
with open(local_filename, 'r', encoding='utf-8') as f:
|
17 |
+
data = json.load(f)
|
18 |
+
config_version = data.get('config_version', '0.0.0')
|
19 |
+
# If the version is older than desired, re-download
|
20 |
+
if config_version < '1.1.1':
|
21 |
+
data = download_json(url)
|
22 |
+
else:
|
23 |
+
data = download_json(url)
|
24 |
+
|
25 |
+
# Apply modifications
|
26 |
+
data.update(modifications)
|
27 |
+
|
28 |
+
# Save the modified JSON locally
|
29 |
+
with open(local_filename, 'w', encoding='utf-8') as f:
|
30 |
+
json.dump(data, f, ensure_ascii=False, indent=4)
|
31 |
+
|
32 |
+
def initialize_app(force=False):
|
33 |
+
"""Initialize models and configuration.
|
34 |
+
|
35 |
+
This function downloads the required models and modifies the configuration file.
|
36 |
+
It will only perform these actions if they have not been done before.
|
37 |
+
"""
|
38 |
+
# Define the path to the configuration file in the user's home directory.
|
39 |
+
home_dir = os.path.expanduser('~')
|
40 |
+
config_file_name = 'magic-pdf.json'
|
41 |
+
config_file = os.path.join(home_dir, config_file_name)
|
42 |
+
|
43 |
+
# If the config file exists, assume initialization is complete.
|
44 |
+
if os.path.exists(config_file):
|
45 |
+
print(f"Initialization already completed. Using existing configuration at {config_file}")
|
46 |
+
return
|
47 |
+
elif force:
|
48 |
+
print(f"Forced initialization.")
|
49 |
+
|
50 |
+
# Define patterns for model download
|
51 |
+
mineru_patterns = [
|
52 |
+
"models/Layout/LayoutLMv3/*",
|
53 |
+
"models/Layout/YOLO/*",
|
54 |
+
"models/MFD/YOLO/*",
|
55 |
+
"models/MFR/unimernet_small_2501/*",
|
56 |
+
"models/TabRec/TableMaster/*",
|
57 |
+
"models/TabRec/StructEqTable/*",
|
58 |
+
]
|
59 |
+
model_dir = snapshot_download('opendatalab/PDF-Extract-Kit-1.0', allow_patterns=mineru_patterns)
|
60 |
+
|
61 |
+
layoutreader_pattern = [
|
62 |
+
"*.json",
|
63 |
+
"*.safetensors",
|
64 |
+
]
|
65 |
+
layoutreader_model_dir = snapshot_download('hantian/layoutreader', allow_patterns=layoutreader_pattern)
|
66 |
+
|
67 |
+
model_dir = os.path.join(model_dir, 'models')
|
68 |
+
print(f"Downloaded model_dir is: {model_dir}")
|
69 |
+
print(f"Downloaded layoutreader_model_dir is: {layoutreader_model_dir}")
|
70 |
+
|
71 |
+
# Download and modify JSON configuration
|
72 |
+
json_url = 'https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json'
|
73 |
+
json_mods = {
|
74 |
+
'models-dir': model_dir,
|
75 |
+
'layoutreader-model-dir': layoutreader_model_dir,
|
76 |
+
}
|
77 |
+
download_and_modify_json(json_url, config_file, json_mods)
|
78 |
+
print(f"The configuration file has been configured successfully, the path is: {config_file}")
|
79 |
+
|
80 |
+
# This block will run if the module is executed directly.
|
81 |
+
if __name__ == '__main__':
|
82 |
+
initialize_app()
|
language_options.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"latin_lang": ["af", "az", "bs", "cs", "cy", "da", "de", "es", "et", "fr", "ga", "hr", "hu", "id", "is", "it", "ku", "la", "lt", "lv", "mi", "ms", "mt", "nl", "no", "oc", "pi", "pl", "pt", "ro", "rs_latin", "sk", "sl", "sq", "sv", "sw", "tl", "tr", "uz", "vi", "french", "german"],
|
3 |
+
"arabic_lang": ["ar", "fa", "ug", "ur"],
|
4 |
+
"cyrillic_lang": ["ru", "rs_cyrillic", "be", "bg", "uk", "mn", "abq", "ady", "kbd", "ava", "dar", "inh", "che", "lbe", "lez", "tab"],
|
5 |
+
"devanagari_lang": ["hi", "mr", "ne", "bh", "mai", "ang", "bho", "mah", "sck", "new", "gom", "sa", "bgc"],
|
6 |
+
"other_lang": ["ch", "en", "korean", "japan", "chinese_cht", "ta", "te", "ka"]
|
7 |
+
}
|
8 |
+
|
pdf_processor.py
CHANGED
@@ -77,3 +77,13 @@ def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table
|
|
77 |
md_content = replace_image_with_base64(txt_content, local_md_dir)
|
78 |
new_pdf_path = os.path.join(local_md_dir, f"{file_name}_layout.pdf")
|
79 |
return md_content, txt_content, archive_zip_path, new_pdf_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
md_content = replace_image_with_base64(txt_content, local_md_dir)
|
78 |
new_pdf_path = os.path.join(local_md_dir, f"{file_name}_layout.pdf")
|
79 |
return md_content, txt_content, archive_zip_path, new_pdf_path
|
80 |
+
|
81 |
+
def file_to_pdf(file_obj):
|
82 |
+
if file_obj is not None:
|
83 |
+
try:
|
84 |
+
pdf_path = to_pdf(file_obj.name)
|
85 |
+
log_info("File converted to PDF successfully.")
|
86 |
+
return pdf_path
|
87 |
+
except Exception as e:
|
88 |
+
log_error(f"Error converting file to PDF: {e}")
|
89 |
+
return None
|
tts.py
CHANGED
@@ -47,3 +47,18 @@ def text_to_speech_gtts(text: str) -> str:
|
|
47 |
except Exception as e:
|
48 |
logging.error("gTTS failed. Error: %s", e)
|
49 |
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
except Exception as e:
|
48 |
logging.error("gTTS failed. Error: %s", e)
|
49 |
raise
|
50 |
+
|
51 |
+
def generate_audio(text: str) -> str:
|
52 |
+
"""
|
53 |
+
Converts the provided text to speech and returns the path of the audio file.
|
54 |
+
"""
|
55 |
+
if text:
|
56 |
+
try:
|
57 |
+
audio_file = text_to_speech(text)
|
58 |
+
log_info("Audio generated successfully.")
|
59 |
+
return audio_file
|
60 |
+
except Exception as e:
|
61 |
+
log_error(f"Audio generation failed: {e}")
|
62 |
+
return ""
|
63 |
+
log_error("No text provided for TTS.")
|
64 |
+
return ""
|