Echo9k commited on
Commit
53cd054
·
1 Parent(s): 5c4aab2

modularized app function

Browse files
Files changed (5) hide show
  1. app.py +23 -37
  2. initializer.py +82 -0
  3. language_options.json +8 -0
  4. pdf_processor.py +10 -0
  5. tts.py +15 -0
app.py CHANGED
@@ -1,12 +1,15 @@
1
  # app.py
2
  import os
 
3
  import gradio as gr
4
  from gradio_pdf import PDF
5
  import logging
6
  from model import model_initialized
7
- from pdf_processor import to_pdf, to_markdown
8
  from config import config
9
- from tts import text_to_speech # Import TTS module
 
 
10
 
11
 
12
 
@@ -20,6 +23,9 @@ def log_info(message: str):
20
  def log_error(message: str):
21
  logging.error(f"\033[91m{message}\033[0m") # Red for errors
22
 
 
 
 
23
  # Load header HTML content
24
  try:
25
  with open("header.html", "r") as file:
@@ -29,55 +35,35 @@ except Exception as e:
29
  log_error(f"Failed to load header.html. Error: {e}")
30
  header = "<h1>Header not found</h1>"
31
 
32
- # Define language options
33
- latin_lang = ['af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr', 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv', 'sw', 'tl', 'tr', 'uz', 'vi', 'french', 'german']
34
- arabic_lang = ['ar', 'fa', 'ug', 'ur']
35
- cyrillic_lang = ['ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava', 'dar', 'inh', 'che', 'lbe', 'lez', 'tab']
36
- devanagari_lang = ['hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom', 'sa', 'bgc']
37
- other_lang = ['ch', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka']
38
- all_lang = ['', 'auto'] + other_lang + latin_lang + arabic_lang + cyrillic_lang + devanagari_lang
39
-
40
- def file_to_pdf(file_obj):
41
- if file_obj is not None:
42
- try:
43
- pdf_path = to_pdf(file_obj.name)
44
- log_info("File converted to PDF successfully.")
45
- return pdf_path
46
- except Exception as e:
47
- log_error(f"Error converting file to PDF: {e}")
48
- return None
49
 
50
- def generate_audio(text: str) -> str:
51
- """
52
- Converts the provided text to speech and returns the path of the audio file.
53
- """
54
- if text:
55
- try:
56
- audio_file = text_to_speech(text)
57
- log_info("Audio generated successfully.")
58
- return audio_file
59
- except Exception as e:
60
- log_error(f"Audio generation failed: {e}")
61
- return ""
62
- log_error("No text provided for TTS.")
63
- return ""
64
 
65
  with gr.Blocks() as demo:
66
  gr.HTML(header)
67
  with gr.Row():
68
  with gr.Column(variant='panel', scale=5):
69
- file_input = gr.File(label="Please upload a PDF or image", file_types=[".pdf", ".png", ".jpeg", ".jpg"])
70
- max_pages = gr.Slider(1, 20, config.get("max_pages_default", 10), step=1, label='Max convert pages')
 
 
71
  with gr.Row():
72
  layout_mode = gr.Dropdown(
73
  ["layoutlmv3", "doclayout_yolo"],
74
  label="Layout model",
75
- value=config.get("layout_model_default", "doclayout_yolo")
76
  )
77
  language = gr.Dropdown(
78
  all_lang,
79
  label="Language",
80
- value=config.get("language_default", "auto")
81
  )
82
  with gr.Row():
83
  formula_enable = gr.Checkbox(label="Enable formula recognition", value=True)
 
1
  # app.py
2
  import os
3
+ import json
4
  import gradio as gr
5
  from gradio_pdf import PDF
6
  import logging
7
  from model import model_initialized
8
+ from pdf_processor import to_pdf, to_markdown, file_to_pdf
9
  from config import config
10
+ from tts import text_to_speech, generate_audio # Import TTS module
11
+ from initializer import initialize_app
12
+
13
 
14
 
15
 
 
23
  def log_error(message: str):
24
  logging.error(f"\033[91m{message}\033[0m") # Red for errors
25
 
26
+ # Run the initialization once.
27
+ initialize_app()
28
+
29
  # Load header HTML content
30
  try:
31
  with open("header.html", "r") as file:
 
35
  log_error(f"Failed to load header.html. Error: {e}")
36
  header = "<h1>Header not found</h1>"
37
 
38
+ try:
39
+ # Load the language options from the JSON file
40
+ with open('language_options.json', 'r') as file:
41
+ data = json.load(file)
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ # Create the all_lang list by concatenating the different language lists
44
+ all_lang = ['','auto'] + data["other_lang"] + data["latin_lang"] + data["arabic_lang"] + data["cyrillic_lang"] + data["devanagari_lang"]
45
+ except Exception as e:
46
+ log_error(f"Filed to load file language_options.json. Error: {e}")
47
+ all_lang = ['es', 'en']
 
 
 
 
 
 
 
 
 
48
 
49
  with gr.Blocks() as demo:
50
  gr.HTML(header)
51
  with gr.Row():
52
  with gr.Column(variant='panel', scale=5):
53
+ file_input = gr.File(
54
+ label="Please upload a PDF or image",
55
+ file_types=[".pdf", ".png", ".jpeg", ".jpg" ,"webp"])
56
+ max_pages = gr.Slider(1, 20,config.get("max_pages_default", config.get("max_pages", 10)), step=1, label='Max convert pages')
57
  with gr.Row():
58
  layout_mode = gr.Dropdown(
59
  ["layoutlmv3", "doclayout_yolo"],
60
  label="Layout model",
61
+ value=config.get("layout_model_default", "layoutlmv3")
62
  )
63
  language = gr.Dropdown(
64
  all_lang,
65
  label="Language",
66
+ value=config.get("language_default", config.get("language", "auto"))
67
  )
68
  with gr.Row():
69
  formula_enable = gr.Checkbox(label="Enable formula recognition", value=True)
initializer.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # initializer.py
2
+ import json
3
+ import os
4
+ import requests
5
+ from huggingface_hub import snapshot_download
6
+
7
+ def download_json(url):
8
+ """Download JSON content from the given URL."""
9
+ response = requests.get(url)
10
+ response.raise_for_status() # Check if request was successful
11
+ return response.json()
12
+
13
+ def download_and_modify_json(url, local_filename, modifications):
14
+ """Download JSON from URL and modify its contents based on modifications."""
15
+ if os.path.exists(local_filename):
16
+ with open(local_filename, 'r', encoding='utf-8') as f:
17
+ data = json.load(f)
18
+ config_version = data.get('config_version', '0.0.0')
19
+ # If the version is older than desired, re-download
20
+ if config_version < '1.1.1':
21
+ data = download_json(url)
22
+ else:
23
+ data = download_json(url)
24
+
25
+ # Apply modifications
26
+ data.update(modifications)
27
+
28
+ # Save the modified JSON locally
29
+ with open(local_filename, 'w', encoding='utf-8') as f:
30
+ json.dump(data, f, ensure_ascii=False, indent=4)
31
+
32
+ def initialize_app(force=False):
33
+ """Initialize models and configuration.
34
+
35
+ This function downloads the required models and modifies the configuration file.
36
+ It will only perform these actions if they have not been done before.
37
+ """
38
+ # Define the path to the configuration file in the user's home directory.
39
+ home_dir = os.path.expanduser('~')
40
+ config_file_name = 'magic-pdf.json'
41
+ config_file = os.path.join(home_dir, config_file_name)
42
+
43
+ # If the config file exists, assume initialization is complete.
44
+ if os.path.exists(config_file):
45
+ print(f"Initialization already completed. Using existing configuration at {config_file}")
46
+ return
47
+ elif force:
48
+ print(f"Forced initialization.")
49
+
50
+ # Define patterns for model download
51
+ mineru_patterns = [
52
+ "models/Layout/LayoutLMv3/*",
53
+ "models/Layout/YOLO/*",
54
+ "models/MFD/YOLO/*",
55
+ "models/MFR/unimernet_small_2501/*",
56
+ "models/TabRec/TableMaster/*",
57
+ "models/TabRec/StructEqTable/*",
58
+ ]
59
+ model_dir = snapshot_download('opendatalab/PDF-Extract-Kit-1.0', allow_patterns=mineru_patterns)
60
+
61
+ layoutreader_pattern = [
62
+ "*.json",
63
+ "*.safetensors",
64
+ ]
65
+ layoutreader_model_dir = snapshot_download('hantian/layoutreader', allow_patterns=layoutreader_pattern)
66
+
67
+ model_dir = os.path.join(model_dir, 'models')
68
+ print(f"Downloaded model_dir is: {model_dir}")
69
+ print(f"Downloaded layoutreader_model_dir is: {layoutreader_model_dir}")
70
+
71
+ # Download and modify JSON configuration
72
+ json_url = 'https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json'
73
+ json_mods = {
74
+ 'models-dir': model_dir,
75
+ 'layoutreader-model-dir': layoutreader_model_dir,
76
+ }
77
+ download_and_modify_json(json_url, config_file, json_mods)
78
+ print(f"The configuration file has been configured successfully, the path is: {config_file}")
79
+
80
+ # This block will run if the module is executed directly.
81
+ if __name__ == '__main__':
82
+ initialize_app()
language_options.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "latin_lang": ["af", "az", "bs", "cs", "cy", "da", "de", "es", "et", "fr", "ga", "hr", "hu", "id", "is", "it", "ku", "la", "lt", "lv", "mi", "ms", "mt", "nl", "no", "oc", "pi", "pl", "pt", "ro", "rs_latin", "sk", "sl", "sq", "sv", "sw", "tl", "tr", "uz", "vi", "french", "german"],
3
+ "arabic_lang": ["ar", "fa", "ug", "ur"],
4
+ "cyrillic_lang": ["ru", "rs_cyrillic", "be", "bg", "uk", "mn", "abq", "ady", "kbd", "ava", "dar", "inh", "che", "lbe", "lez", "tab"],
5
+ "devanagari_lang": ["hi", "mr", "ne", "bh", "mai", "ang", "bho", "mah", "sck", "new", "gom", "sa", "bgc"],
6
+ "other_lang": ["ch", "en", "korean", "japan", "chinese_cht", "ta", "te", "ka"]
7
+ }
8
+
pdf_processor.py CHANGED
@@ -77,3 +77,13 @@ def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table
77
  md_content = replace_image_with_base64(txt_content, local_md_dir)
78
  new_pdf_path = os.path.join(local_md_dir, f"{file_name}_layout.pdf")
79
  return md_content, txt_content, archive_zip_path, new_pdf_path
 
 
 
 
 
 
 
 
 
 
 
77
  md_content = replace_image_with_base64(txt_content, local_md_dir)
78
  new_pdf_path = os.path.join(local_md_dir, f"{file_name}_layout.pdf")
79
  return md_content, txt_content, archive_zip_path, new_pdf_path
80
+
81
+ def file_to_pdf(file_obj):
82
+ if file_obj is not None:
83
+ try:
84
+ pdf_path = to_pdf(file_obj.name)
85
+ log_info("File converted to PDF successfully.")
86
+ return pdf_path
87
+ except Exception as e:
88
+ log_error(f"Error converting file to PDF: {e}")
89
+ return None
tts.py CHANGED
@@ -47,3 +47,18 @@ def text_to_speech_gtts(text: str) -> str:
47
  except Exception as e:
48
  logging.error("gTTS failed. Error: %s", e)
49
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  except Exception as e:
48
  logging.error("gTTS failed. Error: %s", e)
49
  raise
50
+
51
+ def generate_audio(text: str) -> str:
52
+ """
53
+ Converts the provided text to speech and returns the path of the audio file.
54
+ """
55
+ if text:
56
+ try:
57
+ audio_file = text_to_speech(text)
58
+ log_info("Audio generated successfully.")
59
+ return audio_file
60
+ except Exception as e:
61
+ log_error(f"Audio generation failed: {e}")
62
+ return ""
63
+ log_error("No text provided for TTS.")
64
+ return ""