PDF_reader

Paused

App Files Files Community

Echo9k commited on 24 days ago

Commit

53cd054

1 Parent(s): 5c4aab2

modularized app function

Browse files

Files changed (5) hide show

app.py +23 -37
initializer.py +82 -0
language_options.json +8 -0
pdf_processor.py +10 -0
tts.py +15 -0

app.py CHANGED Viewed

@@ -1,12 +1,15 @@
 # app.py
 import os
 import gradio as gr
 from gradio_pdf import PDF
 import logging
 from model import model_initialized
-from pdf_processor import to_pdf, to_markdown
 from config import config
-from tts import text_to_speech  # Import TTS module
@@ -20,6 +23,9 @@ def log_info(message: str):
 def log_error(message: str):
     logging.error(f"\033[91m{message}\033[0m")  # Red for errors
 # Load header HTML content
 try:
     with open("header.html", "r") as file:
@@ -29,55 +35,35 @@ except Exception as e:
     log_error(f"Failed to load header.html. Error: {e}")
     header = "<h1>Header not found</h1>"
-# Define language options
-latin_lang = ['af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr', 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv', 'sw', 'tl', 'tr', 'uz', 'vi', 'french', 'german']
-arabic_lang = ['ar', 'fa', 'ug', 'ur']
-cyrillic_lang = ['ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava', 'dar', 'inh', 'che', 'lbe', 'lez', 'tab']
-devanagari_lang = ['hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom', 'sa', 'bgc']
-other_lang = ['ch', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka']
-all_lang = ['', 'auto'] + other_lang + latin_lang + arabic_lang + cyrillic_lang + devanagari_lang
-def file_to_pdf(file_obj):
-    if file_obj is not None:
-        try:
-            pdf_path = to_pdf(file_obj.name)
-            log_info("File converted to PDF successfully.")
-            return pdf_path
-        except Exception as e:
-            log_error(f"Error converting file to PDF: {e}")
-    return None
-def generate_audio(text: str) -> str:
-    """
-    Converts the provided text to speech and returns the path of the audio file.
-    """
-    if text:
-        try:
-            audio_file = text_to_speech(text)
-            log_info("Audio generated successfully.")
-            return audio_file
-        except Exception as e:
-            log_error(f"Audio generation failed: {e}")
-            return ""
-    log_error("No text provided for TTS.")
-    return ""
 with gr.Blocks() as demo:
     gr.HTML(header)
     with gr.Row():
         with gr.Column(variant='panel', scale=5):
-            file_input = gr.File(label="Please upload a PDF or image", file_types=[".pdf", ".png", ".jpeg", ".jpg"])
-            max_pages = gr.Slider(1, 20, config.get("max_pages_default", 10), step=1, label='Max convert pages')
             with gr.Row():
                 layout_mode = gr.Dropdown(
                     ["layoutlmv3", "doclayout_yolo"],
                     label="Layout model",
-                    value=config.get("layout_model_default", "doclayout_yolo")
                 )
                 language = gr.Dropdown(
                     all_lang,
                     label="Language",
-                    value=config.get("language_default", "auto")
                 )
             with gr.Row():
                 formula_enable = gr.Checkbox(label="Enable formula recognition", value=True)

 # app.py
 import os
+import json
 import gradio as gr
 from gradio_pdf import PDF
 import logging
 from model import model_initialized
+from pdf_processor import to_pdf, to_markdown, file_to_pdf
 from config import config
+from tts import text_to_speech, generate_audio  # Import TTS module
+from initializer import initialize_app
 def log_error(message: str):
     logging.error(f"\033[91m{message}\033[0m")  # Red for errors
+# Run the initialization once.
+initialize_app()
 # Load header HTML content
 try:
     with open("header.html", "r") as file:
     log_error(f"Failed to load header.html. Error: {e}")
     header = "<h1>Header not found</h1>"
+try:
+    # Load the language options from the JSON file
+    with open('language_options.json', 'r') as file:
+        data = json.load(file)
+    # Create the all_lang list by concatenating the different language lists
+    all_lang = ['','auto'] + data["other_lang"] + data["latin_lang"] + data["arabic_lang"] + data["cyrillic_lang"] + data["devanagari_lang"]
+except Exception as e:
+    log_error(f"Filed to load file language_options.json. Error: {e}")
+    all_lang = ['es', 'en']
 with gr.Blocks() as demo:
     gr.HTML(header)
     with gr.Row():
         with gr.Column(variant='panel', scale=5):
+            file_input = gr.File(
+                label="Please upload a PDF or image",
+                file_types=[".pdf", ".png", ".jpeg", ".jpg" ,"webp"])
+            max_pages = gr.Slider(1, 20,config.get("max_pages_default", config.get("max_pages", 10)), step=1, label='Max convert pages')
             with gr.Row():
                 layout_mode = gr.Dropdown(
                     ["layoutlmv3", "doclayout_yolo"],
                     label="Layout model",
+                    value=config.get("layout_model_default", "layoutlmv3")
                 )
                 language = gr.Dropdown(
                     all_lang,
                     label="Language",
+                    value=config.get("language_default", config.get("language", "auto"))
                 )
             with gr.Row():
                 formula_enable = gr.Checkbox(label="Enable formula recognition", value=True)

initializer.py ADDED Viewed

	@@ -0,0 +1,82 @@

+# initializer.py
+import json
+import os
+import requests
+from huggingface_hub import snapshot_download
+def download_json(url):
+    """Download JSON content from the given URL."""
+    response = requests.get(url)
+    response.raise_for_status()  # Check if request was successful
+    return response.json()
+def download_and_modify_json(url, local_filename, modifications):
+    """Download JSON from URL and modify its contents based on modifications."""
+    if os.path.exists(local_filename):
+        with open(local_filename, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        config_version = data.get('config_version', '0.0.0')
+        # If the version is older than desired, re-download
+        if config_version < '1.1.1':
+            data = download_json(url)
+    else:
+        data = download_json(url)
+    # Apply modifications
+    data.update(modifications)
+    # Save the modified JSON locally
+    with open(local_filename, 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=4)
+def initialize_app(force=False):
+    """Initialize models and configuration.
+    This function downloads the required models and modifies the configuration file.
+    It will only perform these actions if they have not been done before.
+    """
+    # Define the path to the configuration file in the user's home directory.
+    home_dir = os.path.expanduser('~')
+    config_file_name = 'magic-pdf.json'
+    config_file = os.path.join(home_dir, config_file_name)
+    # If the config file exists, assume initialization is complete.
+    if os.path.exists(config_file):
+        print(f"Initialization already completed. Using existing configuration at {config_file}")
+        return
+    elif force:
+        print(f"Forced initialization.")
+    # Define patterns for model download
+    mineru_patterns = [
+        "models/Layout/LayoutLMv3/*",
+        "models/Layout/YOLO/*",
+        "models/MFD/YOLO/*",
+        "models/MFR/unimernet_small_2501/*",
+        "models/TabRec/TableMaster/*",
+        "models/TabRec/StructEqTable/*",
+    ]
+    model_dir = snapshot_download('opendatalab/PDF-Extract-Kit-1.0', allow_patterns=mineru_patterns)
+    layoutreader_pattern = [
+        "*.json",
+        "*.safetensors",
+    ]
+    layoutreader_model_dir = snapshot_download('hantian/layoutreader', allow_patterns=layoutreader_pattern)
+    model_dir = os.path.join(model_dir, 'models')
+    print(f"Downloaded model_dir is: {model_dir}")
+    print(f"Downloaded layoutreader_model_dir is: {layoutreader_model_dir}")
+    # Download and modify JSON configuration
+    json_url = 'https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json'
+    json_mods = {
+        'models-dir': model_dir,
+        'layoutreader-model-dir': layoutreader_model_dir,
+    }
+    download_and_modify_json(json_url, config_file, json_mods)
+    print(f"The configuration file has been configured successfully, the path is: {config_file}")
+# This block will run if the module is executed directly.
+if __name__ == '__main__':
+    initialize_app()

language_options.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "latin_lang": ["af", "az", "bs", "cs", "cy", "da", "de", "es", "et", "fr", "ga", "hr", "hu", "id", "is", "it", "ku", "la", "lt", "lv", "mi", "ms", "mt", "nl", "no", "oc", "pi", "pl", "pt", "ro", "rs_latin", "sk", "sl", "sq", "sv", "sw", "tl", "tr", "uz", "vi", "french", "german"],
+    "arabic_lang": ["ar", "fa", "ug", "ur"],
+    "cyrillic_lang": ["ru", "rs_cyrillic", "be", "bg", "uk", "mn", "abq", "ady", "kbd", "ava", "dar", "inh", "che", "lbe", "lez", "tab"],
+    "devanagari_lang": ["hi", "mr", "ne", "bh", "mai", "ang", "bho", "mah", "sck", "new", "gom", "sa", "bgc"],
+    "other_lang": ["ch", "en", "korean", "japan", "chinese_cht", "ta", "te", "ka"]
+  }

pdf_processor.py CHANGED Viewed

@@ -77,3 +77,13 @@ def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table
     md_content = replace_image_with_base64(txt_content, local_md_dir)
     new_pdf_path = os.path.join(local_md_dir, f"{file_name}_layout.pdf")
     return md_content, txt_content, archive_zip_path, new_pdf_path

     md_content = replace_image_with_base64(txt_content, local_md_dir)
     new_pdf_path = os.path.join(local_md_dir, f"{file_name}_layout.pdf")
     return md_content, txt_content, archive_zip_path, new_pdf_path
+def file_to_pdf(file_obj):
+    if file_obj is not None:
+        try:
+            pdf_path = to_pdf(file_obj.name)
+            log_info("File converted to PDF successfully.")
+            return pdf_path
+        except Exception as e:
+            log_error(f"Error converting file to PDF: {e}")
+    return None

tts.py CHANGED Viewed

@@ -47,3 +47,18 @@ def text_to_speech_gtts(text: str) -> str:
     except Exception as e:
         logging.error("gTTS failed. Error: %s", e)
         raise

     except Exception as e:
         logging.error("gTTS failed. Error: %s", e)
         raise
+def generate_audio(text: str) -> str:
+    """
+    Converts the provided text to speech and returns the path of the audio file.
+    """
+    if text:
+        try:
+            audio_file = text_to_speech(text)
+            log_info("Audio generated successfully.")
+            return audio_file
+        except Exception as e:
+            log_error(f"Audio generation failed: {e}")
+            return ""
+    log_error("No text provided for TTS.")
+    return ""