Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -81,10 +81,10 @@ def draw_ocr_on_image(image_pil, ocr_data): | |
| 81 | 
             
                draw = ImageDraw.Draw(image_pil)
         | 
| 82 |  | 
| 83 | 
             
                try:
         | 
| 84 | 
            -
                    avg_height = sum(d['h'] for d in ocr_data if d['h'] > 0) / len(ocr_data) if ocr_data and any(d['h'] > 0 for d in ocr_data) else 10
         | 
| 85 | 
             
                    font_size = max(8, int(avg_height * 0.6)) 
         | 
| 86 | 
             
                    font = ImageFont.truetype("arial.ttf", font_size)
         | 
| 87 | 
            -
                except (IOError, ZeroDivisionError): | 
| 88 | 
             
                    font = ImageFont.load_default()
         | 
| 89 | 
             
                    font_size = 10 
         | 
| 90 | 
             
                    print("Arial font not found or issue with height calculation, using default font.")
         | 
| @@ -108,7 +108,6 @@ def process_image_and_xml(image_path, xml_path, show_overlay): | |
| 108 | 
             
                if image_path is None:
         | 
| 109 | 
             
                    return None, "Please upload an image.", None
         | 
| 110 | 
             
                if xml_path is None:
         | 
| 111 | 
            -
                    # If image_path is not None, we can still show the image
         | 
| 112 | 
             
                    try:
         | 
| 113 | 
             
                        img_pil_orig = Image.open(image_path).convert("RGB")
         | 
| 114 | 
             
                    except Exception as e:
         | 
| @@ -127,7 +126,11 @@ def process_image_and_xml(image_path, xml_path, show_overlay): | |
| 127 | 
             
                    img_for_overlay = img_pil.copy()
         | 
| 128 | 
             
                    overlay_image_pil = draw_ocr_on_image(img_for_overlay, ocr_box_data)
         | 
| 129 | 
             
                elif show_overlay and not ocr_box_data and not (isinstance(extracted_text, str) and extracted_text.startswith("Error")):
         | 
| 130 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
| 131 |  | 
| 132 | 
             
                return img_pil, extracted_text, overlay_image_pil
         | 
| 133 |  | 
| @@ -144,7 +147,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| 144 |  | 
| 145 | 
             
                with gr.Row():
         | 
| 146 | 
             
                    with gr.Column(scale=1):
         | 
| 147 | 
            -
                        # Corrected: type="filepath"
         | 
| 148 | 
             
                        image_input = gr.File(label="Upload Image (PNG, JPG, etc.)", type="filepath")
         | 
| 149 | 
             
                        xml_input = gr.File(label="Upload ALTO XML File (.xml)", type="filepath")
         | 
| 150 | 
             
                        show_overlay_checkbox = gr.Checkbox(label="Show OCR Overlay on Image", value=False)
         | 
| @@ -159,17 +161,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| 159 | 
             
                output_image_overlay = gr.Image(label="Image with OCR Overlay", type="pil", interactive=False, visible=True)
         | 
| 160 |  | 
| 161 | 
             
                def update_interface(image_filepath, xml_filepath, show_overlay_val):
         | 
| 162 | 
            -
                    # image_filepath and xml_filepath are now strings (paths) or None
         | 
| 163 | 
            -
                    
         | 
| 164 | 
            -
                    # Initial check for None inputs to provide guidance
         | 
| 165 | 
             
                    if image_filepath is None and xml_filepath is None:
         | 
| 166 | 
             
                        return None, "Please upload an image and an XML file.", None
         | 
| 167 | 
             
                    if image_filepath is None:
         | 
| 168 | 
            -
                        # xml_filepath might be present, but we need the image first
         | 
| 169 | 
             
                        return None, "Please upload an image file.", None
         | 
| 170 | 
            -
                     | 
| 171 | 
            -
             | 
| 172 | 
            -
                    # Call the main processing function
         | 
| 173 | 
             
                    img, text, overlay_img = process_image_and_xml(image_filepath, xml_filepath, show_overlay_val)
         | 
| 174 |  | 
| 175 | 
             
                    return img, text, overlay_img
         | 
| @@ -189,7 +185,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| 189 | 
             
                gr.Markdown("---")
         | 
| 190 | 
             
                gr.Markdown("### Example ALTO XML Snippet (for `String` element extraction):")
         | 
| 191 | 
             
                gr.Code(
         | 
| 192 | 
            -
                    language=" | 
|  | |
| 193 | 
             
                    value="""
         | 
| 194 | 
             
            <alto xmlns="http://www.loc.gov/standards/alto/v3/alto.xsd">
         | 
| 195 | 
             
              <Description>...</Description>
         | 
| @@ -218,24 +215,24 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| 218 |  | 
| 219 | 
             
            if __name__ == "__main__":
         | 
| 220 | 
             
                try:
         | 
| 221 | 
            -
                    # from PIL import Image as PImage # Already imported as Image
         | 
| 222 | 
             
                    img = Image.new('RGB', (2394, 3612), color = 'lightgray')
         | 
| 223 | 
             
                    img.save("dummy_image.png")
         | 
| 224 | 
             
                    print("Created dummy_image.png for testing.")
         | 
| 225 |  | 
| 226 | 
            -
                    # Make sure your example XML is named 189819724.34.xml and is in the same directory
         | 
| 227 | 
            -
                    # Or, create it if it doesn't exist with the content you provided
         | 
| 228 | 
             
                    example_xml_filename = "189819724.34.xml"
         | 
| 229 | 
             
                    if not os.path.exists(example_xml_filename):
         | 
| 230 | 
             
                        print(f"WARNING: Example XML '{example_xml_filename}' not found. Please create it or upload your own.")
         | 
| 231 | 
            -
                        #  | 
| 232 | 
            -
                        #  | 
| 233 | 
            -
                        # | 
| 234 | 
            -
             | 
|  | |
|  | |
|  | |
| 235 |  | 
| 236 | 
             
                except ImportError:
         | 
| 237 | 
             
                    print("Pillow not installed, can't create dummy image.")
         | 
| 238 | 
             
                except Exception as e:
         | 
| 239 | 
            -
                    print(f"Error creating dummy image: {e}")
         | 
| 240 |  | 
| 241 | 
             
                demo.launch()
         | 
|  | |
| 81 | 
             
                draw = ImageDraw.Draw(image_pil)
         | 
| 82 |  | 
| 83 | 
             
                try:
         | 
| 84 | 
            +
                    avg_height = sum(d['h'] for d in ocr_data if d['h'] > 0) / len([d for d in ocr_data if d['h'] > 0]) if ocr_data and any(d['h'] > 0 for d in ocr_data) else 10
         | 
| 85 | 
             
                    font_size = max(8, int(avg_height * 0.6)) 
         | 
| 86 | 
             
                    font = ImageFont.truetype("arial.ttf", font_size)
         | 
| 87 | 
            +
                except (IOError, ZeroDivisionError):
         | 
| 88 | 
             
                    font = ImageFont.load_default()
         | 
| 89 | 
             
                    font_size = 10 
         | 
| 90 | 
             
                    print("Arial font not found or issue with height calculation, using default font.")
         | 
|  | |
| 108 | 
             
                if image_path is None:
         | 
| 109 | 
             
                    return None, "Please upload an image.", None
         | 
| 110 | 
             
                if xml_path is None:
         | 
|  | |
| 111 | 
             
                    try:
         | 
| 112 | 
             
                        img_pil_orig = Image.open(image_path).convert("RGB")
         | 
| 113 | 
             
                    except Exception as e:
         | 
|  | |
| 126 | 
             
                    img_for_overlay = img_pil.copy()
         | 
| 127 | 
             
                    overlay_image_pil = draw_ocr_on_image(img_for_overlay, ocr_box_data)
         | 
| 128 | 
             
                elif show_overlay and not ocr_box_data and not (isinstance(extracted_text, str) and extracted_text.startswith("Error")):
         | 
| 129 | 
            +
                    if isinstance(extracted_text, str):
         | 
| 130 | 
            +
                        extracted_text += "\n(No bounding box data found or parsed for overlay)"
         | 
| 131 | 
            +
                    else: # Should not happen if parse_alto_xml returns (str, list)
         | 
| 132 | 
            +
                        extracted_text = "(No bounding box data found or parsed for overlay)"
         | 
| 133 | 
            +
             | 
| 134 |  | 
| 135 | 
             
                return img_pil, extracted_text, overlay_image_pil
         | 
| 136 |  | 
|  | |
| 147 |  | 
| 148 | 
             
                with gr.Row():
         | 
| 149 | 
             
                    with gr.Column(scale=1):
         | 
|  | |
| 150 | 
             
                        image_input = gr.File(label="Upload Image (PNG, JPG, etc.)", type="filepath")
         | 
| 151 | 
             
                        xml_input = gr.File(label="Upload ALTO XML File (.xml)", type="filepath")
         | 
| 152 | 
             
                        show_overlay_checkbox = gr.Checkbox(label="Show OCR Overlay on Image", value=False)
         | 
|  | |
| 161 | 
             
                output_image_overlay = gr.Image(label="Image with OCR Overlay", type="pil", interactive=False, visible=True)
         | 
| 162 |  | 
| 163 | 
             
                def update_interface(image_filepath, xml_filepath, show_overlay_val):
         | 
|  | |
|  | |
|  | |
| 164 | 
             
                    if image_filepath is None and xml_filepath is None:
         | 
| 165 | 
             
                        return None, "Please upload an image and an XML file.", None
         | 
| 166 | 
             
                    if image_filepath is None:
         | 
|  | |
| 167 | 
             
                        return None, "Please upload an image file.", None
         | 
| 168 | 
            +
                    
         | 
|  | |
|  | |
| 169 | 
             
                    img, text, overlay_img = process_image_and_xml(image_filepath, xml_filepath, show_overlay_val)
         | 
| 170 |  | 
| 171 | 
             
                    return img, text, overlay_img
         | 
|  | |
| 185 | 
             
                gr.Markdown("---")
         | 
| 186 | 
             
                gr.Markdown("### Example ALTO XML Snippet (for `String` element extraction):")
         | 
| 187 | 
             
                gr.Code(
         | 
| 188 | 
            +
                    # Corrected: language="text" or omit language parameter
         | 
| 189 | 
            +
                    language="text",  # Or language=None
         | 
| 190 | 
             
                    value="""
         | 
| 191 | 
             
            <alto xmlns="http://www.loc.gov/standards/alto/v3/alto.xsd">
         | 
| 192 | 
             
              <Description>...</Description>
         | 
|  | |
| 215 |  | 
| 216 | 
             
            if __name__ == "__main__":
         | 
| 217 | 
             
                try:
         | 
|  | |
| 218 | 
             
                    img = Image.new('RGB', (2394, 3612), color = 'lightgray')
         | 
| 219 | 
             
                    img.save("dummy_image.png")
         | 
| 220 | 
             
                    print("Created dummy_image.png for testing.")
         | 
| 221 |  | 
|  | |
|  | |
| 222 | 
             
                    example_xml_filename = "189819724.34.xml"
         | 
| 223 | 
             
                    if not os.path.exists(example_xml_filename):
         | 
| 224 | 
             
                        print(f"WARNING: Example XML '{example_xml_filename}' not found. Please create it or upload your own.")
         | 
| 225 | 
            +
                        # Example of writing the provided XML if it's missing for easier testing
         | 
| 226 | 
            +
                        # Note: The provided XML is partial, so this would be a placeholder
         | 
| 227 | 
            +
                        # If you have the full XML, you can paste it here.
         | 
| 228 | 
            +
                        # For now, just a warning.
         | 
| 229 | 
            +
                        # with open(example_xml_filename, "w", encoding="utf-8") as f:
         | 
| 230 | 
            +
                        #     f.write('<?xml version="1.0" encoding="UTF-8"?>\n<alto xmlns="http://www.loc.gov/standards/alto/v3/alto.xsd"><Description><MeasurementUnit>pixel</MeasurementUnit><sourceImageInformation><fileName>./data/pdfs/189742210.pdf</fileName></sourceImageInformation><OCRProcessing ID="IdOcr"><ocrProcessingStep><processingDateTime>Wed Nov 20 19:03:43 2019\n</processingDateTime><processingSoftware><softwareCreator>CONTRIBUTORS</softwareCreator><softwareName>pdfalto</softwareName><softwareVersion>0.1</softwareVersion></processingSoftware></ocrProcessingStep></OCRProcessing></Description><Styles><TextStyle ID="font0" FONTFAMILY="helvetica" FONTSIZE="21.000" FONTTYPE="sans-serif" FONTWIDTH="proportional" FONTCOLOR="#WWWWWW" FONTSTYLE="italics"/><TextStyle ID="font1" FONTFAMILY="helvetica" FONTSIZE="10.000" FONTTYPE="sans-serif" FONTWIDTH="proportional" FONTCOLOR="#WWWWWW" FONTSTYLE="italics"/><TextStyle ID="font2" FONTFAMILY="helvetica" FONTSIZE="10.000" FONTTYPE="sans-serif" FONTWIDTH="proportional" FONTCOLOR="#WWWWWW" FONTSTYLE=""/></Styles><Layout><Page ID="Page13" PHYSICAL_IMG_NR="13" WIDTH="2394" HEIGHT="3612"><PrintSpace><TextLine WIDTH="684" HEIGHT="108" ID="p13_t1" HPOS="465" VPOS="196"><String ID="p13_w1" CONTENT="Introduction" HPOS="465" VPOS="196" WIDTH="684" HEIGHT="108" STYLEREFS="font0"/></TextLine></PrintSpace></Page></Layout></alto>') # Shortened for brevity
         | 
| 231 | 
            +
                        # print(f"Created a placeholder '{example_xml_filename}'. Please replace with full XML if needed.")
         | 
| 232 |  | 
| 233 | 
             
                except ImportError:
         | 
| 234 | 
             
                    print("Pillow not installed, can't create dummy image.")
         | 
| 235 | 
             
                except Exception as e:
         | 
| 236 | 
            +
                    print(f"Error creating dummy image or example XML: {e}")
         | 
| 237 |  | 
| 238 | 
             
                demo.launch()
         | 
