Spaces:
Running
on
Zero
Running
on
Zero
import io | |
import logging | |
import os | |
import re | |
import time | |
import certifi | |
import fitz # PyMuPDF | |
import gradio as gr | |
import pycurl | |
import spaces | |
import yaml | |
from gradio_modal import Modal | |
from htrflow.pipeline.pipeline import Pipeline | |
from htrflow.pipeline.steps import init_step | |
from htrflow.volume.volume import Collection | |
from PIL import Image | |
from app.pipelines import PIPELINES | |
logger = logging.getLogger(__name__) | |
# Max number of images a user can upload at once | |
MAX_IMAGES = int(os.environ.get("MAX_IMAGES", 5)) | |
# Setup the cache directory to point to the directory where the example images | |
# are located. The images must lay in the cache directory because otherwise they | |
# have to be reuploaded when drag-and-dropped to the input image widget. | |
GRADIO_CACHE = ".gradio_cache" | |
EXAMPLES_DIRECTORY = os.path.join(GRADIO_CACHE, "examples") | |
if os.environ.get("GRADIO_CACHE_DIR", GRADIO_CACHE) != GRADIO_CACHE: | |
os.environ["GRADIO_CACHE_DIR"] = GRADIO_CACHE | |
logger.warning("Setting GRADIO_CACHE_DIR to '%s' (overriding a previous value).") | |
class PipelineWithProgress(Pipeline): | |
def from_config(cls, config: dict[str, str]): | |
"""Init pipeline from config, ensuring the correct subclass is instantiated.""" | |
return cls( | |
[ | |
init_step(step["step"], step.get("settings", {})) | |
for step in config["steps"] | |
] | |
) | |
def run(self, collection, start=0, progress=None): | |
""" | |
Run pipeline on collection with Gradio progress support. | |
If progress is provided, it updates the Gradio progress bar during execution. | |
""" | |
total_steps = len(self.steps[start:]) | |
for i, step in enumerate(self.steps[start:]): | |
step_name = f"{step} (step {start + i + 1} / {total_steps})" | |
try: | |
progress((i + 1) / total_steps, desc=f"Running {step_name}") | |
collection = step.run(collection) | |
except Exception: | |
if self.pickle_path: | |
gr.Error( | |
f"HTRflow: Pipeline failed on step {step_name}. A backup collection is saved at {self.pickle_path}" | |
) | |
else: | |
gr.Error( | |
f"HTRflow: Pipeline failed on step {step_name}", | |
) | |
raise | |
return collection | |
def pdf_to_images(pdf_path): | |
""" | |
Convert a PDF file to a list of PIL Image objects using PyMuPDF. | |
Extracts full-resolution images with no DPI adjustment. | |
Args: | |
pdf_path (str): Path to the PDF file | |
Returns: | |
list: List of PIL Image objects | |
""" | |
# Open the PDF | |
pdf_document = fitz.open(pdf_path) | |
# List to store the images | |
images = [] | |
# Iterate through each page | |
for page_num in range(len(pdf_document)): | |
# Get the page | |
page = pdf_document[page_num] | |
# Get the pixmap at default resolution | |
pixmap = page.get_pixmap(alpha=False) | |
# Convert pixmap to PIL Image | |
img_data = pixmap.tobytes("jpeg") | |
img = Image.open(io.BytesIO(img_data)) | |
# Add the image to our list | |
images.append(img) | |
# Close the PDF | |
pdf_document.close() | |
return images | |
def run_htrflow(custom_template_yaml, batch_image_gallery, progress=gr.Progress()): | |
""" | |
Executes the HTRflow pipeline based on the provided YAML configuration and batch images. | |
Args: | |
custom_template_yaml (str): YAML string specifying the HTRflow pipeline configuration. | |
batch_image_gallery (list): List of uploaded images to process in the pipeline. | |
Returns: | |
tuple: A collection of processed items, list of exported file paths, and a Gradio update object. | |
""" | |
if custom_template_yaml is None or len(custom_template_yaml) < 1: | |
gr.Warning("HTRflow: Please insert a HTRflow-yaml template") | |
try: | |
config = yaml.safe_load(custom_template_yaml) | |
except Exception as e: | |
gr.Warning(f"HTRflow: Error loading YAML configuration: {e}") | |
return gr.skip() | |
progress(0, desc="HTRflow: Starting") | |
time.sleep(0.3) | |
if batch_image_gallery is None: | |
gr.Warning("HTRflow: You must upload atleast 1 image or more") | |
images = [temp_img[0] for temp_img in batch_image_gallery] | |
collection = Collection(images) | |
pipe = PipelineWithProgress.from_config(config) | |
gr.Info( | |
f"HTRflow: processing {len(images)} {'image' if len(images) == 1 else 'images'}." | |
) | |
progress(0.1, desc="HTRflow: Processing") | |
collection.label = "demo_output" | |
collection = pipe.run(collection, progress=progress) | |
progress(1, desc="HTRflow: Finish, redirecting to 'Results tab'") | |
time.sleep(2) | |
gr.Info("Completed succesfully ✨") | |
yield collection, gr.skip() | |
def get_pipeline_description(pipeline: str) -> str: | |
""" | |
Get the description of the given pipeline | |
""" | |
return PIPELINES[pipeline]["description"] | |
def get_yaml(pipeline: str) -> str: | |
""" | |
Get the yaml file for the given pipeline | |
Args: | |
pipeline: Name of pipeline (must be a key in the PIPELINES directory) | |
""" | |
with open(PIPELINES[pipeline]["file"], "r") as f: | |
pipeline = f.read() | |
return pipeline | |
def all_example_images() -> list[str]: | |
""" | |
Get paths to all example images. | |
""" | |
examples = [] | |
for pipeline in PIPELINES.values(): | |
for example in pipeline.get("examples", []): | |
examples.append(os.path.join(EXAMPLES_DIRECTORY, example)) | |
return examples | |
def get_selected_example_image(event: gr.SelectData) -> str: | |
""" | |
Get path to the selected example image. | |
""" | |
return [event.value["image"]["path"]] | |
def get_selected_example_pipeline(event: gr.SelectData) -> str | None: | |
""" | |
Get the name of the pipeline that corresponds to the selected image. | |
""" | |
for name, details in PIPELINES.items(): | |
if event.value["image"]["orig_name"] in details.get("examples", []): | |
return name | |
def get_image_from_image_id(image_id): | |
return [f"https://lbiiif.riksarkivet.se/arkis!{image_id}/full/max/0/default.jpg"] | |
# def get_images_from_iiif_manifest(iiif_manifest_url): | |
# """ | |
# Read all images from a v2/v3 IIIF manifest. | |
# Arguments: | |
# manifest: IIIF manifest | |
# height: Max height of returned images. | |
# """ | |
# try: | |
# response = requests.get(iiif_manifest_url, timeout=5) | |
# response.raise_for_status() | |
# except (requests.HTTPError, requests.ConnectionError) as e: | |
# gr.Error(f"Could not fetch IIIF manifest from {iiif_manifest_url} ({e})") | |
# return | |
# # Hacky solution to get all images regardless of API version - treat | |
# # the manifest as a string and match everything that looks like an IIIF | |
# # image URL. | |
# manifest = response.text | |
# pattern = r'(?P<identifier>https?://[^"\s]*)/(?P<region>[^"\s]*?)/(?P<size>[^"\s]*?)/(?P<rotation>!?\d*?)/(?P<quality>[^"\s]*?)\.(?P<format>jpg|tif|png|gif|jp2|pdf|webp)' | |
# height= 1200 | |
# images = set() # create a set to eliminate duplicates (e.g. thumbnails and fullsize images) | |
# for match in re.findall(pattern, manifest): | |
# identifier, _, _, _, _, format_ = match | |
# images.add(f"{identifier}/full/{height},/0/default.{format_}") | |
# return sorted(images) | |
def get_images_from_iiif_manifest(iiif_manifest_url, max_images=20, height=1200): | |
""" | |
Read images from a v2/v3 IIIF manifest, limited to max_images. | |
Arguments: | |
iiif_manifest_url: URL to IIIF manifest | |
height: Max height of returned images | |
max_images: Maximum number of images to return (default: 20) | |
""" | |
try: | |
buffer = io.BytesIO() | |
c = pycurl.Curl() | |
c.setopt(c.URL, iiif_manifest_url) | |
c.setopt(c.WRITEDATA, buffer) | |
c.setopt(c.CAINFO, certifi.where()) | |
c.setopt(c.FOLLOWLOCATION, 1) | |
c.setopt(c.MAXREDIRS, 5) | |
c.setopt(c.CONNECTTIMEOUT, 5) | |
c.setopt(c.TIMEOUT, 10) | |
c.setopt(c.NOSIGNAL, 1) | |
c.setopt(c.USERAGENT, "curl/7.68.0") | |
c.perform() | |
http_code = c.getinfo(c.RESPONSE_CODE) | |
if http_code != 200: | |
raise Exception(f"HTTP Error: {http_code}") | |
manifest = buffer.getvalue().decode("utf-8") | |
c.close() | |
except pycurl.error as e: | |
error_code, error_msg = e.args | |
raise Exception( | |
f"Could not fetch IIIF manifest from {iiif_manifest_url} ({error_msg})" | |
) | |
# Hacky solution to get all images regardless of API version - treat | |
# the manifest as a string and match everything that looks like an IIIF | |
# image URL. | |
pattern = r'(?P<identifier>https?://[^"\s]*)/(?P<region>[^"\s]*?)/(?P<size>[^"\s]*?)/(?P<rotation>!?\d*?)/(?P<quality>[^"\s]*?)\.(?P<format>jpg|tif|png|gif|jp2|pdf|webp)' | |
images = ( | |
set() | |
) # create a set to eliminate duplicates (e.g. thumbnails and fullsize images) | |
for match in re.findall(pattern, manifest): | |
identifier, _, _, _, _, format_ = match | |
images.add(f"{identifier}/full/{height},/0/default.{format_}") | |
# Stop adding images if we've reached the maximum | |
if len(images) >= max_images: | |
break | |
# Sort and limit the results to max_images | |
return sorted(images)[:max_images], gr.update(visible=True) | |
with gr.Blocks() as submit: | |
gr.Markdown("# Upload") | |
gr.Markdown( | |
"Select or upload the image you want to transcribe. Most common image formats are supported and you can upload max 5 images at a time in this hosted demo." | |
) | |
collection_submit_state = gr.State() | |
with gr.Row(equal_height=True): | |
with gr.Column(scale=2): | |
batch_image_gallery = gr.Gallery( | |
file_types=["image"], | |
label="Image to transcribe", | |
interactive=True, | |
object_fit="scale-down", | |
) | |
with gr.Column(scale=1, variant="panel", elem_classes="panel-with-border"): | |
with gr.Tabs(): | |
with gr.Tab("Examples"): | |
examples = gr.Gallery( | |
all_example_images(), | |
show_label=False, | |
interactive=False, | |
allow_preview=False, | |
object_fit="scale-down", | |
min_width=250, | |
height="100%", | |
columns=4, | |
container=False, | |
) | |
with gr.Tab("Image ID"): | |
image_id = gr.Textbox( | |
label="Upload by image ID", | |
info=( | |
"Use any image from our digitized archives by pasting its image ID found in the " | |
"<a href='https://sok.riksarkivet.se/bildvisning/R0002231_00005' target='_blank'>image viewer</a>. " | |
"Press enter to submit." | |
), | |
placeholder="R0002231_00005", | |
) | |
with gr.Tab("IIIF Manifest"): | |
with gr.Group(): | |
iiif_manifest_url = gr.Textbox( | |
label="IIIF Manifest", | |
info=( | |
"Use an image from a IIIF manifest by pasting a IIIF manifest URL. Press enter to submit." | |
), | |
placeholder="", | |
scale=0, | |
) | |
max_images_iiif_manifest = gr.Number( | |
value=20, | |
min_width=50, | |
scale=0, | |
label="Number of image to return from IIIF manifest", | |
minimum=1, | |
visible=False, | |
) | |
iiif_gallery = gr.Gallery( | |
interactive=False, | |
columns=4, | |
allow_preview=False, | |
container=False, | |
show_label=False, | |
object_fit="scale-down", | |
) | |
with gr.Tab("URL"): | |
image_url = gr.Textbox( | |
label="Image URL", | |
info="Upload an image by pasting its URL.", | |
placeholder="https://example.com/image.jpg", | |
) | |
with gr.Tab("PDF"): | |
pdf_file = gr.File(label="PDF", file_types=[".pdf"]) | |
pdf_gallery = gr.Gallery( | |
interactive=False, | |
columns=4, | |
allow_preview=False, | |
container=False, | |
show_label=False, | |
object_fit="scale-down", | |
) | |
with gr.Column(variant="panel", elem_classes="panel-with-border"): | |
gr.Markdown("## Settings") | |
gr.Markdown( | |
"Select a pipeline that best matches your image. The pipeline determines the processing workflow optimized for different handwritten text recognition tasks. " | |
"If you select an example image, a suitable pipeline will be preselected automatically. However, you can edit the pipeline if you need to customize it further. " | |
"Choosing the right pipeline significantly improves transcription quality. " | |
) | |
with gr.Row(): | |
with gr.Column(scale=0): | |
pipeline_dropdown = gr.Dropdown( | |
PIPELINES, | |
container=False, | |
min_width=240, | |
scale=0, | |
elem_classes="pipeline-dropdown", | |
) | |
with gr.Column(scale=0, min_width=100): | |
edit_pipeline_button = gr.Button("Edit", scale=0) | |
with gr.Column(scale=3): | |
progess_bar = gr.Textbox(visible=False, show_label=False) | |
with gr.Column(scale=0, min_width=20): | |
pass | |
pipeline_description = gr.HTML( | |
value=get_pipeline_description, | |
inputs=pipeline_dropdown, | |
elem_classes="pipeline-info", | |
padding=False, | |
) | |
with Modal(visible=False) as edit_pipeline_modal: | |
gr.Markdown( | |
""" | |
## Edit Pipeline | |
The code snippet below is a YAML file that the HTRflow app uses to process the image. If you have chosen an | |
image from the "Examples" section, the YAML is already a pre-made template tailored to fit the example image. | |
Edit pipeline if needed: | |
""" | |
) | |
custom_template_yaml = gr.Code( | |
value=get_yaml, | |
inputs=pipeline_dropdown, | |
language="yaml", | |
container=False, | |
) | |
url = "https://ai-riksarkivet.github.io/htrflow/latest/getting_started/pipeline.html#example-pipelines" | |
gr.HTML( | |
f'See the <a href="{url}">documentation</a> for a detailed description on how to customize HTRflow pipelines.', | |
padding=False, | |
elem_classes="pipeline-help", | |
) | |
with gr.Row(): | |
run_button = gr.Button("Run HTR", variant="primary", scale=0, min_width=200) | |
def validate_images(images): | |
if len(images) > MAX_IMAGES: | |
gr.Warning(f"Maximum images you can upload is set to: {MAX_IMAGES}") | |
return gr.update(value=None) | |
return images | |
image_id.submit(get_image_from_image_id, image_id, batch_image_gallery).then( | |
fn=lambda: "Swedish - Spreads", outputs=pipeline_dropdown | |
) | |
iiif_manifest_url.submit( | |
get_images_from_iiif_manifest, | |
[iiif_manifest_url, max_images_iiif_manifest], | |
[iiif_gallery, max_images_iiif_manifest], | |
) | |
image_url.submit(lambda url: [url], image_url, batch_image_gallery) | |
pdf_file.upload( | |
lambda imgs: pdf_to_images(imgs), inputs=pdf_file, outputs=pdf_gallery | |
) | |
run_button.click( | |
fn=run_htrflow, | |
inputs=[custom_template_yaml, batch_image_gallery], | |
outputs=[collection_submit_state, batch_image_gallery], | |
) | |
examples.select(get_selected_example_image, None, batch_image_gallery) | |
examples.select(get_selected_example_pipeline, None, pipeline_dropdown) | |
iiif_gallery.select(get_selected_example_image, None, batch_image_gallery) | |
pdf_gallery.select(get_selected_example_image, None, batch_image_gallery) | |
edit_pipeline_button.click(lambda: Modal(visible=True), None, edit_pipeline_modal) | |