Spaces:
Running
Running
import gradio as gr | |
import os | |
import base64 | |
import requests | |
from mistralai import Mistral | |
api_key = os.environ["MISTRAL_API_KEY"] | |
client = Mistral(api_key=api_key) | |
def encode_image(image_path): | |
"""Encode the image to base64.""" | |
try: | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode('utf-8') | |
except FileNotFoundError: | |
return "Error: The file was not found." | |
except Exception as e: | |
return f"Error: {e}" | |
def replace_images_in_markdown(markdown_str: str, images_dict: dict) -> str: | |
for img_name, base64_str in images_dict.items(): | |
markdown_str = markdown_str.replace(f"", f"") | |
return markdown_str | |
def get_combined_markdown(ocr_response) -> tuple: | |
markdowns = [] | |
raw_markdowns = [] | |
for page in ocr_response.pages: | |
image_data = {} | |
for img in page.images: | |
image_data[img.id] = img.image_base64 | |
markdowns.append(replace_images_in_markdown(page.markdown, image_data)) | |
raw_markdowns.append(page.markdown) | |
return "\n\n".join(markdowns), "\n\n".join(raw_markdowns) | |
def get_content_type(url): | |
"""Fetch the content type of the URL.""" | |
try: | |
response = requests.head(url) | |
return response.headers.get('Content-Type') | |
except Exception as e: | |
return f"Error fetching content type: {e}" | |
def perform_ocr_file(file, ocr_method="Mistral OCR"): | |
if ocr_method == "Mistral OCR": | |
if file.name.lower().endswith('.pdf'): | |
uploaded_pdf = client.files.upload( | |
file={ | |
"file_name": file.name, | |
"content": open(file.name, "rb"), | |
}, | |
purpose="ocr" | |
) | |
signed_url = client.files.get_signed_url(file_id=uploaded_pdf.id) | |
ocr_response = client.ocr.process( | |
model="mistral-ocr-latest", | |
document={ | |
"type": "document_url", | |
"document_url": signed_url.url, | |
}, | |
include_image_base64=True | |
) | |
client.files.delete(file_id=uploaded_pdf.id) | |
elif file.name.lower().endswith(('.png', '.jpg', '.jpeg')): | |
base64_image = encode_image(file.name) | |
ocr_response = client.ocr.process( | |
model="mistral-ocr-latest", | |
document={ | |
"type": "image_url", | |
"image_url": f"data:image/jpeg;base64,{base64_image}" | |
}, | |
include_image_base64=True | |
) | |
else: | |
return "# Unsupported file type. Please provide a PDF or an image (png, jpeg, jpg).", "" | |
combined_markdown, raw_markdown = get_combined_markdown(ocr_response) | |
return combined_markdown, raw_markdown | |
return "## Method not supported.", "" | |
def perform_ocr_url(url, ocr_method="Mistral OCR"): | |
if ocr_method == "Mistral OCR": | |
content_type = get_content_type(url) | |
if 'application/pdf' in content_type: | |
ocr_response = client.ocr.process( | |
model="mistral-ocr-latest", | |
document={ | |
"type": "document_url", | |
"document_url": url, | |
}, | |
include_image_base64=True | |
) | |
elif any(image_type in content_type for image_type in ['image/png', 'image/jpeg', 'image/jpg']): | |
ocr_response = client.ocr.process( | |
model="mistral-ocr-latest", | |
document={ | |
"type": "image_url", | |
"image_url": url, | |
}, | |
include_image_base64=True | |
) | |
else: | |
return f"## Unsupported file type. Please provide a URL to a PDF or an image (png, jpeg, jpg).\n\n### You provided:\n{content_type}", "" | |
combined_markdown, raw_markdown = get_combined_markdown(ocr_response) | |
return combined_markdown, raw_markdown | |
return "## Method not supported.", "" | |
with gr.Blocks() as demo: | |
gr.Markdown("# Mistral OCR") | |
gr.Markdown("Upload a PDF or an image, or provide a URL to extract text and images using Mistral OCR capabilities.\n\nLearn more in the blog post [here](https://mistral.ai/news/mistral-ocr).") | |
with gr.Tab("Upload File"): | |
file_input = gr.File(label="Upload a PDF or Image") | |
ocr_method_file = gr.Dropdown(choices=["Mistral OCR"], label="Select OCR Method", value="Mistral OCR") | |
file_output = gr.Markdown(label="Rendered Markdown") | |
file_raw_output = gr.Textbox(label="Raw Markdown") | |
file_button = gr.Button("Process") | |
example_files = gr.Examples( | |
examples=[ | |
"pixtral-12b.pdf", | |
"receipt.png" | |
], | |
inputs=[file_input] | |
) | |
file_button.click( | |
fn=perform_ocr_file, | |
inputs=[file_input, ocr_method_file], | |
outputs=[file_output, file_raw_output] | |
) | |
with gr.Tab("Enter URL"): | |
url_input = gr.Textbox(label="Enter a URL to a PDF or Image") | |
ocr_method_url = gr.Dropdown(choices=["Mistral OCR"], label="Select OCR Method", value="Mistral OCR") | |
url_output = gr.Markdown(label="Rendered Markdown") | |
url_raw_output = gr.Textbox(label="Raw Markdown") | |
url_button = gr.Button("Process") | |
example_urls = gr.Examples( | |
examples=[ | |
"https://arxiv.org/pdf/2410.07073", | |
"https://raw.githubusercontent.com/mistralai/cookbook/refs/heads/main/mistral/ocr/receipt.png" | |
], | |
inputs=[url_input] | |
) | |
url_button.click( | |
fn=perform_ocr_url, | |
inputs=[url_input, ocr_method_url], | |
outputs=[url_output, url_raw_output] | |
) | |
demo.launch(max_threads=1) | |