Spaces:
Running
Running
File size: 5,927 Bytes
5b212c3 1cf0187 5b212c3 1cf0187 7acf9a1 5b212c3 b12b4c3 5b212c3 b12b4c3 5b212c3 e82221d 5b212c3 32e43ff 5b212c3 32e43ff 5b212c3 7acf9a1 5b212c3 32e43ff 5b212c3 23ac91c 1c329f7 5b212c3 22d0047 5b212c3 7acf9a1 5b212c3 1cf0187 5b212c3 7acf9a1 5b212c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
import gradio as gr
import os
import base64
import requests
from mistralai import Mistral
api_key = os.environ["MISTRAL_API_KEY"]
client = Mistral(api_key=api_key)
def encode_image(image_path):
"""Encode the image to base64."""
try:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
except FileNotFoundError:
return "Error: The file was not found."
except Exception as e:
return f"Error: {e}"
def replace_images_in_markdown(markdown_str: str, images_dict: dict) -> str:
for img_name, base64_str in images_dict.items():
markdown_str = markdown_str.replace(f"", f"")
return markdown_str
def get_combined_markdown(ocr_response) -> tuple:
markdowns = []
raw_markdowns = []
for page in ocr_response.pages:
image_data = {}
for img in page.images:
image_data[img.id] = img.image_base64
markdowns.append(replace_images_in_markdown(page.markdown, image_data))
raw_markdowns.append(page.markdown)
return "\n\n".join(markdowns), "\n\n".join(raw_markdowns)
def get_content_type(url):
"""Fetch the content type of the URL."""
try:
response = requests.head(url)
return response.headers.get('Content-Type')
except Exception as e:
return f"Error fetching content type: {e}"
def perform_ocr_file(file, ocr_method="Mistral OCR"):
if ocr_method == "Mistral OCR":
if file.name.lower().endswith('.pdf'):
uploaded_pdf = client.files.upload(
file={
"file_name": file.name,
"content": open(file.name, "rb"),
},
purpose="ocr"
)
signed_url = client.files.get_signed_url(file_id=uploaded_pdf.id)
ocr_response = client.ocr.process(
model="mistral-ocr-latest",
document={
"type": "document_url",
"document_url": signed_url.url,
},
include_image_base64=True
)
client.files.delete(file_id=uploaded_pdf.id)
elif file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
base64_image = encode_image(file.name)
ocr_response = client.ocr.process(
model="mistral-ocr-latest",
document={
"type": "image_url",
"image_url": f"data:image/jpeg;base64,{base64_image}"
},
include_image_base64=True
)
else:
return "# Unsupported file type. Please provide a PDF or an image (png, jpeg, jpg).", ""
combined_markdown, raw_markdown = get_combined_markdown(ocr_response)
return combined_markdown, raw_markdown
return "## Method not supported.", ""
def perform_ocr_url(url, ocr_method="Mistral OCR"):
if ocr_method == "Mistral OCR":
content_type = get_content_type(url)
if 'application/pdf' in content_type:
ocr_response = client.ocr.process(
model="mistral-ocr-latest",
document={
"type": "document_url",
"document_url": url,
},
include_image_base64=True
)
elif any(image_type in content_type for image_type in ['image/png', 'image/jpeg', 'image/jpg']):
ocr_response = client.ocr.process(
model="mistral-ocr-latest",
document={
"type": "image_url",
"image_url": url,
},
include_image_base64=True
)
else:
return f"## Unsupported file type. Please provide a URL to a PDF or an image (png, jpeg, jpg).\n\n### You provided:\n{content_type}", ""
combined_markdown, raw_markdown = get_combined_markdown(ocr_response)
return combined_markdown, raw_markdown
return "## Method not supported.", ""
with gr.Blocks() as demo:
gr.Markdown("# Mistral OCR")
gr.Markdown("Upload a PDF or an image, or provide a URL to extract text and images using Mistral OCR capabilities.\n\nLearn more in the blog post [here](https://mistral.ai/news/mistral-ocr).")
with gr.Tab("Upload File"):
file_input = gr.File(label="Upload a PDF or Image")
ocr_method_file = gr.Dropdown(choices=["Mistral OCR"], label="Select OCR Method", value="Mistral OCR")
file_output = gr.Markdown(label="Rendered Markdown")
file_raw_output = gr.Textbox(label="Raw Markdown")
file_button = gr.Button("Process")
example_files = gr.Examples(
examples=[
"pixtral-12b.pdf",
"receipt.png"
],
inputs=[file_input]
)
file_button.click(
fn=perform_ocr_file,
inputs=[file_input, ocr_method_file],
outputs=[file_output, file_raw_output]
)
with gr.Tab("Enter URL"):
url_input = gr.Textbox(label="Enter a URL to a PDF or Image")
ocr_method_url = gr.Dropdown(choices=["Mistral OCR"], label="Select OCR Method", value="Mistral OCR")
url_output = gr.Markdown(label="Rendered Markdown")
url_raw_output = gr.Textbox(label="Raw Markdown")
url_button = gr.Button("Process")
example_urls = gr.Examples(
examples=[
"https://arxiv.org/pdf/2410.07073",
"https://raw.githubusercontent.com/mistralai/cookbook/refs/heads/main/mistral/ocr/receipt.png"
],
inputs=[url_input]
)
url_button.click(
fn=perform_ocr_url,
inputs=[url_input, ocr_method_url],
outputs=[url_output, url_raw_output]
)
demo.launch(max_threads=1)
|