Janarddan commited on
Commit
35798c7
·
verified ·
1 Parent(s): 0f06b5e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -0
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Author : Janarddan Sarkar
3
+ file_name : mistral_ocr_st.py
4
+ date : 10-03-2025
5
+ description :
6
+ """
7
+ import os
8
+ import json
9
+ import base64
10
+ import streamlit as st
11
+ from mistralai import Mistral
12
+ from dotenv import find_dotenv, load_dotenv
13
+ from mistralai import DocumentURLChunk, ImageURLChunk, TextChunk
14
+ from mistralai.models import OCRResponse
15
+ from enum import Enum
16
+ from pydantic import BaseModel
17
+ import pycountry
18
+
19
+ # Load environment variables
20
+ load_dotenv(find_dotenv())
21
+ api_key = os.environ.get("MISTRAL_API_KEY")
22
+ client = Mistral(api_key=api_key)
23
+
24
+ # Define Language Enum
25
+ languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
26
+
27
+
28
+ class LanguageMeta(Enum.__class__):
29
+ def __new__(metacls, cls, bases, classdict):
30
+ for code, name in languages.items():
31
+ classdict[name.upper().replace(' ', '_')] = name
32
+ return super().__new__(metacls, cls, bases, classdict)
33
+
34
+
35
+ class Language(Enum, metaclass=LanguageMeta):
36
+ pass
37
+
38
+
39
+ class StructuredOCR(BaseModel):
40
+ file_name: str
41
+ topics: list[str]
42
+ languages: list[Language]
43
+ ocr_contents: dict
44
+
45
+ def replace_images_in_markdown(markdown_str: str, images_dict: dict) -> str:
46
+ for img_name, base64_str in images_dict.items():
47
+ markdown_str = markdown_str.replace(f"![{img_name}]({img_name})", f"![{img_name}]({base64_str})")
48
+ return markdown_str
49
+
50
+ def get_combined_markdown(ocr_response: OCRResponse) -> str:
51
+ markdowns: list[str] = []
52
+ for page in ocr_response.pages:
53
+ image_data = {img.id: img.image_base64 for img in page.images}
54
+ markdowns.append(replace_images_in_markdown(page.markdown, image_data))
55
+ return "\n\n".join(markdowns)
56
+
57
+ def process_pdf(pdf_bytes, file_name):
58
+ """Process a PDF using OCR."""
59
+ uploaded_file = client.files.upload(
60
+ file={"file_name": file_name, "content": pdf_bytes},
61
+ purpose = "ocr",
62
+ )
63
+ signed_url = client.files.get_signed_url(file_id=uploaded_file.id, expiry=1)
64
+ pdf_response = client.ocr.process(
65
+ document=DocumentURLChunk(document_url=signed_url.url),
66
+ model="mistral-ocr-latest",
67
+ include_image_base64=True,
68
+ )
69
+
70
+ # Ensure pdf_response is properly converted to OCRResponse model
71
+ if isinstance(pdf_response, dict): # If response is a dictionary, convert it
72
+ pdf_response = OCRResponse(**pdf_response)
73
+
74
+ return pdf_response
75
+
76
+
77
+ def process_image(image_bytes, file_name):
78
+ """Process an image using OCR."""
79
+ encoded_image = base64.b64encode(image_bytes).decode()
80
+ base64_data_url = f"data:image/jpeg;base64,{encoded_image}"
81
+ image_response = client.ocr.process(
82
+ document=ImageURLChunk(image_url=base64_data_url), model="mistral-ocr-latest"
83
+ )
84
+ image_ocr_markdown = image_response.pages[0].markdown
85
+
86
+ chat_response = client.chat.parse(
87
+ model="pixtral-12b-latest",
88
+ messages=[
89
+ {
90
+ "role": "user",
91
+ "content": [
92
+ ImageURLChunk(image_url=base64_data_url),
93
+ TextChunk(
94
+ text=(
95
+ "This is the image's OCR in markdown:\n"
96
+ f"<BEGIN_IMAGE_OCR>\n{image_ocr_markdown}\n<END_IMAGE_OCR>.\n"
97
+ "Convert this into a structured JSON response with the OCR contents in a dictionary."
98
+ )
99
+ ),
100
+ ],
101
+ },
102
+ ],
103
+ response_format=StructuredOCR,
104
+ temperature=0,
105
+ )
106
+ return json.loads(chat_response.choices[0].message.parsed.model_dump_json())
107
+
108
+
109
+ # Streamlit UI
110
+ st.title("Mistral OCR")
111
+
112
+ uploaded_file = st.file_uploader("Upload a PDF or Image", type=["pdf", "png", "jpg", "jpeg"])
113
+
114
+ if uploaded_file:
115
+ file_type = uploaded_file.type
116
+ file_bytes = uploaded_file.read()
117
+ file_name = uploaded_file.name
118
+
119
+ if st.button("Submit"):
120
+ st.write(f"**Processing file:** {file_name}")
121
+
122
+ if "pdf" in file_type:
123
+ pdf_response = process_pdf(file_bytes, file_name)
124
+ st.markdown(get_combined_markdown(pdf_response))
125
+ else:
126
+ result = process_image(file_bytes, file_name)
127
+ st.json(result)