Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,69 +1,69 @@
|
|
| 1 |
|
| 2 |
-
import re
|
| 3 |
-
import gradio as gr
|
| 4 |
-
from transformers import AutoProcessor, AutoModelForImageTextToText
|
| 5 |
-
from PIL import Image
|
| 6 |
|
| 7 |
-
# Load model & processor once at startup
|
| 8 |
-
processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
| 9 |
-
model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
| 10 |
|
| 11 |
-
def smoldocling_readimage(image, prompt_text="Convert to docling"):
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
|
| 23 |
-
def extract_numbers(docling_text):
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
|
| 28 |
-
def compare_outputs(img1, img2):
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
|
| 54 |
-
# Gradio UI: take 2 images, output similarity report
|
| 55 |
-
demo = gr.Interface(
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
)
|
| 65 |
|
| 66 |
-
demo.launch()
|
| 67 |
|
| 68 |
|
| 69 |
import re
|
|
|
|
| 1 |
|
| 2 |
+
# import re
|
| 3 |
+
# import gradio as gr
|
| 4 |
+
# from transformers import AutoProcessor, AutoModelForImageTextToText
|
| 5 |
+
# from PIL import Image
|
| 6 |
|
| 7 |
+
# # Load model & processor once at startup
|
| 8 |
+
# processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
| 9 |
+
# model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
| 10 |
|
| 11 |
+
# def smoldocling_readimage(image, prompt_text="Convert to docling"):
|
| 12 |
+
# messages = [
|
| 13 |
+
# {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
|
| 14 |
+
# ]
|
| 15 |
+
# prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
|
| 16 |
+
# inputs = processor(text=prompt, images=[image], return_tensors="pt")
|
| 17 |
+
# outputs = model.generate(**inputs, max_new_tokens=1024)
|
| 18 |
+
# prompt_length = inputs.input_ids.shape[1]
|
| 19 |
+
# generated = outputs[:, prompt_length:]
|
| 20 |
+
# result = processor.batch_decode(generated, skip_special_tokens=False)[0]
|
| 21 |
+
# return result.replace("<end_of_utterance>", "").strip()
|
| 22 |
|
| 23 |
+
# def extract_numbers(docling_text):
|
| 24 |
+
# # Extract all floating numbers from the docling text using regex
|
| 25 |
+
# numbers = re.findall(r"[-+]?\d*\.\d+|\d+", docling_text)
|
| 26 |
+
# return list(map(float, numbers))
|
| 27 |
|
| 28 |
+
# def compare_outputs(img1, img2):
|
| 29 |
+
# # Extract docling text from both images
|
| 30 |
+
# output1 = smoldocling_readimage(img1)
|
| 31 |
+
# output2 = smoldocling_readimage(img2)
|
| 32 |
|
| 33 |
+
# # Extract numbers from both outputs
|
| 34 |
+
# nums1 = extract_numbers(output1)
|
| 35 |
+
# nums2 = extract_numbers(output2)
|
| 36 |
|
| 37 |
+
# # Compare numbers — find matching count based on position
|
| 38 |
+
# length = min(len(nums1), len(nums2))
|
| 39 |
+
# matches = sum(1 for i in range(length) if abs(nums1[i] - nums2[i]) < 1e-3)
|
| 40 |
|
| 41 |
+
# # Calculate similarity accuracy percentage
|
| 42 |
+
# total = max(len(nums1), len(nums2))
|
| 43 |
+
# accuracy = (matches / total) * 100 if total > 0 else 0
|
| 44 |
|
| 45 |
+
# # Prepare result text
|
| 46 |
+
# result_text = (
|
| 47 |
+
# f"Output for Image 1:\n{output1}\n\n"
|
| 48 |
+
# f"Output for Image 2:\n{output2}\n\n"
|
| 49 |
+
# f"Similarity Accuracy: {accuracy:.2f}%\n"
|
| 50 |
+
# f"Matching Values: {matches} out of {total}"
|
| 51 |
+
# )
|
| 52 |
+
# return result_text
|
| 53 |
|
| 54 |
+
# # Gradio UI: take 2 images, output similarity report
|
| 55 |
+
# demo = gr.Interface(
|
| 56 |
+
# fn=compare_outputs,
|
| 57 |
+
# inputs=[
|
| 58 |
+
# gr.Image(type="pil", label="Upload Image 1"),
|
| 59 |
+
# gr.Image(type="pil", label="Upload Image 2"),
|
| 60 |
+
# ],
|
| 61 |
+
# outputs="text",
|
| 62 |
+
# title="SmolDocling Image Comparison",
|
| 63 |
+
# description="Upload two document images. This app extracts data from both and compares similarity."
|
| 64 |
+
# )
|
| 65 |
|
| 66 |
+
# demo.launch()
|
| 67 |
|
| 68 |
|
| 69 |
import re
|