Spaces:

madhiemw
/

image_to_text_generator

Running

madhiemw commited on Feb 3

Commit

c189148

verified ·

1 Parent(s): 392e7e7

Upload 7 files

Files changed (7) hide show

app.py ADDED Viewed

+import gradio as gr
+from pipeline.image_result_to_summary import image_result_to_response
+with gr.Blocks() as demo:
+    gr.Markdown("Muhammad Adhiem Wicaksana's Image to Description project")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="numpy", label="Upload Image", sources=["upload"])
+            analyze_button = gr.Button("Analyze Image")
+        with gr.Column():
+            output_text = gr.Textbox(label="Analysis Result", lines=50, max_lines=50)
+    analyze_button.click(
+        fn=image_result_to_response,
+        inputs=[image_input],
+        outputs=output_text,
+        api_name="analyze"
+    )
+if __name__ == "__main__":
+    demo.launch()

pipeline/__pycache__/image_result_to_summary.cpython-311.pyc ADDED Viewed

Binary file (1.95 kB). View file

pipeline/__pycache__/image_to_data.cpython-311.pyc ADDED Viewed

Binary file (1.62 kB). View file

pipeline/image_result_to_summary.py ADDED Viewed

+from groq import Groq
+from pipeline.image_to_data import analyze_image
+import time
+def image_result_to_response(image):
+    """Get summarized insights from image analysis."""
+    try:
+        yield("-----------Give me a quick second to analyzing the image-----------")
+        image_description = analyze_image(image)
+        yield("-----------It Will be quick, another second to create the summarization-----------")
+        client = Groq(api_key="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")
+        chat_completion = client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": f"Below is extracted data from an image. "
+                                    f"Generate a short and structured presentation with bullet points summarizing the insights:\n\n{image_description}"
+                        },
+                    ],
+                }
+            ],
+            model="llama-3.1-8b-instant",
+            temperature=0.1,
+        )
+        response = chat_completion.choices[0].message.content
+        displayed_text = ""
+        for char in response:
+            displayed_text += char
+            time.sleep(0.01)
+            yield displayed_text
+    except Exception as e:
+        yield f"Error occurred: {str(e)}"

pipeline/image_to_data.py ADDED Viewed

+from groq import Groq
+from utils.encode_image import encode_image_to_base64
+def analyze_image(image):
+    """Analyze image using Groq's vision model and return response."""
+    try:
+        base64_image = encode_image_to_base64(image)
+        print("Encoded Image:", base64_image[:100])
+        client = Groq(api_key="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")
+        chat_completion = client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Extract all data from the image in table format (columns and rows)."
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/png;base64,{base64_image}",
+                            },
+                        },
+                    ],
+                }
+            ],
+            model="llama-3.2-90b-vision-preview",
+            temperature=0.1,
+        )
+        return chat_completion.choices[0].message.content
+    except Exception as e:
+        return f"Error occurred: {str(e)}"

utils/__pycache__/encode_image.cpython-311.pyc ADDED Viewed

Binary file (1.38 kB). View file

utils/encode_image.py ADDED Viewed

+import base64
+from PIL import Image
+import io
+def encode_image_to_base64(image):
+    """Convert image to base64 string"""
+    if isinstance(image, str):
+        with open(image, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode('utf-8')
+    else:
+        buffered = io.BytesIO()
+        Image.fromarray(image).save(buffered, format="PNG")
+        return base64.b64encode(buffered.getvalue()).decode('utf-8')