Upload 7 files
Browse files
app.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from pipeline.image_result_to_summary import image_result_to_response
|
3 |
+
|
4 |
+
with gr.Blocks() as demo:
|
5 |
+
gr.Markdown("Muhammad Adhiem Wicaksana's Image to Description project")
|
6 |
+
|
7 |
+
with gr.Row():
|
8 |
+
with gr.Column():
|
9 |
+
image_input = gr.Image(type="numpy", label="Upload Image", sources=["upload"])
|
10 |
+
analyze_button = gr.Button("Analyze Image")
|
11 |
+
|
12 |
+
with gr.Column():
|
13 |
+
output_text = gr.Textbox(label="Analysis Result", lines=50, max_lines=50)
|
14 |
+
|
15 |
+
analyze_button.click(
|
16 |
+
fn=image_result_to_response,
|
17 |
+
inputs=[image_input],
|
18 |
+
outputs=output_text,
|
19 |
+
api_name="analyze"
|
20 |
+
)
|
21 |
+
|
22 |
+
if __name__ == "__main__":
|
23 |
+
demo.launch()
|
pipeline/__pycache__/image_result_to_summary.cpython-311.pyc
ADDED
Binary file (1.95 kB). View file
|
|
pipeline/__pycache__/image_to_data.cpython-311.pyc
ADDED
Binary file (1.62 kB). View file
|
|
pipeline/image_result_to_summary.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from groq import Groq
|
2 |
+
from pipeline.image_to_data import analyze_image
|
3 |
+
import time
|
4 |
+
|
5 |
+
def image_result_to_response(image):
|
6 |
+
"""Get summarized insights from image analysis."""
|
7 |
+
try:
|
8 |
+
yield("-----------Give me a quick second to analyzing the image-----------")
|
9 |
+
image_description = analyze_image(image)
|
10 |
+
yield("-----------It Will be quick, another second to create the summarization-----------")
|
11 |
+
|
12 |
+
client = Groq(api_key="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")
|
13 |
+
|
14 |
+
chat_completion = client.chat.completions.create(
|
15 |
+
messages=[
|
16 |
+
{
|
17 |
+
"role": "user",
|
18 |
+
"content": [
|
19 |
+
{
|
20 |
+
"type": "text",
|
21 |
+
"text": f"Below is extracted data from an image. "
|
22 |
+
f"Generate a short and structured presentation with bullet points summarizing the insights:\n\n{image_description}"
|
23 |
+
},
|
24 |
+
],
|
25 |
+
}
|
26 |
+
],
|
27 |
+
model="llama-3.1-8b-instant",
|
28 |
+
temperature=0.1,
|
29 |
+
)
|
30 |
+
|
31 |
+
response = chat_completion.choices[0].message.content
|
32 |
+
|
33 |
+
displayed_text = ""
|
34 |
+
for char in response:
|
35 |
+
displayed_text += char
|
36 |
+
time.sleep(0.01)
|
37 |
+
yield displayed_text
|
38 |
+
|
39 |
+
except Exception as e:
|
40 |
+
yield f"Error occurred: {str(e)}"
|
pipeline/image_to_data.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from groq import Groq
|
2 |
+
from utils.encode_image import encode_image_to_base64
|
3 |
+
|
4 |
+
def analyze_image(image):
|
5 |
+
"""Analyze image using Groq's vision model and return response."""
|
6 |
+
try:
|
7 |
+
base64_image = encode_image_to_base64(image)
|
8 |
+
print("Encoded Image:", base64_image[:100])
|
9 |
+
|
10 |
+
client = Groq(api_key="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")
|
11 |
+
|
12 |
+
chat_completion = client.chat.completions.create(
|
13 |
+
messages=[
|
14 |
+
{
|
15 |
+
"role": "user",
|
16 |
+
"content": [
|
17 |
+
{
|
18 |
+
"type": "text",
|
19 |
+
"text": "Extract all data from the image in table format (columns and rows)."
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"type": "image_url",
|
23 |
+
"image_url": {
|
24 |
+
"url": f"data:image/png;base64,{base64_image}",
|
25 |
+
},
|
26 |
+
},
|
27 |
+
],
|
28 |
+
}
|
29 |
+
],
|
30 |
+
model="llama-3.2-90b-vision-preview",
|
31 |
+
temperature=0.1,
|
32 |
+
)
|
33 |
+
|
34 |
+
return chat_completion.choices[0].message.content
|
35 |
+
|
36 |
+
except Exception as e:
|
37 |
+
return f"Error occurred: {str(e)}"
|
utils/__pycache__/encode_image.cpython-311.pyc
ADDED
Binary file (1.38 kB). View file
|
|
utils/encode_image.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
from PIL import Image
|
3 |
+
import io
|
4 |
+
|
5 |
+
def encode_image_to_base64(image):
|
6 |
+
"""Convert image to base64 string"""
|
7 |
+
if isinstance(image, str):
|
8 |
+
with open(image, "rb") as image_file:
|
9 |
+
return base64.b64encode(image_file.read()).decode('utf-8')
|
10 |
+
else:
|
11 |
+
buffered = io.BytesIO()
|
12 |
+
Image.fromarray(image).save(buffered, format="PNG")
|
13 |
+
return base64.b64encode(buffered.getvalue()).decode('utf-8')
|