madhiemw commited on
Commit
c189148
·
verified ·
1 Parent(s): 392e7e7

Upload 7 files

Browse files
app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pipeline.image_result_to_summary import image_result_to_response
3
+
4
+ with gr.Blocks() as demo:
5
+ gr.Markdown("Muhammad Adhiem Wicaksana's Image to Description project")
6
+
7
+ with gr.Row():
8
+ with gr.Column():
9
+ image_input = gr.Image(type="numpy", label="Upload Image", sources=["upload"])
10
+ analyze_button = gr.Button("Analyze Image")
11
+
12
+ with gr.Column():
13
+ output_text = gr.Textbox(label="Analysis Result", lines=50, max_lines=50)
14
+
15
+ analyze_button.click(
16
+ fn=image_result_to_response,
17
+ inputs=[image_input],
18
+ outputs=output_text,
19
+ api_name="analyze"
20
+ )
21
+
22
+ if __name__ == "__main__":
23
+ demo.launch()
pipeline/__pycache__/image_result_to_summary.cpython-311.pyc ADDED
Binary file (1.95 kB). View file
 
pipeline/__pycache__/image_to_data.cpython-311.pyc ADDED
Binary file (1.62 kB). View file
 
pipeline/image_result_to_summary.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from groq import Groq
2
+ from pipeline.image_to_data import analyze_image
3
+ import time
4
+
5
+ def image_result_to_response(image):
6
+ """Get summarized insights from image analysis."""
7
+ try:
8
+ yield("-----------Give me a quick second to analyzing the image-----------")
9
+ image_description = analyze_image(image)
10
+ yield("-----------It Will be quick, another second to create the summarization-----------")
11
+
12
+ client = Groq(api_key="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")
13
+
14
+ chat_completion = client.chat.completions.create(
15
+ messages=[
16
+ {
17
+ "role": "user",
18
+ "content": [
19
+ {
20
+ "type": "text",
21
+ "text": f"Below is extracted data from an image. "
22
+ f"Generate a short and structured presentation with bullet points summarizing the insights:\n\n{image_description}"
23
+ },
24
+ ],
25
+ }
26
+ ],
27
+ model="llama-3.1-8b-instant",
28
+ temperature=0.1,
29
+ )
30
+
31
+ response = chat_completion.choices[0].message.content
32
+
33
+ displayed_text = ""
34
+ for char in response:
35
+ displayed_text += char
36
+ time.sleep(0.01)
37
+ yield displayed_text
38
+
39
+ except Exception as e:
40
+ yield f"Error occurred: {str(e)}"
pipeline/image_to_data.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from groq import Groq
2
+ from utils.encode_image import encode_image_to_base64
3
+
4
+ def analyze_image(image):
5
+ """Analyze image using Groq's vision model and return response."""
6
+ try:
7
+ base64_image = encode_image_to_base64(image)
8
+ print("Encoded Image:", base64_image[:100])
9
+
10
+ client = Groq(api_key="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")
11
+
12
+ chat_completion = client.chat.completions.create(
13
+ messages=[
14
+ {
15
+ "role": "user",
16
+ "content": [
17
+ {
18
+ "type": "text",
19
+ "text": "Extract all data from the image in table format (columns and rows)."
20
+ },
21
+ {
22
+ "type": "image_url",
23
+ "image_url": {
24
+ "url": f"data:image/png;base64,{base64_image}",
25
+ },
26
+ },
27
+ ],
28
+ }
29
+ ],
30
+ model="llama-3.2-90b-vision-preview",
31
+ temperature=0.1,
32
+ )
33
+
34
+ return chat_completion.choices[0].message.content
35
+
36
+ except Exception as e:
37
+ return f"Error occurred: {str(e)}"
utils/__pycache__/encode_image.cpython-311.pyc ADDED
Binary file (1.38 kB). View file
 
utils/encode_image.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ from PIL import Image
3
+ import io
4
+
5
+ def encode_image_to_base64(image):
6
+ """Convert image to base64 string"""
7
+ if isinstance(image, str):
8
+ with open(image, "rb") as image_file:
9
+ return base64.b64encode(image_file.read()).decode('utf-8')
10
+ else:
11
+ buffered = io.BytesIO()
12
+ Image.fromarray(image).save(buffered, format="PNG")
13
+ return base64.b64encode(buffered.getvalue()).decode('utf-8')