Spaces:
Running
Running
added llama-3bi, requirements.txt need to be updates
#9
by
RatanPrakash
- opened
app.py
CHANGED
@@ -11,6 +11,22 @@ import dateparser
|
|
11 |
import os
|
12 |
import matplotlib.pyplot as plt
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# Function to get Instagram post details
|
15 |
import instaloader
|
16 |
def get_instagram_post_details(post_url):
|
@@ -344,6 +360,16 @@ elif app_mode == "Task 1":
|
|
344 |
st.write(f"Extracting details from {uploaded_image.name}...")
|
345 |
result = ocr.ocr(img_array, cls=True)
|
346 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
# Process the OCR result to extract product name and properties
|
348 |
product_name, product_details = extract_product_info(result)
|
349 |
|
|
|
11 |
import os
|
12 |
import matplotlib.pyplot as plt
|
13 |
|
14 |
+
# LLM Integration to extract product details. - Llama-3bi
|
15 |
+
import torch
|
16 |
+
from transformers import pipeline
|
17 |
+
|
18 |
+
model_id = "meta-llama/Llama-3.2-3B-Instruct"
|
19 |
+
pipe = pipeline(
|
20 |
+
"text-generation",
|
21 |
+
model=model_id,
|
22 |
+
torch_dtype=torch.bfloat16,
|
23 |
+
device_map="auto",
|
24 |
+
)
|
25 |
+
|
26 |
+
messages = [
|
27 |
+
{"role": "system", "content": """Your task is to get the product details out of the text given. The text given will be raw text from OCR of social media images of products,
|
28 |
+
and the goal is to get product details and description so that it can be used for amazon product listing. """},
|
29 |
+
]
|
30 |
# Function to get Instagram post details
|
31 |
import instaloader
|
32 |
def get_instagram_post_details(post_url):
|
|
|
360 |
st.write(f"Extracting details from {uploaded_image.name}...")
|
361 |
result = ocr.ocr(img_array, cls=True)
|
362 |
|
363 |
+
|
364 |
+
|
365 |
+
messages.append({"role": "user", "content": result})
|
366 |
+
outputs = pipe(
|
367 |
+
messages,
|
368 |
+
max_new_tokens=256,
|
369 |
+
)
|
370 |
+
productContent = outputs[0]["generated_text"][-1]
|
371 |
+
st.markdown(productContent)
|
372 |
+
|
373 |
# Process the OCR result to extract product name and properties
|
374 |
product_name, product_details = extract_product_info(result)
|
375 |
|