File size: 2,733 Bytes
75fdda9
171a063
8c1ee79
754152d
75fdda9
754152d
 
 
 
 
 
 
75fdda9
57b32e0
8c1ee79
 
57b32e0
 
8c1ee79
 
75fdda9
754152d
75fdda9
8c1ee79
75fdda9
57b32e0
 
171a063
57b32e0
 
 
8c1ee79
754152d
57b32e0
754152d
57b32e0
754152d
 
171a063
57b32e0
 
 
 
 
171a063
754152d
8c1ee79
754152d
8c1ee79
 
 
 
171a063
 
8c1ee79
 
57b32e0
8c1ee79
 
57b32e0
8c1ee79
57b32e0
75fdda9
754152d
8c1ee79
75fdda9
171a063
 
75fdda9
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gradio as gr
import torch
import re
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

# Load NER-Luxury model from Hugging Face
model_name = "AkimfromParis/NER-Luxury"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# Load pipeline for Named Entity Recognition (NER)
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)

# Regex for extracting price
price_pattern = re.compile(r'(\bunder\b|\babove\b|\bbelow\b|\bbetween\b)?\s?(\d{1,5})\s?(AED|USD|EUR)?', re.IGNORECASE)

# Keywords for gender extraction
gender_keywords = ["men", "male", "women", "female", "unisex"]

def extract_attributes(query):
    """
    Extract structured fashion attributes dynamically using the fine-tuned NER-Luxury model.
    """
    structured_output = {"Brand": "Unknown", "Category": "Unknown", "Gender": "Unknown", "Price": "Unknown"}

    # Run NER model on query
    entities = ner_pipeline(query)

    for entity in entities:
        entity_text = entity["word"].replace("##", "")  # Fix tokenization artifacts
        entity_label = entity["entity"]

        if "HOUSE" in entity_label or "BRAND" in entity_label:  # Luxury brands
            structured_output["Brand"] = entity_text
        elif "CATEGORY" in entity_label:  # Fashion categories
            structured_output["Category"] = entity_text
        elif "MONETARYVALUE" in entity_label:  # Price values
            structured_output["Price"] = entity_text

    # Extract gender
    for gender in gender_keywords:
        if gender in query.lower():
            structured_output["Gender"] = gender.capitalize()
            break

    # Extract price if not found by NER
    price_match = price_pattern.search(query)
    if price_match and structured_output["Price"] == "Unknown":
        condition, amount, currency = price_match.groups()
        structured_output["Price"] = f"{condition.capitalize() if condition else ''} {amount} {currency if currency else 'AED'}".strip()

    return structured_output

# Define Gradio UI
def parse_query(user_query):
    """
    Parses fashion-related queries into structured attributes.
    """
    parsed_output = extract_attributes(user_query)
    return parsed_output  # JSON output

# Create Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# 🛍️ Luxury Fashion Query Parser using NER-Luxury")

    query_input = gr.Textbox(label="Enter your search query", placeholder="e.g., Gucci men’s perfume under 200AED")
    output_box = gr.JSON(label="Parsed Output")

    parse_button = gr.Button("Parse Query")
    parse_button.click(parse_query, inputs=[query_input], outputs=[output_box])

demo.launch()