Image to Fragrance

import spaces
import gradio as gr
import re
import os 
hf_token = os.environ.get('HF_TOKEN')

from gradio_client import Client, handle_file

clipi_client = Client("fffiloni/CLIP-Interrogator-2")

from transformers import AutoTokenizer, AutoModelForCausalLM

model_path = "meta-llama/Llama-2-7b-chat-hf"

tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False, use_auth_token=hf_token)
model = AutoModelForCausalLM.from_pretrained(model_path, use_auth_token=hf_token).half().cuda()

#client = Client("https://fffiloni-test-llama-api-debug.hf.space/", hf_token=hf_token)

clipi_client = Client("https://fffiloni-clip-interrogator-2.hf.space/")

@spaces.GPU
def llama_gen_fragrance(scene):

    instruction = f"""[INST] <<SYS>>\n
You are a poetic perfumer. Your role is to create the imaginary scent of a described scene.

You must always respond using the following structure:

---

**Perfume Name**:  
[An original, evocative, and unique name — in French or English]

**Tagline**:  
[A short, poetic sentence — like a perfume advertisement hook]

**Poetic Olfactory Description**:  
[A freeform and expressive description of the scent ambiance evoked by the scene. Use sensory, emotional, and evocative language. You may include metaphors, seasonal references, or mood imagery.]

**Olfactory Pyramid (technical)**:

- **Top Notes**: [3–4 precise and vivid elements. Example: frozen pine, mandarin peel, cold ozone]  
- **Heart Notes**: [3–4 elements suggesting warmth or the body of the fragrance. Example: smoked wood, soft leather, toasted bread]  
- **Base Notes**: [3–4 deeper, earthier, or longer-lasting elements. Example: white musk, ash, ambergris]

**General Atmosphere**:  
[A final synthesis that summarizes the overall impression of the fragrance — contrast, duration, evolution, or emotional tone. Keep it artistic.]

---

Generate one single imaginary perfume following these instructions.
Here is the scene to analyze:  
\n<</SYS>>\n\n{} [/INST]"""

    
    prompt = instruction.format(scene)
    
    generate_ids = model.generate(tokenizer(prompt, return_tensors='pt').input_ids.cuda(), max_new_tokens=4096)
    output_text = tokenizer.decode(generate_ids[0], skip_special_tokens=True)
    #print(generate_ids)
    #print(output_text)
    pattern = r'\[INST\].*?\[/INST\]'
    cleaned_text = re.sub(pattern, '', output_text, flags=re.DOTALL)
    return cleaned_text

def get_text_after_colon(input_text):
    # Find the first occurrence of ":"
    colon_index = input_text.find(":")
    
    # Check if ":" exists in the input_text
    if colon_index != -1:
        # Extract the text after the colon
        result_text = input_text[colon_index + 1:].strip()
        return result_text
    else:
        # Return the original text if ":" is not found
        return input_text

def infer(image_input, audience):

    gr.Info('Calling CLIP Interrogator ...')

    clipi_result = clipi_client.predict(
		image=handle_file(image_input),
		mode="best",
		best_max_flavors=4,
		api_name="/clipi2"
    )
    print(clipi_result)
   

    llama_q = clipi_result
    
    
    gr.Info('Calling Llama2 ...')
    result = llama_gen_fragrance(llama_q)

    print(f"Llama2 result: {result}")

    result = get_text_after_colon(result)

    # Split the text into paragraphs based on actual line breaks
    paragraphs = result.split('\n')
    
    # Join the paragraphs back with an extra empty line between each paragraph
    formatted_text = '\n\n'.join(paragraphs)


    return formatted_text

css="""
#col-container {max-width: 910px; margin-left: auto; margin-right: auto;}
div#fragrance textarea {
    font-size: 1.5em;
    line-height: 1.4em;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(
            """
            <h1 style="text-align: center">Image to Fragrance</h1>
            <p style="text-align: center">Upload an image, get a pro fragrance idea made by Llama2 !</p>
            """
        )
        with gr.Row():
            with gr.Column():
                image_in = gr.Image(label="Image input", type="filepath", elem_id="image-in")
                
                submit_btn = gr.Button('Tell me a story')
            with gr.Column():
                #caption = gr.Textbox(label="Generated Caption")
                fragrance = gr.Textbox(label="generated Fragrance", elem_id="fragrance")
        
        gr.Examples(examples=[["./examples/crabby.png", "Children"],["./examples/hopper.jpeg", "Adult"]],
                    fn=infer,
                    inputs=[image_in],
                    outputs=[fragrance],
                    cache_examples=False
                   )
        
    submit_btn.click(fn=infer, inputs=[image_in], outputs=[fragrance])

demo.queue(max_size=12).launch(ssr_mode=False, mcp_server=True)