Spaces:

serhany
/

cineguide-comparator

Sleeping

App Files Files Community

serhany commited on Jun 4

Commit

0c08550

verified ·

1 Parent(s): 78407e7

Upload 2 files

Browse files

Files changed (2) hide show

app.py +267 -0
requirements.txt +10 -0

app.py ADDED Viewed

	@@ -0,0 +1,267 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
+import time
+import os
+# --- Configuration ---
+BASE_MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"
+# NOW, this points to your model on the Hugging Face Hub
+FINETUNED_MODEL_ID = "serhany/cineguide-qwen2.5-7b-instruct-ft"
+# System prompts (same as before)
+SYSTEM_PROMPT_CINEGUIDE = """You are CineGuide, a knowledgeable and friendly movie recommendation assistant. Your goal is to:
+1. Provide personalized movie recommendations based on user preferences
+2. Give brief, compelling rationales for why you recommend each movie
+3. Ask thoughtful follow-up questions to better understand user tastes
+4. Maintain an enthusiastic but not overwhelming tone about cinema
+When recommending movies, always explain WHY the movie fits their preferences."""
+SYSTEM_PROMPT_BASE = "You are a helpful AI assistant."
+# --- Model Loading ---
+_models_cache = {}
+def get_model_and_tokenizer(model_id_or_path, is_local_path=False): # Added is_local_path for flexibility
+    if model_id_or_path in _models_cache:
+        return _models_cache[model_id_or_path]
+    print(f"Loading model: {model_id_or_path}")
+    # For models from Hub, trust_remote_code is often needed for custom architectures like Qwen
+    # For local paths, it might also be needed if they were saved with trust_remote_code=True
+    tokenizer = AutoTokenizer.from_pretrained(model_id_or_path, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id_or_path,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        trust_remote_code=True,
+        # attn_implementation="flash_attention_2" # Optional
+    )
+    model.eval()
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+        # Ensure pad_token_id is also set if pad_token is set
+        if hasattr(tokenizer, "pad_token_id") and tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
+            tokenizer.pad_token_id = tokenizer.eos_token_id
+    _models_cache[model_id_or_path] = (model, tokenizer)
+    print(f"Finished loading: {model_id_or_path}")
+    return model, tokenizer
+print("Pre-loading models...")
+model_base, tokenizer_base = None, None
+model_ft, tokenizer_ft = None, None
+try:
+    model_base, tokenizer_base = get_model_and_tokenizer(BASE_MODEL_ID)
+    print("Base model loaded.")
+except Exception as e:
+    print(f"Error loading base model ({BASE_MODEL_ID}): {e}")
+try:
+    model_ft, tokenizer_ft = get_model_and_tokenizer(FINETUNED_MODEL_ID)
+    print("Fine-tuned model loaded.")
+except Exception as e:
+    print(f"Error loading fine-tuned model ({FINETUNED_MODEL_ID}): {e}")
+print("Model pre-loading complete.")
+# --- Inference Function (generate_chat_response) ---
+# This function remains largely the same as in the previous app.py.
+# Make sure it uses `model_base, tokenizer_base` and `model_ft, tokenizer_ft` correctly.
+def generate_chat_response(message: str, chat_history: list, model_type: str):
+    # ... (Keep the exact same generate_chat_response function from the previous app.py)
+    if model_type == "base":
+        if model_base is None or tokenizer_base is None:
+            yield f"Base model ({BASE_MODEL_ID}) is not available."
+            return
+        model, tokenizer = model_base, tokenizer_base
+        system_prompt = SYSTEM_PROMPT_BASE
+    elif model_type == "finetuned":
+        if model_ft is None or tokenizer_ft is None:
+            yield f"Fine-tuned model ({FINETUNED_MODEL_ID}) is not available."
+            return
+        model, tokenizer = model_ft, tokenizer_ft
+        system_prompt = SYSTEM_PROMPT_CINEGUIDE
+    else:
+        yield "Invalid model type."
+        return
+    conversation = []
+    if system_prompt:
+        conversation.append({"role": "system", "content": system_prompt})
+    for user_msg, assistant_msg in chat_history:
+        if user_msg: # Ensure user_msg is not None
+             conversation.append({"role": "user", "content": user_msg})
+        if assistant_msg: # Ensure assistant_msg is not None
+             conversation.append({"role": "assistant", "content": assistant_msg})
+    conversation.append({"role": "user", "content": message})
+    prompt = tokenizer.apply_chat_template(
+        conversation,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1800).to(model.device)
+    full_response = ""
+    # Make sure eos_token_id is a list if multiple EOS tokens are possible
+    eos_tokens_ids = [tokenizer.eos_token_id]
+    im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
+    if im_end_id != tokenizer.unk_token_id: # Check if <|im_end|> is in vocab
+        eos_tokens_ids.append(im_end_id)
+    generated_token_ids = model.generate(
+        **inputs,
+        max_new_tokens=512,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.9,
+        repetition_penalty=1.1,
+        pad_token_id=tokenizer.pad_token_id, # Use pad_token_id
+        eos_token_id=eos_tokens_ids
+    )
+    new_tokens = generated_token_ids[0, inputs['input_ids'].shape[1]:]
+    response_text = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
+    response_text = response_text.replace("<|im_end|>", "").strip()
+    for char in response_text:
+        full_response += char
+        time.sleep(0.005)
+        yield full_response
+def respond_base(message, chat_history):
+    yield from generate_chat_response(message, chat_history, "base")
+def respond_finetuned(message, chat_history):
+    yield from generate_chat_response(message, chat_history, "finetuned")
+# --- Gradio UI (with gr.Blocks as demo:) ---
+# This part remains largely the same as the previous app.py
+# Ensure the Markdown and labels correctly reference the models being loaded.
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        f"""
+        # 🎬 CineGuide vs. Base {BASE_MODEL_ID}
+        Compare the fine-tuned CineGuide movie recommender (loaded from `{FINETUNED_MODEL_ID}`)
+        with the base {BASE_MODEL_ID} model.
+        Type your movie-related query below and see how each model responds!
+        """
+    )
+    # ... (Rest of the UI definition: Rows, Columns, Chatbots, Textbox, Button, Examples)
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown(f"## 🗣️ Base {BASE_MODEL_ID}")
+            chatbot_base = gr.Chatbot(label="Base Model Chat", height=500, bubble_full_width=False)
+            if model_base is None:
+                 gr.Markdown(f"⚠️ Base model ({BASE_MODEL_ID}) could not be loaded.")
+        with gr.Column(scale=1):
+            gr.Markdown(f"## 🤖 Fine-tuned CineGuide (from {FINETUNED_MODEL_ID})")
+            chatbot_ft = gr.Chatbot(label="CineGuide Chat", height=500, bubble_full_width=False)
+            if model_ft is None:
+                 gr.Markdown(f"⚠️ Fine-tuned model ({FINETUNED_MODEL_ID}) could not be loaded.")
+    with gr.Row():
+        shared_input_textbox = gr.Textbox(
+            show_label=False,
+            placeholder="Enter your movie query here and press Enter...",
+            container=False,
+            scale=7,
+        )
+        submit_button = gr.Button("✉️ Send", variant="primary", scale=1)
+    gr.Examples(
+        examples=[
+            "Hi! I'm looking for something funny to watch tonight.",
+            "I love dry, witty humor more than slapstick. Think more British comedy style.",
+            "I'm really into complex sci-fi movies that make you think. I loved Arrival and Blade Runner 2049.",
+            "I need help planning a family movie night. We have kids aged 8, 11, and 14, plus adults.",
+            "I'm going through a tough breakup and need something uplifting but not cheesy romantic.",
+            "I loved Parasite and want to explore more international cinema. Where should I start?",
+        ],
+        inputs=[shared_input_textbox],
+        label="Example Prompts (click to use)"
+    )
+    def base_model_predict(user_message, chat_history):
+        if model_base is None: # Add this check
+            chat_history.append((user_message, f"Base model ({BASE_MODEL_ID}) is not available."))
+            yield chat_history
+            return
+        chat_history.append((user_message, ""))
+        for response_chunk in respond_base(user_message, chat_history[:-1]):
+            chat_history[-1] = (user_message, response_chunk)
+            yield chat_history
+    def ft_model_predict(user_message, chat_history):
+        if model_ft is None: # Add this check
+            chat_history.append((user_message, f"Fine-tuned model ({FINETUNED_MODEL_ID}) is not available."))
+            yield chat_history
+            return
+        chat_history.append((user_message, ""))
+        for response_chunk in respond_finetuned(user_message, chat_history[:-1]):
+            chat_history[-1] = (user_message, response_chunk)
+            yield chat_history
+    # Event handlers
+    actions = []
+    if model_base is not None:
+        actions.append(
+            shared_input_textbox.submit(
+                base_model_predict,
+                [shared_input_textbox, chatbot_base],
+                [chatbot_base],
+                queue=True
+            )
+        )
+        actions.append(
+            submit_button.click(
+                base_model_predict,
+                [shared_input_textbox, chatbot_base],
+                [chatbot_base],
+                queue=True
+            )
+        )
+    if model_ft is not None:
+        actions.append(
+            shared_input_textbox.submit(
+                ft_model_predict,
+                [shared_input_textbox, chatbot_ft],
+                [chatbot_ft],
+                queue=True
+            )
+        )
+        actions.append(
+            submit_button.click(
+                ft_model_predict,
+                [shared_input_textbox, chatbot_ft],
+                [chatbot_ft],
+                queue=True
+            )
+        )
+    # Clear textbox after all submits are queued. This is slightly simplified.
+    # For a more robust clear, you might need to chain these events or use gr.Group.
+    def clear_textbox_fn():
+        return ""
+    if actions: # If any model is active
+        shared_input_textbox.submit(clear_textbox_fn, [], [shared_input_textbox])
+        submit_button.click(clear_textbox_fn, [], [shared_input_textbox])
+# --- Launch the App ---
+if __name__ == "__main__":
+    demo.queue()
+    demo.launch(debug=True) # share=True for public link if running locally

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+torch==2.7.1+cu118
+transformers
+gradio
+accelerate
+datasets
+peft
+trl
+scikit-learn
+einops
+sentencepiece