Spaces:

uoda5t
/

test2

Running

File size: 1,841 Bytes

8957b2f
07896ce
6bd508a
 
4dbb3f0
4065559
4dbb3f0
6bd508a
07896ce
 
 
 
 
 
 
4dbb3f0
 
 
6bd508a
4dbb3f0
 
 
 
 
 
 
 
6bd508a
4dbb3f0
 
 
 
 
 
 
 
 
6bd508a
4dbb3f0
 
 
 
 
6bd508a
 
 
 
4dbb3f0
 
 
 
 
 
6bd508a
8957b2f
 
6bd508a

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer
import torch

# Load the model and tokenizer
model_name = "cognitivecomputations/TinyDolphin-2.8-1.1b"
model = AutoModelForCausalLM.from_pretrained(model_name)

# Try to load the tokenizer, with a fallback option
try:
    tokenizer = AutoTokenizer.from_pretrained(model_name)
except ValueError:
    print("Failed to load AutoTokenizer. Falling back to LlamaTokenizer.")
    tokenizer = LlamaTokenizer.from_pretrained(model_name)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def generate_response(message, chat_history):
    # Prepare the input
    chat_history_text = ""
    for turn in chat_history:
        chat_history_text += f"Human: {turn[0]}\nAI: {turn[1]}\n"
    
    prompt = f"{chat_history_text}Human: {message}\nAI:"
    
    # Tokenize and generate
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract only the AI's response
    ai_response = response.split("AI:")[-1].strip()
    
    return ai_response

# Create the Gradio interface
iface = gr.ChatInterface(
    generate_response,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7),
    title="TinyDolphin-2.8-1.1b Chatbot",
    description="Chat with the TinyDolphin-2.8-1.1b model.",
    theme="soft",
    examples=["Tell me a short story", "What's the capital of France?", "Explain quantum computing"],
    cache_examples=False,
)

# Launch the interface
iface.launch()