Hunyuan-MT-7B / app.py
akhaliq's picture
akhaliq HF Staff
Upload app.py with huggingface_hub
a233e6e verified
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import spaces
# Load model and tokenizer
model_name_or_path = "tencent/Hunyuan-MT-7B"
print("Loading model... This may take a few minutes.")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
model = AutoModelForCausalLM.from_pretrained(
model_name_or_path,
torch_dtype=torch.bfloat16,
device_map="auto"
)
@spaces.GPU(duration=120)
def respond(message, history, system_message=None, max_tokens=None, temperature=None, top_p=None):
"""
Generate response from Hunyuan-MT model
"""
# Set default values if None (happens during example caching)
if system_message is None:
system_message = "You are a helpful AI assistant."
if max_tokens is None:
max_tokens = 512
if temperature is None:
temperature = 0.7
if top_p is None:
top_p = 0.95
# Build conversation history
messages = []
# Add system message if provided
if system_message:
messages.append({"role": "system", "content": system_message})
# Add conversation history
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Tokenize the conversation
tokenized_chat = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt"
)
# Generate response
with torch.no_grad():
outputs = model.generate(
tokenized_chat.to(model.device),
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True if temperature > 0 else False,
pad_token_id=tokenizer.eos_token_id
)
# Decode only the new tokens
response = tokenizer.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True)
return response
# Create Gradio interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are a helpful AI assistant.",
label="System Message",
lines=2
),
gr.Slider(
minimum=1,
maximum=2048,
value=512,
step=1,
label="Max New Tokens"
),
gr.Slider(
minimum=0,
maximum=2,
value=0.7,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0,
maximum=1,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
),
],
title="Hunyuan-MT-7B Chatbot",
description="Chat with Tencent's Hunyuan-MT-7B model. This model is particularly good at translation tasks.",
examples=[
["Translate to Chinese: It's on the house.", "You are a helpful AI assistant.", 512, 0.7, 0.95],
["What are the main differences between Python and JavaScript?", "You are a helpful AI assistant.", 512, 0.7, 0.95],
["Explain quantum computing in simple terms.", "You are a helpful AI assistant.", 512, 0.7, 0.95],
],
cache_examples=False,
theme="soft"
)
if __name__ == "__main__":
demo.launch()