akhaliq HF Staff commited on
Commit
e4932b3
·
verified ·
1 Parent(s): e047326

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ # Load model and tokenizer
6
+ model_name_or_path = "tencent/Hunyuan-MT-7B"
7
+ print("Loading model... This may take a few minutes.")
8
+
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ model_name_or_path,
12
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
13
+ device_map="auto"
14
+ )
15
+
16
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
17
+ """
18
+ Generate response from Hunyuan-MT model
19
+ """
20
+ # Build conversation history
21
+ messages = []
22
+
23
+ # Add system message if provided
24
+ if system_message:
25
+ messages.append({"role": "system", "content": system_message})
26
+
27
+ # Add conversation history
28
+ for user_msg, assistant_msg in history:
29
+ messages.append({"role": "user", "content": user_msg})
30
+ if assistant_msg:
31
+ messages.append({"role": "assistant", "content": assistant_msg})
32
+
33
+ # Add current message
34
+ messages.append({"role": "user", "content": message})
35
+
36
+ # Tokenize the conversation
37
+ tokenized_chat = tokenizer.apply_chat_template(
38
+ messages,
39
+ tokenize=True,
40
+ add_generation_prompt=True,
41
+ return_tensors="pt"
42
+ )
43
+
44
+ # Generate response
45
+ with torch.no_grad():
46
+ outputs = model.generate(
47
+ tokenized_chat.to(model.device),
48
+ max_new_tokens=max_tokens,
49
+ temperature=temperature,
50
+ top_p=top_p,
51
+ do_sample=True if temperature > 0 else False,
52
+ pad_token_id=tokenizer.eos_token_id
53
+ )
54
+
55
+ # Decode only the new tokens
56
+ response = tokenizer.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True)
57
+
58
+ return response
59
+
60
+ # Create Gradio interface
61
+ demo = gr.ChatInterface(
62
+ respond,
63
+ additional_inputs=[
64
+ gr.Textbox(
65
+ value="You are a helpful AI assistant.",
66
+ label="System Message",
67
+ lines=2
68
+ ),
69
+ gr.Slider(
70
+ minimum=1,
71
+ maximum=2048,
72
+ value=512,
73
+ step=1,
74
+ label="Max New Tokens"
75
+ ),
76
+ gr.Slider(
77
+ minimum=0,
78
+ maximum=2,
79
+ value=0.7,
80
+ step=0.1,
81
+ label="Temperature"
82
+ ),
83
+ gr.Slider(
84
+ minimum=0,
85
+ maximum=1,
86
+ value=0.95,
87
+ step=0.05,
88
+ label="Top-p (nucleus sampling)"
89
+ ),
90
+ ],
91
+ title="Hunyuan-MT-7B Chatbot",
92
+ description="Chat with Tencent's Hunyuan-MT-7B model. This model is particularly good at translation tasks.",
93
+ examples=[
94
+ ["Translate to Chinese: It's on the house."],
95
+ ["What are the main differences between Python and JavaScript?"],
96
+ ["Explain quantum computing in simple terms."],
97
+ ],
98
+ theme="soft"
99
+ )
100
+
101
+ if __name__ == "__main__":
102
+ demo.launch()