soufyane commited on
Commit
74f2d58
·
verified ·
1 Parent(s): b7a60d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -2
app.py CHANGED
@@ -1,3 +1,137 @@
1
- import tensorflow as tf
 
 
 
 
 
2
 
3
- print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import spaces
4
+ from transformers import GemmaTokenizer, AutoModelForCausalLM
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
+ from threading import Thread
7
 
8
+ # Set an environment variable
9
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
+
11
+
12
+ DESCRIPTION = '''
13
+ <div>
14
+ <h1 style="text-align: center;">Mistral 7B Instruct v0.3</h1>
15
+ <p>This Space demonstrates the instruction-tuned model <a href="https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3"><b>mistralai/Mistral-7B-Instruct-v0.3</b></a>. The Mistral-7B-Instruct-v0.3 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-7B-v0.3, which is a Mistral-7B-v0.2 with extended vocabulary. Feel free to play with it, or duplicate to run privately!</p>
16
+ <p>🔎 For more details about the release and how to use the model with <code>transformers</code>, visit the model-card linked above.</p>
17
+ <p>🦕 The Instruct model - Has Extended vocabulary to 32768. Supports v3 Tokenizer. Supports function calling.</p>
18
+ </div>
19
+ '''
20
+
21
+
22
+ PLACEHOLDER = """
23
+ <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
24
+ <img src="https://cdn-thumbnails.huggingface.co/social-thumbnails/models/mistralai/Mistral-7B-Instruct-v0.3.png" style="width: 70%; max-width: 550px; height: auto; opacity: 0.55; ">
25
+ <p style="font-size: 20px; margin-bottom: 2px; opacity: 0.65;">Ask me anything...</p>
26
+ </div>
27
+ """
28
+
29
+
30
+ css = """
31
+ h1 {
32
+ text-align: center;
33
+ display: block;
34
+ }
35
+
36
+ #duplicate-button {
37
+ margin: auto;
38
+ color: white;
39
+ background: #1565c0;
40
+ border-radius: 100vh;
41
+ }
42
+ """
43
+
44
+ # Load the tokenizer and model
45
+ tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
46
+ model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3", device_map="auto")
47
+ terminators = [
48
+ tokenizer.eos_token_id,
49
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
50
+ ]
51
+
52
+ @spaces.GPU(duration=120)
53
+ def chat_mistral7b_v0dot3(message: str,
54
+ history: list,
55
+ temperature: float,
56
+ max_new_tokens: int
57
+ ) -> str:
58
+ """
59
+ Generate a streaming response using the mistralai/Mistral-7B-Instruct-v0.3 model.
60
+ Args:
61
+ message (str): The input message.
62
+ history (list): The conversation history used by ChatInterface.
63
+ temperature (float): The temperature for generating the response.
64
+ max_new_tokens (int): The maximum number of new tokens to generate.
65
+ Returns:
66
+ str: The generated response.
67
+ """
68
+ conversation = []
69
+ for user, assistant in history:
70
+ conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
71
+ conversation.append({"role": "user", "content": message})
72
+
73
+ input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
74
+
75
+ streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
76
+
77
+ generate_kwargs = dict(
78
+ input_ids= input_ids,
79
+ streamer=streamer,
80
+ max_new_tokens=max_new_tokens,
81
+ do_sample=True,
82
+ temperature=temperature,
83
+ eos_token_id=terminators,
84
+ )
85
+ # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
86
+ if temperature == 0:
87
+ generate_kwargs['do_sample'] = False
88
+
89
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
90
+ t.start()
91
+
92
+ outputs = []
93
+ for text in streamer:
94
+ outputs.append(text)
95
+ #print(outputs)
96
+ yield "".join(outputs)
97
+
98
+
99
+ # Gradio block
100
+ chatbot=gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')
101
+
102
+ with gr.Blocks(fill_height=True, css=css) as demo:
103
+
104
+ gr.Markdown(DESCRIPTION)
105
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
106
+ gr.ChatInterface(
107
+ fn=chat_mistral7b_v0dot3,
108
+ chatbot=chatbot,
109
+ fill_height=True,
110
+ additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
111
+ additional_inputs=[
112
+ gr.Slider(minimum=0,
113
+ maximum=1,
114
+ step=0.1,
115
+ value=0.95,
116
+ label="Temperature",
117
+ render=False),
118
+ gr.Slider(minimum=128,
119
+ maximum=4096,
120
+ step=1,
121
+ value=512,
122
+ label="Max new tokens",
123
+ render=False ),
124
+ ],
125
+ examples=[
126
+ ['How to setup a human base on Mars? Give short answer.'],
127
+ ['Explain theory of relativity to me like I’m 8 years old.'],
128
+ ['What is 9,000 * 9,000?'],
129
+ ['Write a pun-filled happy birthday message to my friend Alex.'],
130
+ ['Justify why a penguin might make a good king of the jungle.']
131
+ ],
132
+ cache_examples=False,
133
+ )
134
+
135
+
136
+ if __name__ == "__main__":
137
+ demo.launch()