sakurasaniya12345 commited on
Commit
e6c46a3
·
verified ·
1 Parent(s): 0575df1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -40
app.py CHANGED
@@ -1,43 +1,46 @@
1
- import torch
2
  import gradio as gr
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
-
5
- model_name = "mistralai/Mistral-7B-Instruct-v0.1"
6
-
7
- # Load tokenizer
8
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
9
- tokenizer.padding_side = "left"
10
- tokenizer.pad_token = tokenizer.eos_token
11
-
12
- # Load model in 4-bit quantization
13
- model = AutoModelForCausalLM.from_pretrained(
14
- model_name,
15
- device_map="auto",
16
- load_in_4bit=True,
17
- torch_dtype=torch.float16
18
- )
19
-
20
- # Create text generation pipeline
21
- chatbot = pipeline(
22
- "text-generation",
23
- model=model,
24
- tokenizer=tokenizer,
25
- device=0,
26
- pad_token_id=tokenizer.eos_token_id
27
- )
28
-
29
- # Function for chatting
30
- def mistral_chat(user_input):
31
- response = chatbot(user_input, max_new_tokens=200, temperature=0.7, do_sample=True)
32
- return response[0]["generated_text"]
33
-
34
- # Gradio interface
35
- iface = gr.Interface(
36
- fn=mistral_chat,
37
- inputs="text",
38
- outputs="text",
39
- title="Mistral 7B Chatbot"
 
 
 
 
40
  )
41
 
42
- iface.launch()
43
-
 
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+ import os
4
+
5
+ # Load HF Token from Environment Variables (Make sure to add it in Hugging Face Secrets)
6
+ HF_TOKEN = os.getenv("HF_TOKEN")
7
+
8
+ # Use Mistral-7B instead of Zephyr
9
+ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1", token=HF_TOKEN)
10
+
11
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
12
+ messages = [{"role": "system", "content": system_message}]
13
+
14
+ for user_msg, bot_msg in history:
15
+ if user_msg:
16
+ messages.append({"role": "user", "content": user_msg})
17
+ if bot_msg:
18
+ messages.append({"role": "assistant", "content": bot_msg})
19
+
20
+ messages.append({"role": "user", "content": message})
21
+
22
+ response = ""
23
+
24
+ for message in client.chat_completion(
25
+ messages,
26
+ max_tokens=max_tokens,
27
+ stream=False, # Change to True if streaming works
28
+ temperature=temperature,
29
+ top_p=top_p,
30
+ ):
31
+ response += message.choices[0].delta.content
32
+ yield response
33
+
34
+ # Gradio Chat Interface
35
+ demo = gr.ChatInterface(
36
+ respond,
37
+ additional_inputs=[
38
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
39
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
40
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
41
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
42
+ ],
43
  )
44
 
45
+ if __name__ == "__main__":
46
+ demo.launch()