DominusDeorum commited on
Commit
3390999
·
verified ·
1 Parent(s): ffcf472

Using custom model

Browse files
Files changed (1) hide show
  1. app.py +28 -32
app.py CHANGED
@@ -1,52 +1,48 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
- from typing import List, Tuple
 
 
 
 
4
 
5
- # Load your fine-tuned model and tokenizer
6
- model_name = "DominusDeorum/llama-3.2-lora_model" # Replace with your model name
7
- tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForCausalLM.from_pretrained(model_name)
9
 
10
- # The main function that handles the chatbot response
11
  def respond(
12
- message: str,
13
- history: List[Tuple[str, str]],
14
- system_message: str,
15
- max_tokens: int,
16
- temperature: float,
17
- top_p: float,
18
  ):
19
- # Start with the system message
20
  messages = [{"role": "system", "content": system_message}]
21
 
22
- # Add the user-assistant history to the messages
23
  for val in history:
24
  if val[0]:
25
  messages.append({"role": "user", "content": val[0]})
26
  if val[1]:
27
  messages.append({"role": "assistant", "content": val[1]})
28
 
29
- # Add the new user message
30
  messages.append({"role": "user", "content": message})
31
 
32
- # Tokenize the conversation history for model input
33
- inputs = tokenizer(" ".join([msg['content'] for msg in messages]), return_tensors="pt")
34
 
35
- # Generate the response using the fine-tuned model
36
- outputs = model.generate(
37
- inputs.input_ids,
38
- max_length=max_tokens,
39
- temperature=temperature,
40
- top_p=top_p
41
- )
 
42
 
43
- # Decode the generated response tokens
44
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
45
 
46
- # Yield the final response to the user
47
- return response
48
 
49
- # Set up the Gradio interface for the chatbot
 
 
50
  demo = gr.ChatInterface(
51
  respond,
52
  additional_inputs=[
@@ -63,6 +59,6 @@ demo = gr.ChatInterface(
63
  ],
64
  )
65
 
66
- # Launch the interface
67
  if __name__ == "__main__":
68
- demo.launch()
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+
4
+ """
5
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
+ """
7
+ client = InferenceClient("DominusDeorum/llama-3.2-lora_model")
8
 
 
 
 
 
9
 
 
10
  def respond(
11
+ message,
12
+ history: list[tuple[str, str]],
13
+ system_message,
14
+ max_tokens,
15
+ temperature,
16
+ top_p,
17
  ):
 
18
  messages = [{"role": "system", "content": system_message}]
19
 
 
20
  for val in history:
21
  if val[0]:
22
  messages.append({"role": "user", "content": val[0]})
23
  if val[1]:
24
  messages.append({"role": "assistant", "content": val[1]})
25
 
 
26
  messages.append({"role": "user", "content": message})
27
 
28
+ response = ""
 
29
 
30
+ for message in client.chat_completion(
31
+ messages,
32
+ max_tokens=max_tokens,
33
+ stream=True,
34
+ temperature=temperature,
35
+ top_p=top_p,
36
+ ):
37
+ token = message.choices[0].delta.content
38
 
39
+ response += token
40
+ yield response
41
 
 
 
42
 
43
+ """
44
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
+ """
46
  demo = gr.ChatInterface(
47
  respond,
48
  additional_inputs=[
 
59
  ],
60
  )
61
 
62
+
63
  if __name__ == "__main__":
64
+ demo.launch()