Rafay17 commited on
Commit
a3af99d
·
verified ·
1 Parent(s): 6982029

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -14
app.py CHANGED
@@ -1,34 +1,87 @@
 
 
1
  import torch
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
3
 
4
  # Load the model and tokenizer
5
- model_name = "Rafay17/Llama3.2_1b_customModel2"
6
  tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda") # Ensure to load the model on GPU
8
 
9
- # Prepare the model for inference
10
- model.eval()
 
 
 
 
11
 
12
- # Define a function to generate responses
 
 
13
  def generate_response(input_text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Prepare the input for the model
15
- inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
 
 
 
 
 
 
16
 
17
  # Set up the text streamer to stream the generated response
18
  text_streamer = TextStreamer(tokenizer, skip_prompt=True)
19
 
20
  # Generate the response
21
- with torch.no_grad():
22
  model.generate(
23
  input_ids=inputs.input_ids,
24
  attention_mask=inputs.attention_mask,
25
  streamer=text_streamer,
26
- max_new_tokens=64, # Adjust this value as needed
27
  pad_token_id=tokenizer.eos_token_id,
28
  )
29
 
30
- # Example usage of the generate_response function
31
- input_text = "Hello, how can I help you today?"
32
- print("Generating response for input:")
33
- print(input_text)
34
- generate_response(input_text)
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, TextStreamer
2
+ from unsloth import FastLanguageModel
3
  import torch
 
4
 
5
  # Load the model and tokenizer
6
+ model_name = "Rafay17/Llama3.2_1b_customModle2" # Use your specific model name
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
8
 
9
+ model = FastLanguageModel.from_pretrained(
10
+ model_name=model_name,
11
+ max_seq_length=512, # Adjust as needed
12
+ dtype="float16", # Adjust as needed
13
+ load_in_4bit=True # Adjust based on your needs
14
+ )
15
 
16
+ FastLanguageModel.for_inference(model) # Call this immediately after loading the model
17
+
18
+ # Function to generate a response
19
  def generate_response(input_text):
20
+ # Prepare the labeled prompt for the model
21
+ labeled_prompt = (
22
+ "Please provide the response with the following labels:\n"
23
+ "Speaker: [SPEAKER]\n"
24
+ "Text: [TEXT]\n"
25
+ "Sentiment: [SENTIMENT]\n"
26
+ "Emotion: [EMOTION]\n"
27
+ "Intent: [INTENT]\n"
28
+ "Tone: [TONE]\n"
29
+ "Confidence Level: [CONFIDENCE]\n"
30
+ "Frustration Level: [FRUSTRATION]\n"
31
+ "Response Length: [LENGTH]\n"
32
+ "Action Required: [ACTION]\n"
33
+ "Interruption: [INTERRUPTION]\n"
34
+ "Cooperation Level: [COOPERATION]\n"
35
+ "Clarity: [CLARITY]\n"
36
+ "Objective: [OBJECTIVE]\n"
37
+ "Timeline: [TIMELINE]\n"
38
+ "Motivation: [MOTIVATION]\n"
39
+ "Conversation Stage: [STAGE]\n"
40
+ "Resolution: [RESOLUTION]\n"
41
+ "Context: [CONTEXT]\n"
42
+ "Urgency: [URGENCY]\n"
43
+ "Problem Type: [PROBLEM]\n"
44
+ "Key Words: [KEYWORDS]\n"
45
+ "Expected Detail: [DETAIL]\n"
46
+ "Time Gap: [TIME]\n"
47
+ "Client Expectation: [EXPECTATION]\n"
48
+ "Channel: [CHANNEL]\n"
49
+ "Power Relationship: [POWER]\n\n"
50
+ f"User Input: {input_text}\n"
51
+ "Response:"
52
+ )
53
+
54
  # Prepare the input for the model
55
+ inputs = tokenizer(
56
+ [labeled_prompt],
57
+ return_tensors="pt",
58
+ padding=True,
59
+ truncation=True,
60
+ max_length=512, # Ensure this matches your model's max length
61
+ ).to("cuda")
62
 
63
  # Set up the text streamer to stream the generated response
64
  text_streamer = TextStreamer(tokenizer, skip_prompt=True)
65
 
66
  # Generate the response
67
+ with torch.no_grad(): # Disable gradient calculation for inference
68
  model.generate(
69
  input_ids=inputs.input_ids,
70
  attention_mask=inputs.attention_mask,
71
  streamer=text_streamer,
72
+ max_new_tokens=100, # Adjust this value as needed
73
  pad_token_id=tokenizer.eos_token_id,
74
  )
75
 
76
+ # Function to take user input and generate output
77
+ def user_interaction():
78
+ while True:
79
+ user_input = input("Enter conversation details (or type 'exit' to quit): ")
80
+ if user_input.lower() == 'exit':
81
+ print("Exiting the program.")
82
+ break
83
+ print("Generating response for input:")
84
+ generate_response(user_input)
85
+
86
+ # Start the user interaction
87
+ user_interaction()