Rafay17 commited on
Commit
93396ac
·
verified ·
1 Parent(s): 463df0a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, FastLanguageModel
3
+
4
+ # Load the model and tokenizer
5
+ model, tokenizer = FastLanguageModel.from_pretrained(
6
+ model_name="lora_model", # Replace with your trained model name
7
+ max_seq_length=512,
8
+ dtype="float16",
9
+ load_in_4bit=True,
10
+ )
11
+ FastLanguageModel.for_inference(model)
12
+
13
+ # Define the inference function
14
+ def generate_response(user_input):
15
+ # Prepare the input for the model
16
+ labeled_prompt = (
17
+ "Please provide the response with the following labels:\n"
18
+ f"User Input: {user_input}\n"
19
+ "Response:"
20
+ )
21
+
22
+ inputs = tokenizer(
23
+ [labeled_prompt],
24
+ return_tensors="pt",
25
+ padding=True,
26
+ truncation=True,
27
+ max_length=512,
28
+ ).to("cuda")
29
+
30
+ response = model.generate(input_ids=inputs.input_ids, attention_mask=inputs.attention_mask, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
31
+ return tokenizer.decode(response[0], skip_special_tokens=True)
32
+
33
+ # Create a Gradio interface
34
+ iface = gr.Interface(fn=generate_response, inputs="text", outputs="text", title="Chatbot Interface", description="Enter your message below:")
35
+
36
+ # Launch the app
37
+ iface.launch()