limitedonly41 commited on
Commit
b421aac
·
verified ·
1 Parent(s): b2aa395

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -19
app.py CHANGED
@@ -10,6 +10,8 @@ from unsloth import FastLanguageModel
10
  import torch
11
  import re
12
 
 
 
13
  # Define helper functions
14
  async def fetch_data(url):
15
  headers = {
@@ -100,29 +102,36 @@ def translate_text(text):
100
  print(f"An error occurred during translation: {e}")
101
  return None
102
 
103
- def load_model():
 
 
 
 
 
 
 
 
 
 
 
 
104
  max_seq_length = 2048
105
  dtype = None
106
  load_in_4bit = True
107
 
108
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
 
 
 
109
 
110
- model, tokenizer = FastLanguageModel.from_pretrained(
111
- model_name="unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
112
- max_seq_length=max_seq_length,
113
- dtype=dtype,
114
- load_in_4bit=load_in_4bit,
115
- )
116
-
117
- # Enable native 2x faster inference if GPU is available
118
- if device == "cuda":
119
- FastLanguageModel.for_inference(model)
120
 
121
- return model, tokenizer, device
122
-
123
- model, tokenizer, device = load_model()
124
-
125
- def summarize_url(url):
126
  result = asyncio.run(fetch_data(url))
127
  text = concatenate_text(result)
128
  translated_text = translate_text(text)
@@ -139,7 +148,7 @@ def summarize_url(url):
139
  """
140
 
141
  prompt = alpaca_prompt.format(translated_text)
142
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
143
 
144
  outputs = model.generate(inputs.input_ids, max_new_tokens=64, use_cache=True)
145
  summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -156,4 +165,4 @@ iface = gr.Interface(
156
  )
157
 
158
  # Launch the Gradio app
159
- iface.launch()
 
10
  import torch
11
  import re
12
 
13
+
14
+
15
  # Define helper functions
16
  async def fetch_data(url):
17
  headers = {
 
102
  print(f"An error occurred during translation: {e}")
103
  return None
104
 
105
+
106
+ model_name="unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
107
+
108
+ # Initialize model and tokenizer variables
109
+ model = None
110
+ tokenizer = None
111
+
112
+ @spaces.GPU()
113
+ def summarize_url(url):
114
+
115
+ global model, tokenizer # Declare model and tokenizer as global variables
116
+
117
+ # Load the model
118
  max_seq_length = 2048
119
  dtype = None
120
  load_in_4bit = True
121
 
122
+ if model is None or tokenizer is None:
123
+ from unsloth import FastLanguageModel
124
+
125
+ # Load the model and tokenizer
126
+ model, tokenizer = FastLanguageModel.from_pretrained(
127
+ model_name=model_name, # YOUR MODEL YOU USED FOR TRAINING
128
+ max_seq_length=max_seq_length,
129
+ dtype=dtype,
130
+ load_in_4bit=load_in_4bit,
131
+ )
132
+ FastLanguageModel.for_inference(model) # Enable native 2x faster inference
133
 
 
 
 
 
 
 
 
 
 
 
134
 
 
 
 
 
 
135
  result = asyncio.run(fetch_data(url))
136
  text = concatenate_text(result)
137
  translated_text = translate_text(text)
 
148
  """
149
 
150
  prompt = alpaca_prompt.format(translated_text)
151
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
152
 
153
  outputs = model.generate(inputs.input_ids, max_new_tokens=64, use_cache=True)
154
  summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
165
  )
166
 
167
  # Launch the Gradio app
168
+ iface.launch()