Spaces:

limitedonly41
/

website_topic_classify

Build error

limitedonly41 commited on Aug 5, 2024

Commit

b421aac

verified ·

1 Parent(s): b2aa395

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,6 +10,8 @@ from unsloth import FastLanguageModel
 import torch
 import re
 # Define helper functions
 async def fetch_data(url):
     headers = {
@@ -100,29 +102,36 @@ def translate_text(text):
         print(f"An error occurred during translation: {e}")
         return None
-def load_model():
     max_seq_length = 2048
     dtype = None
     load_in_4bit = True
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    model, tokenizer = FastLanguageModel.from_pretrained(
-        model_name="unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
-        max_seq_length=max_seq_length,
-        dtype=dtype,
-        load_in_4bit=load_in_4bit,
-    )
-    # Enable native 2x faster inference if GPU is available
-    if device == "cuda":
-        FastLanguageModel.for_inference(model)
-    return model, tokenizer, device
-model, tokenizer, device = load_model()
-def summarize_url(url):
     result = asyncio.run(fetch_data(url))
     text = concatenate_text(result)
     translated_text = translate_text(text)
@@ -139,7 +148,7 @@ def summarize_url(url):
     """
     prompt = alpaca_prompt.format(translated_text)
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
     outputs = model.generate(inputs.input_ids, max_new_tokens=64, use_cache=True)
     summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -156,4 +165,4 @@ iface = gr.Interface(
 )
 # Launch the Gradio app
-iface.launch()

 import torch
 import re
 # Define helper functions
 async def fetch_data(url):
     headers = {
         print(f"An error occurred during translation: {e}")
         return None
+model_name="unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
+# Initialize model and tokenizer variables
+model = None
+tokenizer = None
+@spaces.GPU()
+def summarize_url(url):
+    global model, tokenizer  # Declare model and tokenizer as global variables
+    # Load the model
     max_seq_length = 2048
     dtype = None
     load_in_4bit = True
+    if model is None or tokenizer is None:
+        from unsloth import FastLanguageModel
+        # Load the model and tokenizer
+        model, tokenizer = FastLanguageModel.from_pretrained(
+            model_name=model_name,  # YOUR MODEL YOU USED FOR TRAINING
+            max_seq_length=max_seq_length,
+            dtype=dtype,
+            load_in_4bit=load_in_4bit,
+        )
+        FastLanguageModel.for_inference(model)  # Enable native 2x faster inference
     result = asyncio.run(fetch_data(url))
     text = concatenate_text(result)
     translated_text = translate_text(text)
     """
     prompt = alpaca_prompt.format(translated_text)
+    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
     outputs = model.generate(inputs.input_ids, max_new_tokens=64, use_cache=True)
     summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
 )
 # Launch the Gradio app
+iface.launch()