kasterkeqi
/

Lumo-8B-Fork-Sol-Copilot

Inference Endpoints

8-bit precision

Model card Files Files and versions Community

kasterkeqi commited on Jan 22

Commit

ab072a7

·

1 Parent(s): 0b51b54

Initial Commit

Files changed (1) hide show

handler.py +13 -9

handler.py CHANGED Viewed

@@ -1,26 +1,30 @@
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-MODEL_NAME = "lumolabs-ai/Lumo-8B-Instruct"
 class ModelHandler:
     def __init__(self):
-        """Load the model and tokenizer"""
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"Loading model on {self.device}...")
         self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-        self.model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(self.device)
         print("Model loaded successfully.")
     def __call__(self, inputs):
-        """Handle inference requests"""
-        text = inputs.get("inputs", "")
-        if not text:
             return {"error": "No input provided"}
         # Tokenize input
-        input_tokens = self.tokenizer(text, return_tensors="pt").to(self.device)
         # Generate output
         with torch.no_grad():

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+MODEL_NAME = "kasterkeqi/Lumo-8B-Fork-Sol-Copilot"
 class ModelHandler:
     def __init__(self):
+        """Initialize the model and tokenizer."""
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"Loading model on {self.device}...")
         self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
+        ).to(self.device)
         print("Model loaded successfully.")
     def __call__(self, inputs):
+        """Handle inference requests."""
+        prompt = inputs.get("inputs", "")
+        if not prompt:
             return {"error": "No input provided"}
         # Tokenize input
+        input_tokens = self.tokenizer(prompt, return_tensors="pt").to(self.device)
         # Generate output
         with torch.no_grad():