Spaces:

Nitzantry1
/

try3

Sleeping

Nitzantry1 commited on Oct 14, 2024

Commit

c102310

verified ·

1 Parent(s): 82a7ce6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,18 +11,29 @@ print(f'should_use_fast = {should_use_fast}')
 local_rank = int(os.getenv('LOCAL_RANK', '0'))
 world_size = int(os.getenv('WORLD_SIZE', '1'))
-generator = pipeline('text-generation', model=model_id,
-                     tokenizer=model_id,
-                     torch_dtype=torch.float16,
-                     use_fast=should_use_fast,
-                     trust_remote_code=True,
-                     device_map="auto")
 # בדיקת התקן - GPU או CPU
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 print('Using device:', device)
 print()
 # פונקציית יצירת הטקסט
 def chat_with_model(history):
     prompt = history[-1]["content"]

 local_rank = int(os.getenv('LOCAL_RANK', '0'))
 world_size = int(os.getenv('WORLD_SIZE', '1'))
 # בדיקת התקן - GPU או CPU
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 print('Using device:', device)
 print()
+# יצירת המנוע עם Accelerate במידת הצורך
+if device.type == 'cuda':
+    generator = pipeline('text-generation', model=model_id,
+                         tokenizer=model_id,
+                         torch_dtype=torch.float16,
+                         use_fast=should_use_fast,
+                         trust_remote_code=True,
+                         device_map="auto")
+else:
+    from accelerate import init_empty_weights, infer_auto_device_map
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=should_use_fast)
+    with init_empty_weights():
+        model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
+    generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=device)
 # פונקציית יצירת הטקסט
 def chat_with_model(history):
     prompt = history[-1]["content"]