Spaces:

Nitzantry1
/

try3

Sleeping

App Files Files Community

Nitzantry1 commited on Oct 14, 2024

Commit

b99a3bf

verified ·

1 Parent(s): 51eae2b

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -11

app.py CHANGED Viewed

@@ -1,17 +1,48 @@
-from gradio_client import Client
 import gradio as gr
-# חיבור ל-Space עם המודל ב-Hugging Face
-client = Client("dicta-il/dictalm2.0-instruct-demo")
 def chat_with_model(history):
-    # קבלת ההודעה האחרונה מהמשתמש
     prompt = history[-1]["content"]
-    # קריאה למודל לקבלת תגובה
-    result = client.predict(
-        message=prompt
-    )
-    # הוספת התגובה מהמודל להיסטוריה
     return history + [{"role": "bot", "content": result}]
 # יצירת ממשק מתקדם עם Gradio בצורת צ'ט-בוט בסגנון אקדמי
@@ -28,7 +59,6 @@ with gr.Blocks(theme="default") as demo:
         send_button = gr.Button("שלח")
     def user_chat(history, message):
-        # הוספת הודעת המשתמש להיסטוריה
         return history + [{"role": "user", "content": message}], ""
     # שליחת ההודעה גם בלחיצה על Enter וגם על ידי לחיצה על כפתור "שלח"
@@ -39,4 +69,4 @@ with gr.Blocks(theme="default") as demo:
         fn=chat_with_model, inputs=chatbot, outputs=chatbot
     )
-demo.launch()

+import deepspeed
+import torch
+from transformers import pipeline
+import os
 import gradio as gr
+model_id = 'dicta-il/dictalm-7b-instruct'
+# טעינת המודל והכנת המנוע
+should_use_fast = True
+print(f'should_use_fast = {should_use_fast}')
+local_rank = int(os.getenv('LOCAL_RANK', '0'))
+world_size = int(os.getenv('WORLD_SIZE', '1'))
+generator = pipeline('text-generation', model=model_id,
+                     tokenizer=model_id,
+                     torch_dtype=torch.float16,
+                     use_fast=should_use_fast,
+                     trust_remote_code=True,
+                     device_map="auto")
+# בדיקת התקן - GPU או CPU
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print('Using device:', device)
+print()
+total_mem = 0
+if device.type == 'cuda':
+    print(torch.cuda.get_device_name(0))
+    total_mem = round(torch.cuda.get_device_properties(0).total_memory / 1024**3, 1)
+    print('Total Memory: ', total_mem, 'GB')
+should_replace_with_kernel_inject = total_mem >= 12
+print(f'should_replace_with_kernel_inject = {should_replace_with_kernel_inject}')
+ds_engine = deepspeed.init_inference(generator.model,
+                                     mp_size=world_size,
+                                     dtype=torch.half,
+                                     replace_with_kernel_inject=should_replace_with_kernel_inject)
+generator.model = ds_engine.module
+# פונקציית יצירת הטקסט
 def chat_with_model(history):
     prompt = history[-1]["content"]
+    result = generator(prompt, do_sample=True, min_length=20, max_length=64, top_k=40, top_p=0.92, temperature=0.9)[0]["generated_text"]
     return history + [{"role": "bot", "content": result}]
 # יצירת ממשק מתקדם עם Gradio בצורת צ'ט-בוט בסגנון אקדמי
         send_button = gr.Button("שלח")
     def user_chat(history, message):
         return history + [{"role": "user", "content": message}], ""
     # שליחת ההודעה גם בלחיצה על Enter וגם על ידי לחיצה על כפתור "שלח"
         fn=chat_with_model, inputs=chatbot, outputs=chatbot
     )
+demo.launch()