Spaces:

Schrieffer
/

SARM-Demo

Running on Zero

App Files Files Community

Schrieffer2sy commited on 28 days ago

Commit

739e21a

1 Parent(s): 06d7c11

init

Browse files

Files changed (1) hide show

app.py +10 -12

app.py CHANGED Viewed

@@ -4,30 +4,30 @@ from transformers import AutoTokenizer
 from sarm_llama import LlamaSARM
 # --- 1. Load Model and Tokenizer ---
-# This step automatically downloads your model files from the Hugging Face Hub.
-# Ensure your model repository is public.
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 MODEL_ID = "schrieffer/SARM-4B"
-print(f"Loading model: {MODEL_ID} on {DEVICE}...")
 # trust_remote_code=True is required because SARM has a custom architecture.
 model = LlamaSARM.from_pretrained(
     MODEL_ID,
     sae_hidden_state_source_layer=16,
     sae_latent_size=65536,
     sae_k=192,
-    device_map=DEVICE,
     trust_remote_code=True,
     torch_dtype=torch.bfloat16
 )
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
-print("Model loaded successfully!")
 # --- 2. Define the Inference Function ---
-# This function will be called by Gradio.
 def get_reward_score(prompt: str, response: str) -> float:
     """
@@ -39,7 +39,8 @@ def get_reward_score(prompt: str, response: str) -> float:
     try:
         # Use the same chat template as used during model training.
         messages = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response}]
-        input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(DEVICE)
         with torch.no_grad():
             score = model(input_ids).logits.item()
@@ -47,12 +48,10 @@ def get_reward_score(prompt: str, response: str) -> float:
         return round(score, 4)
     except Exception as e:
         print(f"Error: {e}")
-        # It might be better to return an error message on the UI, but here we simply return 0.
         return 0.0
 # --- 3. Create and Launch the Gradio Interface ---
-# Use gr.Blocks() for a more flexible layout.
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
@@ -94,7 +93,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     calculate_btn = gr.Button("Calculate Reward Score", variant="primary")
     score_output = gr.Number(label="Reward Score", info="A higher score is better.")
-    # Define the button's click behavior.
     calculate_btn.click(
         fn=get_reward_score,
         inputs=[prompt_input, response_input],
@@ -111,7 +109,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         inputs=[prompt_input, response_input],
         outputs=score_output,
         fn=get_reward_score,
-        cache_examples=True # Cache the results of the examples to speed up loading.
     )
 # Launch the application.

 from sarm_llama import LlamaSARM
 # --- 1. Load Model and Tokenizer ---
+# No longer need to manually check for CUDA. `device_map="auto"` will handle it.
 MODEL_ID = "schrieffer/SARM-4B"
+print(f"Loading model: {MODEL_ID} with device_map='auto'...")
 # trust_remote_code=True is required because SARM has a custom architecture.
+# Using device_map="auto" is the key to correctly loading the model onto the GPU.
 model = LlamaSARM.from_pretrained(
     MODEL_ID,
     sae_hidden_state_source_layer=16,
     sae_latent_size=65536,
     sae_k=192,
+    device_map="auto",  # <<< KEY CHANGE HERE
     trust_remote_code=True,
     torch_dtype=torch.bfloat16
 )
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
+# We can get the device from the model itself after loading
+DEVICE = model.device
+print(f"Model loaded successfully on device: {DEVICE}")
 # --- 2. Define the Inference Function ---
 def get_reward_score(prompt: str, response: str) -> float:
     """
     try:
         # Use the same chat template as used during model training.
         messages = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response}]
+        # The model will handle moving inputs to the correct device automatically.
+        input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt") # <<< REMOVED .to(DEVICE)
         with torch.no_grad():
             score = model(input_ids).logits.item()
         return round(score, 4)
     except Exception as e:
         print(f"Error: {e}")
         return 0.0
 # --- 3. Create and Launch the Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
     calculate_btn = gr.Button("Calculate Reward Score", variant="primary")
     score_output = gr.Number(label="Reward Score", info="A higher score is better.")
     calculate_btn.click(
         fn=get_reward_score,
         inputs=[prompt_input, response_input],
         inputs=[prompt_input, response_input],
         outputs=score_output,
         fn=get_reward_score,
+        cache_examples=True
     )
 # Launch the application.