Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import torch | |
from transformers import AutoTokenizer | |
from sarm_llama import LlamaSARM | |
# --- 1. 加载模型和Tokenizer --- | |
# 这一步会自动从Hugging Face Hub下载你的模型文件 | |
# 确保你的模型仓库是公开的 | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
MODEL_ID = "schrieffer/SARM-4B" | |
print(f"Loading model: {MODEL_ID} on {DEVICE}...") | |
# 加载模型时必须信任远程代码,因为SARM有自定义架构 | |
model = LlamaSARM.from_pretrained( | |
MODEL_ID, | |
sae_hidden_state_source_layer=16, | |
sae_latent_size=65536, | |
sae_k=192, | |
device_map=DEVICE, | |
trust_remote_code=True, | |
torch_dtype=torch.bfloat16 | |
) | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True) | |
print("Model loaded successfully!") | |
# --- 2. 定义推理函数 --- | |
# 这个函数会被Gradio调用 | |
def get_reward_score(prompt: str, response: str) -> float: | |
""" | |
接收prompt和response,返回SARM模型计算出的奖励分数。 | |
""" | |
if not prompt or not response: | |
return 0.0 | |
try: | |
# 使用与模型训练时相同的聊天模板 | |
messages = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response}] | |
input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(DEVICE) | |
with torch.no_grad(): | |
score = model(input_ids).logits.item() | |
return round(score, 4) | |
except Exception as e: | |
print(f"Error: {e}") | |
# 在界面上返回一个错误提示可能更好,但这里我们简单返回0 | |
return 0.0 | |
# --- 3. 创建并启动Gradio界面 --- | |
# 使用gr.Blocks()可以获得更灵活的布局 | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown( | |
""" | |
# SARM-4B: Interpretable Reward Model Demo | |
This is an interactive demo for the SARM-4B model, an interpretable reward model enhanced by a Sparse Autoencoder. | |
Enter a prompt (question) and a corresponding response below to get a reward score. A higher score indicates a better quality response according to the model. | |
For more details, check out our [Tech Report](https://arxiv.org/abs/submit/6699218) and [Model Card](https://huggingface.co/schrieffer/SARM-4B). | |
""" | |
) | |
with gr.Row(): | |
prompt_input = gr.Textbox(lines=3, label="Prompt / Question", placeholder="e.g., Can you explain the theory of relativity in simple terms?") | |
response_input = gr.Textbox(lines=5, label="Response to be Evaluated", placeholder="e.g., Of course! Albert Einstein's theory of relativity...") | |
calculate_btn = gr.Button("Calculate Reward Score", variant="primary") | |
score_output = gr.Number(label="Reward Score", info="A higher score is better.") | |
# 定义按钮点击时的行为 | |
calculate_btn.click( | |
fn=get_reward_score, | |
inputs=[prompt_input, response_input], | |
outputs=score_output | |
) | |
gr.Examples( | |
examples=[ | |
["What is the capital of France?", "The capital of France is Paris."], | |
["What is the capital of France?", "Berlin is a large city in Germany."], | |
["Write a short poem about the moon.", "Silver orb in velvet night, / Casting shadows, soft and light. / Silent watcher, distant, bright, / Guiding dreams till morning's light."], | |
["Write a short poem about the moon.", "The moon is a rock."] | |
], | |
inputs=[prompt_input, response_input], | |
outputs=score_output, | |
fn=get_reward_score, | |
cache_examples=True # 缓存示例结果,加快加载速度 | |
) | |
# 启动应用 | |
demo.launch() |