Spaces:

Schrieffer
/

SARM-Demo

Running on Zero

App Files Files Community

SARM-Demo / app.py

Schrieffer2sy

init

05a9ebf about 1 month ago

raw

history blame

3.66 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer
	from sarm_llama import LlamaSARM

	# --- 1. 加载模型和Tokenizer ---
	# 这一步会自动从Hugging Face Hub下载你的模型文件
	# 确保你的模型仓库是公开的

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	MODEL_ID = "schrieffer/SARM-4B"

	print(f"Loading model: {MODEL_ID} on {DEVICE}...")

	# 加载模型时必须信任远程代码，因为SARM有自定义架构
	model = LlamaSARM.from_pretrained(
	MODEL_ID,
	sae_hidden_state_source_layer=16,
	sae_latent_size=65536,
	sae_k=192,
	device_map=DEVICE,
	trust_remote_code=True,
	torch_dtype=torch.bfloat16
	)
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)

	print("Model loaded successfully!")

	# --- 2. 定义推理函数 ---
	# 这个函数会被Gradio调用

	def get_reward_score(prompt: str, response: str) -> float:
	"""
	接收prompt和response，返回SARM模型计算出的奖励分数。
	"""
	if not prompt or not response:
	return 0.0

	try:
	# 使用与模型训练时相同的聊天模板
	messages = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response}]
	input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(DEVICE)

	with torch.no_grad():
	score = model(input_ids).logits.item()

	return round(score, 4)
	except Exception as e:
	print(f"Error: {e}")
	# 在界面上返回一个错误提示可能更好，但这里我们简单返回0
	return 0.0

	# --- 3. 创建并启动Gradio界面 ---

	# 使用gr.Blocks()可以获得更灵活的布局
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# SARM-4B: Interpretable Reward Model Demo
	This is an interactive demo for the SARM-4B model, an interpretable reward model enhanced by a Sparse Autoencoder.
	Enter a prompt (question) and a corresponding response below to get a reward score. A higher score indicates a better quality response according to the model.

	For more details, check out our [Tech Report](https://arxiv.org/abs/submit/6699218) and [Model Card](https://huggingface.co/schrieffer/SARM-4B).
	"""
	)

	with gr.Row():
	prompt_input = gr.Textbox(lines=3, label="Prompt / Question", placeholder="e.g., Can you explain the theory of relativity in simple terms?")
	response_input = gr.Textbox(lines=5, label="Response to be Evaluated", placeholder="e.g., Of course! Albert Einstein's theory of relativity...")

	calculate_btn = gr.Button("Calculate Reward Score", variant="primary")
	score_output = gr.Number(label="Reward Score", info="A higher score is better.")

	# 定义按钮点击时的行为
	calculate_btn.click(
	fn=get_reward_score,
	inputs=[prompt_input, response_input],
	outputs=score_output
	)

	gr.Examples(
	examples=[
	["What is the capital of France?", "The capital of France is Paris."],
	["What is the capital of France?", "Berlin is a large city in Germany."],
	["Write a short poem about the moon.", "Silver orb in velvet night, / Casting shadows, soft and light. / Silent watcher, distant, bright, / Guiding dreams till morning's light."],
	["Write a short poem about the moon.", "The moon is a rock."]
	],
	inputs=[prompt_input, response_input],
	outputs=score_output,
	fn=get_reward_score,
	cache_examples=True # 缓存示例结果，加快加载速度
	)

	# 启动应用
	demo.launch()