Spaces:

HemanM
/

EvoPlatform

Sleeping

EvoPlatform / inference.py

Update inference.py

2b652a8 verified 4 months ago

1.02 kB

	import torch
	from evo_model import EvoTransformer

	# Load EvoTransformer model
	def load_model(model_path="evo_hellaswag.pt", device=None):
	if device is None:
	device = "cuda" if torch.cuda.is_available() else "cpu"

	model = EvoTransformer()
	model.load_state_dict(torch.load(model_path, map_location=device))
	model.to(device)
	model.eval()
	return model, device

	# Predict the best option (0 or 1)
	def predict(model, tokenizer, prompt, option1, option2, device):
	inputs = [
	f"{prompt} {option1}",
	f"{prompt} {option2}",
	]

	encoded = tokenizer(inputs, padding=True, truncation=True, return_tensors="pt").to(device)

	with torch.no_grad():
	outputs = model(encoded["input_ids"]) # already includes classifier

	logits = outputs.squeeze(-1) # shape: [2]
	probs = torch.softmax(logits, dim=0)
	best = torch.argmax(probs).item()

	return {
	"choice": best,
	"confidence": probs[best].item(),
	"scores": probs.tolist(),
	}