import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load model and tokenizer
model_name = "GOAT-AI/GOAT-70B-Storytelling"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16
).cuda()

# Generate a story
prompt = "In a dense jungle, a group of explorers discover an ancient treasure map."
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(
    **inputs,
    max_new_tokens=512,
    temperature=0.7,
    top_p=0.9,
    do_sample=True
)
story = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(story)