import transformers
import torch

# Set the path to your local model
model_path = "YOUR_LOCAL_MODEL_PATH"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_path,  # Use local model path
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

messages = [
    {"role": "system", "content": "You are an expert assistant in environmental science, EnvGPT.You are a helpful assistant."},
    {"role": "user", "content": "What is the definition of environmental science?"},
]

# Pass top_p and temperature directly in the pipeline call
outputs = pipeline(
    messages,
    max_new_tokens=4096,
    top_p=0.7,  # Add nucleus sampling
    temperature=0.9,  # Add temperature control
)

print(outputs[0]["generated_text"])