Spaces:
Runtime error
Runtime error
import transformers | |
import torch | |
from fastapi import FastAPI | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
app = FastAPI() | |
MODEL = None | |
TOKENIZER = None | |
def llama(): | |
text = "Hi, my name is " | |
inputs = TOKENIZER(text, return_tensors="pt") | |
outputs = MODEL.generate(**inputs, max_new_tokens=64) | |
tresponse = TOKENIZER.decode(outputs[0], skip_special_tokens=True) | |
print(tresponse) | |
return tresponse | |
def init_model(): | |
global MODEL | |
if not MODEL: | |
print("loading model") | |
TOKENIZER = AutoTokenizer.from_pretrained("Upstage/SOLAR-10.7B-v1.0") | |
MODEL = AutoModelForCausalLM.from_pretrained("Upstage/SOLAR-10.7B-v1.0", device_map="auto", torch_dtype=torch.float16,) | |
print("loaded model") |