Yash Sachdeva commited on
Commit
5ed2b9f
·
1 Parent(s): 38fc296
Files changed (1) hide show
  1. question_paper.py +8 -15
question_paper.py CHANGED
@@ -1,25 +1,18 @@
1
  import transformers
2
  import torch
3
- import os
4
 
5
  from fastapi import FastAPI
6
 
7
- from llama_cpp import Llama
8
 
9
  app = FastAPI()
10
  @app.get("/")
11
  def llama():
12
- llm = Llama(
13
- model_path="./llama-2-7b-chat.Q2_K.gguf"
14
- # n_gpu_layers=-1, # Uncomment to use GPU acceleration
15
- # seed=1337, # Uncomment to set a specific seed
16
- # n_ctx=2048, # Uncomment to increase the context window
17
- )
18
 
19
- output = llm(
20
- "Q: Name the planets in the solar system? A: ", # Prompt
21
- max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window
22
- echo=True # Echo the prompt back in the output
23
- ) # Generate a completion, can also call create_completion
24
-
25
- return output["choices"][0]["text"].strip()
 
1
  import transformers
2
  import torch
 
3
 
4
  from fastapi import FastAPI
5
 
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer
7
 
8
  app = FastAPI()
9
  @app.get("/")
10
  def llama():
11
+ tokenizer = AutoTokenizer.from_pretrained("Upstage/SOLAR-10.7B-v1.0")
12
+ model = AutoModelForCausalLM.from_pretrained("Upstage/SOLAR-10.7B-v1.0", device_map="auto", torch_dtype=torch.float16,)
13
+ text = "Hi, my name is "
14
+ inputs = tokenizer(text, return_tensors="pt")
15
+ outputs = model.generate(**inputs, max_new_tokens=64)
16
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
17
 
18
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)