Jerrz commited on
Commit
ef23eb5
·
1 Parent(s): 7995f22

try llama 3

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
-
6
 
7
  # ACCESS_TOKEN = os.getenv('ACCESS_TOKEN')
8
  """
@@ -48,10 +48,16 @@ def respond(
48
  # response += token
49
  # yield response
50
 
51
- input_ids = tokenizer.encode(message, return_tensors = 'pt')
52
- for output in model.generate(input_ids, stream=True):
53
- output_text = tokenizer.decode(output, skip_special_tokens=True)
54
- yield output_text
 
 
 
 
 
 
55
 
56
 
57
 
 
2
  from huggingface_hub import InferenceClient
3
  import os
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
+ import requests
6
 
7
  # ACCESS_TOKEN = os.getenv('ACCESS_TOKEN')
8
  """
 
48
  # response += token
49
  # yield response
50
 
51
+ ### doesn't work
52
+ # input_ids = tokenizer.encode(message, return_tensors = 'pt')
53
+ # for output in model.generate(input_ids, stream=True):
54
+ # output_text = tokenizer.decode(output, skip_special_tokens=True)
55
+ # yield output_text
56
+
57
+ API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
58
+ headers = {"Authorization": "Bearer "+os.environ['hf_token']}
59
+ response = requests.post(API_URL, headers=headers, json={"inputs":"message"})
60
+ return response.json()
61
 
62
 
63