File size: 1,265 Bytes
12e34fc
91367ff
 
 
7e0b9e8
 
 
66ac57b
7e0b9e8
66ac57b
 
7e0b9e8
66ac57b
7e0b9e8
66ac57b
7e0b9e8
 
 
 
 
 
 
d8e0a07
91367ff
 
7e0b9e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d94f2c6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
import requests
import json


#url = os.environ["TGI_GAUDI_ENDPOINT_URL"]
#myport = os.environ["myport"]
URL = "198.175.88.52"
#URL = "100.81.119.213"
myport = "8080"

gaudi_device_url = f"http://{URL}:{myport}/generate"

# This assumes that TGI is running on Gaudi so we don't need to define the pipeline here.  It's like we're sending a curl command 
def text_gen(url, prompt):
    resp = requests.post(url, prompt=json.dumps(prompt))
    return resp

def text_gen_cpu(prompt):
    pipe = pipeline(task="text-generation", model="gpt2", tokenizer="gpt2", device="cpu", torch_dtype=torch.bfloat16)
    result = pipe(prompt, max_length=100, num_return_sequences=1)
    return result

demo = gr.Interface(
    fn=text_gen,
    inputs=[gaudi_device_url, "text"],
    outputs=["text"],
    
)

demo.launch()


#url = gr.Textbox(label='url', value=URL, visible=False)

# This is some demo code for using the 
#llm = HuggingFaceEndpoint(
#            endpoint_url=url,
#            max_new_tokens=1024,
#            top_k=10,
#            top_p=0.95,
#            typical_p=0.95,
#            temperature=0.01,
#            repetition_penalty=1.03,
#            streaming=True,
#        )

#result = llm.invoke("Why is the sky blue?")
#print(result)