import gradio as gr import requests import json #url = os.environ["TGI_GAUDI_ENDPOINT_URL"] #myport = os.environ["myport"] URL = "198.175.88.52" #URL = "100.81.119.213" myport = "8080" gaudi_device_url = f"http://{URL}:{myport}/generate" # This assumes that TGI is running on Gaudi so we don't need to define the pipeline here. It's like we're sending a curl command def text_gen(url, prompt): resp = requests.post(url, prompt=json.dumps(prompt)) return resp def text_gen_cpu(prompt): pipe = pipeline(task="text-generation", model="gpt2", tokenizer="gpt2", device="cpu", torch_dtype=torch.bfloat16) result = pipe(prompt, max_length=100, num_return_sequences=1) return result demo = gr.Interface( fn=text_gen, inputs=[gaudi_device_url, "text"], outputs=["text"], ) demo.launch() #url = gr.Textbox(label='url', value=URL, visible=False) # This is some demo code for using the #llm = HuggingFaceEndpoint( # endpoint_url=url, # max_new_tokens=1024, # top_k=10, # top_p=0.95, # typical_p=0.95, # temperature=0.01, # repetition_penalty=1.03, # streaming=True, # ) #result = llm.invoke("Why is the sky blue?") #print(result)