Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
import json | |
#url = os.environ["TGI_GAUDI_ENDPOINT_URL"] | |
#myport = os.environ["myport"] | |
URL = "198.175.88.52" | |
#URL = "100.81.119.213" | |
myport = "8080" | |
gaudi_device_url = f"http://{URL}:{myport}/generate" | |
# This assumes that TGI is running on Gaudi so we don't need to define the pipeline here. It's like we're sending a curl command | |
def text_gen(url, prompt): | |
resp = requests.post(url, prompt=json.dumps(prompt)) | |
return resp | |
def text_gen_cpu(prompt): | |
pipe = pipeline(task="text-generation", model="gpt2", tokenizer="gpt2", device="cpu", torch_dtype=torch.bfloat16) | |
result = pipe(prompt, max_length=100, num_return_sequences=1) | |
return result | |
demo = gr.Interface( | |
fn=text_gen, | |
inputs=[gaudi_device_url, "text"], | |
outputs=["text"], | |
) | |
demo.launch() | |
#url = gr.Textbox(label='url', value=URL, visible=False) | |
# This is some demo code for using the | |
#llm = HuggingFaceEndpoint( | |
# endpoint_url=url, | |
# max_new_tokens=1024, | |
# top_k=10, | |
# top_p=0.95, | |
# typical_p=0.95, | |
# temperature=0.01, | |
# repetition_penalty=1.03, | |
# streaming=True, | |
# ) | |
#result = llm.invoke("Why is the sky blue?") | |
#print(result) | |