Spaces:

sureshnam9
/

ae

Sleeping

ae / app.py

Update app.py

3845b7e verified 11 months ago

1.64 kB

	import gradio as gr
	import os
	import argparse
	import concurrent.futures
	import json
	import requests
	import logging
	import math
	import time
	from itertools import cycle
	from pathlib import Path
	from langchain_community.llms import HuggingFaceEndpoint

	import torch
	import gradio as gr
	from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer


	#url = os.environ["TGI_GAUDI_ENDPOINT_URL"]
	#myport = os.environ["myport"]
	URL = "198.175.88.52"
	#URL = "100.81.119.213"
	myport = "8080"

	gaudi_device_url = f"http://{URL}:{myport}/generate"

	# This assumes that TGI is running on Gaudi so we don't need to define the pipeline here. It's like we're sending a curl command
	def text_gen(url, prompt):
	resp = requests.post(url, prompt=json.dumps(prompt))
	return resp

	def text_gen_cpu(prompt):
	pipe = pipeline(task="text-generation", model="gpt2", tokenizer="gpt2", device="cpu", torch_dtype=torch.bfloat16)
	result = pipe(prompt, max_length=100, num_return_sequences=1)
	return result

	demo = gr.Interface(
	fn=text_gen,
	inputs=[gaudi_device_url, "text"],
	outputs=["text"],

	)

	demo.launch()


	#url = gr.Textbox(label='url', value=URL, visible=False)

	# This is some demo code for using the
	#llm = HuggingFaceEndpoint(
	# endpoint_url=url,
	# max_new_tokens=1024,
	# top_k=10,
	# top_p=0.95,
	# typical_p=0.95,
	# temperature=0.01,
	# repetition_penalty=1.03,
	# streaming=True,
	# )

	#result = llm.invoke("Why is the sky blue?")
	#print(result)



	#result = llm.invoke("Why is the sky blue?")
	#print(result)