Spaces:

sandz7
/

Krypton

Runtime error

App Files Files Community

Krypton / app.py

sandz7

start of krypton

d364219 10 months ago

raw

history blame

3.18 kB

	# import torch
	# import gradio as gr
	# from transformers import pipeline, TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
	# from PIL import Image
	# import requests
	# import threading

	DESCRIPTION = '''
	<div>
	<h1 style="text-align: center;">Krypton 🕋</h1>
	<p>This uses an Open Source model from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers"><b>xtuner/llava-llama-3-8b-v1_1-transformers</b></a></p>
	</div>
	'''

	# model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
	# pipe = pipeline("image-to-text", model=model_id, device_map="auto")
	# # Place transformers in hardware to prepare for process and generation
	# llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
	# llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.float16).to('cuda')
	# terminators = [
	# llama_tokenizer.eos_token_id,
	# llama_tokenizer.convert_tokens_to_ids("<\|eot_id\|>")
	# ]

	# def krypton(prompt,
	# history,
	# input_image,
	# max_new_tokens,
	# temperature,
	# num_beams,
	# do_sample: bool=True):
	# """
	# Passes an image as input, places it for generation
	# on pipeline and output is passed. This is multimodal
	# """
	# conversation = []
	# for user, assistant in history:
	# conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
	# conversation.append({"role": "user", "content": prompt})

	# input_ids = llama_tokenizer.apply_chat_template(conversation, return_tensors='pt').to(llama_model.device)

	# streamer = TextIteratorStreamer(llama_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)

	# llava_generation_kwargs = dict(
	# input_ids=input_ids,
	# streamer=streamer,
	# max_new_tokens=max_new_tokens,
	# num_beams=num_beams,
	# do_sample=do_sample
	# )

	# if temperature == 0.0:
	# do_sample = False

	# pil_image = Image.fromarray(input_image.astype('uint8'), 'RGB')

	# # Pipeline generation
	# outputs = pipeline()


	from transformers import pipeline
	from PIL import Image
	import requests
	import torch
	import subprocess
	import gradio as gr

	model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
	pipe = pipeline("image-to-text", model=model_id, torch_dtype=torch.float16, device=0)

	def krypton(input_image):

	pil_image = Image.fromarray(input_image.astype('uint8'), 'RGB')
	# image = Image.open(requests.get(url, stream=True).raw)
	prompt = ("<\|start_header_id\|>user<\|end_header_id\|>\n\n<image>\nWhat are these?<\|eot_id\|>"
	"<\|start_header_id\|>assistant<\|end_header_id\|>\n\n")
	outputs = pipe(input_image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
	nvidia_result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE)
	return outputs[0]

	with gr.Blocks(fill_height=True) as demo:
	gr.Markdown(DESCRIPTION)
	gr.Interface(
	fn=krypton,
	inputs="image",
	outputs="text",
	fill_height=True
	)

	if __name__ == "__main__":
	demo.launch()