# import torch # import gradio as gr # from transformers import pipeline, TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM # from PIL import Image # import requests # import threading DESCRIPTION = ''' <div> <h1 style="text-align: center;">Krypton 🕋</h1> <p>This uses an Open Source model from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers"><b>xtuner/llava-llama-3-8b-v1_1-transformers</b></a></p> </div> ''' # model_id = "xtuner/llava-llama-3-8b-v1_1-transformers" # pipe = pipeline("image-to-text", model=model_id, device_map="auto") # # Place transformers in hardware to prepare for process and generation # llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") # llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.float16).to('cuda') # terminators = [ # llama_tokenizer.eos_token_id, # llama_tokenizer.convert_tokens_to_ids("<|eot_id|>") # ] # def krypton(prompt, # history, # input_image, # max_new_tokens, # temperature, # num_beams, # do_sample: bool=True): # """ # Passes an image as input, places it for generation # on pipeline and output is passed. This is multimodal # """ # conversation = [] # for user, assistant in history: # conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}]) # conversation.append({"role": "user", "content": prompt}) # input_ids = llama_tokenizer.apply_chat_template(conversation, return_tensors='pt').to(llama_model.device) # streamer = TextIteratorStreamer(llama_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True) # llava_generation_kwargs = dict( # input_ids=input_ids, # streamer=streamer, # max_new_tokens=max_new_tokens, # num_beams=num_beams, # do_sample=do_sample # ) # if temperature == 0.0: # do_sample = False # pil_image = Image.fromarray(input_image.astype('uint8'), 'RGB') # # Pipeline generation # outputs = pipeline() from transformers import pipeline from PIL import Image import requests import torch import subprocess import gradio as gr model_id = "xtuner/llava-llama-3-8b-v1_1-transformers" pipe = pipeline("image-to-text", model=model_id, torch_dtype=torch.float16, device=0) def krypton(input_image): pil_image = Image.fromarray(input_image.astype('uint8'), 'RGB') # image = Image.open(requests.get(url, stream=True).raw) prompt = ("<|start_header_id|>user<|end_header_id|>\n\n<image>\nWhat are these?<|eot_id|>" "<|start_header_id|>assistant<|end_header_id|>\n\n") outputs = pipe(input_image, prompt=prompt, generate_kwargs={"max_new_tokens": 200}) nvidia_result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE) return outputs[0] with gr.Blocks(fill_height=True) as demo: gr.Markdown(DESCRIPTION) gr.Interface( fn=krypton, inputs="image", outputs="text", fill_height=True ) if __name__ == "__main__": demo.launch()