# import torch
# import gradio as gr
# from transformers import pipeline, TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
# from PIL import Image
# import requests
# import threading

DESCRIPTION = '''
<div>
<h1 style="text-align: center;">Krypton 🕋</h1>
<p>This uses an Open Source model from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers"><b>xtuner/llava-llama-3-8b-v1_1-transformers</b></a></p>
</div>
'''

# model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
# pipe = pipeline("image-to-text", model=model_id, device_map="auto")
# # Place transformers in hardware to prepare for process and generation
# llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
# llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.float16).to('cuda')
# terminators = [
#     llama_tokenizer.eos_token_id,
#     llama_tokenizer.convert_tokens_to_ids("<|eot_id|>")
# ]

# def krypton(prompt,
#             history,
#             input_image,
#             max_new_tokens,
#             temperature,
#             num_beams,
#             do_sample: bool=True):
#     """
#     Passes an image as input, places it for generation
#     on pipeline and output is passed. This is multimodal
#     """
#     conversation = []
#     for user, assistant in history:
#         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
#     conversation.append({"role": "user", "content": prompt})

#     input_ids = llama_tokenizer.apply_chat_template(conversation, return_tensors='pt').to(llama_model.device)

#     streamer = TextIteratorStreamer(llama_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)

#     llava_generation_kwargs = dict(
#         input_ids=input_ids,
#         streamer=streamer,
#         max_new_tokens=max_new_tokens,
#         num_beams=num_beams,
#         do_sample=do_sample
#     )

#     if temperature == 0.0:
#         do_sample = False

#     pil_image = Image.fromarray(input_image.astype('uint8'), 'RGB')

#     # Pipeline generation
#     outputs = pipeline()


from transformers import pipeline
from PIL import Image    
import requests
import torch
import subprocess
import gradio as gr

model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
pipe = pipeline("image-to-text", model=model_id, torch_dtype=torch.float16, device=0)

def krypton(input_image):

    pil_image = Image.fromarray(input_image.astype('uint8'), 'RGB')
    # image = Image.open(requests.get(url, stream=True).raw)
    prompt = ("<|start_header_id|>user<|end_header_id|>\n\n<image>\nWhat are these?<|eot_id|>"
            "<|start_header_id|>assistant<|end_header_id|>\n\n")
    outputs = pipe(input_image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
    nvidia_result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE)
    return outputs[0]

with gr.Blocks(fill_height=True) as demo:
    gr.Markdown(DESCRIPTION)
    gr.Interface(
        fn=krypton,
        inputs="image",
        outputs="text",
        fill_height=True
    )

if __name__ == "__main__":
    demo.launch()