diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..1a4be208548b1a07a70623f9622feb84a676a42f 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +examples/bigcompany.png filter=lfs diff=lfs merge=lfs -text +examples/dog_to_monkey1.png filter=lfs diff=lfs merge=lfs -text +examples/dog_to_monkey2.png filter=lfs diff=lfs merge=lfs -text +examples/twitter2.jpeg filter=lfs diff=lfs merge=lfs -text +examples/twitter3.jpeg filter=lfs diff=lfs merge=lfs -text +examples/twitter4.jpeg filter=lfs diff=lfs merge=lfs -text +examples/user_example_07.jpg filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 7d4aa833ffb0a68b2b7e8d5300eda61123d142c7..15f2fd1865cf097543b9ff718b8522c75109e09c 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,12 @@ --- -title: PULSE Debug -emoji: π -colorFrom: gray -colorTo: blue +title: Pangea +emoji: π +colorFrom: green +colorTo: red sdk: gradio -sdk_version: 5.4.0 +sdk_version: 4.37.2 app_file: app.py -pinned: false -license: apache-2.0 -short_description: ECG +pinned: true +short_description: A Fully Open Multilingual Multimodal LLM for 39 Languages --- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..bc9e04af67199db5836b0cef5ec56102c0f0ac80 --- /dev/null +++ b/app.py @@ -0,0 +1,535 @@ +# from .demo_modelpart import InferenceDemo +import gradio as gr +import os +from threading import Thread + +# import time +import cv2 + +import datetime +# import copy +import torch + +import spaces +import numpy as np + +from llava import conversation as conversation_lib +from llava.constants import DEFAULT_IMAGE_TOKEN + + +from llava.constants import ( + IMAGE_TOKEN_INDEX, + DEFAULT_IMAGE_TOKEN, + DEFAULT_IM_START_TOKEN, + DEFAULT_IM_END_TOKEN, +) +from llava.conversation import conv_templates, SeparatorStyle +from llava.model.builder import load_pretrained_model +from llava.utils import disable_torch_init +from llava.mm_utils import ( + tokenizer_image_token, + process_images, + get_model_name_from_path, + KeywordsStoppingCriteria, +) + +from serve_constants import html_header, bibtext, learn_more_markdown, tos_markdown + +import requests +from PIL import Image +from io import BytesIO +from transformers import TextStreamer, TextIteratorStreamer + +import hashlib +import PIL +import base64 +import json + +import datetime +import gradio as gr +import gradio_client +import subprocess +import sys + +from huggingface_hub import HfApi +from huggingface_hub import login +from huggingface_hub import revision_exists + +login(token=os.environ["HF_TOKEN"], + write_permission=True) + +api = HfApi() +repo_name = os.environ["LOG_REPO"] + +external_log_dir = "./logs" +LOGDIR = external_log_dir + + +def install_gradio_4_35_0(): + current_version = gr.__version__ + if current_version != "4.35.0": + print(f"Current Gradio version: {current_version}") + print("Installing Gradio 4.35.0...") + subprocess.check_call([sys.executable, "-m", "pip", "install", "gradio==4.35.0", "--force-reinstall"]) + print("Gradio 4.35.0 installed successfully.") + else: + print("Gradio 4.35.0 is already installed.") + +# Call the function to install Gradio 4.35.0 if needed +install_gradio_4_35_0() + +import gradio as gr +import gradio_client +print(f"Gradio version: {gr.__version__}") +print(f"Gradio-client version: {gradio_client.__version__}") + +def get_conv_log_filename(): + t = datetime.datetime.now() + name = os.path.join(LOGDIR, f"{t.year}-{t.month:02d}-{t.day:02d}-user_conv.json") + return name + +class InferenceDemo(object): + def __init__( + self, args, model_path, tokenizer, model, image_processor, context_len + ) -> None: + disable_torch_init() + + self.tokenizer, self.model, self.image_processor, self.context_len = ( + tokenizer, + model, + image_processor, + context_len, + ) + + if "llama-2" in model_name.lower(): + conv_mode = "llava_llama_2" + elif "v1" in model_name.lower() or "pulse" in model_name.lower(): + conv_mode = "llava_v1" + elif "mpt" in model_name.lower(): + conv_mode = "mpt" + elif "qwen" in model_name.lower(): + conv_mode = "qwen_1_5" + else: + conv_mode = "llava_v0" + + if args.conv_mode is not None and conv_mode != args.conv_mode: + print( + "[WARNING] the auto inferred conversation mode is {}, while `--conv-mode` is {}, using {}".format( + conv_mode, args.conv_mode, args.conv_mode + ) + ) + else: + args.conv_mode = conv_mode + self.conv_mode = conv_mode + self.conversation = conv_templates[args.conv_mode].copy() + self.num_frames = args.num_frames + + +def is_valid_video_filename(name): + video_extensions = ["avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg"] + + ext = name.split(".")[-1].lower() + + if ext in video_extensions: + return True + else: + return False + +def is_valid_image_filename(name): + image_extensions = ["jpg", "jpeg", "png", "bmp", "gif", "tiff", "webp", "heic", "heif", "jfif", "svg", "eps", "raw"] + + ext = name.split(".")[-1].lower() + + if ext in image_extensions: + return True + else: + return False + + +def sample_frames(video_file, num_frames): + video = cv2.VideoCapture(video_file) + total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + interval = total_frames // num_frames + frames = [] + for i in range(total_frames): + ret, frame = video.read() + pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) + if not ret: + continue + if i % interval == 0: + frames.append(pil_img) + video.release() + return frames + + +def load_image(image_file): + if image_file.startswith("http") or image_file.startswith("https"): + response = requests.get(image_file) + if response.status_code == 200: + image = Image.open(BytesIO(response.content)).convert("RGB") + else: + print("failed to load the image") + else: + print("Load image from local file") + print(image_file) + image = Image.open(image_file).convert("RGB") + + return image + + +def clear_history(history): + + our_chatbot.conversation = conv_templates[our_chatbot.conv_mode].copy() + + return None + + +def clear_response(history): + for index_conv in range(1, len(history)): + # loop until get a text response from our model. + conv = history[-index_conv] + if not (conv[0] is None): + break + question = history[-index_conv][0] + history = history[:-index_conv] + return history, question + + +# def print_like_dislike(x: gr.LikeData): +# print(x.index, x.value, x.liked) + + +def add_message(history, message): + # history=[] + global our_chatbot + if len(history) == 0: + our_chatbot = InferenceDemo( + args, model_path, tokenizer, model, image_processor, context_len + ) + + for x in message["files"]: + history.append(((x,), None)) + if message["text"] is not None: + history.append((message["text"], None)) + return history, gr.MultimodalTextbox(value=None, interactive=False) + + +@spaces.GPU +def bot(history, temperature, top_p, max_output_tokens): + print("### turn start history",history) + print("### turn start conv",our_chatbot.conversation) + text = history[-1][0] + images_this_term = [] + text_this_term = "" + # import pdb;pdb.set_trace() + num_new_images = 0 + for i, message in enumerate(history[:-1]): + if type(message[0]) is tuple: + images_this_term.append(message[0][0]) + if is_valid_video_filename(message[0][0]): + # δΈζ₯εθ§ι’ + raise ValueError("Video is not supported") + num_new_images += our_chatbot.num_frames + elif is_valid_image_filename(message[0][0]): + print("#### Load image from local file",message[0][0]) + num_new_images += 1 + else: + raise ValueError("Invalid image file") + else: + num_new_images = 0 + + # for message in history[-i-1:]: + # images_this_term.append(message[0][0]) + + assert len(images_this_term) > 0, "must have an image" + # image_files = (args.image_file).split(',') + # image = [load_image(f) for f in images_this_term if f] + + all_image_hash = [] + all_image_path = [] + for image_path in images_this_term: + with open(image_path, "rb") as image_file: + image_data = image_file.read() + image_hash = hashlib.md5(image_data).hexdigest() + all_image_hash.append(image_hash) + image = PIL.Image.open(image_path).convert("RGB") + t = datetime.datetime.now() + filename = os.path.join( + LOGDIR, + "serve_images", + f"{t.year}-{t.month:02d}-{t.day:02d}", + f"{image_hash}.jpg", + ) + all_image_path.append(filename) + if not os.path.isfile(filename): + os.makedirs(os.path.dirname(filename), exist_ok=True) + print("image save to",filename) + image.save(filename) + + image_list = [] + for f in images_this_term: + if is_valid_video_filename(f): + image_list += sample_frames(f, our_chatbot.num_frames) + elif is_valid_image_filename(f): + image_list.append(load_image(f)) + else: + raise ValueError("Invalid image file") + + image_tensor = [ + process_images([f], our_chatbot.image_processor, our_chatbot.model.config)[0] + .to(our_chatbot.model.device) + for f in image_list + ] + + + image_tensor = torch.stack(image_tensor) + image_token = DEFAULT_IMAGE_TOKEN * num_new_images + # if our_chatbot.model.config.mm_use_im_start_end: + # inp = DEFAULT_IM_START_TOKEN + image_token + DEFAULT_IM_END_TOKEN + "\n" + inp + # else: + inp = text + inp = image_token + "\n" + inp + our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp) + # image = None + our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None) + prompt = our_chatbot.conversation.get_prompt() + + # input_ids = ( + # tokenizer_image_token( + # prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt" + # ) + # .unsqueeze(0) + # .to(our_chatbot.model.device) + # ) + input_ids = tokenizer_image_token( + prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt" + ).unsqueeze(0).to(our_chatbot.model.device) + # print("### input_id",input_ids) + stop_str = ( + our_chatbot.conversation.sep + if our_chatbot.conversation.sep_style != SeparatorStyle.TWO + else our_chatbot.conversation.sep2 + ) + keywords = [stop_str] + stopping_criteria = KeywordsStoppingCriteria( + keywords, our_chatbot.tokenizer, input_ids + ) + # streamer = TextStreamer( + # our_chatbot.tokenizer, skip_prompt=True, skip_special_tokens=True + # ) + streamer = TextIteratorStreamer( + our_chatbot.tokenizer, skip_prompt=True, skip_special_tokens=True + ) + print(our_chatbot.model.device) + print(input_ids.device) + print(image_tensor.device) + + # with torch.inference_mode(): + # output_ids = our_chatbot.model.generate( + # input_ids, + # images=image_tensor, + # do_sample=True, + # temperature=0.7, + # top_p=1.0, + # max_new_tokens=4096, + # streamer=streamer, + # use_cache=False, + # stopping_criteria=[stopping_criteria], + # ) + + # outputs = our_chatbot.tokenizer.decode(output_ids[0]).strip() + # if outputs.endswith(stop_str): + # outputs = outputs[: -len(stop_str)] + # our_chatbot.conversation.messages[-1][-1] = outputs + + # history[-1] = [text, outputs] + + # return history + generate_kwargs = dict( + inputs=input_ids, + streamer=streamer, + images=image_tensor, + do_sample=True, + temperature=temperature, + top_p=top_p, + max_new_tokens=max_output_tokens, + use_cache=False, + stopping_criteria=[stopping_criteria], + ) + + t = Thread(target=our_chatbot.model.generate, kwargs=generate_kwargs) + t.start() + + outputs = [] + for stream_token in streamer: + outputs.append(stream_token) + # print("### stream_token",stream_token) + # our_chatbot.conversation.messages[-1][-1] = "".join(outputs) + history[-1] = [text, "".join(outputs)] + yield history + our_chatbot.conversation.messages[-1][-1] = "".join(outputs) + print("### turn end history", history) + print("### turn end conv",our_chatbot.conversation) + + with open(get_conv_log_filename(), "a") as fout: + data = { + "type": "chat", + "model": "PULSE-7b", + "state": history, + "images": all_image_hash, + "images_path": all_image_path + } + print("#### conv log",data) + fout.write(json.dumps(data) + "\n") + for upload_img in all_image_path: + api.upload_file( + path_or_fileobj=upload_img, + path_in_repo=upload_img.replace("./logs/", ""), + repo_id=repo_name, + repo_type="dataset", + # revision=revision, + # ignore_patterns=["data*"] + ) + # upload json + api.upload_file( + path_or_fileobj=get_conv_log_filename(), + path_in_repo=get_conv_log_filename().replace("./logs/", ""), + repo_id=repo_name, + repo_type="dataset") + + + +txt = gr.Textbox( + scale=4, + show_label=False, + placeholder="Enter text and press enter.", + container=False, +) + +with gr.Blocks( + css=".message-wrap.svelte-1lcyrx4>div.svelte-1lcyrx4 img {min-width: 40px}", +) as demo: + + cur_dir = os.path.dirname(os.path.abspath(__file__)) + # gr.Markdown(title_markdown) + gr.HTML(html_header) + + with gr.Column(): + with gr.Accordion("Parameters", open=False) as parameter_row: + temperature = gr.Slider( + minimum=0.0, + maximum=1.0, + value=0.0, + step=0.1, + interactive=True, + label="Temperature", + ) + top_p = gr.Slider( + minimum=0.0, + maximum=1.0, + value=1, + step=0.1, + interactive=True, + label="Top P", + ) + max_output_tokens = gr.Slider( + minimum=0, + maximum=8192, + value=4096, + step=256, + interactive=True, + label="Max output tokens", + ) + with gr.Row(): + chatbot = gr.Chatbot([], elem_id="PULSE", bubble_full_width=False, height=750) + + with gr.Row(): + upvote_btn = gr.Button(value="π Upvote", interactive=True) + downvote_btn = gr.Button(value="π Downvote", interactive=True) + flag_btn = gr.Button(value="β οΈ Flag", interactive=True) + # stop_btn = gr.Button(value="βΉοΈ Stop Generation", interactive=True) + regenerate_btn = gr.Button(value="π Regenerate", interactive=True) + clear_btn = gr.Button(value="ποΈ Clear history", interactive=True) + + + chat_input = gr.MultimodalTextbox( + interactive=True, + file_types=["image"], + placeholder="Enter message or upload file...", + show_label=False, + submit_btn="π" + ) + + print(cur_dir) + gr.Examples( + examples_per_page=5, + examples=[ + [ + { + "files": [ + f"{cur_dir}/examples/ecg_example2.png", + ], + "text": "What are the main features in this ECG image?", + }, + ], + [ + { + "files": [ + f"{cur_dir}/examples/ecg_example1.jpg", + ], + "text": "What can be inferred from the pattern of the qR complexes and rS complexes in the leads of this ECG image?", + }, + ] + ], + inputs=[chat_input], + label="Image", + ) + + gr.Markdown(tos_markdown) + gr.Markdown(learn_more_markdown) + gr.Markdown(bibtext) + + chat_msg = chat_input.submit( + add_message, [chatbot, chat_input], [chatbot, chat_input] + ) + bot_msg = chat_msg.then(bot, [chatbot,temperature, top_p, max_output_tokens], chatbot, api_name="bot_response") + bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input]) + + # chatbot.like(print_like_dislike, None, None) + clear_btn.click( + fn=clear_history, inputs=[chatbot], outputs=[chatbot], api_name="clear_all" + ) + + +demo.queue() + +if __name__ == "__main__": + import argparse + + argparser = argparse.ArgumentParser() + argparser.add_argument("--server_name", default="0.0.0.0", type=str) + argparser.add_argument("--port", default="6123", type=str) + argparser.add_argument( + "--model_path", default="PULSE-ECG/PULSE-7B", type=str + ) + # argparser.add_argument("--model-path", type=str, default="facebook/opt-350m") + argparser.add_argument("--model-base", type=str, default=None) + argparser.add_argument("--num-gpus", type=int, default=1) + argparser.add_argument("--conv-mode", type=str, default=None) + argparser.add_argument("--temperature", type=float, default=0.0) + argparser.add_argument("--max-new-tokens", type=int, default=1024) + argparser.add_argument("--num_frames", type=int, default=16) + argparser.add_argument("--load-8bit", action="store_true") + argparser.add_argument("--load-4bit", action="store_true") + argparser.add_argument("--debug", action="store_true") + + args = argparser.parse_args() + + model_path = args.model_path + filt_invalid = "cut" + model_name = get_model_name_from_path(args.model_path) + tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit) + print("### image_processor",image_processor) + model=model.to(torch.device('cuda')) + our_chatbot = None + demo.launch() \ No newline at end of file diff --git a/examples/.DS_Store b/examples/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/examples/.DS_Store differ diff --git a/examples/172197131626056_P7966202.png b/examples/172197131626056_P7966202.png new file mode 100644 index 0000000000000000000000000000000000000000..69c8112f3ed89c5e2f48749de0691d542e212d1e Binary files /dev/null and b/examples/172197131626056_P7966202.png differ diff --git a/examples/A-17-processors-1024x576.jpg b/examples/A-17-processors-1024x576.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ac4b4a93c6d66f5b61d73fa2439ccf0cac37e19f Binary files /dev/null and b/examples/A-17-processors-1024x576.jpg differ diff --git a/examples/Iphone-15-Usb-c-charger-1024x576.jpg b/examples/Iphone-15-Usb-c-charger-1024x576.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e0f8423ed48173d09bd5650668e5f2f2e1581421 Binary files /dev/null and b/examples/Iphone-15-Usb-c-charger-1024x576.jpg differ diff --git a/examples/Iphone-15-specs-1024x576.jpg b/examples/Iphone-15-specs-1024x576.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2b6288c0563a7d65a1c61342f5fd949c790c3c30 Binary files /dev/null and b/examples/Iphone-15-specs-1024x576.jpg differ diff --git a/examples/africa.jpg b/examples/africa.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2675f058e77f0c56e90d9b512e8e4b32abfe8c93 Binary files /dev/null and b/examples/africa.jpg differ diff --git a/examples/ballon.jpg b/examples/ballon.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4302cec6677da3b7d5e758ea12818eb60b4baae2 Binary files /dev/null and b/examples/ballon.jpg differ diff --git a/examples/bar.jpg b/examples/bar.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b93a2c12086d3cd88f1522598bd5efaedb9b1cd5 Binary files /dev/null and b/examples/bar.jpg differ diff --git a/examples/bigcompany.png b/examples/bigcompany.png new file mode 100644 index 0000000000000000000000000000000000000000..4341f71ded9f7a15bd6dde393abb618de28c89bc --- /dev/null +++ b/examples/bigcompany.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32e5066f3ced74d5d28fbfd3696232d728b90e9572335fc4ccb80ac1863aa6ff +size 2012596 diff --git a/examples/bijiasuo2.jpeg b/examples/bijiasuo2.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..e726539832c903db07f26786167f8dc4e9eaaca4 Binary files /dev/null and b/examples/bijiasuo2.jpeg differ diff --git a/examples/book.jpg b/examples/book.jpg new file mode 100644 index 0000000000000000000000000000000000000000..441711720991caaea6f7c9469a0888ba125a550a Binary files /dev/null and b/examples/book.jpg differ diff --git a/examples/camera.jpg b/examples/camera.jpg new file mode 100644 index 0000000000000000000000000000000000000000..dd04dd3dfad549a13ae1dfe8f3a56785a99e06ed Binary files /dev/null and b/examples/camera.jpg differ diff --git a/examples/changed_bench.jpeg b/examples/changed_bench.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..a94074d62c726cb97d26ef71bcfcf337b7592946 Binary files /dev/null and b/examples/changed_bench.jpeg differ diff --git a/examples/code.mp4 b/examples/code.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..ca4b0fc3e009e6d2df03a85b281f91bcf768d8b7 Binary files /dev/null and b/examples/code.mp4 differ diff --git a/examples/code1.jpeg b/examples/code1.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..cc336414e2cae9f589cb3696624f50fa005525f1 Binary files /dev/null and b/examples/code1.jpeg differ diff --git a/examples/code2.jpeg b/examples/code2.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..f213d1e0b0ec8e5f81d6325cdc0fc6829fff8878 Binary files /dev/null and b/examples/code2.jpeg differ diff --git a/examples/dog.jpg b/examples/dog.jpg new file mode 100644 index 0000000000000000000000000000000000000000..096366cf2bc802435b0b7a97824ea59c206a9e7d Binary files /dev/null and b/examples/dog.jpg differ diff --git a/examples/dog1.jpg b/examples/dog1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..66b4bcef3c6e675f1a47dbc29f18ba14cab8a89c Binary files /dev/null and b/examples/dog1.jpg differ diff --git a/examples/dog6.jpeg b/examples/dog6.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..718bd78ac4027f79bc8e63b5cc67f9dc665f30d4 Binary files /dev/null and b/examples/dog6.jpeg differ diff --git a/examples/dog9.jpeg b/examples/dog9.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..792b713fb4175b32ce138cd1d1501403c20871a3 Binary files /dev/null and b/examples/dog9.jpeg differ diff --git a/examples/dog_to_monkey1.png b/examples/dog_to_monkey1.png new file mode 100644 index 0000000000000000000000000000000000000000..734a3c37d663165f96e5fde1a4be853147cd28de --- /dev/null +++ b/examples/dog_to_monkey1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03b21f907e6e0614972102936fbbc6c5d45c93ddb2780c26e04770f860c986d7 +size 2229999 diff --git a/examples/dog_to_monkey2.png b/examples/dog_to_monkey2.png new file mode 100644 index 0000000000000000000000000000000000000000..2fbe72bdd00fcb70f14f5a023e3249dced30932d --- /dev/null +++ b/examples/dog_to_monkey2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28724dace4f330df6fb2ade40b7113d3c77c5e7b5fd4aeff8e1bd39407246fb1 +size 2143496 diff --git a/examples/dynamic-island-1024x576.jpg b/examples/dynamic-island-1024x576.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6b3d8951a5e12a29351b7ee49b927b5460f7146f Binary files /dev/null and b/examples/dynamic-island-1024x576.jpg differ diff --git a/examples/eagles.jpg b/examples/eagles.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5b389d9061bd101afe526c83b8b3541ec7a39a6d Binary files /dev/null and b/examples/eagles.jpg differ diff --git a/examples/ecg_example1.jpg b/examples/ecg_example1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..647caea88e5ed2e81f1572e5547c1367bb8efb74 Binary files /dev/null and b/examples/ecg_example1.jpg differ diff --git a/examples/ecg_example2.png b/examples/ecg_example2.png new file mode 100644 index 0000000000000000000000000000000000000000..4244813bd75e0c32ab044979cfdb3b3b03ab0bcb Binary files /dev/null and b/examples/ecg_example2.png differ diff --git a/examples/examples_image12.jpg b/examples/examples_image12.jpg new file mode 100644 index 0000000000000000000000000000000000000000..547b4eb582e58e7d058265355c6ef3581bbb48c3 Binary files /dev/null and b/examples/examples_image12.jpg differ diff --git a/examples/examples_image13.jpg b/examples/examples_image13.jpg new file mode 100644 index 0000000000000000000000000000000000000000..89abd71e82eb02560ce0c34653333084e7b0402c Binary files /dev/null and b/examples/examples_image13.jpg differ diff --git a/examples/examples_image14.jpg b/examples/examples_image14.jpg new file mode 100644 index 0000000000000000000000000000000000000000..731e15e8fe838785fd888cffe642ff5e88f177f2 Binary files /dev/null and b/examples/examples_image14.jpg differ diff --git a/examples/fangao1.jpeg b/examples/fangao1.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..0daebf1dcca0f940d7b7a97c6de2e63a215be127 Binary files /dev/null and b/examples/fangao1.jpeg differ diff --git a/examples/fangao2.jpeg b/examples/fangao2.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..673e53ab6344ac313606097f6fc7c01b827e70fc Binary files /dev/null and b/examples/fangao2.jpeg differ diff --git a/examples/fangao3.jpeg b/examples/fangao3.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..db45d6dc7501bc8698c3432d23d78943b31d76a5 Binary files /dev/null and b/examples/fangao3.jpeg differ diff --git a/examples/food.jpg b/examples/food.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2bfc975bbade5697f5f79a025cb3fc8791e6bbc1 Binary files /dev/null and b/examples/food.jpg differ diff --git a/examples/girl.jpg b/examples/girl.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8f56be6b6e777183bb0584cc77a069afc7a42813 Binary files /dev/null and b/examples/girl.jpg differ diff --git a/examples/hanzi.jpg b/examples/hanzi.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4ae58bff3e7d6abfba6ad487f6d06cffe67e464b Binary files /dev/null and b/examples/hanzi.jpg differ diff --git a/examples/hot_ballon.jpg b/examples/hot_ballon.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ebc9cb58941c19c587087dd3e3d359ed1bc942d1 Binary files /dev/null and b/examples/hot_ballon.jpg differ diff --git a/examples/ice_cream.jpg b/examples/ice_cream.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5b97ddc7d275f5ef5d39681320efae7f401ab424 Binary files /dev/null and b/examples/ice_cream.jpg differ diff --git a/examples/image-00007.jpeg b/examples/image-00007.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..a18c7a701e132b2981c938e4363b81dd9ed3601c Binary files /dev/null and b/examples/image-00007.jpeg differ diff --git a/examples/image-00053.jpeg b/examples/image-00053.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..1d5c2cfefbd493c8bc79e259313299a7af535e48 Binary files /dev/null and b/examples/image-00053.jpeg differ diff --git a/examples/iphone-15-colors-1024x576.jpg b/examples/iphone-15-colors-1024x576.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9dd5ee457b5c0391cf6b273744addecca419a442 Binary files /dev/null and b/examples/iphone-15-colors-1024x576.jpg differ diff --git a/examples/iphone-15-price-1024x576.jpg b/examples/iphone-15-price-1024x576.jpg new file mode 100644 index 0000000000000000000000000000000000000000..04ee013325bf94c30569d9ad78b8c49c5985e6b6 Binary files /dev/null and b/examples/iphone-15-price-1024x576.jpg differ diff --git a/examples/iphone-15-pricing-1024x576.jpg b/examples/iphone-15-pricing-1024x576.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fb477c0e99b48a4a97a507ce562c6968fd6e55f2 Binary files /dev/null and b/examples/iphone-15-pricing-1024x576.jpg differ diff --git a/examples/line chart.jpg b/examples/line chart.jpg new file mode 100644 index 0000000000000000000000000000000000000000..02766ecbc43d97c82d4d3f6cf388a0d7cac5f587 Binary files /dev/null and b/examples/line chart.jpg differ diff --git a/examples/norway.jpg b/examples/norway.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3ed6af75e6e883617374edcdd40e7557751c3155 Binary files /dev/null and b/examples/norway.jpg differ diff --git a/examples/oprah-winfrey-resume.png b/examples/oprah-winfrey-resume.png new file mode 100644 index 0000000000000000000000000000000000000000..e80a3665a04ffc16fcde2c30914cadfd02cc1468 Binary files /dev/null and b/examples/oprah-winfrey-resume.png differ diff --git a/examples/orange.png b/examples/orange.png new file mode 100644 index 0000000000000000000000000000000000000000..83da9a9f71f08f94c6618150088aa737ca6bb945 Binary files /dev/null and b/examples/orange.png differ diff --git a/examples/original_bench.jpeg b/examples/original_bench.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..e84dfc8554d344a69afb1fe8c7b8b2997d4e5e11 Binary files /dev/null and b/examples/original_bench.jpeg differ diff --git a/examples/pie.jpg b/examples/pie.jpg new file mode 100644 index 0000000000000000000000000000000000000000..da1248da3ef36bd8869548bea1b1059b76c68407 Binary files /dev/null and b/examples/pie.jpg differ diff --git a/examples/pink_lake.jpg b/examples/pink_lake.jpg new file mode 100644 index 0000000000000000000000000000000000000000..027d6bf1dd7a78f7f983fb4569c72b349979ebdd Binary files /dev/null and b/examples/pink_lake.jpg differ diff --git a/examples/resume_a.jpg b/examples/resume_a.jpg new file mode 100644 index 0000000000000000000000000000000000000000..abb1a5d9c10bfc957b6da2d1e0fb42194cc4c284 Binary files /dev/null and b/examples/resume_a.jpg differ diff --git a/examples/resume_b.jpg b/examples/resume_b.jpg new file mode 100644 index 0000000000000000000000000000000000000000..88c5059331bbce957f3cab20bbb8cd429a33aeed Binary files /dev/null and b/examples/resume_b.jpg differ diff --git a/examples/shua.jpg b/examples/shua.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1ce2a9d5318d0d1b63f58c29429d0c689c2cd82a Binary files /dev/null and b/examples/shua.jpg differ diff --git a/examples/shub.jpg b/examples/shub.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3c0bf426f471f648df1e3cd6a8fe123210c04909 Binary files /dev/null and b/examples/shub.jpg differ diff --git a/examples/shuc.jpg b/examples/shuc.jpg new file mode 100644 index 0000000000000000000000000000000000000000..764be26dc69fb94cbb9bf14d24bdfed870430fbe Binary files /dev/null and b/examples/shuc.jpg differ diff --git a/examples/shud.jpg b/examples/shud.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c607a6559e49d85ee00ce72ea917c6085974891f Binary files /dev/null and b/examples/shud.jpg differ diff --git a/examples/south africa.jpg b/examples/south africa.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cf05b1c5cedff0c829a729c01977a5684ef60cb8 Binary files /dev/null and b/examples/south africa.jpg differ diff --git a/examples/steve-jobs-resume.jpg b/examples/steve-jobs-resume.jpg new file mode 100644 index 0000000000000000000000000000000000000000..554eb37c0c573491543131597ed7aec9226bce65 Binary files /dev/null and b/examples/steve-jobs-resume.jpg differ diff --git a/examples/strawberry.png b/examples/strawberry.png new file mode 100644 index 0000000000000000000000000000000000000000..85e6989e96cac62a5b74468cb96c142644e54b5f Binary files /dev/null and b/examples/strawberry.png differ diff --git a/examples/totoro.jpg b/examples/totoro.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fe090b615b6d8e0826230ffd277464f79d2f6bd5 Binary files /dev/null and b/examples/totoro.jpg differ diff --git a/examples/twitter1.jpeg b/examples/twitter1.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..81e8462f145732e183f27c7e657599536849a078 Binary files /dev/null and b/examples/twitter1.jpeg differ diff --git a/examples/twitter2.jpeg b/examples/twitter2.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..f0ea41c6eb9e77096ecaa8b35d8348158cc4bbc8 --- /dev/null +++ b/examples/twitter2.jpeg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cdf155c8f619226f70f1953611c6f659953be4266a2f21be3903140c7f4eb6c +size 1499716 diff --git a/examples/twitter3.jpeg b/examples/twitter3.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..34828da8854978c5e72792ac9968d596d2db8b39 --- /dev/null +++ b/examples/twitter3.jpeg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c81f7af9a87a63348f15a0ae26308253bbd765f8f49012d452fe73558eb5cea0 +size 1827735 diff --git a/examples/twitter4.jpeg b/examples/twitter4.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..f0477465bfbd7351ef6a824086bb7a9d4f9478ea --- /dev/null +++ b/examples/twitter4.jpeg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d9758ee6b319f6fd5829ded57e188346161f41b432949b38dd0837992757879 +size 2001277 diff --git a/examples/user_example_05.jpg b/examples/user_example_05.jpg new file mode 100644 index 0000000000000000000000000000000000000000..268e122527566f70b52dbff8f693e8d54c25d5f0 Binary files /dev/null and b/examples/user_example_05.jpg differ diff --git a/examples/user_example_07.jpg b/examples/user_example_07.jpg new file mode 100644 index 0000000000000000000000000000000000000000..28e407e5dace0fc725e256c8dc31d2a88de73779 --- /dev/null +++ b/examples/user_example_07.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db7cbd447bd8a78f7b5ab068d641ef12ae31b2092894780465e751d8c7db049d +size 1388264 diff --git a/gitattributes.txt b/gitattributes.txt new file mode 100644 index 0000000000000000000000000000000000000000..1a4be208548b1a07a70623f9622feb84a676a42f --- /dev/null +++ b/gitattributes.txt @@ -0,0 +1,42 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +examples/bigcompany.png filter=lfs diff=lfs merge=lfs -text +examples/dog_to_monkey1.png filter=lfs diff=lfs merge=lfs -text +examples/dog_to_monkey2.png filter=lfs diff=lfs merge=lfs -text +examples/twitter2.jpeg filter=lfs diff=lfs merge=lfs -text +examples/twitter3.jpeg filter=lfs diff=lfs merge=lfs -text +examples/twitter4.jpeg filter=lfs diff=lfs merge=lfs -text +examples/user_example_07.jpg filter=lfs diff=lfs merge=lfs -text diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..a719a26d73d0bc786c7708fe35574fdcd5a06be0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,34 @@ +git+https://github.com/paralym/LLaVA-NeXT.git +ninja +opencv-python +open_clip_torch +fastapi +gradio==4.35.0 +gradio_client==1.0.1 +markdown2[all] +numpy==1.26.4 +requests +sentencepiece +torch==2.1.2 +torchvision==0.16.2 +uvicorn +wandb==0.16.5 +deepspeed==0.12.2 +peft==0.4.0 +accelerate>=0.29.1 +tokenizers~=0.15.2 +transformers +bitsandbytes==0.41.0 +scikit-learn==1.2.2 +sentencepiece~=0.1.99 +einops==0.6.1 +einops-exts==0.0.4 +pydantic>=2.0 +timm +hf_transfer +decord +datasets +tyro +scipy +rouge +urllib3~=2.0 diff --git a/serve_constants.py b/serve_constants.py new file mode 100644 index 0000000000000000000000000000000000000000..86a0372de3ce25d2cb3f4a847aec6c0eced66b47 --- /dev/null +++ b/serve_constants.py @@ -0,0 +1,124 @@ +title_markdown = """ +