diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..1a4be208548b1a07a70623f9622feb84a676a42f 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +examples/bigcompany.png filter=lfs diff=lfs merge=lfs -text +examples/dog_to_monkey1.png filter=lfs diff=lfs merge=lfs -text +examples/dog_to_monkey2.png filter=lfs diff=lfs merge=lfs -text +examples/twitter2.jpeg filter=lfs diff=lfs merge=lfs -text +examples/twitter3.jpeg filter=lfs diff=lfs merge=lfs -text +examples/twitter4.jpeg filter=lfs diff=lfs merge=lfs -text +examples/user_example_07.jpg filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 7d4aa833ffb0a68b2b7e8d5300eda61123d142c7..15f2fd1865cf097543b9ff718b8522c75109e09c 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,12 @@ --- -title: PULSE Debug -emoji: πŸ“ˆ -colorFrom: gray -colorTo: blue +title: Pangea +emoji: πŸš€ +colorFrom: green +colorTo: red sdk: gradio -sdk_version: 5.4.0 +sdk_version: 4.37.2 app_file: app.py -pinned: false -license: apache-2.0 -short_description: ECG +pinned: true +short_description: A Fully Open Multilingual Multimodal LLM for 39 Languages --- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..bc9e04af67199db5836b0cef5ec56102c0f0ac80 --- /dev/null +++ b/app.py @@ -0,0 +1,535 @@ +# from .demo_modelpart import InferenceDemo +import gradio as gr +import os +from threading import Thread + +# import time +import cv2 + +import datetime +# import copy +import torch + +import spaces +import numpy as np + +from llava import conversation as conversation_lib +from llava.constants import DEFAULT_IMAGE_TOKEN + + +from llava.constants import ( + IMAGE_TOKEN_INDEX, + DEFAULT_IMAGE_TOKEN, + DEFAULT_IM_START_TOKEN, + DEFAULT_IM_END_TOKEN, +) +from llava.conversation import conv_templates, SeparatorStyle +from llava.model.builder import load_pretrained_model +from llava.utils import disable_torch_init +from llava.mm_utils import ( + tokenizer_image_token, + process_images, + get_model_name_from_path, + KeywordsStoppingCriteria, +) + +from serve_constants import html_header, bibtext, learn_more_markdown, tos_markdown + +import requests +from PIL import Image +from io import BytesIO +from transformers import TextStreamer, TextIteratorStreamer + +import hashlib +import PIL +import base64 +import json + +import datetime +import gradio as gr +import gradio_client +import subprocess +import sys + +from huggingface_hub import HfApi +from huggingface_hub import login +from huggingface_hub import revision_exists + +login(token=os.environ["HF_TOKEN"], + write_permission=True) + +api = HfApi() +repo_name = os.environ["LOG_REPO"] + +external_log_dir = "./logs" +LOGDIR = external_log_dir + + +def install_gradio_4_35_0(): + current_version = gr.__version__ + if current_version != "4.35.0": + print(f"Current Gradio version: {current_version}") + print("Installing Gradio 4.35.0...") + subprocess.check_call([sys.executable, "-m", "pip", "install", "gradio==4.35.0", "--force-reinstall"]) + print("Gradio 4.35.0 installed successfully.") + else: + print("Gradio 4.35.0 is already installed.") + +# Call the function to install Gradio 4.35.0 if needed +install_gradio_4_35_0() + +import gradio as gr +import gradio_client +print(f"Gradio version: {gr.__version__}") +print(f"Gradio-client version: {gradio_client.__version__}") + +def get_conv_log_filename(): + t = datetime.datetime.now() + name = os.path.join(LOGDIR, f"{t.year}-{t.month:02d}-{t.day:02d}-user_conv.json") + return name + +class InferenceDemo(object): + def __init__( + self, args, model_path, tokenizer, model, image_processor, context_len + ) -> None: + disable_torch_init() + + self.tokenizer, self.model, self.image_processor, self.context_len = ( + tokenizer, + model, + image_processor, + context_len, + ) + + if "llama-2" in model_name.lower(): + conv_mode = "llava_llama_2" + elif "v1" in model_name.lower() or "pulse" in model_name.lower(): + conv_mode = "llava_v1" + elif "mpt" in model_name.lower(): + conv_mode = "mpt" + elif "qwen" in model_name.lower(): + conv_mode = "qwen_1_5" + else: + conv_mode = "llava_v0" + + if args.conv_mode is not None and conv_mode != args.conv_mode: + print( + "[WARNING] the auto inferred conversation mode is {}, while `--conv-mode` is {}, using {}".format( + conv_mode, args.conv_mode, args.conv_mode + ) + ) + else: + args.conv_mode = conv_mode + self.conv_mode = conv_mode + self.conversation = conv_templates[args.conv_mode].copy() + self.num_frames = args.num_frames + + +def is_valid_video_filename(name): + video_extensions = ["avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg"] + + ext = name.split(".")[-1].lower() + + if ext in video_extensions: + return True + else: + return False + +def is_valid_image_filename(name): + image_extensions = ["jpg", "jpeg", "png", "bmp", "gif", "tiff", "webp", "heic", "heif", "jfif", "svg", "eps", "raw"] + + ext = name.split(".")[-1].lower() + + if ext in image_extensions: + return True + else: + return False + + +def sample_frames(video_file, num_frames): + video = cv2.VideoCapture(video_file) + total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + interval = total_frames // num_frames + frames = [] + for i in range(total_frames): + ret, frame = video.read() + pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) + if not ret: + continue + if i % interval == 0: + frames.append(pil_img) + video.release() + return frames + + +def load_image(image_file): + if image_file.startswith("http") or image_file.startswith("https"): + response = requests.get(image_file) + if response.status_code == 200: + image = Image.open(BytesIO(response.content)).convert("RGB") + else: + print("failed to load the image") + else: + print("Load image from local file") + print(image_file) + image = Image.open(image_file).convert("RGB") + + return image + + +def clear_history(history): + + our_chatbot.conversation = conv_templates[our_chatbot.conv_mode].copy() + + return None + + +def clear_response(history): + for index_conv in range(1, len(history)): + # loop until get a text response from our model. + conv = history[-index_conv] + if not (conv[0] is None): + break + question = history[-index_conv][0] + history = history[:-index_conv] + return history, question + + +# def print_like_dislike(x: gr.LikeData): +# print(x.index, x.value, x.liked) + + +def add_message(history, message): + # history=[] + global our_chatbot + if len(history) == 0: + our_chatbot = InferenceDemo( + args, model_path, tokenizer, model, image_processor, context_len + ) + + for x in message["files"]: + history.append(((x,), None)) + if message["text"] is not None: + history.append((message["text"], None)) + return history, gr.MultimodalTextbox(value=None, interactive=False) + + +@spaces.GPU +def bot(history, temperature, top_p, max_output_tokens): + print("### turn start history",history) + print("### turn start conv",our_chatbot.conversation) + text = history[-1][0] + images_this_term = [] + text_this_term = "" + # import pdb;pdb.set_trace() + num_new_images = 0 + for i, message in enumerate(history[:-1]): + if type(message[0]) is tuple: + images_this_term.append(message[0][0]) + if is_valid_video_filename(message[0][0]): + # 不ζŽ₯受视钑 + raise ValueError("Video is not supported") + num_new_images += our_chatbot.num_frames + elif is_valid_image_filename(message[0][0]): + print("#### Load image from local file",message[0][0]) + num_new_images += 1 + else: + raise ValueError("Invalid image file") + else: + num_new_images = 0 + + # for message in history[-i-1:]: + # images_this_term.append(message[0][0]) + + assert len(images_this_term) > 0, "must have an image" + # image_files = (args.image_file).split(',') + # image = [load_image(f) for f in images_this_term if f] + + all_image_hash = [] + all_image_path = [] + for image_path in images_this_term: + with open(image_path, "rb") as image_file: + image_data = image_file.read() + image_hash = hashlib.md5(image_data).hexdigest() + all_image_hash.append(image_hash) + image = PIL.Image.open(image_path).convert("RGB") + t = datetime.datetime.now() + filename = os.path.join( + LOGDIR, + "serve_images", + f"{t.year}-{t.month:02d}-{t.day:02d}", + f"{image_hash}.jpg", + ) + all_image_path.append(filename) + if not os.path.isfile(filename): + os.makedirs(os.path.dirname(filename), exist_ok=True) + print("image save to",filename) + image.save(filename) + + image_list = [] + for f in images_this_term: + if is_valid_video_filename(f): + image_list += sample_frames(f, our_chatbot.num_frames) + elif is_valid_image_filename(f): + image_list.append(load_image(f)) + else: + raise ValueError("Invalid image file") + + image_tensor = [ + process_images([f], our_chatbot.image_processor, our_chatbot.model.config)[0] + .to(our_chatbot.model.device) + for f in image_list + ] + + + image_tensor = torch.stack(image_tensor) + image_token = DEFAULT_IMAGE_TOKEN * num_new_images + # if our_chatbot.model.config.mm_use_im_start_end: + # inp = DEFAULT_IM_START_TOKEN + image_token + DEFAULT_IM_END_TOKEN + "\n" + inp + # else: + inp = text + inp = image_token + "\n" + inp + our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp) + # image = None + our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None) + prompt = our_chatbot.conversation.get_prompt() + + # input_ids = ( + # tokenizer_image_token( + # prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt" + # ) + # .unsqueeze(0) + # .to(our_chatbot.model.device) + # ) + input_ids = tokenizer_image_token( + prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt" + ).unsqueeze(0).to(our_chatbot.model.device) + # print("### input_id",input_ids) + stop_str = ( + our_chatbot.conversation.sep + if our_chatbot.conversation.sep_style != SeparatorStyle.TWO + else our_chatbot.conversation.sep2 + ) + keywords = [stop_str] + stopping_criteria = KeywordsStoppingCriteria( + keywords, our_chatbot.tokenizer, input_ids + ) + # streamer = TextStreamer( + # our_chatbot.tokenizer, skip_prompt=True, skip_special_tokens=True + # ) + streamer = TextIteratorStreamer( + our_chatbot.tokenizer, skip_prompt=True, skip_special_tokens=True + ) + print(our_chatbot.model.device) + print(input_ids.device) + print(image_tensor.device) + + # with torch.inference_mode(): + # output_ids = our_chatbot.model.generate( + # input_ids, + # images=image_tensor, + # do_sample=True, + # temperature=0.7, + # top_p=1.0, + # max_new_tokens=4096, + # streamer=streamer, + # use_cache=False, + # stopping_criteria=[stopping_criteria], + # ) + + # outputs = our_chatbot.tokenizer.decode(output_ids[0]).strip() + # if outputs.endswith(stop_str): + # outputs = outputs[: -len(stop_str)] + # our_chatbot.conversation.messages[-1][-1] = outputs + + # history[-1] = [text, outputs] + + # return history + generate_kwargs = dict( + inputs=input_ids, + streamer=streamer, + images=image_tensor, + do_sample=True, + temperature=temperature, + top_p=top_p, + max_new_tokens=max_output_tokens, + use_cache=False, + stopping_criteria=[stopping_criteria], + ) + + t = Thread(target=our_chatbot.model.generate, kwargs=generate_kwargs) + t.start() + + outputs = [] + for stream_token in streamer: + outputs.append(stream_token) + # print("### stream_token",stream_token) + # our_chatbot.conversation.messages[-1][-1] = "".join(outputs) + history[-1] = [text, "".join(outputs)] + yield history + our_chatbot.conversation.messages[-1][-1] = "".join(outputs) + print("### turn end history", history) + print("### turn end conv",our_chatbot.conversation) + + with open(get_conv_log_filename(), "a") as fout: + data = { + "type": "chat", + "model": "PULSE-7b", + "state": history, + "images": all_image_hash, + "images_path": all_image_path + } + print("#### conv log",data) + fout.write(json.dumps(data) + "\n") + for upload_img in all_image_path: + api.upload_file( + path_or_fileobj=upload_img, + path_in_repo=upload_img.replace("./logs/", ""), + repo_id=repo_name, + repo_type="dataset", + # revision=revision, + # ignore_patterns=["data*"] + ) + # upload json + api.upload_file( + path_or_fileobj=get_conv_log_filename(), + path_in_repo=get_conv_log_filename().replace("./logs/", ""), + repo_id=repo_name, + repo_type="dataset") + + + +txt = gr.Textbox( + scale=4, + show_label=False, + placeholder="Enter text and press enter.", + container=False, +) + +with gr.Blocks( + css=".message-wrap.svelte-1lcyrx4>div.svelte-1lcyrx4 img {min-width: 40px}", +) as demo: + + cur_dir = os.path.dirname(os.path.abspath(__file__)) + # gr.Markdown(title_markdown) + gr.HTML(html_header) + + with gr.Column(): + with gr.Accordion("Parameters", open=False) as parameter_row: + temperature = gr.Slider( + minimum=0.0, + maximum=1.0, + value=0.0, + step=0.1, + interactive=True, + label="Temperature", + ) + top_p = gr.Slider( + minimum=0.0, + maximum=1.0, + value=1, + step=0.1, + interactive=True, + label="Top P", + ) + max_output_tokens = gr.Slider( + minimum=0, + maximum=8192, + value=4096, + step=256, + interactive=True, + label="Max output tokens", + ) + with gr.Row(): + chatbot = gr.Chatbot([], elem_id="PULSE", bubble_full_width=False, height=750) + + with gr.Row(): + upvote_btn = gr.Button(value="πŸ‘ Upvote", interactive=True) + downvote_btn = gr.Button(value="πŸ‘Ž Downvote", interactive=True) + flag_btn = gr.Button(value="⚠️ Flag", interactive=True) + # stop_btn = gr.Button(value="⏹️ Stop Generation", interactive=True) + regenerate_btn = gr.Button(value="πŸ”„ Regenerate", interactive=True) + clear_btn = gr.Button(value="πŸ—‘οΈ Clear history", interactive=True) + + + chat_input = gr.MultimodalTextbox( + interactive=True, + file_types=["image"], + placeholder="Enter message or upload file...", + show_label=False, + submit_btn="πŸš€" + ) + + print(cur_dir) + gr.Examples( + examples_per_page=5, + examples=[ + [ + { + "files": [ + f"{cur_dir}/examples/ecg_example2.png", + ], + "text": "What are the main features in this ECG image?", + }, + ], + [ + { + "files": [ + f"{cur_dir}/examples/ecg_example1.jpg", + ], + "text": "What can be inferred from the pattern of the qR complexes and rS complexes in the leads of this ECG image?", + }, + ] + ], + inputs=[chat_input], + label="Image", + ) + + gr.Markdown(tos_markdown) + gr.Markdown(learn_more_markdown) + gr.Markdown(bibtext) + + chat_msg = chat_input.submit( + add_message, [chatbot, chat_input], [chatbot, chat_input] + ) + bot_msg = chat_msg.then(bot, [chatbot,temperature, top_p, max_output_tokens], chatbot, api_name="bot_response") + bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input]) + + # chatbot.like(print_like_dislike, None, None) + clear_btn.click( + fn=clear_history, inputs=[chatbot], outputs=[chatbot], api_name="clear_all" + ) + + +demo.queue() + +if __name__ == "__main__": + import argparse + + argparser = argparse.ArgumentParser() + argparser.add_argument("--server_name", default="0.0.0.0", type=str) + argparser.add_argument("--port", default="6123", type=str) + argparser.add_argument( + "--model_path", default="PULSE-ECG/PULSE-7B", type=str + ) + # argparser.add_argument("--model-path", type=str, default="facebook/opt-350m") + argparser.add_argument("--model-base", type=str, default=None) + argparser.add_argument("--num-gpus", type=int, default=1) + argparser.add_argument("--conv-mode", type=str, default=None) + argparser.add_argument("--temperature", type=float, default=0.0) + argparser.add_argument("--max-new-tokens", type=int, default=1024) + argparser.add_argument("--num_frames", type=int, default=16) + argparser.add_argument("--load-8bit", action="store_true") + argparser.add_argument("--load-4bit", action="store_true") + argparser.add_argument("--debug", action="store_true") + + args = argparser.parse_args() + + model_path = args.model_path + filt_invalid = "cut" + model_name = get_model_name_from_path(args.model_path) + tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit) + print("### image_processor",image_processor) + model=model.to(torch.device('cuda')) + our_chatbot = None + demo.launch() \ No newline at end of file diff --git a/examples/.DS_Store b/examples/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/examples/.DS_Store differ diff --git a/examples/172197131626056_P7966202.png b/examples/172197131626056_P7966202.png new file mode 100644 index 0000000000000000000000000000000000000000..69c8112f3ed89c5e2f48749de0691d542e212d1e Binary files /dev/null and b/examples/172197131626056_P7966202.png differ diff --git a/examples/A-17-processors-1024x576.jpg b/examples/A-17-processors-1024x576.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ac4b4a93c6d66f5b61d73fa2439ccf0cac37e19f Binary files /dev/null and b/examples/A-17-processors-1024x576.jpg differ diff --git a/examples/Iphone-15-Usb-c-charger-1024x576.jpg b/examples/Iphone-15-Usb-c-charger-1024x576.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e0f8423ed48173d09bd5650668e5f2f2e1581421 Binary files /dev/null and b/examples/Iphone-15-Usb-c-charger-1024x576.jpg differ diff --git a/examples/Iphone-15-specs-1024x576.jpg b/examples/Iphone-15-specs-1024x576.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2b6288c0563a7d65a1c61342f5fd949c790c3c30 Binary files /dev/null and b/examples/Iphone-15-specs-1024x576.jpg differ diff --git a/examples/africa.jpg b/examples/africa.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2675f058e77f0c56e90d9b512e8e4b32abfe8c93 Binary files /dev/null and b/examples/africa.jpg differ diff --git a/examples/ballon.jpg b/examples/ballon.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4302cec6677da3b7d5e758ea12818eb60b4baae2 Binary files /dev/null and b/examples/ballon.jpg differ diff --git a/examples/bar.jpg b/examples/bar.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b93a2c12086d3cd88f1522598bd5efaedb9b1cd5 Binary files /dev/null and b/examples/bar.jpg differ diff --git a/examples/bigcompany.png b/examples/bigcompany.png new file mode 100644 index 0000000000000000000000000000000000000000..4341f71ded9f7a15bd6dde393abb618de28c89bc --- /dev/null +++ b/examples/bigcompany.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32e5066f3ced74d5d28fbfd3696232d728b90e9572335fc4ccb80ac1863aa6ff +size 2012596 diff --git a/examples/bijiasuo2.jpeg b/examples/bijiasuo2.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..e726539832c903db07f26786167f8dc4e9eaaca4 Binary files /dev/null and b/examples/bijiasuo2.jpeg differ diff --git a/examples/book.jpg b/examples/book.jpg new file mode 100644 index 0000000000000000000000000000000000000000..441711720991caaea6f7c9469a0888ba125a550a Binary files /dev/null and b/examples/book.jpg differ diff --git a/examples/camera.jpg b/examples/camera.jpg new file mode 100644 index 0000000000000000000000000000000000000000..dd04dd3dfad549a13ae1dfe8f3a56785a99e06ed Binary files /dev/null and b/examples/camera.jpg differ diff --git a/examples/changed_bench.jpeg b/examples/changed_bench.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..a94074d62c726cb97d26ef71bcfcf337b7592946 Binary files /dev/null and b/examples/changed_bench.jpeg differ diff --git a/examples/code.mp4 b/examples/code.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..ca4b0fc3e009e6d2df03a85b281f91bcf768d8b7 Binary files /dev/null and b/examples/code.mp4 differ diff --git a/examples/code1.jpeg b/examples/code1.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..cc336414e2cae9f589cb3696624f50fa005525f1 Binary files /dev/null and b/examples/code1.jpeg differ diff --git a/examples/code2.jpeg b/examples/code2.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..f213d1e0b0ec8e5f81d6325cdc0fc6829fff8878 Binary files /dev/null and b/examples/code2.jpeg differ diff --git a/examples/dog.jpg b/examples/dog.jpg new file mode 100644 index 0000000000000000000000000000000000000000..096366cf2bc802435b0b7a97824ea59c206a9e7d Binary files /dev/null and b/examples/dog.jpg differ diff --git a/examples/dog1.jpg b/examples/dog1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..66b4bcef3c6e675f1a47dbc29f18ba14cab8a89c Binary files /dev/null and b/examples/dog1.jpg differ diff --git a/examples/dog6.jpeg b/examples/dog6.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..718bd78ac4027f79bc8e63b5cc67f9dc665f30d4 Binary files /dev/null and b/examples/dog6.jpeg differ diff --git a/examples/dog9.jpeg b/examples/dog9.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..792b713fb4175b32ce138cd1d1501403c20871a3 Binary files /dev/null and b/examples/dog9.jpeg differ diff --git a/examples/dog_to_monkey1.png b/examples/dog_to_monkey1.png new file mode 100644 index 0000000000000000000000000000000000000000..734a3c37d663165f96e5fde1a4be853147cd28de --- /dev/null +++ b/examples/dog_to_monkey1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03b21f907e6e0614972102936fbbc6c5d45c93ddb2780c26e04770f860c986d7 +size 2229999 diff --git a/examples/dog_to_monkey2.png b/examples/dog_to_monkey2.png new file mode 100644 index 0000000000000000000000000000000000000000..2fbe72bdd00fcb70f14f5a023e3249dced30932d --- /dev/null +++ b/examples/dog_to_monkey2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28724dace4f330df6fb2ade40b7113d3c77c5e7b5fd4aeff8e1bd39407246fb1 +size 2143496 diff --git a/examples/dynamic-island-1024x576.jpg b/examples/dynamic-island-1024x576.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6b3d8951a5e12a29351b7ee49b927b5460f7146f Binary files /dev/null and b/examples/dynamic-island-1024x576.jpg differ diff --git a/examples/eagles.jpg b/examples/eagles.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5b389d9061bd101afe526c83b8b3541ec7a39a6d Binary files /dev/null and b/examples/eagles.jpg differ diff --git a/examples/ecg_example1.jpg b/examples/ecg_example1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..647caea88e5ed2e81f1572e5547c1367bb8efb74 Binary files /dev/null and b/examples/ecg_example1.jpg differ diff --git a/examples/ecg_example2.png b/examples/ecg_example2.png new file mode 100644 index 0000000000000000000000000000000000000000..4244813bd75e0c32ab044979cfdb3b3b03ab0bcb Binary files /dev/null and b/examples/ecg_example2.png differ diff --git a/examples/examples_image12.jpg b/examples/examples_image12.jpg new file mode 100644 index 0000000000000000000000000000000000000000..547b4eb582e58e7d058265355c6ef3581bbb48c3 Binary files /dev/null and b/examples/examples_image12.jpg differ diff --git a/examples/examples_image13.jpg b/examples/examples_image13.jpg new file mode 100644 index 0000000000000000000000000000000000000000..89abd71e82eb02560ce0c34653333084e7b0402c Binary files /dev/null and b/examples/examples_image13.jpg differ diff --git a/examples/examples_image14.jpg b/examples/examples_image14.jpg new file mode 100644 index 0000000000000000000000000000000000000000..731e15e8fe838785fd888cffe642ff5e88f177f2 Binary files /dev/null and b/examples/examples_image14.jpg differ diff --git a/examples/fangao1.jpeg b/examples/fangao1.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..0daebf1dcca0f940d7b7a97c6de2e63a215be127 Binary files /dev/null and b/examples/fangao1.jpeg differ diff --git a/examples/fangao2.jpeg b/examples/fangao2.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..673e53ab6344ac313606097f6fc7c01b827e70fc Binary files /dev/null and b/examples/fangao2.jpeg differ diff --git a/examples/fangao3.jpeg b/examples/fangao3.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..db45d6dc7501bc8698c3432d23d78943b31d76a5 Binary files /dev/null and b/examples/fangao3.jpeg differ diff --git a/examples/food.jpg b/examples/food.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2bfc975bbade5697f5f79a025cb3fc8791e6bbc1 Binary files /dev/null and b/examples/food.jpg differ diff --git a/examples/girl.jpg b/examples/girl.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8f56be6b6e777183bb0584cc77a069afc7a42813 Binary files /dev/null and b/examples/girl.jpg differ diff --git a/examples/hanzi.jpg b/examples/hanzi.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4ae58bff3e7d6abfba6ad487f6d06cffe67e464b Binary files /dev/null and b/examples/hanzi.jpg differ diff --git a/examples/hot_ballon.jpg b/examples/hot_ballon.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ebc9cb58941c19c587087dd3e3d359ed1bc942d1 Binary files /dev/null and b/examples/hot_ballon.jpg differ diff --git a/examples/ice_cream.jpg b/examples/ice_cream.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5b97ddc7d275f5ef5d39681320efae7f401ab424 Binary files /dev/null and b/examples/ice_cream.jpg differ diff --git a/examples/image-00007.jpeg b/examples/image-00007.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..a18c7a701e132b2981c938e4363b81dd9ed3601c Binary files /dev/null and b/examples/image-00007.jpeg differ diff --git a/examples/image-00053.jpeg b/examples/image-00053.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..1d5c2cfefbd493c8bc79e259313299a7af535e48 Binary files /dev/null and b/examples/image-00053.jpeg differ diff --git a/examples/iphone-15-colors-1024x576.jpg b/examples/iphone-15-colors-1024x576.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9dd5ee457b5c0391cf6b273744addecca419a442 Binary files /dev/null and b/examples/iphone-15-colors-1024x576.jpg differ diff --git a/examples/iphone-15-price-1024x576.jpg b/examples/iphone-15-price-1024x576.jpg new file mode 100644 index 0000000000000000000000000000000000000000..04ee013325bf94c30569d9ad78b8c49c5985e6b6 Binary files /dev/null and b/examples/iphone-15-price-1024x576.jpg differ diff --git a/examples/iphone-15-pricing-1024x576.jpg b/examples/iphone-15-pricing-1024x576.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fb477c0e99b48a4a97a507ce562c6968fd6e55f2 Binary files /dev/null and b/examples/iphone-15-pricing-1024x576.jpg differ diff --git a/examples/line chart.jpg b/examples/line chart.jpg new file mode 100644 index 0000000000000000000000000000000000000000..02766ecbc43d97c82d4d3f6cf388a0d7cac5f587 Binary files /dev/null and b/examples/line chart.jpg differ diff --git a/examples/norway.jpg b/examples/norway.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3ed6af75e6e883617374edcdd40e7557751c3155 Binary files /dev/null and b/examples/norway.jpg differ diff --git a/examples/oprah-winfrey-resume.png b/examples/oprah-winfrey-resume.png new file mode 100644 index 0000000000000000000000000000000000000000..e80a3665a04ffc16fcde2c30914cadfd02cc1468 Binary files /dev/null and b/examples/oprah-winfrey-resume.png differ diff --git a/examples/orange.png b/examples/orange.png new file mode 100644 index 0000000000000000000000000000000000000000..83da9a9f71f08f94c6618150088aa737ca6bb945 Binary files /dev/null and b/examples/orange.png differ diff --git a/examples/original_bench.jpeg b/examples/original_bench.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..e84dfc8554d344a69afb1fe8c7b8b2997d4e5e11 Binary files /dev/null and b/examples/original_bench.jpeg differ diff --git a/examples/pie.jpg b/examples/pie.jpg new file mode 100644 index 0000000000000000000000000000000000000000..da1248da3ef36bd8869548bea1b1059b76c68407 Binary files /dev/null and b/examples/pie.jpg differ diff --git a/examples/pink_lake.jpg b/examples/pink_lake.jpg new file mode 100644 index 0000000000000000000000000000000000000000..027d6bf1dd7a78f7f983fb4569c72b349979ebdd Binary files /dev/null and b/examples/pink_lake.jpg differ diff --git a/examples/resume_a.jpg b/examples/resume_a.jpg new file mode 100644 index 0000000000000000000000000000000000000000..abb1a5d9c10bfc957b6da2d1e0fb42194cc4c284 Binary files /dev/null and b/examples/resume_a.jpg differ diff --git a/examples/resume_b.jpg b/examples/resume_b.jpg new file mode 100644 index 0000000000000000000000000000000000000000..88c5059331bbce957f3cab20bbb8cd429a33aeed Binary files /dev/null and b/examples/resume_b.jpg differ diff --git a/examples/shua.jpg b/examples/shua.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1ce2a9d5318d0d1b63f58c29429d0c689c2cd82a Binary files /dev/null and b/examples/shua.jpg differ diff --git a/examples/shub.jpg b/examples/shub.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3c0bf426f471f648df1e3cd6a8fe123210c04909 Binary files /dev/null and b/examples/shub.jpg differ diff --git a/examples/shuc.jpg b/examples/shuc.jpg new file mode 100644 index 0000000000000000000000000000000000000000..764be26dc69fb94cbb9bf14d24bdfed870430fbe Binary files /dev/null and b/examples/shuc.jpg differ diff --git a/examples/shud.jpg b/examples/shud.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c607a6559e49d85ee00ce72ea917c6085974891f Binary files /dev/null and b/examples/shud.jpg differ diff --git a/examples/south africa.jpg b/examples/south africa.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cf05b1c5cedff0c829a729c01977a5684ef60cb8 Binary files /dev/null and b/examples/south africa.jpg differ diff --git a/examples/steve-jobs-resume.jpg b/examples/steve-jobs-resume.jpg new file mode 100644 index 0000000000000000000000000000000000000000..554eb37c0c573491543131597ed7aec9226bce65 Binary files /dev/null and b/examples/steve-jobs-resume.jpg differ diff --git a/examples/strawberry.png b/examples/strawberry.png new file mode 100644 index 0000000000000000000000000000000000000000..85e6989e96cac62a5b74468cb96c142644e54b5f Binary files /dev/null and b/examples/strawberry.png differ diff --git a/examples/totoro.jpg b/examples/totoro.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fe090b615b6d8e0826230ffd277464f79d2f6bd5 Binary files /dev/null and b/examples/totoro.jpg differ diff --git a/examples/twitter1.jpeg b/examples/twitter1.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..81e8462f145732e183f27c7e657599536849a078 Binary files /dev/null and b/examples/twitter1.jpeg differ diff --git a/examples/twitter2.jpeg b/examples/twitter2.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..f0ea41c6eb9e77096ecaa8b35d8348158cc4bbc8 --- /dev/null +++ b/examples/twitter2.jpeg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cdf155c8f619226f70f1953611c6f659953be4266a2f21be3903140c7f4eb6c +size 1499716 diff --git a/examples/twitter3.jpeg b/examples/twitter3.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..34828da8854978c5e72792ac9968d596d2db8b39 --- /dev/null +++ b/examples/twitter3.jpeg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c81f7af9a87a63348f15a0ae26308253bbd765f8f49012d452fe73558eb5cea0 +size 1827735 diff --git a/examples/twitter4.jpeg b/examples/twitter4.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..f0477465bfbd7351ef6a824086bb7a9d4f9478ea --- /dev/null +++ b/examples/twitter4.jpeg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d9758ee6b319f6fd5829ded57e188346161f41b432949b38dd0837992757879 +size 2001277 diff --git a/examples/user_example_05.jpg b/examples/user_example_05.jpg new file mode 100644 index 0000000000000000000000000000000000000000..268e122527566f70b52dbff8f693e8d54c25d5f0 Binary files /dev/null and b/examples/user_example_05.jpg differ diff --git a/examples/user_example_07.jpg b/examples/user_example_07.jpg new file mode 100644 index 0000000000000000000000000000000000000000..28e407e5dace0fc725e256c8dc31d2a88de73779 --- /dev/null +++ b/examples/user_example_07.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db7cbd447bd8a78f7b5ab068d641ef12ae31b2092894780465e751d8c7db049d +size 1388264 diff --git a/gitattributes.txt b/gitattributes.txt new file mode 100644 index 0000000000000000000000000000000000000000..1a4be208548b1a07a70623f9622feb84a676a42f --- /dev/null +++ b/gitattributes.txt @@ -0,0 +1,42 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +examples/bigcompany.png filter=lfs diff=lfs merge=lfs -text +examples/dog_to_monkey1.png filter=lfs diff=lfs merge=lfs -text +examples/dog_to_monkey2.png filter=lfs diff=lfs merge=lfs -text +examples/twitter2.jpeg filter=lfs diff=lfs merge=lfs -text +examples/twitter3.jpeg filter=lfs diff=lfs merge=lfs -text +examples/twitter4.jpeg filter=lfs diff=lfs merge=lfs -text +examples/user_example_07.jpg filter=lfs diff=lfs merge=lfs -text diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..a719a26d73d0bc786c7708fe35574fdcd5a06be0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,34 @@ +git+https://github.com/paralym/LLaVA-NeXT.git +ninja +opencv-python +open_clip_torch +fastapi +gradio==4.35.0 +gradio_client==1.0.1 +markdown2[all] +numpy==1.26.4 +requests +sentencepiece +torch==2.1.2 +torchvision==0.16.2 +uvicorn +wandb==0.16.5 +deepspeed==0.12.2 +peft==0.4.0 +accelerate>=0.29.1 +tokenizers~=0.15.2 +transformers +bitsandbytes==0.41.0 +scikit-learn==1.2.2 +sentencepiece~=0.1.99 +einops==0.6.1 +einops-exts==0.0.4 +pydantic>=2.0 +timm +hf_transfer +decord +datasets +tyro +scipy +rouge +urllib3~=2.0 diff --git a/serve_constants.py b/serve_constants.py new file mode 100644 index 0000000000000000000000000000000000000000..86a0372de3ce25d2cb3f4a847aec6c0eced66b47 --- /dev/null +++ b/serve_constants.py @@ -0,0 +1,124 @@ +title_markdown = """ +
+ LLaVA-NeXT + +
+

LLaVA OneVision: Multimodal Chat

+
Video Model | Github | Huggingface | Blog | More
+
+
+""" + +html_header = """ + + +
+ + LLaVA-NeXT + +
+

PANGEA: A Fully Open Multilingual Multimodal LLM for 39 Languages

+
Code | Checkpoints | Data | PANGEA Demo
+
+
+""" + +block_css = """ +#buttons button { + min-width: min(120px,100%); +} +""" + +tos_markdown = """ +## Terms of use +By using this service, users are required to agree to the following terms: +The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research. + +We deploy our model backend with SGLang. However, there could be congestion during the serving process, leading to delayed responses. If you encounter any issues with the webpage, kindly refresh it. +""" + + +learn_more_markdown = """ +## License +The service is a research preview and is subject to the [License](https://huggingface.co/Qwen/Qwen2-7B-Instruct/blob/main/LICENSE) of Qwen2, the [License](https://github.com/LLaVA-VL/LLaVA-NeXT/blob/main/LICENSE) of LLaVA-NEXT, and the [Terms of Use](https://openai.com/policies/terms-of-use) governing the data generated by OpenAI. Users are required to strictly adhere to the terms outlined in these licenses. Please contact us if you identify any potential violations. +""" + +bibtext = """ +## Citation +``` +@article{yue2024pangea, + title={Pangea: A Fully Open Multilingual Multimodal LLM for 39 Languages}, + author={Yue, Xiang and Song, Yueqi and Asai, Akari and Kim, Seungone and Nyandwi, Jean de Dieu and Khanuja, Simran and Kantharuban, Anjali and Sutawika, Lintang and Ramamoorthy, Sathyanarayanan and Neubig, Graham}, + journal={arXiv preprint arXiv:2410.16153}, + year={2024} +} +``` +""" + +block_css = """ + +#buttons button { + min-width: min(120px,100%); +} + +"""