Spaces:

ShaoRun
/

AllSparkv2

Runtime error

App Files Files Community

ShaoRun commited on Mar 1

Commit

7854c68

verified ·

1 Parent(s): b2a1d7a

Update app.py

Browse files

Files changed (1) hide show

app.py +219 -145

app.py CHANGED Viewed

@@ -1,154 +1,228 @@
-import gradio as gr
-import numpy as np
 import random
-# import spaces #[uncomment to use ZeroGPU]
-from diffusers import DiffusionPipeline
 import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model_repo_id = "stabilityai/sdxl-turbo"  # Replace to the model you would like to use
-if torch.cuda.is_available():
-    torch_dtype = torch.float16
-else:
-    torch_dtype = torch.float32
-pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
-pipe = pipe.to(device)
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
-# @spaces.GPU #[uncomment to use ZeroGPU]
-def infer(
-    prompt,
-    negative_prompt,
-    seed,
-    randomize_seed,
-    width,
-    height,
-    guidance_scale,
-    num_inference_steps,
-    progress=gr.Progress(track_tqdm=True),
-):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    image = pipe(
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        guidance_scale=guidance_scale,
-        num_inference_steps=num_inference_steps,
-        width=width,
-        height=height,
-        generator=generator,
-    ).images[0]
-    return image, seed
-examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
-]
-css = """
-#col-container {
-    margin: 0 auto;
-    max-width: 640px;
-}
-"""
-with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # Text-to-Image Gradio Template")
-        with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
-            )
-            run_button = gr.Button("Run", scale=0, variant="primary")
-        result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-            negative_prompt = gr.Text(
-                label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
-                visible=False,
-            )
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
-            )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-            with gr.Row():
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024,  # Replace with defaults that work for your model
-                )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024,  # Replace with defaults that work for your model
-                )
-            with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0,  # Replace with defaults that work for your model
                 )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=50,
-                    step=1,
-                    value=2,  # Replace with defaults that work for your model
-                )
-        gr.Examples(examples=examples, inputs=[prompt])
-    gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn=infer,
         inputs=[
-            prompt,
-            negative_prompt,
-            seed,
-            randomize_seed,
-            width,
-            height,
-            guidance_scale,
-            num_inference_steps,
         ],
-        outputs=[result, seed],
     )
-if __name__ == "__main__":
-    demo.launch()

+import sys
+from io import BytesIO
+import os
 import random
 import torch
+import gradio as gr
+sys.path.append("../")
+from mm_models import AllSparkForCausalLM
+from transformers import AutoImageProcessor, AutoTokenizer
+from PIL import Image
+import numpy as np
+from plyfile import PlyData
+import plotly.graph_objects as go
+from mm_datasets.data_utils import point_preprocess, load_pts, process_pts
+import matplotlib.pyplot as plt
+from utils import SYSTEM_PROMPT
+system_prompt = SYSTEM_PROMPT
+def show_pointcloud(point_input, background='rgb(50,50,50)'):
+    if point_input is None:
+        return None
+    data = load_pts(point_input)
+    data = process_pts(data, 8192, True).numpy()
+    points = data[:, :3]
+    colors = data[:, 3:6]
+    if colors is not None:
+        # * if colors in range(0-1)
+        if np.max(colors) <= 1:
+            color_data = np.multiply(colors, 255).astype(int)  # Convert float values (0-1) to integers (0-255)
+        # * if colors in range(0-255)
+        elif np.max(colors) <= 255:
+            color_data = colors.astype(int)
+    else:
+        color_data = np.zeros_like(points).astype(int)  # Default to black color if RGB information is not available
+    colors = color_data.astype(np.float32) / 255 # model input is (0-1)
+    color_strings = ['rgb({},{},{})'.format(r, g, b) for r, g, b in color_data]
+    fig = go.Figure(
+        data=[
+            go.Scatter3d(
+                x=points[:, 0], y=points[:, 1], z=points[:, 2],
+                mode='markers',
+                marker=dict(
+                    size=1.2,
+                    color=color_strings,  # Use the list of RGB strings for the marker colors
                 )
+            )
+        ],
+        layout=dict(
+            scene=dict(
+                xaxis=dict(visible=False),
+                yaxis=dict(visible=False),
+                zaxis=dict(visible=False)
+            ),
+            paper_bgcolor='rgb(50,50,50)' if background is None else background  # Set the background color to dark gray 50, 50, 50
+        ),
+    )
+    # convert to PIL image
+    img_bytes = fig.to_image(format="png", engine="kaleido")
+    img = Image.open(BytesIO(img_bytes))
+    return img
+# load model
+model_path = "[path/to/model]"
+try:
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+    model = AllSparkForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16).cuda()
+    img_processor = AutoImageProcessor.from_pretrained(model_path)
+    modal_place_token = dict()
+    for modal_cfg in model.config.modal_configs:
+        modal_place_token[modal_cfg['modal_tag']] = modal_cfg['modal_placeholder_token']
+except:
+    model = None
+MARKDOWN = """
+# AllSpark V2🔥
+<div>
+    <a href="https://arxiv.org/pdf/2408.00203">
+        <img src="https://img.shields.io/badge/arXiv-2408.00203-b31b1b.svg" alt="Arxiv" style="display:inline-block;">
+    </a>
+</div>
+AllSparkv2 is a language-centric progressive omni-modal learning framework
+"""
+@torch.inference_mode()
+# @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
+def process(
+    image_input,
+    point_input,
+    text_input
+):
+    if model is None:
+        return 'Please load the model first'
+    # no user input
+    if text_input is None:
+        return 'Please enter your question'
+    # only natural language
+    if image_input is None and point_input is None:
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": text_input}
+        ]
+        inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors='pt').to(model.device)
+        outputs = model.generate(inputs,
+                                do_sample=True,
+                                temperature=0.6,
+                                eos_token_id=tokenizer.eos_token_id,
+                                pad_token_id=tokenizer.pad_token_id,
+                                max_new_tokens=512)
+        text_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return None, text_output
+    # language - vision
+    if point_input is None:
+        # Text+Vision
+        img = image_input.convert("RGB")
+        img = img_processor(images=img, return_tensors="pt").pixel_values.to("cuda").squeeze().to(model.dtype)
+        modal_inputs = [('vision', img)]
+        question = modal_place_token['vision'] + "\n" + text_input
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": question}
+        ]
+        inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors='pt').to(model.device)
+        outputs = model.generate(
+                        inputs,
+                        modal_inputs = [modal_inputs],
+                        do_sample=True,
+                        eos_token_id=tokenizer.eos_token_id,
+                        pad_token_id=tokenizer.pad_token_id,
+                        max_new_tokens=1024)
+        output = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return output
+    # language - point
+    point_cloud = load_pts(point_input)
+    point_cloud = process_pts(point_cloud, 8192, True)
+    show_pointcloud(point_cloud.numpy(), background='rgb(50,50,50)')
+    point_cloud = point_cloud.to(model.device).squeeze().to(model.dtype)
+    modal_inputs = [('point', point_cloud)]
+    question = modal_place_token['point'] + "\n" + text_input
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": question}
+    ]
+    inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors='pt').to(model.device)
+    outputs = model.generate(
+            inputs,
+            modal_inputs = [modal_inputs],
+            do_sample=True,
+            temperature=0.6,
+            eos_token_id=tokenizer.eos_token_id,
+            pad_token_id=tokenizer.pad_token_id,
+            max_new_tokens=1024)
+    output = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return output
+with gr.Blocks() as demo:
+    gr.Markdown(MARKDOWN)
+    with gr.Row():
+        with gr.Column():
+            image_input_component = gr.Image(
+                type='pil', label='Upload image')
+            point_input_component = gr.File(
+                label="Upload point data",
+                file_types=['.npy'],
+                file_count='single')
+            text_input_component = gr.Textbox(label="Text input", placeholder="Chat with AllSparkv2...")
+            submit_button_component = gr.Button(
+                value='Submit', variant='primary')
+        with gr.Column():
+            image_output_component = gr.Image(type='pil', label='Image Output')
+            text_output_component = gr.Textbox(label='Answer', placeholder='Text Output')
+    # automatically visualize the point cloud data once uploaded
+    point_input_component.change(
+        fn=show_pointcloud,
+        inputs=point_input_component,
+        outputs=image_output_component
+    )
+    submit_button_component.click(
+        fn=process,
         inputs=[
+            image_input_component,
+            point_input_component,
+            text_input_component
+        ],
+        outputs=text_output_component
+    )
+    gr.Examples(
+        examples=[
+            ["How do you explain to an elementary school student: why the sun rises in the east and sets in the west?", None, None],
+            ["What does this picture mean for max?", "inference/demo_assets/image2.png", None],
+            ["What is it?", None, "inference/demo_assets/e393be9a47a24a7cae6142e13f5686d1_8192.npy"]
         ],
+        inputs=[text_input_component, image_input_component, point_input_component]
     )
+# demo.launch(debug=False, show_error=True, share=True)
+# demo.launch(share=True, server_port=7861, server_name='0.0.0.0')
+demo.queue().launch(share=True)