Indonesia-Energy-Transition-demo

Runtime error

File size: 9,397 Bytes

faa4f79
 
79f86c4
3a57265
 
faa4f79
 
 
3a57265
faa4f79
 
 
e5ba201
3a57265
faa4f79
 
77652fd
2d73e21
faa4f79
 
 
 
79f86c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b92c9b
b7a6a02
7b92c9b
 
 
3a57265
052c832
 
 
 
 
 
7b92c9b
 
 
 
 
 
3a57265
7b92c9b
 
 
 
 
faa4f79
79f86c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
faa4f79
69fd992
 
 
79f86c4
 
eeb9857
69fd992
79f86c4
 
69fd992
 
 
 
faa4f79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0f77bf
79f86c4
 
 
 
faa4f79
 
 
 
 
 
 
 
 
 
 
 
 
 
4b359d4
faa4f79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1be807e
7b92c9b
faa4f79
 
 
 
b623ba1
10de841
faa4f79
 
 
 
5ff0878
10de841
faa4f79
 
 
 
75bc521
 
7b92c9b
 
 
 
75bc521
 
7b92c9b
 
 
 
75bc521
 
7b92c9b
 
 
 
75bc521
 
7b92c9b
 
 
 
75bc521
 
7b92c9b
 
faa4f79
 
8378e4a
9fff671
8378e4a
 
 
faa4f79
 
 
00cae9a
ee32c7c
faa4f79
 
9bfbc93
8cf9168
faa4f79
 
7dffafc
8378e4a
faa4f79

#!/usr/bin/env python

import os
import re
import tempfile
from collections.abc import Iterator
from threading import Thread

import cv2
import gradio as gr
import spaces
import torch
from loguru import logger
from PIL import Image
from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer

model_id = os.getenv("MODEL_ID", "LiquidAI/LFM2-1.2B")
processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
model = Gemma3ForConditionalGeneration.from_pretrained(
    model_id, device_map="auto", torch_dtype=torch.bfloat16, attn_implementation="eager"
)

MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))


def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
    image_count = 0
    video_count = 0
    for path in paths:
        if path.endswith(".mp4"):
            video_count += 1
        else:
            image_count += 1
    return image_count, video_count


def count_files_in_history(history: list[dict]) -> tuple[int, int]:
    image_count = 0
    video_count = 0
    for item in history:
        if item["role"] != "user" or isinstance(item["content"], str):
            continue
        if item["content"][0].endswith(".mp4"):
            video_count += 1
        else:
            image_count += 1
    return image_count, video_count


def validate_media_constraints(message: dict, history: list[dict]) -> bool:
    new_image_count, new_video_count = count_files_in_new_message(message["files"])
    history_image_count, history_video_count = count_files_in_history(history)
    image_count = history_image_count + new_image_count
    video_count = history_video_count + new_video_count
    if video_count > 1:
        gr.Warning("Only one video is supported.")
        return False
    if video_count == 1:
        if image_count > 0:
            gr.Warning("Mixing images and videos is not allowed.")
            return False
        if "<image>" in message["text"]:
            gr.Warning("Using <image> tags with video files is not supported.")
            return False
    if video_count == 0 and image_count > MAX_NUM_IMAGES:
        gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
        return False
    if "<image>" in message["text"] and message["text"].count("<image>") != new_image_count:
        gr.Warning("The number of <image> tags in the text does not match the number of images.")
        return False
    return True


def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
    vidcap = cv2.VideoCapture(video_path)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))

    frame_interval = max(total_frames // MAX_NUM_IMAGES, 1)
    frames: list[tuple[Image.Image, float]] = []

    for i in range(0, min(total_frames, MAX_NUM_IMAGES * frame_interval), frame_interval):
        if len(frames) >= MAX_NUM_IMAGES:
            break

        vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
        success, image = vidcap.read()
        if success:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            pil_image = Image.fromarray(image)
            timestamp = round(i / fps, 2)
            frames.append((pil_image, timestamp))

    vidcap.release()
    return frames


def process_video(video_path: str) -> list[dict]:
    content = []
    frames = downsample_video(video_path)
    for frame in frames:
        pil_image, timestamp = frame
        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
            pil_image.save(temp_file.name)
            content.append({"type": "text", "text": f"Frame {timestamp}:"})
            content.append({"type": "image", "url": temp_file.name})
    logger.debug(f"{content=}")
    return content


def process_interleaved_images(message: dict) -> list[dict]:
    logger.debug(f"{message['files']=}")
    parts = re.split(r"(<image>)", message["text"])
    logger.debug(f"{parts=}")

    content = []
    image_index = 0
    for part in parts:
        logger.debug(f"{part=}")
        if part == "<image>":
            content.append({"type": "image", "url": message["files"][image_index]})
            logger.debug(f"file: {message['files'][image_index]}")
            image_index += 1
        elif part.strip():
            content.append({"type": "text", "text": part.strip()})
        elif isinstance(part, str) and part != "<image>":
            content.append({"type": "text", "text": part})
    logger.debug(f"{content=}")
    return content


def process_new_user_message(message: dict) -> list[dict]:
    if not message["files"]:
        return [{"type": "text", "text": message["text"]}]

    if message["files"][0].endswith(".mp4"):
        return [{"type": "text", "text": message["text"]}, *process_video(message["files"][0])]

    if "<image>" in message["text"]:
        return process_interleaved_images(message)

    return [
        {"type": "text", "text": message["text"]},
        *[{"type": "image", "url": path} for path in message["files"]],
    ]


def process_history(history: list[dict]) -> list[dict]:
    messages = []
    current_user_content: list[dict] = []
    for item in history:
        if item["role"] == "assistant":
            if current_user_content:
                messages.append({"role": "user", "content": current_user_content})
                current_user_content = []
            messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
        else:
            content = item["content"]
            if isinstance(content, str):
                current_user_content.append({"type": "text", "text": content})
            else:
                current_user_content.append({"type": "image", "url": content[0]})
    return messages


@spaces.GPU(duration=120)
def run(message: dict, history: list[dict], system_prompt: str = "You are a helpful assistant who always provides helpful answer", max_new_tokens: int = 1024) -> Iterator[str]:
    if not validate_media_constraints(message, history):
        yield ""
        return

    messages = []
    if system_prompt:
        messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
    messages.extend(process_history(history))
    messages.append({"role": "user", "content": process_new_user_message(message)})

    inputs = processor.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_dict=True,
        return_tensors="pt",
    ).to(device=model.device, dtype=torch.bfloat16)

    streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = dict(
        inputs,
        streamer=streamer,
        max_new_tokens=max_new_tokens,
    )
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    output = ""
    for delta in streamer:
        output += delta
        yield output


examples = [
    [
        {
            "text": "Saya perlu membuat judul penulisan karya ilmiah yang berkaitan dengan energi baru terbarukan dan transisi energi di Indonesia, berikan contoh daftar judul yang berkaitan.",
            "files": [],
        }
    ],
    [
        {
            "text": "Jelaskan chart ini",
            "files": ["assets/additional-examples/IESR-infographic.jpg"],
        }
    ],
    [
        {
            "text": "Saya membayar BBM sebesar ini <image> berapa liter BBM yang saya dapatkan jika saya membeli dengan uang 1 juta rupiah?",
            "files": ["assets/additional-examples/struk.jpeg"],
        }
    ],
    [
        {
            "text": "Bagaimana pajak karbon dapat berfungsi untuk mengurangi emisi?",
            "files": [],
        }
    ],
    [
        {
            "text": "Mengapa tarif FIT yang diindeks berdasarkan biaya produksi PLN dapat menghambat pengembangan energi terbarukan di wilayah seperti Jawa?",
            "files": [],
        }
    ],
    [
        {
            "text": "Jelaskan apa saja yang dimaksud dengan Energi Baru Terbarukan, dan apa saja potensi EBT di Indonesia",
            "files": [],
        }
    ],
    [
        {
            "text": "Berapa target capaian EBT di Indonesia?",
            "files": [],
        }
    ],
    [
        {
            "text": "Apa saja pilar utama dalam Kebijakan Energi Nasional menurut PP 79/2014?",
            "files": [],
        }
    ],
]

DESCRIPTION = """\
<img src='https://huggingface.co/spaces/gmonsoon/Indonesia-Energy-Transition-demo/resolve/main/assets/logo-updated.png' id='logo' />

"""

demo = gr.ChatInterface(
    fn=run,
    type="messages",
    chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
    textbox=gr.MultimodalTextbox(file_types=["image", ".mp4"], file_count="multiple", autofocus=True),
    multimodal=True,
    additional_inputs=[
        gr.Textbox(label="System Prompt", value="You are a helpful assistant who always provides helpful answer"),
        gr.Slider(label="Max New Tokens", minimum=512, maximum=2000, step=10, value=1024),
    ],
    stop_btn=False,
    title="REnewbie-LLM - Indonesia Energy Transition and Renewable Energy (DEMO)",
    description=DESCRIPTION,
    examples=examples,
    run_examples_on_click=False,
    cache_examples=False,
    css_paths="style.css",
    delete_cache=(1800, 1800),
)

if __name__ == "__main__":
    demo.launch()