Spaces:

Fengx1nn
/

V-MAGE-EVAL-DEMO

Sleeping

App Files Files Community

Fengx1n commited on Jul 31

Commit

504b2e4

1 Parent(s): 02f0e64

first commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +180 -0
agent/game_agent.py +310 -0
app.py +292 -0
config/env_config/3steps/env_config_flappybird_reasoning_3steps.json +34 -0
config/env_config/3steps/env_config_pong_reasoning_3steps.json +34 -0
config/env_config/3steps/env_config_race_reasoning_3steps.json +34 -0
config/env_config/3steps/env_config_supermario_reasoning_3steps.json +34 -0
config/env_config/3steps/env_config_tempestrun_reasoning_3steps.json +34 -0
config/level_config/flappybirdgame/level1.json +4 -0
config/level_config/flappybirdgame/level2.json +4 -0
config/level_config/flappybirdgame/level3.json +4 -0
config/level_config/flappybirdgame/level4.json +4 -0
config/level_config/flappybirdgame/level5.json +4 -0
config/level_config/flappybirdgame/level6.json +4 -0
config/level_config/flappybirdgame/level7.json +4 -0
config/level_config/ponggame/level1.json +3 -0
config/level_config/ponggame/level2.json +3 -0
config/level_config/ponggame/level3.json +3 -0
config/level_config/racegame/level1.json +5 -0
config/level_config/racegame/level1_no_history.json +5 -0
config/level_config/racegame/level2.json +5 -0
config/level_config/racegame/level2_no_history.json +5 -0
config/level_config/racegame/level3.json +5 -0
config/level_config/racegame/level3_no_history.json +5 -0
config/level_config/racegame/level4.json +5 -0
config/level_config/racegame/level5.json +5 -0
config/level_config/racegame/level6.json +5 -0
config/level_config/racegame/level7.json +5 -0
config/level_config/racegame/level8.json +5 -0
config/level_config/racegame/level9.json +5 -0
config/level_config/supermariogame/level0.json +4 -0
config/level_config/supermariogame/level1.json +4 -0
config/level_config/supermariogame/level2.json +4 -0
config/level_config/supermariogame/level3.json +4 -0
config/level_config/supermariogame/level4.json +4 -0
config/level_config/supermariogame/level5.json +4 -0
config/level_config/supermariogame/level6.json +4 -0
config/level_config/supermariogame/level7.json +4 -0
config/level_config/supermariogame/level8.json +4 -0
config/level_config/supermariogame/level9.json +4 -0
config/level_config/tempestrungame/level1.json +3 -0
config/level_config/tempestrungame/level2.json +3 -0
config/level_config/tempestrungame/level3.json +3 -0
config/level_config/tempestrungame/level4.json +3 -0
config/level_config/tempestrungame/level5.json +3 -0
config/model_config/claude_sonnet_3_7_config.ini +5 -0
config/model_config/generation_config.ini +6 -0
config/model_config/low_tokens_generation_config.ini +6 -0
config/model_config/openai_service_config.ini +5 -0
config/model_config/random.ini +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,180 @@

+*.jpg
+/*.png
+*.jpeg
+runs/
+.vscode/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+run_gpt4o.py
+.amltconfig
+wandb/
+*.whl
+test/test_qwen.py
+**/private/
+highscore.txt
+settings.json
+config.ini

agent/game_agent.py ADDED Viewed

	@@ -0,0 +1,310 @@

+import logging
+import queue
+import random
+import re
+from time import sleep
+from typing import Dict
+from utils.config import Config
+from utils.encoding_utils import encode_data_to_base64_path, encode_image_path
+from utils.file_utils import assemble_project_path, get_all_files
+from utils.json_utils import parse_semi_formatted_text
+from utils.lmm_utils import assemble_prompt
+from utils.planner_utils import _extract_keys_from_template
+import json
+config = Config()
+class game_agent:
+    def __init__(self, llm_provider=None):
+        # if config has attribute level_prompt, use it, otherwise use default prompt
+        if hasattr(config, "level_prompt") and config.level_prompt is not None:
+            self.prompt = config.level_prompt
+            print("Using level prompt from config file: " + self.prompt)
+        else:
+            self.prompt = config.prompt
+            print("Using default prompt: " + self.prompt)
+        self.prompt_template_origin, _, _ = _extract_keys_from_template(self.prompt)
+        self.prompt_template = self.prompt_template_origin
+        self.use_instruction = config.use_instruction
+        if self.use_instruction:
+            self.instruction_template = config.instruction
+        self.use_history = config.use_history
+        if self.use_history:
+            self.history_template = config.history
+            self.history_size = len([image_history for image_history in self.history_template if "image" in image_history])
+            print(f"history_size: {self.history_size}")
+            self.history = []
+            self.use_sample_history = config.use_sample_history
+            if self.use_sample_history:
+                self.sample_size = config.sample_size
+                self.sample_histroy_template = config.sample_histroy_template
+        else:
+            self.history_size = 1
+            self.history = []
+        self.reset_provider(llm_provider)
+        logging.info("prompt: " + self.prompt_template_origin)
+        print("prompt: " + self.prompt_template_origin)
+    def reset_provider(self, llm_provider):
+        print("Resetting provider...")
+        self.llm_provider = llm_provider
+        if self.history is not None and len(self.history) > 0:
+            # pop the last history
+            self.history.pop(-1)
+    def produce_instruction(self):
+        """
+        Generates and inserts an instruction string into the prompt template.
+        This method constructs an instruction string based on the `instruction_template` attribute.
+        It replaces the placeholder "<$instruction$>" in the `prompt_template` with the generated instruction string.
+        The instruction string is built by iterating over the `instruction_template` list, appending text and encoded image placeholders as needed.
+        Raises:
+            AssertionError: If the placeholder "<$instruction$>" is not found in `prompt_template`.
+        Side Effects:
+            Modifies `self.prompt_template` by replacing the "<$instruction$>" placeholder with the generated instruction string.
+            Updates `self.input` with encoded image paths using placeholders like "image_instruction_{counter}".
+        """
+        assert "<$instruction$>" in self.prompt_template
+        instruction_str = ""
+        instruction_str += "\n\n" + self.instruction_template[0]["text"]
+        counter = 1
+        for item in self.instruction_template[1:]:
+            instruction_str += "\n\n"
+            if "image" in item:
+                placeholder_token = f"image_instruction_{counter}"
+                self.input[placeholder_token] = encode_image_path(item["image"])
+                instruction_str += f"<${placeholder_token}$>"
+            if "text" in item:
+                instruction_str += item["text"]
+            counter += 1
+        self.prompt_template = self.prompt_template.replace("<$instruction$>", instruction_str + "\n\n")
+    def produce_history(self):
+        """
+        Generates a history string based on the provided history template and updates the prompt template with this history.
+        The method processes the `history_template` in reverse order (excluding the first element) and constructs a history string by replacing placeholders with corresponding values from the `history` list. It also updates the `input` dictionary with image history placeholders.
+        The constructed history string is then inserted into the `prompt_template` at the placeholder "<$history$>".
+        Raises:
+            AssertionError: If the placeholder "<$history$>" is not found in `prompt_template`.
+        """
+        assert "<$history$>" in self.prompt_template
+        history_str = ""
+        # Note: The history is stored in reverse order, with the most recent step at the end of the list.
+        ########################################################################################
+        # produce recent history
+        # skip current step
+        counter = 2
+        for item in reversed(self.history_template[1:]):
+            if counter > len(self.history):
+                break
+            # reversed
+            if "text" in item:
+                history_text_template = item["text"]
+                for history_variable in self.history[-counter]:
+                    if history_variable == "image":
+                        continue
+                    # history_variable_X is the X step in the past (X == 1 means the previous step)
+                    history_variable_X = f"{history_variable}_{counter-1}"
+                    placeholder_token = f"<${history_variable_X}$>"
+                    if placeholder_token in history_text_template:
+                        print(f"history_variable: {history_variable_X}")
+                        history_text_template = \
+                            history_text_template.replace(placeholder_token, self.history[-counter][history_variable])
+                history_str = history_text_template + history_str
+            if "image" in item:
+                placeholder_token = f"image_history_{counter}"
+                self.input[placeholder_token] = self.history[-counter]["image"]
+                history_str = f"<${placeholder_token}$>" + history_str
+            history_str = "\n\n" + history_str
+            counter += 1
+        ########################################################################################
+        # produce sample history
+        # a naive implementation
+        # just randomly select a sample from the history before self.history_size steps of the current step
+        if self.use_sample_history:
+            sample_size = min(self.sample_size, max(len(self.history) - self.history_size,  0))
+            sample_index = random.sample(range(0, len(self.history)- self.history_size), sample_size)
+            sample_index.sort(reverse=True)
+            sample_history_str = ""
+            for index in sample_index:
+                '''
+                This screenshot is <$sample_step$> steps before the current step of the game. After this frame, your reasoning message was \"<$sample_history_reasoning$>\". After the action was excuted, the game info was \"<$sample_history_action_info$>\"
+                '''
+                sample_history_template = self.sample_histroy_template["text"]
+                # 0 1 2  [3 4]  5 (cur)        (index)
+                for history_variable in self.history[index]:
+                    if history_variable == "image":
+                        continue
+                    sample_history_variable = f"sample_{history_variable}"
+                    placeholder_token = f"<${sample_history_variable}$>"
+                    if placeholder_token in sample_history_template:
+                        sample_history_template = \
+                            sample_history_template.replace(placeholder_token, self.history[index][history_variable])
+                sample_history_template = sample_history_template.replace("<$sample_step$>", str(len(self.history) - index))
+                placeholder_token_image = f"image_sample_{index}"
+                sample_history_image = self.history[index]["image"]
+                self.input[placeholder_token_image] = sample_history_image
+                history_str = "\n\n" + f"<${placeholder_token_image}$>" + sample_history_template + history_str
+        ########################################################################################
+        if len(self.history) != 0:
+            history_str = "\n\n" + self.history_template[0]["text"] + history_str
+        self.prompt_template = self.prompt_template.replace("<$history$>", history_str + "\n\n")
+        # print("history_str: ", history_str)
+        if len(self.history) == 10:
+            sleep(100)
+    def update_recent_history(self, info: Dict):
+        # Update the last step with the action taken
+        if len(self.history) == 0:
+            return
+        # for key in ["history_action", "history_action_info", "history_reasoning"]:
+        for key in info.keys():
+            if info.get(key) is not None:
+                self.history[-1][key] = info[key]
+    def update_new_history(self, info: Dict):
+        self.history.append({
+            # Current Step
+            'image': info["last_frame_base64"],
+            'image_path': info["last_frame_path"],
+            'history_action': None,
+            'history_action_info': None,
+            'history_reasoning': None
+        })
+        if self.use_history and not self.use_sample_history and len(self.history) > self.history_size + 1:
+            self.history.pop(0)
+    def update_game_info(self, game_info: Dict):
+        # TODO: working memory module
+        # e.g.
+        # self.memory.update(info)
+        self.update_recent_history(game_info)
+        self.update_new_history(game_info)
+    def generate_input(self):
+        self.input = {}
+        self.prompt_template = self.prompt_template_origin
+        # current step image is at the end of the history
+        self.input['image_current_step'] = self.history[-1]["image"]
+        # Instruction
+        if self.use_instruction:
+            # replace <$instruction$> with images and texts.
+            self.produce_instruction()
+        # History
+        if self.use_history:
+            # replace <$history$> with images and texts.
+            self.produce_history()
+    def generate_action(self, data):
+        if data.get("action") is None:
+            data["action"] = "None"
+        action = data["action"]
+        return action
+    def execute_action(self):
+        print(f"Agent execcuting action...")
+        # Generate self.input
+        self.generate_input()
+        # Generate prompt
+        message_prompts = assemble_prompt(template_str=self.prompt_template, params=self.input, image_prompt_format=self.llm_provider.image_prompt_format)
+        # Replace base64 image data with values from history array
+        readable_message_prompts = json.dumps(message_prompts, indent=2)
+        pattern = re.compile(r"\"data:image/png;base64,[^\"]*\"")
+        print(f"len(self.history): {len(self.history)}")
+        print(f"self.history_size: {self.history_size}")
+        # for i, history_item in enumerate(self.history[-self.history_size:]):
+        #     match = pattern.search(readable_message_prompts)
+        #     if match:
+        #         base64_image = match.group(0)
+        #         expected_image_path = f"\"Image {i+1}: {history_item['image_path']}\""
+        #         assert encode_image_path(history_item['image_path']) in base64_image, f"Base64 encoding does not match for i={i}, {history_item['image_path']}"
+        #         readable_message_prompts = readable_message_prompts[:match.start()] + expected_image_path + readable_message_prompts[match.end():]
+        # print("Base64 image encoding matches history image paths.")
+        logging.info("message_prompts: " + readable_message_prompts)
+        # print the message prompts in JSON format
+        # print("message_prompts: " + readable_message_prompts.encode("utf-8").decode("unicode_escape"))
+        # Call the LLM provider for decision making
+        success, response = self.llm_provider.create_completion(message_prompts)
+        if not success:
+            print("Failed to generate response., error: " + response)
+            error_msg = "Failed to generate response, error: " + response
+            return False, error_msg
+        print("--------------------------------------------------------------------------------------")
+        response = re.sub(r'\n+', '\n', response)
+        # Convert the response to dict
+        response = response.replace(":", ":\n")
+        logging.info("response: " + str(response))
+        print("response: " + response)
+        data = parse_semi_formatted_text(response)
+        self.update_recent_history({"history_reasoning": str(data)})
+        action = self.generate_action(data)
+        return True, action

app.py ADDED Viewed

	@@ -0,0 +1,292 @@

+import pickle
+import subprocess
+import threading
+from filelock import FileLock
+import gradio as gr
+import multiprocessing
+import time
+import os
+from PIL import Image
+import base64
+from io import BytesIO
+import numpy as np
+from datetime import datetime
+from threading import Timer
+Game_to_Levels = {
+    "RaceGame": list(range(1, 10)),
+    "SuperMario": list(range(0, 10)),
+    "FlappyBird": list(range(1, 8)),
+    "TempestRun": list(range(1, 5)),
+    "PongGame": list(range(1, 4))
+}
+valid_actions_dict = {
+    "RaceGame": ["LEFT", "RIGHT", "UP", "DOWN", "FORWARD", "BACKWARD"],
+    "PongGame": ["LEFTUP", "LEFTDOWN", "RIGHTUP", "RIGHTDOWN", "NONE"],
+    "FlappyBird": ["UP", "DOWN", "KEEP", "NONE"],
+    "SuperMario": ["UP", "LEFT", "RIGHT", "UP+LEFT", "UP+RIGHT", "NONE"],
+    "TempestRun": ["JUMP", "LEFT", "RIGHT", "SLIDE", "RISE", "NONE"]
+}
+all_actions = [
+    "LEFT", "LEFTUP", "UP+LEFT", "LEFTDOWN", "RIGHT", "RIGHTUP", "UP+RIGHT", "RIGHTDOWN",
+    "UP", "RISE", "JUMP", "SLIDE", "DOWN", "KEEP", "NONE", "FORWARD", "BACKWARD"
+]
+game_pids = {}
+alive_game_ids = {}
+MAX_HISTORY = 3
+def remove_old_game_dirs():
+    if not os.path.exists("./runs"):
+        os.makedirs("./runs")
+    output_dirs = [d for d in os.listdir("./runs") if os.path.isdir(os.path.join("./runs", d))]
+    for game_id in output_dirs:
+        if game_id not in alive_game_ids:
+            run_lock = FileLock(os.path.join(".", "runs", "run.lock"))
+            with run_lock:
+                os.system(f"rm -rf {os.path.join('.', 'runs', game_id)}")
+        elif (datetime.now() - datetime.strptime(alive_game_ids[game_id], '%Y-%m-%d-%H:%M:%S')).total_seconds() > 0.1 * 24 * 3600:
+            run_lock = FileLock(os.path.join(".", "runs", "run.lock"))
+            with run_lock:
+                os.system(f"rm -rf {os.path.join('.', 'runs', game_id)}")
+            alive_game_ids.pop(game_id, None)
+    for game_id in list(alive_game_ids):
+        if not os.path.exists(os.path.join(".", "runs", game_id)):
+            alive_game_ids.pop(game_id, None)
+    print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), " - Cleaned up old game directories.")
+    print("Current alive game IDs:", alive_game_ids)
+def start_game(game, level, sample_rate, model_name, base_url, api_key, state, req: gr.Request):
+    try:
+        sample_rate = int(sample_rate)
+    except (ValueError, TypeError):
+        gr.Warning("Invalid sample rate. Please enter a valid integer.")
+        return
+    if not (1 <= sample_rate <= 10):
+        gr.Warning("Sample rate must be between 1 and 10.")
+        return
+    print(f"Starting {game} at {level}")
+    game_id = req.session_hash
+    alive_game_ids[game_id] = datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
+    if game_pids.get(game_id):
+        try:
+            os.kill(game_pids[game_id], 9)
+            print(f"Killed previous game process with PID: {game_pids[game_id]}")
+        except Exception as e:
+            print(f"Error killing previous game process: {e}")
+    output_dir = os.path.join(".", "runs", game_id)
+    if os.path.exists(output_dir):
+        os.system(f"rm -rf {output_dir}")
+    os.makedirs(output_dir)
+    action_file = os.path.join(output_dir, f"action_{game_id}.txt")
+    with open(action_file, "w") as f:
+        f.write("")
+    state["action_file"] = action_file
+    if model_name == "":
+        model_name = "default_model"
+    if base_url == "":
+        base_url = "default_url"
+    if api_key == "":
+        api_key = "default_api_key"
+    command = f'SDL_VIDEODRIVER=dummy nohup python3 -u run_game.py --game {game} --level {level} --action_file "{action_file}" --game_id {game_id} --sample_rate {sample_rate} --model_name {model_name} --base_url "{base_url}" --api_key "{api_key}" > run.log 2>&1 & echo $!'
+    with os.popen(command) as f:
+        pid_str = f.read().strip()
+    try:
+        pid = int(pid_str)
+        print(f"Game started with PID: {pid}")
+        game_pids[game_id] = pid
+    except ValueError:
+        print(f"Failed to parse PID from command output: {pid_str}")
+def write_action(action, state):
+    action_file = state.get("action_file")
+    if not action_file:
+        print("Action file not found in state. Cannot write action.")
+        return
+    if not os.path.exists(action_file):
+        print(f"Action file {action_file} does not exist. Skipping write.")
+        return
+    print(f"Writing action: {action} to {action_file}")
+    with open(action_file, "w") as f:
+        f.write(action)
+def cleanup(req: gr.Request):
+    game_id = req.session_hash
+    if game_id in alive_game_ids:
+        alive_game_ids.pop(game_id)
+    pid = game_pids.get(game_id)
+    if pid:
+        try:
+            os.kill(pid, 9)
+            print(f"Killed game process with PID: {pid}")
+        except Exception as e:
+            print(f"Error killing game process: {e}")
+        finally:
+            remove_old_game_dirs()
+def update_all_displays(last_history, req: gr.Request):
+    num_outputs = 1 + 2 * MAX_HISTORY + 1
+    if req is None:
+        return [None] * num_outputs
+    display_count = MAX_HISTORY
+    game_id = req.session_hash
+    output_dir = os.path.join(".", "runs", game_id)
+    run_lock = FileLock(os.path.join(".", "runs", "run.lock"))
+    with run_lock:
+        if not os.path.exists(output_dir):
+            try:
+                game_over_image_path = os.path.join("gameover.jpg")
+                img_array = np.array(Image.open(game_over_image_path).convert('RGB'))
+            except FileNotFoundError:
+                img_array = np.zeros((800, 600, 3), dtype=np.uint8)
+            image_updates = [gr.update(visible=False) for _ in range(MAX_HISTORY)]
+            markdown_updates = [gr.update(visible=False) for _ in range(MAX_HISTORY)]
+            new_history_state = {"images": [], "actions": []}
+            return [img_array] + image_updates + markdown_updates + [new_history_state]
+    try:
+        pkl_path = os.path.join(output_dir, f"game_{game_id}.pkl")
+        lock = FileLock(pkl_path + ".lock")
+        with lock:
+            with open(pkl_path, "rb") as f: info = pickle.load(f)
+    except Exception:
+        return [gr.update()] * num_outputs
+    current_image_b64 = info.get("current_image", "")
+    image_data = base64.b64decode(current_image_b64)
+    image = Image.open(BytesIO(image_data)).convert('RGB')
+    main_img_array = np.array(image.resize((int(image.width * 800 / image.height), 800)))
+    new_history_images_b64 = info.get("history_images", [])[-display_count:]
+    new_history_actions = info.get("history_actions", [])[-display_count:]
+    new_history_images_b64.reverse()
+    new_history_actions.reverse()
+    last_history_images_b64 = last_history.get("images", [])
+    last_history_actions = last_history.get("actions", [])
+    image_updates = []
+    markdown_updates = []
+    for i in range(MAX_HISTORY):
+        new_img = new_history_images_b64[i] if i < len(new_history_images_b64) else None
+        last_img = last_history_images_b64[i] if i < len(last_history_images_b64) else None
+        new_action = new_history_actions[i] if i < len(new_history_actions) else None
+        last_action = last_history_actions[i] if i < len(last_history_actions) else None
+        if new_img == last_img:
+            image_updates.append(gr.update())
+        else:
+            if new_img:
+                img_data = base64.b64decode(new_img)
+                img_array = np.array(Image.open(BytesIO(img_data)).convert('RGB'))
+                image_updates.append(gr.update(value=img_array, visible=True))
+            else:
+                image_updates.append(gr.update(visible=False))
+        if new_action == last_action:
+            markdown_updates.append(gr.update())
+        else:
+            if new_action:
+                action_text = f"**Action: {new_action}**"
+                markdown_updates.append(gr.update(value=action_text, visible=True))
+            else:
+                markdown_updates.append(gr.update(visible=False))
+    new_history_state = {"images": new_history_images_b64, "actions": new_history_actions}
+    return [main_img_array] + image_updates + markdown_updates + [new_history_state]
+with gr.Blocks(title="Game Control Interface") as demo:
+    state = gr.State(value={})
+    last_history_state = gr.State(value={"images": [], "actions": []})
+    with gr.Row():
+        with gr.Column(scale=2):
+            gr.Markdown("## Game Control")
+            game_dropdown = gr.Dropdown(choices=["RaceGame", "SuperMario", "FlappyBird", "TempestRun", "PongGame"], label="Select Game", value="RaceGame")
+            level_dropdown = gr.Dropdown(choices=Game_to_Levels["RaceGame"], label="Select Level")
+            sample_rate_textbox = gr.Number(label="Sample Rate (Frames/Action)", interactive=True, value=3)
+            start_button = gr.Button("Start Game", variant="primary")
+            gr.Markdown("---")
+            model_name_textbox = gr.Textbox(label="Model Name", placeholder="Enter your model name here...")
+            base_url_textbox = gr.Textbox(label="Base URL", placeholder="Enter your base URL here...")
+            api_key_textbox = gr.Textbox(label="API Key", type="password", placeholder="Enter your API key here...")
+            gr.Markdown("### Press to let the model execute the next action")
+            model_inference_button = gr.Button("Model Inference")
+            gr.Markdown("## Action Buttons")
+            gr.Markdown("### Press to send actions to the game.")
+            with gr.Row():
+                action_buttons = {action: gr.Button(action, visible=(action in valid_actions_dict.get("RaceGame", []))) for action in all_actions}
+        with gr.Column(scale=7):
+            gr.Markdown("### Game Screen")
+            screenshot_display = gr.Image(height=1000, interactive=False)
+        with gr.Column(scale=1):
+            gr.Markdown("### History")
+            history_images = []
+            history_actions_md = []
+            for i in range(MAX_HISTORY):
+                action_md = gr.Markdown(f"**Action: NONE**", visible=False)
+                img = gr.Image(interactive=False, label=f"Frame {-i-1}", visible=False)
+                history_images.append(img)
+                history_actions_md.append(action_md)
+    game_dropdown.change(fn=lambda game: gr.update(choices=Game_to_Levels.get(game, [])), inputs=game_dropdown, outputs=level_dropdown)
+    def update_action_buttons(game):
+        valid_actions = valid_actions_dict.get(game, [])
+        return {btn: gr.update(visible=(action in valid_actions)) for action, btn in action_buttons.items()}
+    game_dropdown.change(fn=update_action_buttons, inputs=game_dropdown, outputs=list(action_buttons.values()))
+    for action, btn in action_buttons.items():
+        btn.click(fn=write_action, inputs=[gr.Textbox(value=action, visible=False), state], outputs=None)
+    model_inference_button.click(fn=write_action, inputs=[gr.Textbox(value="model", visible=False), state], outputs=None)
+    start_button.click(
+        fn=start_game,
+        inputs=[game_dropdown, level_dropdown, sample_rate_textbox, model_name_textbox, base_url_textbox, api_key_textbox, state],
+        outputs=None
+    )
+    all_outputs = [screenshot_display] + history_images + history_actions_md + [last_history_state]
+    timer_inputs = [last_history_state]
+    timer = gr.Timer(0.5)
+    timer.tick(fn=update_all_displays, inputs=timer_inputs, outputs=all_outputs)
+    demo.unload(cleanup)
+if __name__ == "__main__":
+    if not os.path.exists("./runs"):
+        os.makedirs("./runs")
+    cleanup_thread = threading.Thread(target=lambda: (remove_old_game_dirs(), time.sleep(3600)), daemon=True)
+    cleanup_thread.start()
+    demo.launch()

config/env_config/3steps/env_config_flappybird_reasoning_3steps.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+    "env_name": "Flappy Bird",
+    "env_short_name": "flappybirdgame",
+    "game_module": "game.flappybird_game",
+    "game_class": "FlappyBirdGame",
+    "prompt": "./res/flappybirdgame/prompts/templates/flappybird_standard.prompt",
+    "use_history": "True",
+    "history": [
+        {
+            "text": "Now, I will give you some history screenshots in the current game for decision making."
+        },
+        {
+            "image": "",
+            "text": "This screenshot is three steps before the current step of the game. After this frame, your action was \"<$history_action_3$>\". After this action was excuted, the game info was \"<$history_action_info_3$>\""
+        },
+        {
+            "image": "",
+            "text": "This screenshot is two steps before the current step of the game. After this frame, your action was \"<$history_action_2$>\". After this action was excuted, the game info was \"<$history_action_info_2$>\""
+        },
+        {
+            "image": "",
+            "text": "This screenshot is the previous step of the game. After this frame, your action was \"<$history_action_1$>\". After this action was excuted, the game info was \"<$history_action_info_1$>\""
+        }
+    ],
+    "use_instruction": "False",
+    "instruction": [
+        "TODO"
+    ]
+}

config/env_config/3steps/env_config_pong_reasoning_3steps.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+    "env_name": "Pong Game",
+    "env_short_name": "ponggame",
+    "game_module": "game.pong_game",
+    "game_class": "PongGame",
+    "prompt": "./res/ponggame/prompts/templates/ponggame.prompt",
+    "use_history": "True",
+    "history": [
+        {
+            "text": "Now, I will give you some screenshots in the current game for decision making."
+        },
+        {
+            "image": "",
+            "text": "This screenshot is three steps before the current step of the game. After this frame, your action was \"<$history_action_3$>\". After this action was excuted, the game info was \"<$history_action_info_3$>\""
+        },
+        {
+            "image": "",
+            "text": "This screenshot is two steps before the current step of the game. After this frame, your action was \"<$history_action_2$>\". After this action was excuted, the game info was \"<$history_action_info_2$>\""
+        },
+        {
+            "image": "",
+            "text": "This screenshot is the previous step of the game. After this frame, your action was \"<$history_action_1$>\". After this action was excuted, the game info was \"<$history_action_info_1$>\""
+        }
+    ],
+    "use_instruction": "False",
+    "instruction": [
+        "TODO"
+    ]
+}

config/env_config/3steps/env_config_race_reasoning_3steps.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+    "env_name": "Race Game",
+    "env_short_name": "racegame",
+    "game_module": "game.race_game",
+    "game_class": "RaceGame",
+    "prompt": "./res/racegame/prompts/templates/racegame.prompt",
+    "use_history": "True",
+    "history": [
+        {
+            "text": "Now, I will give you some screenshots in the current game for decision making."
+        },
+        {
+            "image": "",
+            "text": "This screenshot is three steps before the current step of the game. After this frame, your action was \"<$history_action_3$>\". After this action was excuted, the game info was \"<$history_action_info_3$>\""
+        },
+        {
+            "image": "",
+            "text": "This screenshot is two steps before the current step of the game. After this frame, your action was \"<$history_action_2$>\". After this action was excuted, the game info was \"<$history_action_info_2$>\""
+        },
+        {
+            "image": "",
+            "text": "This screenshot is the previous step of the game. After this frame, your action was \"<$history_action_1$>\". After this action was excuted, the game info was \"<$history_action_info_1$>\""
+        }
+    ],
+    "use_instruction": "False",
+    "instruction": [
+        "TODO"
+    ]
+}

config/env_config/3steps/env_config_supermario_reasoning_3steps.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+    "env_name": "SuperMario Game",
+    "env_short_name": "supermariogame",
+    "game_module": "game.supermario_game",
+    "game_class": "SuperMarioGame",
+    "prompt": "./res/supermariogame/prompts/templates/supermariogame.prompt",
+    "use_history": "True",
+    "history": [
+        {
+            "text": "Now, I will give you some screenshots in the current game for decision making."
+        },
+        {
+            "image": "",
+            "text": "This screenshot is three steps before the current step of the game. After this frame, your action was \"<$history_action_3$>\". After this action was excuted, the game info was \"<$history_action_info_3$>\""
+        },
+        {
+            "image": "",
+            "text": "This screenshot is two steps before the current step of the game. After this frame, your action was \"<$history_action_2$>\". After this action was excuted, the game info was \"<$history_action_info_2$>\""
+        },
+        {
+            "image": "",
+            "text": "This screenshot is the previous step of the game. After this frame, your action was \"<$history_action_1$>\". After this action was excuted, the game info was \"<$history_action_info_1$>\""
+        }
+    ],
+    "use_instruction": "False",
+    "instruction": [
+        "TODO"
+    ]
+}

config/env_config/3steps/env_config_tempestrun_reasoning_3steps.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+    "env_name": "TempestRun Game",
+    "env_short_name": "tempestrungame",
+    "game_module": "game.tempestrun_game",
+    "game_class": "TempestRunGame",
+    "prompt": "./res/tempestrungame/prompts/templates/tempestrungame.prompt",
+    "use_history": "True",
+    "history": [
+        {
+            "text": "Now, I will give you some screenshots in the current game for decision making."
+        },
+        {
+            "image": "",
+            "text": "This screenshot is three steps before the current step of the game. After this frame, your action was \"<$history_action_3$>\". After this action was excuted, the game info was \"<$history_action_info_3$>\""
+        },
+        {
+            "image": "",
+            "text": "This screenshot is two steps before the current step of the game. After this frame, your action was \"<$history_action_2$>\". After this action was excuted, the game info was \"<$history_action_info_2$>\""
+        },
+        {
+            "image": "",
+            "text": "This screenshot is the previous step of the game. After this frame, your action was \"<$history_action_1$>\". After this action was excuted, the game info was \"<$history_action_info_1$>\""
+        }
+    ],
+    "use_instruction": "False",
+    "instruction": [
+        "TODO"
+    ]
+}

config/level_config/flappybirdgame/level1.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 1,
+    "level_prompt": "./res/flappybirdgame/prompts/templates/flappybird_zero_gravity.prompt"
+}

config/level_config/flappybirdgame/level2.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 2,
+    "level_prompt": "./res/flappybirdgame/prompts/templates/flappybird_zero_gravity.prompt"
+}

config/level_config/flappybirdgame/level3.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 3,
+    "level_prompt": "./res/flappybirdgame/prompts/templates/flappybird_zero_gravity.prompt"
+}

config/level_config/flappybirdgame/level4.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 4,
+    "level_prompt": "./res/flappybirdgame/prompts/templates/flappybird_standard_with_keep.prompt"
+}

config/level_config/flappybirdgame/level5.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 5,
+    "level_prompt": "./res/flappybirdgame/prompts/templates/flappybird_standard_with_keep.prompt"
+}

config/level_config/flappybirdgame/level6.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 6,
+    "level_prompt": "./res/flappybirdgame/prompts/templates/flappybird_standard_with_keep.prompt"
+}

config/level_config/flappybirdgame/level7.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 7,
+    "level_prompt": "./res/flappybirdgame/prompts/templates/flappybird_standard.prompt"
+}

config/level_config/ponggame/level1.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "level": 1
+}

config/level_config/ponggame/level2.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "level": 2
+}

config/level_config/ponggame/level3.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "level": 3
+}

config/level_config/racegame/level1.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "level": 1,
+    "dynamic": "True",
+    "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_map_no_obstacle_history.prompt"
+}

config/level_config/racegame/level1_no_history.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "level": 1,
+    "dynamic": "True",
+    "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_map_no_obstacle.prompt"
+}

config/level_config/racegame/level2.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "level": 2,
+    "dynamic": "True",
+    "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_map_history.prompt"
+}

config/level_config/racegame/level2_no_history.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "level": 2,
+    "dynamic": "True",
+    "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_map.prompt"
+}

config/level_config/racegame/level3.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "level": 3,
+    "dynamic": "True",
+    "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_map_history.prompt"
+}

config/level_config/racegame/level3_no_history.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "level": 3,
+    "dynamic": "True",
+    "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_map.prompt"
+}

config/level_config/racegame/level4.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "level": 4,
+    "dynamic": "True",
+    "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_car_history.prompt"
+}

config/level_config/racegame/level5.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "level": 5,
+    "dynamic": "True",
+    "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_car_history.prompt"
+}

config/level_config/racegame/level6.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "level": 6,
+    "dynamic": "True",
+    "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_car_history.prompt"
+}

config/level_config/racegame/level7.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "level": 7,
+    "dynamic": "True",
+    "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_car_history.prompt"
+}

config/level_config/racegame/level8.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "level": 8,
+    "dynamic": "True",
+    "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_car_history.prompt"
+}

config/level_config/racegame/level9.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "level": 9,
+    "dynamic": "True",
+    "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_car_history.prompt"
+}

config/level_config/supermariogame/level0.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 0,
+    "max_round": 5000
+}

config/level_config/supermariogame/level1.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 1,
+    "max_round": 400
+}

config/level_config/supermariogame/level2.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 2,
+    "max_round": 1000
+}

config/level_config/supermariogame/level3.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 3,
+    "max_round": 1000
+}

config/level_config/supermariogame/level4.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 4,
+    "max_round": 1000
+}

config/level_config/supermariogame/level5.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 5,
+    "max_round": 300
+}

config/level_config/supermariogame/level6.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 6,
+    "max_round": 300
+}

config/level_config/supermariogame/level7.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 7,
+    "max_round": 300
+}

config/level_config/supermariogame/level8.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 8,
+    "max_round": 1000
+}

config/level_config/supermariogame/level9.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "level": 9,
+    "max_round": 1000
+}

config/level_config/tempestrungame/level1.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "level": 1
+}

config/level_config/tempestrungame/level2.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "level": 2
+}

config/level_config/tempestrungame/level3.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "level": 3
+}

config/level_config/tempestrungame/level4.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "level": 4
+}

config/level_config/tempestrungame/level5.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "level": 5
+}

config/model_config/claude_sonnet_3_7_config.ini ADDED Viewed

	@@ -0,0 +1,5 @@

+[lmm]
+model_name=Claude
+model_path=claude-3-7-sonnet-20250219
+api_key=sk-XXX

config/model_config/generation_config.ini ADDED Viewed

	@@ -0,0 +1,6 @@

+[generation]
+top_p=0.9
+top_k=50
+temperature=0.8
+do_sample=True
+max_new_tokens=2048

config/model_config/low_tokens_generation_config.ini ADDED Viewed

	@@ -0,0 +1,6 @@

+[generation]
+top_p=0.9
+top_k=50
+temperature=0.8
+do_sample=True
+max_new_tokens=512

config/model_config/openai_service_config.ini ADDED Viewed

	@@ -0,0 +1,5 @@

+[lmm]
+model_name = OpenAI
+model_path =  XXX
+openai_api_key = XXX
+openai_api_base = XXX

config/model_config/random.ini ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ [lmm]
2	+
3	+ model_name = random