Fengx1n commited on
Commit
504b2e4
·
1 Parent(s): 02f0e64

first commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +180 -0
  2. agent/game_agent.py +310 -0
  3. app.py +292 -0
  4. config/env_config/3steps/env_config_flappybird_reasoning_3steps.json +34 -0
  5. config/env_config/3steps/env_config_pong_reasoning_3steps.json +34 -0
  6. config/env_config/3steps/env_config_race_reasoning_3steps.json +34 -0
  7. config/env_config/3steps/env_config_supermario_reasoning_3steps.json +34 -0
  8. config/env_config/3steps/env_config_tempestrun_reasoning_3steps.json +34 -0
  9. config/level_config/flappybirdgame/level1.json +4 -0
  10. config/level_config/flappybirdgame/level2.json +4 -0
  11. config/level_config/flappybirdgame/level3.json +4 -0
  12. config/level_config/flappybirdgame/level4.json +4 -0
  13. config/level_config/flappybirdgame/level5.json +4 -0
  14. config/level_config/flappybirdgame/level6.json +4 -0
  15. config/level_config/flappybirdgame/level7.json +4 -0
  16. config/level_config/ponggame/level1.json +3 -0
  17. config/level_config/ponggame/level2.json +3 -0
  18. config/level_config/ponggame/level3.json +3 -0
  19. config/level_config/racegame/level1.json +5 -0
  20. config/level_config/racegame/level1_no_history.json +5 -0
  21. config/level_config/racegame/level2.json +5 -0
  22. config/level_config/racegame/level2_no_history.json +5 -0
  23. config/level_config/racegame/level3.json +5 -0
  24. config/level_config/racegame/level3_no_history.json +5 -0
  25. config/level_config/racegame/level4.json +5 -0
  26. config/level_config/racegame/level5.json +5 -0
  27. config/level_config/racegame/level6.json +5 -0
  28. config/level_config/racegame/level7.json +5 -0
  29. config/level_config/racegame/level8.json +5 -0
  30. config/level_config/racegame/level9.json +5 -0
  31. config/level_config/supermariogame/level0.json +4 -0
  32. config/level_config/supermariogame/level1.json +4 -0
  33. config/level_config/supermariogame/level2.json +4 -0
  34. config/level_config/supermariogame/level3.json +4 -0
  35. config/level_config/supermariogame/level4.json +4 -0
  36. config/level_config/supermariogame/level5.json +4 -0
  37. config/level_config/supermariogame/level6.json +4 -0
  38. config/level_config/supermariogame/level7.json +4 -0
  39. config/level_config/supermariogame/level8.json +4 -0
  40. config/level_config/supermariogame/level9.json +4 -0
  41. config/level_config/tempestrungame/level1.json +3 -0
  42. config/level_config/tempestrungame/level2.json +3 -0
  43. config/level_config/tempestrungame/level3.json +3 -0
  44. config/level_config/tempestrungame/level4.json +3 -0
  45. config/level_config/tempestrungame/level5.json +3 -0
  46. config/model_config/claude_sonnet_3_7_config.ini +5 -0
  47. config/model_config/generation_config.ini +6 -0
  48. config/model_config/low_tokens_generation_config.ini +6 -0
  49. config/model_config/openai_service_config.ini +5 -0
  50. config/model_config/random.ini +3 -0
.gitignore ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.jpg
2
+ /*.png
3
+ *.jpeg
4
+
5
+ runs/
6
+ .vscode/
7
+
8
+ # Byte-compiled / optimized / DLL files
9
+ __pycache__/
10
+ *.py[cod]
11
+ *$py.class
12
+
13
+ # C extensions
14
+ *.so
15
+
16
+ # Distribution / packaging
17
+ .Python
18
+ build/
19
+ develop-eggs/
20
+ dist/
21
+ downloads/
22
+ eggs/
23
+ .eggs/
24
+ lib/
25
+ lib64/
26
+ parts/
27
+ sdist/
28
+ var/
29
+ wheels/
30
+ share/python-wheels/
31
+ *.egg-info/
32
+ .installed.cfg
33
+ *.egg
34
+ MANIFEST
35
+
36
+ # PyInstaller
37
+ # Usually these files are written by a python script from a template
38
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
39
+ *.manifest
40
+ *.spec
41
+
42
+ # Installer logs
43
+ pip-log.txt
44
+ pip-delete-this-directory.txt
45
+
46
+ # Unit test / coverage reports
47
+ htmlcov/
48
+ .tox/
49
+ .nox/
50
+ .coverage
51
+ .coverage.*
52
+ .cache
53
+ nosetests.xml
54
+ coverage.xml
55
+ *.cover
56
+ *.py,cover
57
+ .hypothesis/
58
+ .pytest_cache/
59
+ cover/
60
+
61
+ # Translations
62
+ *.mo
63
+ *.pot
64
+
65
+ # Django stuff:
66
+ *.log
67
+ local_settings.py
68
+ db.sqlite3
69
+ db.sqlite3-journal
70
+
71
+ # Flask stuff:
72
+ instance/
73
+ .webassets-cache
74
+
75
+ # Scrapy stuff:
76
+ .scrapy
77
+
78
+ # Sphinx documentation
79
+ docs/_build/
80
+
81
+ # PyBuilder
82
+ .pybuilder/
83
+ target/
84
+
85
+ # Jupyter Notebook
86
+ .ipynb_checkpoints
87
+
88
+ # IPython
89
+ profile_default/
90
+ ipython_config.py
91
+
92
+ # pyenv
93
+ # For a library or package, you might want to ignore these files since the code is
94
+ # intended to run in multiple environments; otherwise, check them in:
95
+ # .python-version
96
+
97
+ # pipenv
98
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
99
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
100
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
101
+ # install all needed dependencies.
102
+ #Pipfile.lock
103
+
104
+ # poetry
105
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
106
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
107
+ # commonly ignored for libraries.
108
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
109
+ #poetry.lock
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ #pdm.lock
114
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
115
+ # in version control.
116
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
117
+ .pdm.toml
118
+ .pdm-python
119
+ .pdm-build/
120
+
121
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
122
+ __pypackages__/
123
+
124
+ # Celery stuff
125
+ celerybeat-schedule
126
+ celerybeat.pid
127
+
128
+ # SageMath parsed files
129
+ *.sage.py
130
+
131
+ # Environments
132
+ .env
133
+ .venv
134
+ env/
135
+ venv/
136
+ ENV/
137
+ env.bak/
138
+ venv.bak/
139
+
140
+ # Spyder project settings
141
+ .spyderproject
142
+ .spyproject
143
+
144
+ # Rope project settings
145
+ .ropeproject
146
+
147
+ # mkdocs documentation
148
+ /site
149
+
150
+ # mypy
151
+ .mypy_cache/
152
+ .dmypy.json
153
+ dmypy.json
154
+
155
+ # Pyre type checker
156
+ .pyre/
157
+
158
+ # pytype static type analyzer
159
+ .pytype/
160
+
161
+ # Cython debug symbols
162
+ cython_debug/
163
+
164
+ # PyCharm
165
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
166
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
167
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
168
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
169
+ #.idea/
170
+ run_gpt4o.py
171
+ .amltconfig
172
+ wandb/
173
+ *.whl
174
+ test/test_qwen.py
175
+ **/private/
176
+ highscore.txt
177
+ settings.json
178
+
179
+
180
+ config.ini
agent/game_agent.py ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import queue
3
+ import random
4
+ import re
5
+ from time import sleep
6
+ from typing import Dict
7
+ from utils.config import Config
8
+ from utils.encoding_utils import encode_data_to_base64_path, encode_image_path
9
+ from utils.file_utils import assemble_project_path, get_all_files
10
+ from utils.json_utils import parse_semi_formatted_text
11
+ from utils.lmm_utils import assemble_prompt
12
+ from utils.planner_utils import _extract_keys_from_template
13
+ import json
14
+
15
+
16
+ config = Config()
17
+
18
+
19
+ class game_agent:
20
+ def __init__(self, llm_provider=None):
21
+ # if config has attribute level_prompt, use it, otherwise use default prompt
22
+ if hasattr(config, "level_prompt") and config.level_prompt is not None:
23
+ self.prompt = config.level_prompt
24
+ print("Using level prompt from config file: " + self.prompt)
25
+ else:
26
+ self.prompt = config.prompt
27
+ print("Using default prompt: " + self.prompt)
28
+
29
+
30
+ self.prompt_template_origin, _, _ = _extract_keys_from_template(self.prompt)
31
+ self.prompt_template = self.prompt_template_origin
32
+
33
+ self.use_instruction = config.use_instruction
34
+ if self.use_instruction:
35
+ self.instruction_template = config.instruction
36
+
37
+ self.use_history = config.use_history
38
+ if self.use_history:
39
+ self.history_template = config.history
40
+
41
+ self.history_size = len([image_history for image_history in self.history_template if "image" in image_history])
42
+ print(f"history_size: {self.history_size}")
43
+ self.history = []
44
+
45
+ self.use_sample_history = config.use_sample_history
46
+
47
+ if self.use_sample_history:
48
+ self.sample_size = config.sample_size
49
+ self.sample_histroy_template = config.sample_histroy_template
50
+
51
+ else:
52
+ self.history_size = 1
53
+ self.history = []
54
+
55
+ self.reset_provider(llm_provider)
56
+
57
+ logging.info("prompt: " + self.prompt_template_origin)
58
+ print("prompt: " + self.prompt_template_origin)
59
+
60
+ def reset_provider(self, llm_provider):
61
+ print("Resetting provider...")
62
+ self.llm_provider = llm_provider
63
+ if self.history is not None and len(self.history) > 0:
64
+ # pop the last history
65
+ self.history.pop(-1)
66
+
67
+
68
+ def produce_instruction(self):
69
+ """
70
+ Generates and inserts an instruction string into the prompt template.
71
+ This method constructs an instruction string based on the `instruction_template` attribute.
72
+ It replaces the placeholder "<$instruction$>" in the `prompt_template` with the generated instruction string.
73
+ The instruction string is built by iterating over the `instruction_template` list, appending text and encoded image placeholders as needed.
74
+ Raises:
75
+ AssertionError: If the placeholder "<$instruction$>" is not found in `prompt_template`.
76
+ Side Effects:
77
+ Modifies `self.prompt_template` by replacing the "<$instruction$>" placeholder with the generated instruction string.
78
+ Updates `self.input` with encoded image paths using placeholders like "image_instruction_{counter}".
79
+ """
80
+ assert "<$instruction$>" in self.prompt_template
81
+
82
+ instruction_str = ""
83
+ instruction_str += "\n\n" + self.instruction_template[0]["text"]
84
+ counter = 1
85
+ for item in self.instruction_template[1:]:
86
+ instruction_str += "\n\n"
87
+ if "image" in item:
88
+ placeholder_token = f"image_instruction_{counter}"
89
+ self.input[placeholder_token] = encode_image_path(item["image"])
90
+ instruction_str += f"<${placeholder_token}$>"
91
+ if "text" in item:
92
+ instruction_str += item["text"]
93
+ counter += 1
94
+
95
+ self.prompt_template = self.prompt_template.replace("<$instruction$>", instruction_str + "\n\n")
96
+
97
+ def produce_history(self):
98
+ """
99
+ Generates a history string based on the provided history template and updates the prompt template with this history.
100
+ The method processes the `history_template` in reverse order (excluding the first element) and constructs a history string by replacing placeholders with corresponding values from the `history` list. It also updates the `input` dictionary with image history placeholders.
101
+ The constructed history string is then inserted into the `prompt_template` at the placeholder "<$history$>".
102
+ Raises:
103
+ AssertionError: If the placeholder "<$history$>" is not found in `prompt_template`.
104
+ """
105
+ assert "<$history$>" in self.prompt_template
106
+
107
+ history_str = ""
108
+
109
+ # Note: The history is stored in reverse order, with the most recent step at the end of the list.
110
+
111
+ ########################################################################################
112
+ # produce recent history
113
+
114
+ # skip current step
115
+ counter = 2
116
+
117
+ for item in reversed(self.history_template[1:]):
118
+ if counter > len(self.history):
119
+ break
120
+ # reversed
121
+ if "text" in item:
122
+ history_text_template = item["text"]
123
+ for history_variable in self.history[-counter]:
124
+ if history_variable == "image":
125
+ continue
126
+
127
+ # history_variable_X is the X step in the past (X == 1 means the previous step)
128
+ history_variable_X = f"{history_variable}_{counter-1}"
129
+ placeholder_token = f"<${history_variable_X}$>"
130
+ if placeholder_token in history_text_template:
131
+ print(f"history_variable: {history_variable_X}")
132
+ history_text_template = \
133
+ history_text_template.replace(placeholder_token, self.history[-counter][history_variable])
134
+ history_str = history_text_template + history_str
135
+
136
+ if "image" in item:
137
+ placeholder_token = f"image_history_{counter}"
138
+ self.input[placeholder_token] = self.history[-counter]["image"]
139
+ history_str = f"<${placeholder_token}$>" + history_str
140
+
141
+
142
+ history_str = "\n\n" + history_str
143
+
144
+ counter += 1
145
+
146
+ ########################################################################################
147
+ # produce sample history
148
+
149
+ # a naive implementation
150
+ # just randomly select a sample from the history before self.history_size steps of the current step
151
+
152
+ if self.use_sample_history:
153
+
154
+
155
+ sample_size = min(self.sample_size, max(len(self.history) - self.history_size, 0))
156
+ sample_index = random.sample(range(0, len(self.history)- self.history_size), sample_size)
157
+
158
+ sample_index.sort(reverse=True)
159
+
160
+ sample_history_str = ""
161
+ for index in sample_index:
162
+ '''
163
+ This screenshot is <$sample_step$> steps before the current step of the game. After this frame, your reasoning message was \"<$sample_history_reasoning$>\". After the action was excuted, the game info was \"<$sample_history_action_info$>\"
164
+ '''
165
+ sample_history_template = self.sample_histroy_template["text"]
166
+ # 0 1 2 [3 4] 5 (cur) (index)
167
+
168
+ for history_variable in self.history[index]:
169
+ if history_variable == "image":
170
+ continue
171
+ sample_history_variable = f"sample_{history_variable}"
172
+ placeholder_token = f"<${sample_history_variable}$>"
173
+ if placeholder_token in sample_history_template:
174
+ sample_history_template = \
175
+ sample_history_template.replace(placeholder_token, self.history[index][history_variable])
176
+
177
+ sample_history_template = sample_history_template.replace("<$sample_step$>", str(len(self.history) - index))
178
+
179
+ placeholder_token_image = f"image_sample_{index}"
180
+
181
+ sample_history_image = self.history[index]["image"]
182
+ self.input[placeholder_token_image] = sample_history_image
183
+
184
+ history_str = "\n\n" + f"<${placeholder_token_image}$>" + sample_history_template + history_str
185
+
186
+ ########################################################################################
187
+
188
+
189
+ if len(self.history) != 0:
190
+ history_str = "\n\n" + self.history_template[0]["text"] + history_str
191
+
192
+ self.prompt_template = self.prompt_template.replace("<$history$>", history_str + "\n\n")
193
+
194
+ # print("history_str: ", history_str)
195
+ if len(self.history) == 10:
196
+ sleep(100)
197
+
198
+ def update_recent_history(self, info: Dict):
199
+ # Update the last step with the action taken
200
+ if len(self.history) == 0:
201
+ return
202
+ # for key in ["history_action", "history_action_info", "history_reasoning"]:
203
+ for key in info.keys():
204
+ if info.get(key) is not None:
205
+ self.history[-1][key] = info[key]
206
+
207
+ def update_new_history(self, info: Dict):
208
+ self.history.append({
209
+ # Current Step
210
+ 'image': info["last_frame_base64"],
211
+ 'image_path': info["last_frame_path"],
212
+ 'history_action': None,
213
+ 'history_action_info': None,
214
+ 'history_reasoning': None
215
+ })
216
+
217
+ if self.use_history and not self.use_sample_history and len(self.history) > self.history_size + 1:
218
+ self.history.pop(0)
219
+
220
+
221
+ def update_game_info(self, game_info: Dict):
222
+ # TODO: working memory module
223
+ # e.g.
224
+ # self.memory.update(info)
225
+
226
+ self.update_recent_history(game_info)
227
+ self.update_new_history(game_info)
228
+
229
+ def generate_input(self):
230
+ self.input = {}
231
+ self.prompt_template = self.prompt_template_origin
232
+
233
+ # current step image is at the end of the history
234
+ self.input['image_current_step'] = self.history[-1]["image"]
235
+
236
+ # Instruction
237
+ if self.use_instruction:
238
+ # replace <$instruction$> with images and texts.
239
+ self.produce_instruction()
240
+
241
+ # History
242
+ if self.use_history:
243
+ # replace <$history$> with images and texts.
244
+ self.produce_history()
245
+
246
+ def generate_action(self, data):
247
+
248
+ if data.get("action") is None:
249
+ data["action"] = "None"
250
+
251
+ action = data["action"]
252
+
253
+ return action
254
+
255
+ def execute_action(self):
256
+ print(f"Agent execcuting action...")
257
+
258
+ # Generate self.input
259
+ self.generate_input()
260
+
261
+ # Generate prompt
262
+ message_prompts = assemble_prompt(template_str=self.prompt_template, params=self.input, image_prompt_format=self.llm_provider.image_prompt_format)
263
+
264
+ # Replace base64 image data with values from history array
265
+ readable_message_prompts = json.dumps(message_prompts, indent=2)
266
+ pattern = re.compile(r"\"data:image/png;base64,[^\"]*\"")
267
+
268
+ print(f"len(self.history): {len(self.history)}")
269
+ print(f"self.history_size: {self.history_size}")
270
+
271
+ # for i, history_item in enumerate(self.history[-self.history_size:]):
272
+ # match = pattern.search(readable_message_prompts)
273
+ # if match:
274
+ # base64_image = match.group(0)
275
+ # expected_image_path = f"\"Image {i+1}: {history_item['image_path']}\""
276
+ # assert encode_image_path(history_item['image_path']) in base64_image, f"Base64 encoding does not match for i={i}, {history_item['image_path']}"
277
+ # readable_message_prompts = readable_message_prompts[:match.start()] + expected_image_path + readable_message_prompts[match.end():]
278
+ # print("Base64 image encoding matches history image paths.")
279
+
280
+
281
+ logging.info("message_prompts: " + readable_message_prompts)
282
+
283
+ # print the message prompts in JSON format
284
+ # print("message_prompts: " + readable_message_prompts.encode("utf-8").decode("unicode_escape"))
285
+
286
+ # Call the LLM provider for decision making
287
+ success, response = self.llm_provider.create_completion(message_prompts)
288
+
289
+ if not success:
290
+ print("Failed to generate response., error: " + response)
291
+ error_msg = "Failed to generate response, error: " + response
292
+ return False, error_msg
293
+
294
+ print("--------------------------------------------------------------------------------------")
295
+
296
+ response = re.sub(r'\n+', '\n', response)
297
+
298
+ # Convert the response to dict
299
+
300
+ response = response.replace(":", ":\n")
301
+ logging.info("response: " + str(response))
302
+ print("response: " + response)
303
+
304
+ data = parse_semi_formatted_text(response)
305
+
306
+ self.update_recent_history({"history_reasoning": str(data)})
307
+
308
+ action = self.generate_action(data)
309
+
310
+ return True, action
app.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import subprocess
3
+ import threading
4
+ from filelock import FileLock
5
+ import gradio as gr
6
+ import multiprocessing
7
+ import time
8
+ import os
9
+ from PIL import Image
10
+ import base64
11
+ from io import BytesIO
12
+ import numpy as np
13
+ from datetime import datetime
14
+ from threading import Timer
15
+
16
+ Game_to_Levels = {
17
+ "RaceGame": list(range(1, 10)),
18
+ "SuperMario": list(range(0, 10)),
19
+ "FlappyBird": list(range(1, 8)),
20
+ "TempestRun": list(range(1, 5)),
21
+ "PongGame": list(range(1, 4))
22
+ }
23
+
24
+ valid_actions_dict = {
25
+ "RaceGame": ["LEFT", "RIGHT", "UP", "DOWN", "FORWARD", "BACKWARD"],
26
+ "PongGame": ["LEFTUP", "LEFTDOWN", "RIGHTUP", "RIGHTDOWN", "NONE"],
27
+ "FlappyBird": ["UP", "DOWN", "KEEP", "NONE"],
28
+ "SuperMario": ["UP", "LEFT", "RIGHT", "UP+LEFT", "UP+RIGHT", "NONE"],
29
+ "TempestRun": ["JUMP", "LEFT", "RIGHT", "SLIDE", "RISE", "NONE"]
30
+ }
31
+
32
+ all_actions = [
33
+ "LEFT", "LEFTUP", "UP+LEFT", "LEFTDOWN", "RIGHT", "RIGHTUP", "UP+RIGHT", "RIGHTDOWN",
34
+ "UP", "RISE", "JUMP", "SLIDE", "DOWN", "KEEP", "NONE", "FORWARD", "BACKWARD"
35
+ ]
36
+
37
+ game_pids = {}
38
+ alive_game_ids = {}
39
+
40
+ MAX_HISTORY = 3
41
+
42
+ def remove_old_game_dirs():
43
+ if not os.path.exists("./runs"):
44
+ os.makedirs("./runs")
45
+ output_dirs = [d for d in os.listdir("./runs") if os.path.isdir(os.path.join("./runs", d))]
46
+ for game_id in output_dirs:
47
+ if game_id not in alive_game_ids:
48
+ run_lock = FileLock(os.path.join(".", "runs", "run.lock"))
49
+ with run_lock:
50
+ os.system(f"rm -rf {os.path.join('.', 'runs', game_id)}")
51
+ elif (datetime.now() - datetime.strptime(alive_game_ids[game_id], '%Y-%m-%d-%H:%M:%S')).total_seconds() > 0.1 * 24 * 3600:
52
+ run_lock = FileLock(os.path.join(".", "runs", "run.lock"))
53
+ with run_lock:
54
+ os.system(f"rm -rf {os.path.join('.', 'runs', game_id)}")
55
+ alive_game_ids.pop(game_id, None)
56
+
57
+ for game_id in list(alive_game_ids):
58
+ if not os.path.exists(os.path.join(".", "runs", game_id)):
59
+ alive_game_ids.pop(game_id, None)
60
+
61
+ print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), " - Cleaned up old game directories.")
62
+ print("Current alive game IDs:", alive_game_ids)
63
+
64
+
65
+ def start_game(game, level, sample_rate, model_name, base_url, api_key, state, req: gr.Request):
66
+ try:
67
+ sample_rate = int(sample_rate)
68
+ except (ValueError, TypeError):
69
+ gr.Warning("Invalid sample rate. Please enter a valid integer.")
70
+ return
71
+ if not (1 <= sample_rate <= 10):
72
+ gr.Warning("Sample rate must be between 1 and 10.")
73
+ return
74
+ print(f"Starting {game} at {level}")
75
+ game_id = req.session_hash
76
+ alive_game_ids[game_id] = datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
77
+ if game_pids.get(game_id):
78
+ try:
79
+ os.kill(game_pids[game_id], 9)
80
+ print(f"Killed previous game process with PID: {game_pids[game_id]}")
81
+ except Exception as e:
82
+ print(f"Error killing previous game process: {e}")
83
+ output_dir = os.path.join(".", "runs", game_id)
84
+ if os.path.exists(output_dir):
85
+ os.system(f"rm -rf {output_dir}")
86
+ os.makedirs(output_dir)
87
+ action_file = os.path.join(output_dir, f"action_{game_id}.txt")
88
+ with open(action_file, "w") as f:
89
+ f.write("")
90
+ state["action_file"] = action_file
91
+
92
+ if model_name == "":
93
+ model_name = "default_model"
94
+ if base_url == "":
95
+ base_url = "default_url"
96
+ if api_key == "":
97
+ api_key = "default_api_key"
98
+
99
+ command = f'SDL_VIDEODRIVER=dummy nohup python3 -u run_game.py --game {game} --level {level} --action_file "{action_file}" --game_id {game_id} --sample_rate {sample_rate} --model_name {model_name} --base_url "{base_url}" --api_key "{api_key}" > run.log 2>&1 & echo $!'
100
+
101
+ with os.popen(command) as f:
102
+ pid_str = f.read().strip()
103
+ try:
104
+ pid = int(pid_str)
105
+ print(f"Game started with PID: {pid}")
106
+ game_pids[game_id] = pid
107
+ except ValueError:
108
+ print(f"Failed to parse PID from command output: {pid_str}")
109
+
110
+
111
+ def write_action(action, state):
112
+ action_file = state.get("action_file")
113
+ if not action_file:
114
+ print("Action file not found in state. Cannot write action.")
115
+ return
116
+ if not os.path.exists(action_file):
117
+ print(f"Action file {action_file} does not exist. Skipping write.")
118
+ return
119
+ print(f"Writing action: {action} to {action_file}")
120
+ with open(action_file, "w") as f:
121
+ f.write(action)
122
+
123
+
124
+ def cleanup(req: gr.Request):
125
+ game_id = req.session_hash
126
+ if game_id in alive_game_ids:
127
+ alive_game_ids.pop(game_id)
128
+ pid = game_pids.get(game_id)
129
+ if pid:
130
+ try:
131
+ os.kill(pid, 9)
132
+ print(f"Killed game process with PID: {pid}")
133
+ except Exception as e:
134
+ print(f"Error killing game process: {e}")
135
+ finally:
136
+ remove_old_game_dirs()
137
+
138
+
139
+ def update_all_displays(last_history, req: gr.Request):
140
+ num_outputs = 1 + 2 * MAX_HISTORY + 1
141
+ if req is None:
142
+ return [None] * num_outputs
143
+
144
+ display_count = MAX_HISTORY
145
+ game_id = req.session_hash
146
+ output_dir = os.path.join(".", "runs", game_id)
147
+
148
+ run_lock = FileLock(os.path.join(".", "runs", "run.lock"))
149
+ with run_lock:
150
+ if not os.path.exists(output_dir):
151
+ try:
152
+ game_over_image_path = os.path.join("gameover.jpg")
153
+ img_array = np.array(Image.open(game_over_image_path).convert('RGB'))
154
+ except FileNotFoundError:
155
+ img_array = np.zeros((800, 600, 3), dtype=np.uint8)
156
+ image_updates = [gr.update(visible=False) for _ in range(MAX_HISTORY)]
157
+ markdown_updates = [gr.update(visible=False) for _ in range(MAX_HISTORY)]
158
+ new_history_state = {"images": [], "actions": []}
159
+ return [img_array] + image_updates + markdown_updates + [new_history_state]
160
+
161
+ try:
162
+ pkl_path = os.path.join(output_dir, f"game_{game_id}.pkl")
163
+ lock = FileLock(pkl_path + ".lock")
164
+ with lock:
165
+ with open(pkl_path, "rb") as f: info = pickle.load(f)
166
+ except Exception:
167
+ return [gr.update()] * num_outputs
168
+
169
+ current_image_b64 = info.get("current_image", "")
170
+ image_data = base64.b64decode(current_image_b64)
171
+ image = Image.open(BytesIO(image_data)).convert('RGB')
172
+ main_img_array = np.array(image.resize((int(image.width * 800 / image.height), 800)))
173
+
174
+ new_history_images_b64 = info.get("history_images", [])[-display_count:]
175
+ new_history_actions = info.get("history_actions", [])[-display_count:]
176
+ new_history_images_b64.reverse()
177
+ new_history_actions.reverse()
178
+
179
+ last_history_images_b64 = last_history.get("images", [])
180
+ last_history_actions = last_history.get("actions", [])
181
+
182
+ image_updates = []
183
+ markdown_updates = []
184
+
185
+ for i in range(MAX_HISTORY):
186
+ new_img = new_history_images_b64[i] if i < len(new_history_images_b64) else None
187
+ last_img = last_history_images_b64[i] if i < len(last_history_images_b64) else None
188
+ new_action = new_history_actions[i] if i < len(new_history_actions) else None
189
+ last_action = last_history_actions[i] if i < len(last_history_actions) else None
190
+
191
+ if new_img == last_img:
192
+ image_updates.append(gr.update())
193
+ else:
194
+ if new_img:
195
+ img_data = base64.b64decode(new_img)
196
+ img_array = np.array(Image.open(BytesIO(img_data)).convert('RGB'))
197
+ image_updates.append(gr.update(value=img_array, visible=True))
198
+ else:
199
+ image_updates.append(gr.update(visible=False))
200
+
201
+ if new_action == last_action:
202
+ markdown_updates.append(gr.update())
203
+ else:
204
+ if new_action:
205
+ action_text = f"**Action: {new_action}**"
206
+ markdown_updates.append(gr.update(value=action_text, visible=True))
207
+ else:
208
+ markdown_updates.append(gr.update(visible=False))
209
+
210
+ new_history_state = {"images": new_history_images_b64, "actions": new_history_actions}
211
+
212
+ return [main_img_array] + image_updates + markdown_updates + [new_history_state]
213
+
214
+
215
+ with gr.Blocks(title="Game Control Interface") as demo:
216
+ state = gr.State(value={})
217
+ last_history_state = gr.State(value={"images": [], "actions": []})
218
+
219
+ with gr.Row():
220
+ with gr.Column(scale=2):
221
+ gr.Markdown("## Game Control")
222
+ game_dropdown = gr.Dropdown(choices=["RaceGame", "SuperMario", "FlappyBird", "TempestRun", "PongGame"], label="Select Game", value="RaceGame")
223
+ level_dropdown = gr.Dropdown(choices=Game_to_Levels["RaceGame"], label="Select Level")
224
+ sample_rate_textbox = gr.Number(label="Sample Rate (Frames/Action)", interactive=True, value=3)
225
+ start_button = gr.Button("Start Game", variant="primary")
226
+
227
+ gr.Markdown("---")
228
+
229
+ model_name_textbox = gr.Textbox(label="Model Name", placeholder="Enter your model name here...")
230
+ base_url_textbox = gr.Textbox(label="Base URL", placeholder="Enter your base URL here...")
231
+ api_key_textbox = gr.Textbox(label="API Key", type="password", placeholder="Enter your API key here...")
232
+
233
+ gr.Markdown("### Press to let the model execute the next action")
234
+ model_inference_button = gr.Button("Model Inference")
235
+
236
+
237
+ gr.Markdown("## Action Buttons")
238
+ gr.Markdown("### Press to send actions to the game.")
239
+
240
+ with gr.Row():
241
+ action_buttons = {action: gr.Button(action, visible=(action in valid_actions_dict.get("RaceGame", []))) for action in all_actions}
242
+
243
+ with gr.Column(scale=7):
244
+ gr.Markdown("### Game Screen")
245
+ screenshot_display = gr.Image(height=1000, interactive=False)
246
+
247
+ with gr.Column(scale=1):
248
+ gr.Markdown("### History")
249
+ history_images = []
250
+ history_actions_md = []
251
+ for i in range(MAX_HISTORY):
252
+ action_md = gr.Markdown(f"**Action: NONE**", visible=False)
253
+ img = gr.Image(interactive=False, label=f"Frame {-i-1}", visible=False)
254
+ history_images.append(img)
255
+ history_actions_md.append(action_md)
256
+
257
+
258
+ game_dropdown.change(fn=lambda game: gr.update(choices=Game_to_Levels.get(game, [])), inputs=game_dropdown, outputs=level_dropdown)
259
+
260
+ def update_action_buttons(game):
261
+ valid_actions = valid_actions_dict.get(game, [])
262
+ return {btn: gr.update(visible=(action in valid_actions)) for action, btn in action_buttons.items()}
263
+
264
+ game_dropdown.change(fn=update_action_buttons, inputs=game_dropdown, outputs=list(action_buttons.values()))
265
+
266
+ for action, btn in action_buttons.items():
267
+ btn.click(fn=write_action, inputs=[gr.Textbox(value=action, visible=False), state], outputs=None)
268
+
269
+ model_inference_button.click(fn=write_action, inputs=[gr.Textbox(value="model", visible=False), state], outputs=None)
270
+
271
+ start_button.click(
272
+ fn=start_game,
273
+ inputs=[game_dropdown, level_dropdown, sample_rate_textbox, model_name_textbox, base_url_textbox, api_key_textbox, state],
274
+ outputs=None
275
+ )
276
+
277
+ all_outputs = [screenshot_display] + history_images + history_actions_md + [last_history_state]
278
+ timer_inputs = [last_history_state]
279
+ timer = gr.Timer(0.5)
280
+
281
+ timer.tick(fn=update_all_displays, inputs=timer_inputs, outputs=all_outputs)
282
+
283
+ demo.unload(cleanup)
284
+
285
+ if __name__ == "__main__":
286
+ if not os.path.exists("./runs"):
287
+ os.makedirs("./runs")
288
+
289
+ cleanup_thread = threading.Thread(target=lambda: (remove_old_game_dirs(), time.sleep(3600)), daemon=True)
290
+ cleanup_thread.start()
291
+
292
+ demo.launch()
config/env_config/3steps/env_config_flappybird_reasoning_3steps.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "env_name": "Flappy Bird",
3
+ "env_short_name": "flappybirdgame",
4
+
5
+ "game_module": "game.flappybird_game",
6
+ "game_class": "FlappyBirdGame",
7
+ "prompt": "./res/flappybirdgame/prompts/templates/flappybird_standard.prompt",
8
+
9
+ "use_history": "True",
10
+ "history": [
11
+ {
12
+ "text": "Now, I will give you some history screenshots in the current game for decision making."
13
+ },
14
+ {
15
+ "image": "",
16
+ "text": "This screenshot is three steps before the current step of the game. After this frame, your action was \"<$history_action_3$>\". After this action was excuted, the game info was \"<$history_action_info_3$>\""
17
+ },
18
+ {
19
+ "image": "",
20
+ "text": "This screenshot is two steps before the current step of the game. After this frame, your action was \"<$history_action_2$>\". After this action was excuted, the game info was \"<$history_action_info_2$>\""
21
+ },
22
+ {
23
+ "image": "",
24
+ "text": "This screenshot is the previous step of the game. After this frame, your action was \"<$history_action_1$>\". After this action was excuted, the game info was \"<$history_action_info_1$>\""
25
+ }
26
+ ],
27
+
28
+
29
+ "use_instruction": "False",
30
+ "instruction": [
31
+ "TODO"
32
+ ]
33
+
34
+ }
config/env_config/3steps/env_config_pong_reasoning_3steps.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "env_name": "Pong Game",
3
+ "env_short_name": "ponggame",
4
+
5
+ "game_module": "game.pong_game",
6
+ "game_class": "PongGame",
7
+ "prompt": "./res/ponggame/prompts/templates/ponggame.prompt",
8
+
9
+ "use_history": "True",
10
+ "history": [
11
+ {
12
+ "text": "Now, I will give you some screenshots in the current game for decision making."
13
+ },
14
+ {
15
+ "image": "",
16
+ "text": "This screenshot is three steps before the current step of the game. After this frame, your action was \"<$history_action_3$>\". After this action was excuted, the game info was \"<$history_action_info_3$>\""
17
+ },
18
+ {
19
+ "image": "",
20
+ "text": "This screenshot is two steps before the current step of the game. After this frame, your action was \"<$history_action_2$>\". After this action was excuted, the game info was \"<$history_action_info_2$>\""
21
+ },
22
+ {
23
+ "image": "",
24
+ "text": "This screenshot is the previous step of the game. After this frame, your action was \"<$history_action_1$>\". After this action was excuted, the game info was \"<$history_action_info_1$>\""
25
+ }
26
+ ],
27
+
28
+
29
+ "use_instruction": "False",
30
+ "instruction": [
31
+ "TODO"
32
+ ]
33
+
34
+ }
config/env_config/3steps/env_config_race_reasoning_3steps.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "env_name": "Race Game",
3
+ "env_short_name": "racegame",
4
+
5
+ "game_module": "game.race_game",
6
+ "game_class": "RaceGame",
7
+ "prompt": "./res/racegame/prompts/templates/racegame.prompt",
8
+
9
+ "use_history": "True",
10
+ "history": [
11
+ {
12
+ "text": "Now, I will give you some screenshots in the current game for decision making."
13
+ },
14
+ {
15
+ "image": "",
16
+ "text": "This screenshot is three steps before the current step of the game. After this frame, your action was \"<$history_action_3$>\". After this action was excuted, the game info was \"<$history_action_info_3$>\""
17
+ },
18
+ {
19
+ "image": "",
20
+ "text": "This screenshot is two steps before the current step of the game. After this frame, your action was \"<$history_action_2$>\". After this action was excuted, the game info was \"<$history_action_info_2$>\""
21
+ },
22
+ {
23
+ "image": "",
24
+ "text": "This screenshot is the previous step of the game. After this frame, your action was \"<$history_action_1$>\". After this action was excuted, the game info was \"<$history_action_info_1$>\""
25
+ }
26
+ ],
27
+
28
+
29
+ "use_instruction": "False",
30
+ "instruction": [
31
+ "TODO"
32
+ ]
33
+
34
+ }
config/env_config/3steps/env_config_supermario_reasoning_3steps.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "env_name": "SuperMario Game",
3
+ "env_short_name": "supermariogame",
4
+
5
+ "game_module": "game.supermario_game",
6
+ "game_class": "SuperMarioGame",
7
+ "prompt": "./res/supermariogame/prompts/templates/supermariogame.prompt",
8
+
9
+ "use_history": "True",
10
+ "history": [
11
+ {
12
+ "text": "Now, I will give you some screenshots in the current game for decision making."
13
+ },
14
+ {
15
+ "image": "",
16
+ "text": "This screenshot is three steps before the current step of the game. After this frame, your action was \"<$history_action_3$>\". After this action was excuted, the game info was \"<$history_action_info_3$>\""
17
+ },
18
+ {
19
+ "image": "",
20
+ "text": "This screenshot is two steps before the current step of the game. After this frame, your action was \"<$history_action_2$>\". After this action was excuted, the game info was \"<$history_action_info_2$>\""
21
+ },
22
+ {
23
+ "image": "",
24
+ "text": "This screenshot is the previous step of the game. After this frame, your action was \"<$history_action_1$>\". After this action was excuted, the game info was \"<$history_action_info_1$>\""
25
+ }
26
+ ],
27
+
28
+
29
+ "use_instruction": "False",
30
+ "instruction": [
31
+ "TODO"
32
+ ]
33
+
34
+ }
config/env_config/3steps/env_config_tempestrun_reasoning_3steps.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "env_name": "TempestRun Game",
3
+ "env_short_name": "tempestrungame",
4
+
5
+ "game_module": "game.tempestrun_game",
6
+ "game_class": "TempestRunGame",
7
+ "prompt": "./res/tempestrungame/prompts/templates/tempestrungame.prompt",
8
+
9
+ "use_history": "True",
10
+ "history": [
11
+ {
12
+ "text": "Now, I will give you some screenshots in the current game for decision making."
13
+ },
14
+ {
15
+ "image": "",
16
+ "text": "This screenshot is three steps before the current step of the game. After this frame, your action was \"<$history_action_3$>\". After this action was excuted, the game info was \"<$history_action_info_3$>\""
17
+ },
18
+ {
19
+ "image": "",
20
+ "text": "This screenshot is two steps before the current step of the game. After this frame, your action was \"<$history_action_2$>\". After this action was excuted, the game info was \"<$history_action_info_2$>\""
21
+ },
22
+ {
23
+ "image": "",
24
+ "text": "This screenshot is the previous step of the game. After this frame, your action was \"<$history_action_1$>\". After this action was excuted, the game info was \"<$history_action_info_1$>\""
25
+ }
26
+ ],
27
+
28
+
29
+ "use_instruction": "False",
30
+ "instruction": [
31
+ "TODO"
32
+ ]
33
+
34
+ }
config/level_config/flappybirdgame/level1.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 1,
3
+ "level_prompt": "./res/flappybirdgame/prompts/templates/flappybird_zero_gravity.prompt"
4
+ }
config/level_config/flappybirdgame/level2.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 2,
3
+ "level_prompt": "./res/flappybirdgame/prompts/templates/flappybird_zero_gravity.prompt"
4
+ }
config/level_config/flappybirdgame/level3.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 3,
3
+ "level_prompt": "./res/flappybirdgame/prompts/templates/flappybird_zero_gravity.prompt"
4
+ }
config/level_config/flappybirdgame/level4.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 4,
3
+ "level_prompt": "./res/flappybirdgame/prompts/templates/flappybird_standard_with_keep.prompt"
4
+ }
config/level_config/flappybirdgame/level5.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 5,
3
+ "level_prompt": "./res/flappybirdgame/prompts/templates/flappybird_standard_with_keep.prompt"
4
+ }
config/level_config/flappybirdgame/level6.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 6,
3
+ "level_prompt": "./res/flappybirdgame/prompts/templates/flappybird_standard_with_keep.prompt"
4
+ }
config/level_config/flappybirdgame/level7.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 7,
3
+ "level_prompt": "./res/flappybirdgame/prompts/templates/flappybird_standard.prompt"
4
+ }
config/level_config/ponggame/level1.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "level": 1
3
+ }
config/level_config/ponggame/level2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "level": 2
3
+ }
config/level_config/ponggame/level3.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "level": 3
3
+ }
config/level_config/racegame/level1.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "level": 1,
3
+ "dynamic": "True",
4
+ "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_map_no_obstacle_history.prompt"
5
+ }
config/level_config/racegame/level1_no_history.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "level": 1,
3
+ "dynamic": "True",
4
+ "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_map_no_obstacle.prompt"
5
+ }
config/level_config/racegame/level2.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "level": 2,
3
+ "dynamic": "True",
4
+ "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_map_history.prompt"
5
+ }
config/level_config/racegame/level2_no_history.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "level": 2,
3
+ "dynamic": "True",
4
+ "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_map.prompt"
5
+ }
config/level_config/racegame/level3.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "level": 3,
3
+ "dynamic": "True",
4
+ "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_map_history.prompt"
5
+ }
config/level_config/racegame/level3_no_history.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "level": 3,
3
+ "dynamic": "True",
4
+ "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_map.prompt"
5
+ }
config/level_config/racegame/level4.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "level": 4,
3
+ "dynamic": "True",
4
+ "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_car_history.prompt"
5
+ }
config/level_config/racegame/level5.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "level": 5,
3
+ "dynamic": "True",
4
+ "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_car_history.prompt"
5
+ }
config/level_config/racegame/level6.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "level": 6,
3
+ "dynamic": "True",
4
+ "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_car_history.prompt"
5
+ }
config/level_config/racegame/level7.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "level": 7,
3
+ "dynamic": "True",
4
+ "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_car_history.prompt"
5
+ }
config/level_config/racegame/level8.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "level": 8,
3
+ "dynamic": "True",
4
+ "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_car_history.prompt"
5
+ }
config/level_config/racegame/level9.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "level": 9,
3
+ "dynamic": "True",
4
+ "level_prompt": "./res/racegame/prompts/templates/racegame_view_of_car_history.prompt"
5
+ }
config/level_config/supermariogame/level0.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 0,
3
+ "max_round": 5000
4
+ }
config/level_config/supermariogame/level1.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 1,
3
+ "max_round": 400
4
+ }
config/level_config/supermariogame/level2.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 2,
3
+ "max_round": 1000
4
+ }
config/level_config/supermariogame/level3.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 3,
3
+ "max_round": 1000
4
+ }
config/level_config/supermariogame/level4.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 4,
3
+ "max_round": 1000
4
+ }
config/level_config/supermariogame/level5.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 5,
3
+ "max_round": 300
4
+ }
config/level_config/supermariogame/level6.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 6,
3
+ "max_round": 300
4
+ }
config/level_config/supermariogame/level7.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 7,
3
+ "max_round": 300
4
+ }
config/level_config/supermariogame/level8.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 8,
3
+ "max_round": 1000
4
+ }
config/level_config/supermariogame/level9.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "level": 9,
3
+ "max_round": 1000
4
+ }
config/level_config/tempestrungame/level1.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "level": 1
3
+ }
config/level_config/tempestrungame/level2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "level": 2
3
+ }
config/level_config/tempestrungame/level3.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "level": 3
3
+ }
config/level_config/tempestrungame/level4.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "level": 4
3
+ }
config/level_config/tempestrungame/level5.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "level": 5
3
+ }
config/model_config/claude_sonnet_3_7_config.ini ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [lmm]
2
+ model_name=Claude
3
+ model_path=claude-3-7-sonnet-20250219
4
+ api_key=sk-XXX
5
+
config/model_config/generation_config.ini ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [generation]
2
+ top_p=0.9
3
+ top_k=50
4
+ temperature=0.8
5
+ do_sample=True
6
+ max_new_tokens=2048
config/model_config/low_tokens_generation_config.ini ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [generation]
2
+ top_p=0.9
3
+ top_k=50
4
+ temperature=0.8
5
+ do_sample=True
6
+ max_new_tokens=512
config/model_config/openai_service_config.ini ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [lmm]
2
+ model_name = OpenAI
3
+ model_path = XXX
4
+ openai_api_key = XXX
5
+ openai_api_base = XXX
config/model_config/random.ini ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [lmm]
2
+
3
+ model_name = random