Spaces:

pierretassel
/

JobShopCPRL

Runtime error

App Files Files Community

Pierre Tassel commited on Nov 25, 2022

Commit

d746b98

1 Parent(s): e8861ce

wip

Browse files

Files changed (11) hide show

MyDummyVecEnv.py +123 -0
MyRemoteVectorEnv.py +130 -0
MyVecEnv.py +47 -0
Network.py +114 -0
actor.pt +0 -0
app.py +155 -0
checkpoint.pt +0 -0
dmu01.txt +21 -0
la01.txt +11 -0
requirements.txt +4 -0
ta01 +16 -0

MyDummyVecEnv.py ADDED Viewed

	@@ -0,0 +1,123 @@

+from collections import OrderedDict
+from typing import Any, Callable, List, Optional, Sequence, Type, Union
+import gym
+import numpy as np
+from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvIndices, VecEnvObs, VecEnvStepReturn
+from stable_baselines3.common.vec_env.util import dict_to_obs, obs_space_info
+class MyDummyVecEnv(VecEnv):
+    """
+    Creates a simple vectorized wrapper for multiple environments, calling each environment in sequence on the current
+    Python process. This is useful for computationally simple environment such as ``cartpole-v1``,
+    as the overhead of multiprocess or multithread outweighs the environment computation time.
+    This can also be used for RL methods that
+    require a vectorized environment, but that you want a single environments to train with.
+    :param env_fns: a list of functions
+        that return environments to vectorize
+    """
+    def __init__(self, env_fns: List[Callable[[], gym.Env]]):
+        self.envs = [fn() for fn in env_fns]
+        env = self.envs[0]
+        VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space)
+        obs_space = env.observation_space
+        self.keys, shapes, dtypes = obs_space_info(obs_space)
+        self.buf_obs = OrderedDict([(k, np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k])) for k in self.keys])
+        self.buf_dones = np.zeros((self.num_envs,), dtype=bool)
+        self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32)
+        self.buf_infos = [{} for _ in range(self.num_envs)]
+        self.actions = None
+    def step_async(self, actions: np.ndarray) -> None:
+        self.actions = actions
+    def step_wait(self) -> VecEnvStepReturn:
+        for env_idx in range(self.num_envs):
+            obs, self.buf_rews[env_idx], self.buf_dones[env_idx], self.buf_infos[env_idx] = self.envs[env_idx].step(
+                self.actions[env_idx]
+            )
+            if self.buf_dones[env_idx]:
+                # save final observation where user can get it, then reset
+                self.buf_infos[env_idx]["terminal_observation"] = obs
+                obs = self.envs[env_idx].reset()
+            self._save_obs(env_idx, obs)
+        return (self._obs_from_buf(), self.buf_rews, self.buf_dones, self.buf_infos)
+    def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
+        seeds = list()
+        for idx, env in enumerate(self.envs):
+            seeds.append(env.seed(seed + idx))
+        return seeds
+    def reset(self) -> VecEnvObs:
+        for env_idx in range(self.num_envs):
+            obs = self.envs[env_idx].reset()
+            self._save_obs(env_idx, obs)
+        return self._obs_from_buf()
+    def close(self) -> None:
+        for env in self.envs:
+            env.close()
+    def get_images(self) -> Sequence[np.ndarray]:
+        return [env.render(mode="rgb_array") for env in self.envs]
+    def render(self, mode: str = "human") -> Optional[np.ndarray]:
+        """
+        Gym environment rendering. If there are multiple environments then
+        they are tiled together in one image via ``BaseVecEnv.render()``.
+        Otherwise (if ``self.num_envs == 1``), we pass the render call directly to the
+        underlying environment.
+        Therefore, some arguments such as ``mode`` will have values that are valid
+        only when ``num_envs == 1``.
+        :param mode: The rendering type.
+        """
+        if self.num_envs == 1:
+            return self.envs[0].render(mode=mode)
+        else:
+            return super().render(mode=mode)
+    def _save_obs(self, env_idx: int, obs: VecEnvObs) -> None:
+        for key in self.keys:
+            if key is None:
+                self.buf_obs[key][env_idx] = obs
+            else:
+                self.buf_obs[key][env_idx] = obs[key]
+    def _obs_from_buf(self) -> VecEnvObs:
+        return dict_to_obs(self.observation_space, self.buf_obs)
+    def get_attr(self, attr_name: str, indices: VecEnvIndices = None) -> List[Any]:
+        """Return attribute from vectorized environment (see base class)."""
+        target_envs = self._get_target_envs(indices)
+        return [getattr(env_i, attr_name) for env_i in target_envs]
+    def set_attr(self, attr_name: str, value: Any, indices: VecEnvIndices = None) -> None:
+        """Set attribute inside vectorized environments (see base class)."""
+        target_envs = self._get_target_envs(indices)
+        for env_i in target_envs:
+            setattr(env_i, attr_name, value)
+    def env_method(self, method_name: str, *method_args, indices: VecEnvIndices = None, **method_kwargs) -> List[Any]:
+        """Call instance methods of vectorized environments."""
+        target_envs = self._get_target_envs(indices)
+        return [getattr(env_i, method_name)(*method_args, **method_kwargs) for env_i in target_envs]
+    def env_is_wrapped(self, wrapper_class: Type[gym.Wrapper], indices: VecEnvIndices = None) -> List[bool]:
+        """Check if worker environments are wrapped with a given wrapper"""
+        target_envs = self._get_target_envs(indices)
+        # Import here to avoid a circular import
+        from stable_baselines3.common import env_util
+        return [env_util.is_wrapped(env_i, wrapper_class) for env_i in target_envs]
+    def _get_target_envs(self, indices: VecEnvIndices) -> List[gym.Env]:
+        indices = self._get_indices(indices)
+        return [self.envs[i] for i in indices]

MyRemoteVectorEnv.py ADDED Viewed

	@@ -0,0 +1,130 @@

+from typing import Tuple, Callable, Optional
+from collections import OrderedDict
+import gym
+import torch
+import numpy as np
+import ray
+from ray.rllib.env.base_env import BaseEnv, ASYNC_RESET_RETURN
+from ray.rllib.utils.annotations import PublicAPI
+from ray.rllib.utils.typing import MultiEnvDict, EnvType, EnvID, MultiAgentDict
+from stable_baselines3.common.vec_env.base_vec_env import VecEnvObs
+from stable_baselines3.common.vec_env.util import obs_space_info, dict_to_obs
+from MyDummyVecEnv import MyDummyVecEnv
+@PublicAPI
+class MyRemoteVectorEnv(BaseEnv):
+    """Vector env that executes envs in remote workers.
+    This provides dynamic batching of inference as observations are returned
+    from the remote simulator actors. Both single and multi-agent child envs
+    are supported, and envs can be stepped synchronously or async.
+    You shouldn't need to instantiate this class directly. It's automatically
+    inserted when you use the `remote_worker_envs` option for Trainers.
+    """
+    @property
+    def observation_space(self):
+        return self._observation_space
+    def __init__(self, make_env: Callable[[int], EnvType], num_workers: int, env_per_worker: int, observation_space: Optional[gym.spaces.Space], device: torch.device):
+        self.make_local_env = make_env
+        self.num_workers = num_workers
+        self.env_per_worker = env_per_worker
+        self.num_envs = num_workers * env_per_worker
+        self.poll_timeout = None
+        self.actors = None  # lazy init
+        self.pending = None  # lazy init
+        self.observation_space = observation_space
+        self.keys, shapes, dtypes = obs_space_info(self.observation_space)
+        self.device = device
+        self.buf_obs = OrderedDict(
+            [(k, torch.zeros((self.num_envs,) + tuple(shapes[k]), dtype=torch.float, device=self.device)) for k in self.keys])
+        self.buf_dones = np.zeros((self.num_envs,), dtype=bool)
+        self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32)
+        self.buf_infos = [{} for _ in range(self.num_envs)]
+    def _save_obs(self, env_idx: int, obs: VecEnvObs) -> None:
+        for key in self.keys:
+            self.buf_obs[key][env_idx * self.env_per_worker: (env_idx + 1) * self.env_per_worker] = torch.from_numpy(obs[key]).to(self.device,
+                                                                                           non_blocking=True)
+    def poll(self) -> Tuple[MultiEnvDict, MultiEnvDict, MultiEnvDict,
+                            MultiEnvDict, MultiEnvDict]:
+        if self.actors is None:
+            def make_remote_env(i):
+                return _RemoteSingleAgentEnv.remote(self.make_local_env, i, self.env_per_worker)
+            self.actors = [make_remote_env(i) for i in range(self.num_workers)]
+        if self.pending is None:
+            self.pending = {a.reset.remote(): a for a in self.actors}
+        # each keyed by env_id in [0, num_remote_envs)
+        ready = []
+        # Wait for at least 1 env to be ready here
+        while not ready:
+            ready, _ = ray.wait(
+                list(self.pending),
+                num_returns=len(self.pending),
+                timeout=self.poll_timeout)
+        for obj_ref in ready:
+            actor = self.pending.pop(obj_ref)
+            env_id = self.actors.index(actor)
+            ob, rew, done, info = ray.get(obj_ref)
+            self._save_obs(env_id, ob)
+            self.buf_rews[env_id * self.env_per_worker: (env_id + 1) * self.env_per_worker] = rew
+            self.buf_dones[env_id * self.env_per_worker: (env_id + 1) * self.env_per_worker] = done
+            self.buf_infos[env_id * self.env_per_worker: (env_id + 1) * self.env_per_worker] = info
+        return (self._obs_from_buf(), self.buf_rews, self.buf_dones, self.buf_infos)
+    def _obs_from_buf(self) -> VecEnvObs:
+        return dict_to_obs(self.observation_space, self.buf_obs)
+    @PublicAPI
+    def send_actions(self, action_list) -> None:
+        for worker_id in range(self.num_workers):
+            actions = action_list[worker_id * self.env_per_worker: (worker_id + 1) * self.env_per_worker]
+            actor = self.actors[worker_id]
+            obj_ref = actor.step.remote(actions)
+            self.pending[obj_ref] = actor
+    @PublicAPI
+    def try_reset(self,
+                  env_id: Optional[EnvID] = None) -> Optional[MultiAgentDict]:
+        actor = self.actors[env_id]
+        obj_ref = actor.reset.remote()
+        self.pending[obj_ref] = actor
+        return ASYNC_RESET_RETURN
+    @PublicAPI
+    def stop(self) -> None:
+        if self.actors is not None:
+            for actor in self.actors:
+                actor.__ray_terminate__.remote()
+    @observation_space.setter
+    def observation_space(self, value):
+        self._observation_space = value
+@ray.remote(num_cpus=1)
+class _RemoteSingleAgentEnv:
+    """Wrapper class for making a gym env a remote actor."""
+    def __init__(self, make_env, i, env_per_worker):
+        self.env = MyDummyVecEnv([lambda: make_env((i * env_per_worker) + k) for k in range(env_per_worker)])
+    def reset(self):
+        return self.env.reset(), 0, False, {}
+    def step(self, actions):
+        return self.env.step(actions)

MyVecEnv.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from typing import Optional, List, Union, Sequence, Type, Any
+import gym
+import numpy as np
+from ray.rllib import BaseEnv
+from stable_baselines3.common.vec_env import VecEnv
+from stable_baselines3.common.vec_env.base_vec_env import VecEnvIndices, VecEnvStepReturn, VecEnvObs
+from MyRemoteVectorEnv import MyRemoteVectorEnv
+class WrapperRay(VecEnv):
+    def __init__(self, make_env, num_workers, per_worker_env, device):
+        self.one_env = make_env(0)
+        self.remote: BaseEnv = MyRemoteVectorEnv(make_env, num_workers, per_worker_env, self.one_env.observation_space, device)
+        super(WrapperRay, self).__init__(num_workers * per_worker_env, self.one_env.observation_space, self.one_env.action_space)
+    def reset(self) -> VecEnvObs:
+        return self.remote.poll()[0]
+    def step_async(self, actions: np.ndarray) -> None:
+        self.remote.send_actions(actions)
+    def step_wait(self) -> VecEnvStepReturn:
+        return self.remote.poll()
+    def close(self) -> None:
+        self.remote.stop()
+    def get_attr(self, attr_name: str, indices: VecEnvIndices = None) -> List[Any]:
+        pass
+    def set_attr(self, attr_name: str, value: Any, indices: VecEnvIndices = None) -> None:
+        pass
+    def env_method(self, method_name: str, *method_args, indices: VecEnvIndices = None, **method_kwargs) -> List[Any]:
+        pass
+    def env_is_wrapped(self, wrapper_class: Type[gym.Wrapper], indices: VecEnvIndices = None) -> List[bool]:
+        pass
+    def get_images(self) -> Sequence[np.ndarray]:
+        pass
+    def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
+        pass

Network.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import math
+import numpy as np
+import torch
+from torch import nn, Tensor
+from torch.distributions import Categorical
+class PositionalEncoding(nn.Module):
+    def __init__(self, d_model: int, max_len: int = 100):
+        super().__init__()
+        position = torch.arange(max_len).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
+        pe = torch.zeros(max_len, d_model)
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        self.register_buffer('pe', pe)
+    def forward(self, positions: Tensor) -> Tensor:
+        return self.pe[positions]
+class Actor(nn.Module):
+    def __init__(self, pos_encoder):
+        super(Actor, self).__init__()
+        self.activation = nn.Tanh()
+        self.project = nn.Linear(4, 8)
+        nn.init.xavier_uniform_(self.project.weight, gain=1.0)
+        nn.init.constant_(self.project.bias, 0)
+        self.pos_encoder = pos_encoder
+        self.embedding_fixed = nn.Embedding(2, 1)
+        self.embedding_legal_op = nn.Embedding(2, 1)
+        self.tokens_start_end = nn.Embedding(3, 4)
+        # self.conv_transform = nn.Conv1d(5, 1, 1)
+        # nn.init.kaiming_normal_(self.conv_transform.weight, mode="fan_out", nonlinearity="relu")
+        # nn.init.constant_(self.conv_transform.bias, 0)
+        self.enc1 = nn.TransformerEncoderLayer(8, 1, dim_feedforward=8 * 4, dropout=0.0, batch_first=True,
+                                               norm_first=True)
+        self.enc2 = nn.TransformerEncoderLayer(8, 1, dim_feedforward=8 * 4, dropout=0.0, batch_first=True,
+                                               norm_first=True)
+        self.final_tmp = nn.Sequential(
+            layer_init_tanh(nn.Linear(8, 32)),
+            nn.Tanh(),
+            layer_init_tanh(nn.Linear(32, 1), std=0.01)
+        )
+        self.no_op = nn.Sequential(
+            layer_init_tanh(nn.Linear(8, 32)),
+            nn.Tanh(),
+            layer_init_tanh(nn.Linear(32, 1), std=0.01)
+        )
+    def forward(self, obs, attention_interval_mask, job_resource, mask, indexes_inter, tokens_start_end):
+        embedded_obs = torch.cat((self.embedding_fixed(obs[:, :, :, 0].long()), obs[:, :, :, 1:3],
+                                  self.embedding_legal_op(obs[:, :, :, 3].long())), dim=3)
+        non_zero_tokens = tokens_start_end != 0
+        t = tokens_start_end[non_zero_tokens].long()
+        embedded_obs[non_zero_tokens] = self.tokens_start_end(t)
+        pos_encoder = self.pos_encoder(indexes_inter.long())
+        pos_encoder[non_zero_tokens] = 0
+        obs = self.project(embedded_obs) + pos_encoder
+        transformed_obs = obs.view(-1, obs.shape[2], obs.shape[3])
+        attention_interval_mask = attention_interval_mask.view(-1, attention_interval_mask.shape[-1])
+        transformed_obs = self.enc1(transformed_obs, src_key_padding_mask=attention_interval_mask == 1)
+        transformed_obs = transformed_obs.view(obs.shape)
+        obs = transformed_obs.mean(dim=2)
+        job_resource = job_resource[:, :-1, :-1] == 0
+        obs_action = self.enc2(obs, src_mask=job_resource) + obs
+        logits = torch.cat((self.final_tmp(obs_action).squeeze(2), self.no_op(obs_action).mean(dim=1)), dim=1)
+        return logits.masked_fill(mask == 0, -3.4028234663852886e+38)
+class Agent(nn.Module):
+    def __init__(self):
+        super(Agent, self).__init__()
+        self.pos_encoder = PositionalEncoding(8)
+        self.actor = Actor(self.pos_encoder)
+    def forward(self, data, attention_interval_mask, job_resource_masks, mask, indexes_inter, tokens_start_end,
+                action=None):
+        logits = self.actor(data, attention_interval_mask, job_resource_masks, mask, indexes_inter, tokens_start_end)
+        probs = Categorical(logits=logits)
+        if action is None:
+            probabilities = probs.probs
+            actions = torch.multinomial(probabilities, probabilities.shape[1])
+            return actions, torch.log(probabilities), probs.entropy()
+        else:
+            return logits, probs.log_prob(action), probs.entropy()
+    def get_action_only(self, data, attention_interval_mask, job_resource_masks, mask, indexes_inter, tokens_start_end):
+        logits = self.actor(data, attention_interval_mask, job_resource_masks, mask, indexes_inter, tokens_start_end)
+        probs = Categorical(logits=logits)
+        return probs.sample()
+    def get_logits_only(self,data, attention_interval_mask, job_resource_masks, mask, indexes_inter, tokens_start_end):
+        logits = self.actor(data, attention_interval_mask, job_resource_masks, mask, indexes_inter, tokens_start_end)
+        return logits
+def layer_init_tanh(layer, std=np.sqrt(2), bias_const=0.0):
+    torch.nn.init.orthogonal_(layer.weight, std)
+    if layer.bias is not None:
+        torch.nn.init.constant_(layer.bias, bias_const)
+    return layer

actor.pt ADDED Viewed

Binary file (80.3 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import collections
+import random
+import time
+import multiprocessing as mp
+import json
+from PIL import Image
+from compiled_jss.CPEnv import CompiledJssEnvCP
+from stable_baselines3.common.vec_env import VecEnvWrapper
+from torch.distributions import Categorical
+import torch
+import numpy as np
+from MyVecEnv import WrapperRay
+import gradio as gr
+import docplex.cp.utils_visu as visu
+import matplotlib.pyplot as plt
+class VecPyTorch(VecEnvWrapper):
+    def __init__(self, venv, device):
+        super(VecPyTorch, self).__init__(venv)
+        self.device = device
+    def reset(self):
+        return self.venv.reset()
+    def step_async(self, actions):
+        self.venv.step_async(actions)
+    def step_wait(self):
+        return self.venv.step_wait()
+def make_env(seed, instance):
+    def thunk():
+        _env = CompiledJssEnvCP(instance)
+        return _env
+    return thunk
+def solve(file):
+    random.seed(0)
+    np.random.seed(0)
+    torch.manual_seed(0)
+    num_workers = min(mp.cpu_count(), 32)
+    with torch.inference_mode():
+        device = torch.device('cpu')
+        actor = torch.jit.load('actor.pt', map_location=device)
+        actor.eval()
+        start_time = time.time()
+        fn_env = [make_env(0, file.name)
+                  for _ in range(num_workers)]
+        ray_wrapper_env = WrapperRay(lambda n: fn_env[n](),
+                                     num_workers, 1, device)
+        envs = VecPyTorch(ray_wrapper_env, device)
+        current_solution_cost = float('inf')
+        current_solution = ''
+        obs = envs.reset()
+        total_episode = 0
+        while total_episode < envs.num_envs:
+            logits = actor(obs['interval_rep'], obs['attention_interval_mask'], obs['job_resource_mask'],
+                           obs['action_mask'], obs['index_interval'], obs['start_end_tokens'])
+            # temperature vector
+            if num_workers >= 4:
+                temperature = torch.arange(0.5, 2.0, step=(1.5 / num_workers), device=device)
+            else:
+                temperature = torch.ones(num_workers, device=device)
+            logits = logits / temperature[:, None]
+            probs = Categorical(logits=logits).probs
+            # random sample based on logits
+            actions = torch.multinomial(probs, probs.shape[1]).cpu().numpy()
+            obs, reward, done, infos = envs.step(actions)
+            total_episode += done.sum()
+            # total_actions += 1
+            # print(f'Episode {total_episode} / {envs.num_envs} - Actions {total_actions}', end='\r')
+            for env_idx, info in enumerate(infos):
+                if 'makespan' in info and int(info['makespan']) < current_solution_cost:
+                    current_solution_cost = int(info['makespan'])
+                    current_solution = json.loads(info['solution'])
+        total_time = time.time() - start_time
+        pretty_output = ""
+        for job_id in range(len(current_solution)):
+            pretty_output += f"Job {job_id}: {current_solution[job_id]}\n"
+        jobs_data = []
+        file.seek(0)
+        line_str: str = file.readline()
+        line_cnt: int = 1
+        while line_str:
+            data = []
+            split_data = line_str.split()
+            if line_cnt == 1:
+                jobs_count, machines_count = int(split_data[0]), int(
+                    split_data[1]
+                )
+            else:
+                i = 0
+                this_job_op_count = 0
+                while i < len(split_data):
+                    machine, op_time = int(split_data[i]), int(split_data[i + 1])
+                    data.append((machine, op_time))
+                    i += 2
+                    this_job_op_count += 1
+                jobs_data.append(data)
+            line_str = file.readline()
+            line_cnt += 1
+        visu.timeline(f'Solution for job-shop, solved using  ')
+        visu.panel('Jobs')
+        # convert to integer the current_solution
+        current_solution = [[int(x) for x in y] for y in current_solution]
+        for job_id in range(len(current_solution)):
+            visu.sequence(name=f'J{job_id}', intervals=[(current_solution[job_id][task_id],
+                                                         current_solution[job_id][task_id] + jobs_data[job_id][task_id][
+                                                             1], jobs_data[job_id][task_id][0],
+                                                         f'M{jobs_data[job_id][task_id][0]}')
+                                                        for task_id in
+                                                        range(len(current_solution[job_id]))])
+        visu.panel('Machines')
+        machine_solution = collections.defaultdict(list)
+        for job_id in range(len(current_solution)):
+            for task_id in range(len(current_solution[job_id])):
+                machine = jobs_data[job_id][task_id][1]
+                machine_solution[machine].append((current_solution[job_id][task_id],
+                                                  current_solution[job_id][task_id] + jobs_data[job_id][task_id][1],
+                                                  machine, f'J{job_id}'))
+        # sort dictionary keys
+        machine_solution = {k: machine_solution[k] for k in sorted(machine_solution.keys())}
+        for machine_id in machine_solution:
+            visu.sequence(name=f'M{machine_id}',
+                          intervals=machine_solution[machine_id])
+        plt.rcParams["font.family"] = "Times New Roman"
+        plt.rcParams["font.size"] = "30"
+        plt.gca().set_aspect('equal')
+        plt.rcParams["figure.figsize"] = (45, 50)
+        from io import BytesIO
+        buffer = BytesIO()
+        visu.show(pngfile=buffer)
+        reloadedPILImage = Image.open(buffer)
+        return pretty_output, reloadedPILImage, str(total_time) + " seconds"
+title = "Job-Shop Scheduling CP RL"
+description = "A Job-Shop Scheduling Reinforcement Learning based solver, using an underlying CP model as an " \
+              "environment. "
+article = "<p style='text-align: center'>Article Under Review</p>"
+examples = ['ta01', 'dmu01.txt', 'la01.txt']
+iface = gr.Interface(fn=solve, inputs=gr.File(label="Instance File"), outputs=[gr.Text(label="Solution"), gr.Image(label="Solution's Gantt Chart"), gr.Text(label="Elapsed Time")], title=title, description=description, article=article, examples=examples)
+iface.launch(enable_queue=True)

checkpoint.pt ADDED Viewed

Binary file (75.6 kB). View file

dmu01.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+20	15
+0	160	13	5	6	139	11	99	12	9	5	98	2	28	1	107	3	196	10	165	7	114	4	7	14	34	8	133	9	76
+14	105	7	160	3	19	2	189	11	25	1	95	12	15	0	122	4	165	9	2	10	66	13	111	8	51	6	83	5	183
+11	61	5	11	9	130	4	147	13	106	12	1	6	141	7	136	10	33	0	13	2	15	8	10	14	62	3	4	1	142
+13	117	1	11	4	162	0	192	5	35	8	172	3	4	14	193	2	141	11	139	6	62	9	12	12	1	7	135	10	25
+5	53	9	89	10	168	12	41	11	121	1	181	3	43	0	118	4	61	14	193	2	124	6	176	13	28	8	125	7	136
+5	152	0	115	2	122	14	5	12	46	13	144	11	29	7	176	1	115	6	18	4	23	9	26	3	175	8	110	10	75
+6	50	1	62	3	186	12	57	11	156	10	32	2	134	9	141	4	189	13	118	0	102	7	3	8	177	14	43	5	41
+13	35	0	171	14	160	9	32	7	5	11	154	8	195	3	113	12	162	5	152	6	140	2	72	4	16	10	104	1	171
+13	68	8	54	6	116	4	9	14	99	12	155	10	22	5	135	0	67	1	165	9	100	11	47	3	46	7	55	2	12
+1	135	5	105	9	49	8	4	12	176	3	52	11	128	7	188	6	170	10	170	2	169	4	62	0	120	13	28	14	70
+2	93	1	172	13	124	6	72	7	189	14	122	5	38	0	120	12	114	11	51	9	77	8	65	4	176	3	171	10	169
+3	122	6	21	4	6	13	189	14	75	5	5	9	180	0	160	1	14	11	73	12	45	2	61	7	148	10	96	8	194
+9	94	12	198	8	100	5	194	2	127	10	95	4	43	3	52	6	166	1	31	14	100	13	104	7	166	11	139	0	143
+5	4	3	78	11	199	8	119	12	167	0	54	9	38	14	114	13	10	4	115	7	101	1	104	2	61	6	75	10	175
+10	18	11	115	6	166	8	41	14	124	12	101	7	38	13	29	0	91	2	118	9	40	5	55	1	82	4	89	3	100
+11	2	9	107	14	99	3	152	7	51	4	13	10	112	0	96	1	150	6	97	13	67	5	57	2	45	8	17	12	184
+1	176	11	15	3	92	9	9	14	77	12	4	7	83	10	195	4	156	6	102	2	91	13	65	8	19	5	163	0	93
+8	38	0	32	14	80	11	109	9	71	1	100	12	139	7	52	3	163	13	40	4	5	6	28	2	105	5	186	10	186
+11	1	3	73	0	106	4	80	12	150	13	5	5	71	9	145	1	138	6	148	10	168	7	60	2	107	14	164	8	178
+1	14	10	5	4	115	2	70	11	112	5	76	9	20	0	104	7	167	13	58	8	193	12	30	6	132	3	6	14	19

la01.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+10	5
+1	21	0	53	4	95	3	55	2	34
+0	21	3	52	4	16	2	26	1	71
+3	39	4	98	1	42	2	31	0	12
+1	77	0	55	4	79	2	66	3	77
+0	83	3	34	2	64	1	19	4	37
+1	54	2	43	4	79	0	92	3	62
+3	69	4	77	1	87	2	87	0	93
+2	38	0	60	1	41	3	24	4	83
+3	17	1	49	4	25	0	44	2	98
+4	77	3	79	2	43	1	75	0	96

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+job-shop-cp-env==1.0.0
+ray==2.1.0
+ray[rllib]==2.1.0
+stable-baselines3==1.6.2

ta01 ADDED Viewed

	@@ -0,0 +1,16 @@

+15 15
+ 6 94 12 66  4 10  7 53  3 26  2 15 10 65 11 82  8 10 14 27  9 93 13 92  5 96  0 70  1 83
+ 4 74  5 31  7 88 14 51 13 57  8 78 11  8  9  7  6 91 10 79  0 18  3 51 12 18  1 99  2 33
+ 1  4  8 82  9 40 12 86  6 50 11 54 13 21  5  6  0 54  2 68  7 82 10 20  4 39  3 35 14 68
+ 5 73  2 23  9 30  6 30 10 53  0 94 13 58  4 93  7 32 14 91 11 30  8 56 12 27  1 92  3  9
+ 7 78  8 23  6 21 10 60  4 36  9 29  2 95 14 99 12 79  5 76  1 93 13 42 11 52  0 42  3 96
+ 5 29  3 61 12 88 13 70 11 16  4 31 14 65  7 83  2 78  1 26 10 50  0 87  9 62  6 14  8 30
+12 18  3 75  7 20  8  4 14 91  6 68  1 19 11 54  4 85  5 73  2 43 10 24  0 37 13 87  9 66
+11 32  5 52  0  9  7 49 12 61 13 35 14 99  1 62  2  6  8 62  4  7  3 80  9  3  6 57 10  7
+10 85 11 30  6 96 14 91  0 13  1 87  2 82  5 83 12 78  4 56  8 85  7  8  9 66 13 88  3 15
+ 6  5 11 59  9 30  2 60  8 41  0 17 13 66  3 89 10 78  7 88  1 69 12 45 14 82  4  6  5 13
+ 4 90  7 27 13  1  0  8  5 91 12 80  6 89  8 49 14 32 10 28  3 90  1 93 11  6  9 35  2 73
+ 2 47 14 43  0 75 12  8  6 51 10  3  7 84  5 34  8 28  9 60 13 69  1 45  3 67 11 58  4 87
+ 5 65  8 62 10 97  2 20  3 31  6 33  9 33  0 77 13 50  4 80  1 48 11 90 12 75  7 96 14 44
+ 8 28 14 21  4 51 13 75  5 17  6 89  9 59  1 56 12 63  7 18 11 17 10 30  3 16  2  7  0 35
+10 57  8 16 12 42  6 34  4 37  1 26 13 68 14 73 11  5  0  8  7 12  3 87  2 83  9 20  5 97