Spaces:

ositamiles
/

DPS

Build error

App Files Files Community

ositamiles commited on Sep 28, 2024

Commit

be1fe7c

verified ·

1 Parent(s): c037455

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -7

app.py CHANGED Viewed

@@ -78,30 +78,41 @@ def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
     return LayerNormalization(epsilon=1e-6)(x + res)
 # RL Environment
-class PricingEnv(gym.Env):
     def __init__(self, data):
         super(PricingEnv, self).__init__()
         self.data = data
         self.current_step = 0
         self.action_space = spaces.Box(low=0, high=100, shape=(1,), dtype=np.float32)
         self.observation_space = spaces.Box(low=0, high=np.inf, shape=(6,), dtype=np.float32)
     def step(self, action):
         reward = self._get_reward(action)
         self.current_step += 1
-        done = self.current_step >= len(self.data)
         obs = self._get_observation()
-        return obs, reward, done, {}
-    def reset(self):
         self.current_step = 0
-        return self._get_observation()
     def _get_observation(self):
-        obs = self.data.iloc[self.current_step][['demand_index', 'competitor_price', 'past_sales', 'genre_encoded', 'region_encoded']].values
-        return np.append(obs, self.current_step)
     def _get_reward(self, action):
         price = action[0]
         actual_price = self.data.iloc[self.current_step]['price']
         return -abs(price - actual_price)

     return LayerNormalization(epsilon=1e-6)(x + res)
 # RL Environment
+class PricingEnv(Env):
     def __init__(self, data):
         super(PricingEnv, self).__init__()
         self.data = data
         self.current_step = 0
+        self.max_steps = len(data) - 1
         self.action_space = spaces.Box(low=0, high=100, shape=(1,), dtype=np.float32)
         self.observation_space = spaces.Box(low=0, high=np.inf, shape=(6,), dtype=np.float32)
     def step(self, action):
         reward = self._get_reward(action)
         self.current_step += 1
+        done = self.current_step >= self.max_steps
         obs = self._get_observation()
+        return obs, reward, done, False, {}  # Added False for truncated flag
+    def reset(self, seed=None, options=None):
+        super().reset(seed=seed)
         self.current_step = 0
+        return self._get_observation(), {}  # Return observation and info dict
     def _get_observation(self):
+        if self.current_step > self.max_steps:
+            # If we've gone past the end of the data, return the last valid observation
+            step = self.max_steps
+        else:
+            step = self.current_step
+        obs = self.data.iloc[step][['demand_index', 'competitor_price', 'past_sales', 'genre_encoded', 'region_encoded']].values
+        return np.append(obs, step)
     def _get_reward(self, action):
+        if self.current_step > self.max_steps:
+            return 0  # Or some other appropriate value for going out of bounds
         price = action[0]
         actual_price = self.data.iloc[self.current_step]['price']
         return -abs(price - actual_price)