Hi everyone, I am trying to create an inventory management program using proximal policy optimization. This is the environment I have created below. Problem is that from the reward graphs there is no improvement in the reward, the agents just fill the inventory instantly without considering transportation, delays or even the carried over unsatisfied demand. Appreciate any assistance in this. The environment: import numpy as np import gym from gym import spaces import matplotlib.pyplot as plt import pandas as pd class InventoryMgmt(gym.Env): definit(self, data, weights_cost, weights_emission, initial_inventory, max_transport_capacity, max_inventory_capacity): super(InventoryMgmt, self).init() self.data = data self.weights_cost = weights_cost self.weights_emission = weights_emission self.initial_inventory = initial_inventory self.max_transport_capacity = max_transport_capacity self.max_inventory_capacity = max_inventory_capacity # Continuous action space to control order quantity self.action_space = spaces.Box(low=0, high=self.max_transport_capacity, shape=(1,), dtype=np.float32) # Observation space: normalized inventory level, demand, and reorder point self.observation_space = spaces.Box(low=0, high=1, shape=(3,), dtype=np.float32) self.results_df = pd.DataFrame() # DataFrame to store results self.pending_orders = [] # Track pending orders (quantity and delivery day) self.carry_over_backorder = 0 # Track backordered demand carried over self.reset()def step(self, action): """ Step through one day in the environment. Process the current day's demand, satisfy it using available inventory, and carry over unmet demand to the next day if necessary. """ row = self.data.iloc[self.current_day] actual_demand = row['Demand'] # Add any carried-over backorder to today's demand total_demand = actual_demand + self.carry_over_backorder carrying_cost = row['Carrying Cost'] item_cost = row['Item Cost'] order_cost = row['Order Cost'] transport_emission_rate = row['Transport Emission Rate'] holding_emission_rate = row['Holding Emission Rate'] lead_time = row['Lead Time'] backorder_cost = row['Backorder Cost'] max_demand = self.data['Demand'].max() avg_demand = self.data['Demand'].mean() avg_lead_time = self.data['Lead Time'].mean() holding_cost = item_cost * carrying_cost safety_stock = (max_demand * avg_lead_time) - (avg_demand * avg_lead_time) reorder_point = safety_stock + (avg_demand * lead_time) # Initialize order_quantity to 0 in case no order is placed order_quantity = 0 # Process pending orders for order in self.pending_orders: if self.current_day >= order['delivery_day']: self.inventory_level += order['quantity'] print(f"Order delivered: {order['quantity']} units on day {self.current_day}") self.pending_orders = [order for order in self.pending_orders if self.current_day < order['delivery_day']] # Ensure inventory level does not exceed capacity self.inventory_level = min(self.inventory_level, self.max_inventory_capacity) # Calculate backorder before placing new orders if total_demand > self.inventory_level: backorder_quantity = total_demand - self.inventory_level backorder_penalty = backorder_quantity * backorder_cost self.inventory_level = 0 self.carry_over_backorder = backorder_quantity else: self.inventory_level -= total_demand backorder_quantity = 0 backorder_penalty = 0 self.carry_over_backorder = 0 # Check if inventory is below the reorder point if self.inventory_level <= reorder_point: order_quantity = max(0, min(action[0], self.max_inventory_capacity - self.inventory_level)) if order_quantity > 0: self.pending_orders.append({'quantity': order_quantity, 'delivery_day': self.current_day + lead_time}) print(f"Order placed: {order_quantity} units on day {self.current_day}") # Calculate daily costs and emissions holding_cost_total = max(0, self.inventory_level * holding_cost) order_cost_total = max(0, order_quantity * item_cost + order_cost) daily_cost = holding_cost_total + order_cost_total + backorder_penalty holding_emissions = max(0, self.inventory_level * holding_emission_rate) transport_emissions = max(0, (np.ceil(order_quantity / self.max_transport_capacity)) * transport_emission_rate) daily_emissions = holding_emissions + transport_emissions # Update totals self.total_cost += daily_cost self.total_emissions += daily_emissions self.daily_costs.append(daily_cost) self.daily_emissions.append(daily_emissions) self.inventory_levels.append(self.inventory_level) # Refined reward calculation backorder_penalty_reward = -5.0 * backorder_quantity * backorder_cost inventory_penalty = -0.5 * self.inventory_level desired_inventory_level = reorder_point efficiency_reward = -abs(self.inventory_level - desired_inventory_level) * 0.2 if self.inventory_level <= reorder_point: order_incentive = 2.0 else: order_incentive = -0.5 reward_cost = -daily_cost + backorder_penalty_reward + inventory_penalty + efficiency_reward + order_incentive reward_emission = -daily_emissions normalized_inventory = self.inventory_level / self.max_inventory_capacity normalized_demand = total_demand / max_demand normalized_reorder_point = reorder_point / self.max_inventory_capacity normalized_observation = np.array([normalized_inventory, normalized_demand, normalized_reorder_point], dtype=np.float32) daily_data = { 'Day': self.current_day + 1, 'Actual Demand': actual_demand, 'Carried Over Demand': self.carry_over_backorder, 'Inventory Level': self.inventory_level, 'Order Quantity': order_quantity if self.inventory_level <= reorder_point else 0, 'Holding Cost': holding_cost_total, 'Order Cost': order_cost_total, 'Backorder Penalty': backorder_penalty, 'Total Daily Cost': daily_cost, 'Daily Holding Emissions': holding_emissions, 'Daily Transport Emissions': transport_emissions, 'Total Daily Emissions': daily_emissions } self.results_df = pd.concat([self.results_df, pd.DataFrame([daily_data])], ignore_index=True) self.current_day += 1 done = self.current_day >= len(self.data) self.rewards.append((reward_cost, reward_emission)) return normalized_observation, (reward_cost, reward_emission), done, {}def reset(self): self.current_day = 0 self.inventory_level = self.initial_inventory self.total_cost = 0 self.total_emissions = 0 self.daily_costs = [] self.daily_emissions = [] self.inventory_levels = [] self.rewards = [] self.pending_orders = [] self.carry_over_backorder = 0 # Calculate initial values for safety_stock, avg_demand, and lead_time max_demand = self.data['Demand'].max() avg_demand = self.data['Demand'].mean() avg_lead_time = self.data['Lead Time'].mean() initial_lead_time = self.data.iloc[self.current_day]['Lead Time'] holding_cost = self.data.iloc[self.current_day]['Item Cost'] * self.data.iloc[self.current_day]['Carrying Cost'] safety_stock = (max_demand * avg_lead_time) - (avg_demand * avg_lead_time) reorder_point = safety_stock + (avg_demand * initial_lead_time) # Calculate reorder_point # Initial observation initial_demand = self.data.iloc[self.current_day]['Demand'] normalized_inventory = self.inventory_level / self.max_inventory_capacity normalized_demand = initial_demand / max_demand normalized_reorder_point = reorder_point / self.max_inventory_capacity # Normalize reorder_point return np.array([normalized_inventory, normalized_demand, normalized_reorder_point], dtype=np.float32)def render(self): print(f"Final Total Cost: {self.total_cost}") print(f"Final Total Emissions: {self.total_emissions}") days = range(1, len(self.inventory_levels) + 1) plt.figure(figsize=(20, 10)) plt.subplot(5, 1, 1) plt.plot(days, self.inventory_levels, marker='o') plt.title('Inventory Level Over Time') plt.xlabel('Day') plt.ylabel('Inventory Level') plt.subplot(5, 1, 2) plt.plot(days, self.daily_costs, marker='o') plt.title('Daily Costs Over Time') plt.xlabel('Day') plt.ylabel('Costs') plt.subplot(5, 1, 3) plt.plot(days, self.daily_emissions, marker='o') plt.title('Daily Emissions Over Time') plt.xlabel('Day') plt.ylabel('Emissions') plt.subplot(5, 1, 4) cost_rewards = [r[0] for r in self.rewards] plt.plot(days, cost_rewards, marker='o', label='Cost Rewards') plt.title('Cost Rewards Over Time') plt.xlabel('Day') plt.ylabel('Rewards') plt.subplot(5, 1, 5) emission_rewards = [r[1] for r in self.rewards] plt.plot(days, emission_rewards, marker='o', label='Emission Rewards') plt.title('Emission Rewards Over Time') plt.xlabel('Day') plt.ylabel('Rewards') plt.tight_layout() plt.show()def save_to_excel(self, file_name='inventory_output.xlsx'): """Save the results DataFrame to an Excel file.""" if not self.results_df.empty: self.results_df.to_excel(file_name, index=False) print(f"Results saved to {file_name}") else: print("No results to save.")
Register the environmentgym.envs.registration.register( id='InventoryMgmt-v1', entry_point='InvEnv_multi_v1:InventoryMgmt', ) |