Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Assistance with gym environment code#647

Sorour332000 started this conversation inShow and tell
Discussion options

Hi everyone, I am trying to create an inventory management program using proximal policy optimization. This is the environment I have created below. Problem is that from the reward graphs there is no improvement in the reward, the agents just fill the inventory instantly without considering transportation, delays or even the carried over unsatisfied demand. Appreciate any assistance in this.

The environment:
import numpy as np
import gym
from gym import spaces
import matplotlib.pyplot as plt
import pandas as pd

class InventoryMgmt(gym.Env):
definit(self, data, weights_cost, weights_emission, initial_inventory, max_transport_capacity, max_inventory_capacity):
super(InventoryMgmt, self).init()
self.data = data
self.weights_cost = weights_cost
self.weights_emission = weights_emission
self.initial_inventory = initial_inventory
self.max_transport_capacity = max_transport_capacity
self.max_inventory_capacity = max_inventory_capacity

    # Continuous action space to control order quantity    self.action_space = spaces.Box(low=0, high=self.max_transport_capacity, shape=(1,), dtype=np.float32)    # Observation space: normalized inventory level, demand, and reorder point    self.observation_space = spaces.Box(low=0, high=1, shape=(3,), dtype=np.float32)    self.results_df = pd.DataFrame()  # DataFrame to store results    self.pending_orders = []  # Track pending orders (quantity and delivery day)    self.carry_over_backorder = 0  # Track backordered demand carried over    self.reset()def step(self, action):    """    Step through one day in the environment. Process the current day's demand, satisfy it using available    inventory, and carry over unmet demand to the next day if necessary.    """    row = self.data.iloc[self.current_day]    actual_demand = row['Demand']    # Add any carried-over backorder to today's demand    total_demand = actual_demand + self.carry_over_backorder    carrying_cost = row['Carrying Cost']    item_cost = row['Item Cost']    order_cost = row['Order Cost']    transport_emission_rate = row['Transport Emission Rate']    holding_emission_rate = row['Holding Emission Rate']    lead_time = row['Lead Time']    backorder_cost = row['Backorder Cost']    max_demand = self.data['Demand'].max()    avg_demand = self.data['Demand'].mean()    avg_lead_time = self.data['Lead Time'].mean()    holding_cost = item_cost * carrying_cost    safety_stock = (max_demand * avg_lead_time) - (avg_demand * avg_lead_time)    reorder_point = safety_stock + (avg_demand * lead_time)    # Initialize order_quantity to 0 in case no order is placed    order_quantity = 0    # Process pending orders    for order in self.pending_orders:        if self.current_day >= order['delivery_day']:            self.inventory_level += order['quantity']            print(f"Order delivered: {order['quantity']} units on day {self.current_day}")    self.pending_orders = [order for order in self.pending_orders if self.current_day < order['delivery_day']]    # Ensure inventory level does not exceed capacity    self.inventory_level = min(self.inventory_level, self.max_inventory_capacity)    # Calculate backorder before placing new orders    if total_demand > self.inventory_level:        backorder_quantity = total_demand - self.inventory_level        backorder_penalty = backorder_quantity * backorder_cost        self.inventory_level = 0        self.carry_over_backorder = backorder_quantity    else:        self.inventory_level -= total_demand        backorder_quantity = 0        backorder_penalty = 0        self.carry_over_backorder = 0    # Check if inventory is below the reorder point    if self.inventory_level <= reorder_point:        order_quantity = max(0, min(action[0], self.max_inventory_capacity - self.inventory_level))        if order_quantity > 0:            self.pending_orders.append({'quantity': order_quantity, 'delivery_day': self.current_day + lead_time})            print(f"Order placed: {order_quantity} units on day {self.current_day}")    # Calculate daily costs and emissions    holding_cost_total = max(0, self.inventory_level * holding_cost)    order_cost_total = max(0, order_quantity * item_cost + order_cost)    daily_cost = holding_cost_total + order_cost_total + backorder_penalty    holding_emissions = max(0, self.inventory_level * holding_emission_rate)    transport_emissions = max(0, (np.ceil(order_quantity / self.max_transport_capacity)) * transport_emission_rate)    daily_emissions = holding_emissions + transport_emissions    # Update totals    self.total_cost += daily_cost    self.total_emissions += daily_emissions    self.daily_costs.append(daily_cost)    self.daily_emissions.append(daily_emissions)    self.inventory_levels.append(self.inventory_level)    # Refined reward calculation    backorder_penalty_reward = -5.0 * backorder_quantity * backorder_cost    inventory_penalty = -0.5 * self.inventory_level    desired_inventory_level = reorder_point    efficiency_reward = -abs(self.inventory_level - desired_inventory_level) * 0.2    if self.inventory_level <= reorder_point:        order_incentive = 2.0    else:        order_incentive = -0.5    reward_cost = -daily_cost + backorder_penalty_reward + inventory_penalty + efficiency_reward + order_incentive    reward_emission = -daily_emissions    normalized_inventory = self.inventory_level / self.max_inventory_capacity    normalized_demand = total_demand / max_demand    normalized_reorder_point = reorder_point / self.max_inventory_capacity    normalized_observation = np.array([normalized_inventory, normalized_demand, normalized_reorder_point], dtype=np.float32)    daily_data = {        'Day': self.current_day + 1,        'Actual Demand': actual_demand,        'Carried Over Demand': self.carry_over_backorder,        'Inventory Level': self.inventory_level,        'Order Quantity': order_quantity if self.inventory_level <= reorder_point else 0,        'Holding Cost': holding_cost_total,        'Order Cost': order_cost_total,        'Backorder Penalty': backorder_penalty,        'Total Daily Cost': daily_cost,        'Daily Holding Emissions': holding_emissions,        'Daily Transport Emissions': transport_emissions,        'Total Daily Emissions': daily_emissions    }    self.results_df = pd.concat([self.results_df, pd.DataFrame([daily_data])], ignore_index=True)    self.current_day += 1    done = self.current_day >= len(self.data)    self.rewards.append((reward_cost, reward_emission))    return normalized_observation, (reward_cost, reward_emission), done, {}def reset(self):    self.current_day = 0    self.inventory_level = self.initial_inventory    self.total_cost = 0    self.total_emissions = 0    self.daily_costs = []    self.daily_emissions = []    self.inventory_levels = []    self.rewards = []    self.pending_orders = []    self.carry_over_backorder = 0    # Calculate initial values for safety_stock, avg_demand, and lead_time    max_demand = self.data['Demand'].max()    avg_demand = self.data['Demand'].mean()    avg_lead_time = self.data['Lead Time'].mean()    initial_lead_time = self.data.iloc[self.current_day]['Lead Time']    holding_cost = self.data.iloc[self.current_day]['Item Cost'] * self.data.iloc[self.current_day]['Carrying Cost']    safety_stock = (max_demand * avg_lead_time) - (avg_demand * avg_lead_time)    reorder_point = safety_stock + (avg_demand * initial_lead_time)  # Calculate reorder_point    # Initial observation    initial_demand = self.data.iloc[self.current_day]['Demand']    normalized_inventory = self.inventory_level / self.max_inventory_capacity    normalized_demand = initial_demand / max_demand    normalized_reorder_point = reorder_point / self.max_inventory_capacity  # Normalize reorder_point    return np.array([normalized_inventory, normalized_demand, normalized_reorder_point], dtype=np.float32)def render(self):    print(f"Final Total Cost: {self.total_cost}")    print(f"Final Total Emissions: {self.total_emissions}")    days = range(1, len(self.inventory_levels) + 1)    plt.figure(figsize=(20, 10))    plt.subplot(5, 1, 1)    plt.plot(days, self.inventory_levels, marker='o')    plt.title('Inventory Level Over Time')    plt.xlabel('Day')    plt.ylabel('Inventory Level')    plt.subplot(5, 1, 2)    plt.plot(days, self.daily_costs, marker='o')    plt.title('Daily Costs Over Time')    plt.xlabel('Day')    plt.ylabel('Costs')    plt.subplot(5, 1, 3)    plt.plot(days, self.daily_emissions, marker='o')    plt.title('Daily Emissions Over Time')    plt.xlabel('Day')    plt.ylabel('Emissions')    plt.subplot(5, 1, 4)    cost_rewards = [r[0] for r in self.rewards]    plt.plot(days, cost_rewards, marker='o', label='Cost Rewards')    plt.title('Cost Rewards Over Time')    plt.xlabel('Day')    plt.ylabel('Rewards')    plt.subplot(5, 1, 5)    emission_rewards = [r[1] for r in self.rewards]    plt.plot(days, emission_rewards, marker='o', label='Emission Rewards')    plt.title('Emission Rewards Over Time')    plt.xlabel('Day')    plt.ylabel('Rewards')    plt.tight_layout()    plt.show()def save_to_excel(self, file_name='inventory_output.xlsx'):    """Save the results DataFrame to an Excel file."""    if not self.results_df.empty:        self.results_df.to_excel(file_name, index=False)        print(f"Results saved to {file_name}")    else:        print("No results to save.")

Register the environment

gym.envs.registration.register(
id='InventoryMgmt-v1',
entry_point='InvEnv_multi_v1:InventoryMgmt',
)

You must be logged in to vote

Replies: 0 comments

Sign up for freeto join this conversation on GitHub. Already have an account?Sign in to comment
Labels
None yet
1 participant
@Sorour332000

[8]ページ先頭

©2009-2025 Movatter.jp