May 28, 2025 · May 28, 2025 · May 28, 2025 · May 28, 2025 · May 28, 2025 · May 28, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
            (?x)^(
                .*cs.meta|
                .*.css|
                .*.meta
                .*.meta|
                .*.asset|
                .*.prefab|
                .*.unity|
                .*.json
            )$
        args: [--fix=lf]
        args: [--fix=crlf]

    -   id: trailing-whitespace
        name: trailing-whitespace-markdown
diff --git a/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb b/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb
    "from pathlib import Path\n",
    "from typing import Callable, Any\n",
    "\n",
    "import gym\n",
    "fromgym import Env\n",
    "importgymnasium asgym\n",
    "fromgymnasium import Env\n",
    "\n",
    "from stable_baselines3 import PPO\n",
    "from stable_baselines3.common.vec_env import VecMonitor, VecEnv, SubprocVecEnv\n",
diff --git a/docs/Python-Gym-API-Documentation.md b/docs/Python-Gym-API-Documentation.md
 #### reset

 ```python
 | reset() ->Union[List[np.ndarray], np.ndarray]
 | reset(*, seed: int | None = None, options: dict[str, Any] | None = None) ->Tuple[np.ndarray, Dict]
 ```

 Resets the state of the environment and returns an initial observation.
 Returns: observation (object/list): the initial observation of the
 space.
 Resets the state of the environment and returns an initial observation and info.

 **Returns**:

 - `observation` _object/list_ - the initial observation of the
  space.
 - `info` _dict_ - contains auxiliary diagnostic information.

 <a name="mlagents_envs.envs.unity_gym_env.UnityToGymWrapper.step"></a>
 #### step

 ```python
 | step(action:List[Any]) -> GymStepResult
 | step(action: Any) -> GymStepResult
 ```

 Run one timestep of the environment's dynamics. When end of

 - `observation` _object/list_ - agent's observation of the current environment
  reward (float/list) : amount of reward returned after previous action
 - `done` _boolean/list_ - whether the episode has ended.
 - `terminated` _boolean/list_ - whether the episode has ended by termination.
 - `truncated` _boolean/list_ - whether the episode has ended by truncation.
 - `info` _dict_ - contains auxiliary diagnostic information.

 <a name="mlagents_envs.envs.unity_gym_env.UnityToGymWrapper.render"></a>
 #### render

 ```python
 | render(mode="rgb_array")
 | render()
 ```

 Return the latest visual observations.
diff --git a/docs/Python-Gym-API.md b/docs/Python-Gym-API.md
 Add the following code to the `train_unity.py` file:

 ```python
 import gym
 importgymnasium asgym

 from baselines import deepq
 from baselines import logger
diff --git a/docs/Python-PettingZoo-API-Documentation.md b/docs/Python-PettingZoo-API-Documentation.md
    * [action\_space](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.action_space)
    * [side\_channel](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.side_channel)
    * [reset](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.reset)
    * [seed](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.seed)
    * [render](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.render)
    * [close](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.close)

 #### reset

 ```python
 | reset() -> Dict[str, Any]
 | reset(seed: int | None = None, options: dict | None = None) ->Tuple[Dict[str, Any], Dict[str, Dict]]
 ```

 Resets the environment.
 #### reset

 ```python
 | reset()
 | reset(seed: int | None = None, options: dict | None = None) -> Any
 ```

 Resets the environment.

 <a name="mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.seed"></a>
 #### seed

 ```python
 | seed(seed=None)
 ```

 Reseeds the environment (making the resulting environment deterministic).
 `reset()` must be called after `seed()`, and before `step()`.

 <a name="mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.render"></a>
 #### render

 ```python
 | render(mode="human")
 | render()
 ```

 NOT SUPPORTED.

 Displays a rendered frame from the environment, if supported.
 Alternate render modes in the default environments are `'rgb_array'`
 Renders the environment as specified by self.render_mode, if supported.

 Render mode can be `human` to display a window.
 Other render modes in the default environments are `'rgb_array'`
 which returns a numpy array and is supported by all environments outside of classic,
 and `'ansi'` which returns the strings printed (specific to classic environments).

diff --git a/docs/Python-PettingZoo-API.md b/docs/Python-PettingZoo-API.md

 ```python
 from mlagents_envs.environment import UnityEnvironment
 from mlagents_envs.envs importUnityToPettingZooWrapper
 from mlagents_envs.envs.unity_aec_env importUnityAECEnv

 unity_env = UnityEnvironment("StrikersVsGoalie")
 env =UnityToPettingZooWrapper(unity_env)
 env =UnityAECEnv(unity_env)
 env.reset()
 for agent in env.agent_iter():
    observation, reward,done, info = env.last()
    observation, reward,terminated, truncated, info = env.last()
    action = policy(observation, agent)
    env.step(action)
 ```
diff --git a/ml-agents-envs/README.md b/ml-agents-envs/README.md
 `mlagents_envs` can be used independently of `mlagents` for Python
 communication.

 ## Installation

 Install the `mlagents_envs` package with:

 ```sh
 python -m pip install mlagents_envs==1.1.0
 ```

 ## Usage & More Information

 See
 - Communication between Unity and the Python `UnityEnvironment` is not secure.
 - On Linux, ports are not released immediately after the communication closes.
  As such, you cannot reuse ports right after closing a `UnityEnvironment`.

 ## Development and publishing (Wargaming artifactory)

 Since this package does not seem to be maintained anymore be the official developers, we have forked it to the Wargaming gitlab and are maintaining it there.
 Publishing is done via the [Wargaming artifactory](https://ed.artifactory.wgdp.io:443/artifactory/api/pypi/mlopsbi-pypi/simple).

 To contribute to the `mlagents_envs` package, please work on a branch and create a merge request to `master` once ready.
 Once the merge request is approved and merged to `master` branch, a gitlab pipeline will automatically create a new git tag and publish the new version to the Wargaming artifactory.

 ## Installation (Wargaming artifactory)

 Since publishing is done via the Wargaming artifactory, you can use this package as dependency by adding the following to your `pyproject.toml`:

 ```toml
 [tool.poetry.dependencies]
 mlagents-envs = { version = "^0.1", source = "artifactory" }

 [[tool.poetry.source]]
 name = "artifactory"
 url = "https://ed.artifactory.wgdp.io:443/artifactory/api/pypi/mlopsbi-pypi/simple"
 priority = "explicit"
 ```


 Or you can install the `mlagents_envs` package from the Wargaming artifactory using pip:

 ```bash
 pip install mlagents-envs --extra-index-url https://ed.artifactory.wgdp.io:443/artifactory/api/pypi/mlopsbi-pypi/simple
 ```
diff --git a/ml-agents-envs/mlagents_envs/base_env.py b/ml-agents-envs/mlagents_envs/base_env.py
    def __iter__(self) -> Iterator[Any]:
        yield from self.agent_id

    def __add__(self, other: "DecisionSteps") -> "DecisionSteps":
        assert isinstance(other, DecisionSteps)

        combined_terminal_steps = DecisionSteps(
            list(np.hstack([self.obs, other.obs])),
            np.hstack([self.reward, other.reward]),
            np.hstack([self.agent_id, other.agent_id]),
            list(np.hstack([self.action_mask, other.action_mask]))
            if self.action_mask or other.action_mask
            else None,
            np.hstack([self.group_id, other.group_id]),
            np.hstack([self.group_reward, other.group_reward]),
        )
        combined_terminal_steps._agent_id_to_index = {
            **self.agent_id_to_index,
            # shift index of added termination steps because of appending
            **{
                agent_id: index + len(self)
                for agent_id, index in other.agent_id_to_index.items()
            },
        }

        return combined_terminal_steps

    @staticmethod
    def empty(spec: "BehaviorSpec") -> "DecisionSteps":
        """
    def __iter__(self) -> Iterator[Any]:
        yield from self.agent_id

    def __add__(self, other: "TerminalSteps") -> "TerminalSteps":
        assert isinstance(other, TerminalSteps)

        combined_terminal_steps = TerminalSteps(
            list(np.hstack([self.obs, other.obs])),
            np.hstack([self.reward, other.reward]),
            np.hstack([self.interrupted, other.interrupted]),
            np.hstack([self.agent_id, other.agent_id]),
            np.hstack([self.group_id, other.group_id]),
            np.hstack([self.group_reward, other.group_reward]),
        )
        combined_terminal_steps._agent_id_to_index = {
            **self.agent_id_to_index,
            # shift index of added termination steps because of appending
            **{
                agent_id: index + len(self)
                for agent_id, index in other.agent_id_to_index.items()
            },
        }

        return combined_terminal_steps

    @staticmethod
    def empty(spec: "BehaviorSpec") -> "TerminalSteps":
        """
diff --git a/ml-agents-envs/mlagents_envs/envs/env_helpers.py b/ml-agents-envs/mlagents_envs/envs/env_helpers.py
    termination_id = [
        _behavior_to_agent_id(behavior_name, i) for i in termination_batch.agent_id
    ]
    agents = decision_id + termination_id
    agents = decision_id
    for id in termination_id:
        if id not in agents:
            agents.append(id)

    obs = {
        agent_id: [batch_obs[i] for batch_obs in termination_batch.obs]
        for i, agent_id in enumerate(termination_id)
            }
        )
    obs = {k: v if len(v) > 1 else v[0] for k, v in obs.items()}
    dones = {agent_id: True for agent_id in termination_id}
    dones.update({agent_id: False for agent_id in decision_id})
    rewards = {
        agent_id: termination_batch.reward[i]
        for i, agent_id in enumerate(termination_id)
        agent_id: decision_batch.reward[i] for i, agent_id in enumerate(decision_id)
    }
    rewards.update(
        {agent_id: decision_batch.reward[i] for i, agent_id in enumerate(decision_id)}
        {
            agent_id: termination_batch.reward[i]
            for i, agent_id in enumerate(termination_id)
        }
    )
    cumulative_rewards = {k: v for k, v in rewards.items()}
    infos = {}
    terminations = {}
    truncations = {}
    for i, agent_id in enumerate(decision_id):
        infos[agent_id] = {}
        infos[agent_id]["behavior_name"] = behavior_name
        infos[agent_id]["group_id"] = decision_batch.group_id[i]
        infos[agent_id]["group_reward"] = decision_batch.group_reward[i]
        truncations[agent_id] = False
        terminations[agent_id] = False
    for i, agent_id in enumerate(termination_id):
        infos[agent_id] = {}
        infos[agent_id]["behavior_name"] = behavior_name
        infos[agent_id]["group_id"] = termination_batch.group_id[i]
        infos[agent_id]["group_reward"] = termination_batch.group_reward[i]
        infos[agent_id]["interrupted"] = termination_batch.interrupted[i]
        truncated = bool(termination_batch.interrupted[i])
        infos[agent_id]["interrupted"] = truncated
        truncations[agent_id] = truncated
        terminations[agent_id] = not truncated
    id_map = {agent_id: i for i, agent_id in enumerate(decision_id)}
    return agents, obs, dones, rewards, cumulative_rewards, infos, id_map
    return (
        agents,
        obs,
        terminations,
        truncations,
        rewards,
        cumulative_rewards,
        infos,
        id_map,
    )


 def _parse_behavior(full_behavior):
diff --git a/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py b/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py
 from typing import Any, Optional
 fromgym import error
 fromgymnasium import error
 from mlagents_envs.base_env import BaseEnv
 from pettingzoo import AECEnv

        return (
            self._observations[agent_id],
            self._cumm_rewards[agent_id],
            self._dones[agent_id],
            self._terminations[agent_id],
            self._truncations[agent_id],
            self._infos[agent_id],
        )

    def last(self, observe=True):
        """
        returns observation, cumulative reward, done, info for the current agent (specified by self.agent_selection)
        """
        obs, reward, done, info = self.observe(self._agents[self._agent_index])
        return obs if observe else None, reward, done, info
        obs, cumm_rewards, terminated, truncated, info = self.observe(
            self._agents[self._agent_index]
        )
        return obs if observe else None, cumm_rewards, terminated, truncated, info

    @property
    def agent_selection(self):
Original file line number	Diff line number	Diff line change
Expand Up		@@ -56,9 +56,13 @@ repos:
		(?x)^(
		.*cs.meta\|
		.*.css\|
		.*.meta
		.*.meta\|
		.*.asset\|
		.*.prefab\|
		.*.unity\|
		.*.json
		)$
		args: [--fix=lf]
		args: [--fix=crlf]

		- id: trailing-whitespace
		name: trailing-whitespace-markdown
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -161,8 +161,8 @@
		"from pathlib import Path\n",
		"from typing import Callable, Any\n",
		"\n",
		"import gym\n",
		"fromgym import Env\n",
		"importgymnasium asgym\n",
		"fromgymnasium import Env\n",
		"\n",
		"from stable_baselines3 import PPO\n",
		"from stable_baselines3.common.vec_env import VecMonitor, VecEnv, SubprocVecEnv\n",
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -59,18 +59,22 @@ Environment initialization
		#### reset

		```python
		\| reset() ->Union[List[np.ndarray], np.ndarray]
		\| reset(*, seed: int \| None = None, options: dict[str, Any] \| None = None) ->Tuple[np.ndarray, Dict]
		```

		Resets the state of the environment and returns an initial observation.
		Returns: observation (object/list): the initial observation of the
		space.
		Resets the state of the environment and returns an initial observation and info.

		Returns:

		- `observation` _object/list_ - the initial observation of the
		space.
		- `info` _dict_ - contains auxiliary diagnostic information.

		<a name="mlagents_envs.envs.unity_gym_env.UnityToGymWrapper.step"></a>
		#### step

		```python
		\| step(action:List[Any]) -> GymStepResult
		\| step(action: Any) -> GymStepResult
		```

		Run one timestep of the environment's dynamics. When end of
Expand All		@@ -86,14 +90,15 @@ Accepts an action and returns a tuple (observation, reward, done, info).

		- `observation` _object/list_ - agent's observation of the current environment
		reward (float/list) : amount of reward returned after previous action
		- `done` _boolean/list_ - whether the episode has ended.
		- `terminated` _boolean/list_ - whether the episode has ended by termination.
		- `truncated` _boolean/list_ - whether the episode has ended by truncation.
		- `info` _dict_ - contains auxiliary diagnostic information.

		<a name="mlagents_envs.envs.unity_gym_env.UnityToGymWrapper.render"></a>
		#### render

		```python
		\| render(mode="rgb_array")
		\| render()
		```

		Return the latest visual observations.
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -93,7 +93,7 @@ observation, a single discrete action and a single Agent in the scene.
		Add the following code to the `train_unity.py` file:

		```python
		import gym
		importgymnasium asgym

		from baselines import deepq
		from baselines import logger
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -21,7 +21,6 @@
		* [action\_space](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.action_space)
		* [side\_channel](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.side_channel)
		* [reset](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.reset)
		* [seed](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.seed)
		* [render](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.render)
		* [close](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.close)

Expand DownExpand Up		@@ -137,7 +136,7 @@ Initializes a Unity Parallel environment wrapper.
		#### reset

		```python
		\| reset() -> Dict[str, Any]
		\| reset(seed: int \| None = None, options: dict \| None = None) ->Tuple[Dict[str, Any], Dict[str, Dict]]
		```

		Resets the environment.
Expand DownExpand Up		@@ -207,32 +206,24 @@ of an environment with `env.side_channel[<name-of-channel>]`.
		#### reset

		```python
		\| reset()
		\| reset(seed: int \| None = None, options: dict \| None = None) -> Any
		```

		Resets the environment.

		<a name="mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.seed"></a>
		#### seed

		```python
		\| seed(seed=None)
		```

		Reseeds the environment (making the resulting environment deterministic).
		`reset()` must be called after `seed()`, and before `step()`.

		<a name="mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.render"></a>
		#### render

		```python
		\| render(mode="human")
		\| render()
		```

		NOT SUPPORTED.

		Displays a rendered frame from the environment, if supported.
		Alternate render modes in the default environments are `'rgb_array'`
		Renders the environment as specified by self.render_mode, if supported.

		Render mode can be `human` to display a window.
		Other render modes in the default environments are `'rgb_array'`
		which returns a numpy array and is supported by all environments outside of classic,
		and `'ansi'` which returns the strings printed (specific to classic environments).

Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -25,13 +25,13 @@ Here's an example of interacting with wrapped environment:

		```python
		from mlagents_envs.environment import UnityEnvironment
		from mlagents_envs.envs importUnityToPettingZooWrapper
		from mlagents_envs.envs.unity_aec_env importUnityAECEnv

		unity_env = UnityEnvironment("StrikersVsGoalie")
		env =UnityToPettingZooWrapper(unity_env)
		env =UnityAECEnv(unity_env)
		env.reset()
		for agent in env.agent_iter():
		observation, reward,done, info = env.last()
		observation, reward,terminated, truncated, info = env.last()
		action = policy(observation, agent)
		env.step(action)
		```
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -12,14 +12,6 @@ The LLAPI is used by the trainer implementation in `mlagents`.
		`mlagents_envs` can be used independently of `mlagents` for Python
		communication.

		## Installation

		Install the `mlagents_envs` package with:

		```sh
		python -m pip install mlagents_envs==1.1.0
		```

		## Usage & More Information

		See
Expand All		@@ -42,3 +34,32 @@ scene with the ML-Agents SDK, check out the main
		- Communication between Unity and the Python `UnityEnvironment` is not secure.
		- On Linux, ports are not released immediately after the communication closes.
		As such, you cannot reuse ports right after closing a `UnityEnvironment`.

		## Development and publishing (Wargaming artifactory)

		Since this package does not seem to be maintained anymore be the official developers, we have forked it to the Wargaming gitlab and are maintaining it there.
		Publishing is done via the [Wargaming artifactory](https://ed.artifactory.wgdp.io:443/artifactory/api/pypi/mlopsbi-pypi/simple).

		To contribute to the `mlagents_envs` package, please work on a branch and create a merge request to `master` once ready.
		Once the merge request is approved and merged to `master` branch, a gitlab pipeline will automatically create a new git tag and publish the new version to the Wargaming artifactory.

		## Installation (Wargaming artifactory)

		Since publishing is done via the Wargaming artifactory, you can use this package as dependency by adding the following to your `pyproject.toml`:

		```toml
		[tool.poetry.dependencies]
		mlagents-envs = { version = "^0.1", source = "artifactory" }

		[[tool.poetry.source]]
		name = "artifactory"
		url = "https://ed.artifactory.wgdp.io:443/artifactory/api/pypi/mlopsbi-pypi/simple"
		priority = "explicit"
		```


		Or you can install the `mlagents_envs` package from the Wargaming artifactory using pip:

		```bash
		pip install mlagents-envs --extra-index-url https://ed.artifactory.wgdp.io:443/artifactory/api/pypi/mlopsbi-pypi/simple
		```
Original file line number	Diff line number	Diff line change
Expand Up		@@ -138,6 +138,30 @@ def __getitem__(self, agent_id: AgentId) -> DecisionStep:
		def __iter__(self) -> Iterator[Any]:
		yield from self.agent_id

		def __add__(self, other: "DecisionSteps") -> "DecisionSteps":
		assert isinstance(other, DecisionSteps)

		combined_terminal_steps = DecisionSteps(
		list(np.hstack([self.obs, other.obs])),
		np.hstack([self.reward, other.reward]),
		np.hstack([self.agent_id, other.agent_id]),
		list(np.hstack([self.action_mask, other.action_mask]))
		if self.action_mask or other.action_mask
		else None,
		np.hstack([self.group_id, other.group_id]),
		np.hstack([self.group_reward, other.group_reward]),
		)
		combined_terminal_steps._agent_id_to_index = {
		**self.agent_id_to_index,
		# shift index of added termination steps because of appending
		**{
		agent_id: index + len(self)
		for agent_id, index in other.agent_id_to_index.items()
		},
		}

		return combined_terminal_steps

		@staticmethod
		def empty(spec: "BehaviorSpec") -> "DecisionSteps":
		"""
Expand DownExpand Up		@@ -245,6 +269,28 @@ def __getitem__(self, agent_id: AgentId) -> TerminalStep:
		def __iter__(self) -> Iterator[Any]:
		yield from self.agent_id

		def __add__(self, other: "TerminalSteps") -> "TerminalSteps":
		assert isinstance(other, TerminalSteps)

		combined_terminal_steps = TerminalSteps(
		list(np.hstack([self.obs, other.obs])),
		np.hstack([self.reward, other.reward]),
		np.hstack([self.interrupted, other.interrupted]),
		np.hstack([self.agent_id, other.agent_id]),
		np.hstack([self.group_id, other.group_id]),
		np.hstack([self.group_reward, other.group_reward]),
		)
		combined_terminal_steps._agent_id_to_index = {
		**self.agent_id_to_index,
		# shift index of added termination steps because of appending
		**{
		agent_id: index + len(self)
		for agent_id, index in other.agent_id_to_index.items()
		},
		}

		return combined_terminal_steps

		@staticmethod
		def empty(spec: "BehaviorSpec") -> "TerminalSteps":
		"""
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -17,7 +17,11 @@ def _unwrap_batch_steps(batch_steps, behavior_name):
		termination_id = [
		_behavior_to_agent_id(behavior_name, i) for i in termination_batch.agent_id
		]
		agents = decision_id + termination_id
		agents = decision_id
		for id in termination_id:
		if id not in agents:
		agents.append(id)

		obs = {
		agent_id: [batch_obs[i] for batch_obs in termination_batch.obs]
		for i, agent_id in enumerate(termination_id)
Expand All		@@ -40,30 +44,46 @@ def _unwrap_batch_steps(batch_steps, behavior_name):
		}
		)
		obs = {k: v if len(v) > 1 else v[0] for k, v in obs.items()}
		dones = {agent_id: True for agent_id in termination_id}
		dones.update({agent_id: False for agent_id in decision_id})
		rewards = {
		agent_id: termination_batch.reward[i]
		for i, agent_id in enumerate(termination_id)
		agent_id: decision_batch.reward[i] for i, agent_id in enumerate(decision_id)
		}
		rewards.update(
		{agent_id: decision_batch.reward[i] for i, agent_id in enumerate(decision_id)}
		{
		agent_id: termination_batch.reward[i]
		for i, agent_id in enumerate(termination_id)
		}
		)
		cumulative_rewards = {k: v for k, v in rewards.items()}
		infos = {}
		terminations = {}
		truncations = {}
		for i, agent_id in enumerate(decision_id):
		infos[agent_id] = {}
		infos[agent_id]["behavior_name"] = behavior_name
		infos[agent_id]["group_id"] = decision_batch.group_id[i]
		infos[agent_id]["group_reward"] = decision_batch.group_reward[i]
		truncations[agent_id] = False
		terminations[agent_id] = False
		for i, agent_id in enumerate(termination_id):
		infos[agent_id] = {}
		infos[agent_id]["behavior_name"] = behavior_name
		infos[agent_id]["group_id"] = termination_batch.group_id[i]
		infos[agent_id]["group_reward"] = termination_batch.group_reward[i]
		infos[agent_id]["interrupted"] = termination_batch.interrupted[i]
		truncated = bool(termination_batch.interrupted[i])
		infos[agent_id]["interrupted"] = truncated
		truncations[agent_id] = truncated
		terminations[agent_id] = not truncated
		id_map = {agent_id: i for i, agent_id in enumerate(decision_id)}
		return agents, obs, dones, rewards, cumulative_rewards, infos, id_map
		return (
		agents,
		obs,
		terminations,
		truncations,
		rewards,
		cumulative_rewards,
		infos,
		id_map,
		)


		def _parse_behavior(full_behavior):
Expand Down
Original file line number	Diff line number	Diff line change
		@@ -1,5 +1,5 @@
		from typing import Any, Optional
		fromgym import error
		fromgymnasium import error
		from mlagents_envs.base_env import BaseEnv
		from pettingzoo import AECEnv

Expand DownExpand Up		@@ -53,16 +53,19 @@ def observe(self, agent_id):
		return (
		self._observations[agent_id],
		self._cumm_rewards[agent_id],
		self._dones[agent_id],
		self._terminations[agent_id],
		self._truncations[agent_id],
		self._infos[agent_id],
		)

		def last(self, observe=True):
		"""
		returns observation, cumulative reward, done, info for the current agent (specified by self.agent_selection)
		"""
		obs, reward, done, info = self.observe(self._agents[self._agent_index])
		return obs if observe else None, reward, done, info
		obs, cumm_rewards, terminated, truncated, info = self.observe(
		self._agents[self._agent_index]
		)
		return obs if observe else None, cumm_rewards, terminated, truncated, info

		@property
		def agent_selection(self):
Expand Down