Source code for momaland.utils.env
"""Overrides PZ types to enforce multi objective rewards."""
import warnings
from typing import Dict
from typing_extensions import override
import gymnasium
import numpy as np
from numpy.typing import NDArray
from pettingzoo import AECEnv
from pettingzoo.utils.env import AgentID, ParallelEnv
[docs]
class MOAECEnv(AECEnv):
"""Overrides PZ types to enforce multi objective rewards."""
# Reward space for each agent
reward_spaces: Dict[AgentID, gymnasium.spaces.Space]
rewards: Dict[AgentID, NDArray] # Reward from the last step for each agent (MO)
# Cumulative rewards for each agent
_cumulative_rewards: Dict[AgentID, NDArray]
[docs]
def reward_space(self, agent: AgentID) -> gymnasium.spaces.Space:
"""Takes in agent and returns the reward space for that agent.
MUST return the same value for the same agent name
Default implementation is to return the reward_spaces dict
"""
warnings.warn(
"Your environment should override the reward_space function. Attempting to use the reward_spaces dict attribute."
)
return self.reward_spaces[agent]
@override
def _clear_rewards(self) -> None:
"""Clears all items in .rewards."""
for agent in self.rewards:
self.rewards[agent] = np.zeros(self.reward_space(agent).shape[0], dtype=np.float32) # type: ignore
[docs]
class MOParallelEnv(ParallelEnv):
"""Overrides PZ types to enforce multi objective rewards."""
# Reward space for each agent
reward_spaces: Dict[AgentID, gymnasium.spaces.Space]
[docs]
def reward_space(self, agent: AgentID) -> gymnasium.spaces.Space:
"""Takes in agent and returns the reward space for that agent.
MUST return the same value for the same agent name
Default implementation is to return the reward_spaces dict
"""
warnings.warn(
"Your environment should override the reward_space function. Attempting to use the reward_spaces dict attribute."
)
return self.reward_spaces[agent]