Source code for momaland.utils.env

"""Overrides PZ types to enforce multi objective rewards."""

import warnings
from typing import Dict
from typing_extensions import override

import gymnasium
import numpy as np
from numpy.typing import NDArray
from pettingzoo import AECEnv
from pettingzoo.utils.env import AgentID, ParallelEnv



[docs]
class MOAECEnv(AECEnv):
    """Overrides PZ types to enforce multi objective rewards."""

    # Reward space for each agent
    reward_spaces: Dict[AgentID, gymnasium.spaces.Space]
    rewards: Dict[AgentID, NDArray]  # Reward from the last step for each agent (MO)
    # Cumulative rewards for each agent
    _cumulative_rewards: Dict[AgentID, NDArray]


[docs]
    def reward_space(self, agent: AgentID) -> gymnasium.spaces.Space:
        """Takes in agent and returns the reward space for that agent.

        MUST return the same value for the same agent name

        Default implementation is to return the reward_spaces dict
        """
        warnings.warn(
            "Your environment should override the reward_space function. Attempting to use the reward_spaces dict attribute."
        )
        return self.reward_spaces[agent]


    @override
    def _clear_rewards(self) -> None:
        """Clears all items in .rewards."""
        for agent in self.rewards:
            self.rewards[agent] = np.zeros(self.reward_space(agent).shape[0], dtype=np.float32)  # type: ignore




[docs]
class MOParallelEnv(ParallelEnv):
    """Overrides PZ types to enforce multi objective rewards."""

    # Reward space for each agent
    reward_spaces: Dict[AgentID, gymnasium.spaces.Space]


[docs]
    def reward_space(self, agent: AgentID) -> gymnasium.spaces.Space:
        """Takes in agent and returns the reward space for that agent.

        MUST return the same value for the same agent name

        Default implementation is to return the reward_spaces dict
        """
        warnings.warn(
            "Your environment should override the reward_space function. Attempting to use the reward_spaces dict attribute."
        )
        return self.reward_spaces[agent]