Back to Repositories

Testing Episode Statistics Recording Wrapper in OpenAI Gym

This test suite validates the RecordEpisodeStatistics wrapper functionality in OpenAI Gym, focusing on episode tracking and statistics collection across different environment configurations. The tests ensure proper recording of episode returns, lengths, and termination conditions.

Test Coverage Overview

The test suite provides comprehensive coverage of the RecordEpisodeStatistics wrapper functionality.

Key areas tested include:

Single environment episode statistics recording
Vector environment compatibility
Reset information handling
Different deque sizes for statistics storage
Asynchronous and synchronous vector environments
Error handling for incorrect wrapper ordering

Implementation Analysis

The testing approach uses pytest’s parametrize feature to validate multiple environment configurations and scenarios. The implementation verifies episode statistics tracking across CartPole-v1 and Pendulum-v1 environments, with particular attention to vector environment handling and information dictionary structure.

Technical patterns include:

Parametrized test cases for environment types
Assertion-based validation of episode information
Vector environment state verification
Episode termination condition checking

Technical Details

Testing tools and configuration:

pytest framework for test organization
numpy for numerical operations
OpenAI Gym environment wrapper classes
Vector environment utilities
Custom episode statistics recording functions
Environment-specific configuration parameters

Best Practices Demonstrated

The test suite exemplifies several testing best practices in Python environment testing.

Notable practices include:

Systematic parameter variation using pytest.mark.parametrize
Proper environment cleanup and initialization
Comprehensive edge case handling
Clear separation of test scenarios
Explicit assertion messages
Modular test function organization

openai/gym

tests/wrappers/test_record_episode_statistics.py

            
import numpy as np
import pytest

import gym
from gym.wrappers import RecordEpisodeStatistics, VectorListInfo
from gym.wrappers.record_episode_statistics import add_vector_episode_statistics


@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"])
@pytest.mark.parametrize("deque_size", [2, 5])
def test_record_episode_statistics(env_id, deque_size):
    env = gym.make(env_id, disable_env_checker=True)
    env = RecordEpisodeStatistics(env, deque_size)

    for n in range(5):
        env.reset()
        assert env.episode_returns is not None and env.episode_lengths is not None
        assert env.episode_returns[0] == 0.0
        assert env.episode_lengths[0] == 0
        for t in range(env.spec.max_episode_steps):
            _, _, terminated, truncated, info = env.step(env.action_space.sample())
            if terminated or truncated:
                assert "episode" in info
                assert all([item in info["episode"] for item in ["r", "l", "t"]])
                break
    assert len(env.return_queue) == deque_size
    assert len(env.length_queue) == deque_size


def test_record_episode_statistics_reset_info():
    env = gym.make("CartPole-v1", disable_env_checker=True)
    env = RecordEpisodeStatistics(env)
    ob_space = env.observation_space
    obs, info = env.reset()
    assert ob_space.contains(obs)
    assert isinstance(info, dict)


@pytest.mark.parametrize(
    ("num_envs", "asynchronous"), [(1, False), (1, True), (4, False), (4, True)]
)
def test_record_episode_statistics_with_vectorenv(num_envs, asynchronous):
    envs = gym.vector.make(
        "CartPole-v1",
        render_mode=None,
        num_envs=num_envs,
        asynchronous=asynchronous,
        disable_env_checker=True,
    )
    envs = RecordEpisodeStatistics(envs)
    max_episode_step = (
        envs.env_fns[0]().spec.max_episode_steps
        if asynchronous
        else envs.env.envs[0].spec.max_episode_steps
    )
    envs.reset()
    for _ in range(max_episode_step + 1):
        _, _, terminateds, truncateds, infos = envs.step(envs.action_space.sample())
        if any(terminateds) or any(truncateds):
            assert "episode" in infos
            assert "_episode" in infos
            assert all(infos["_episode"] == np.bitwise_or(terminateds, truncateds))
            assert all([item in infos["episode"] for item in ["r", "l", "t"]])
            break
        else:
            assert "episode" not in infos
            assert "_episode" not in infos


def test_wrong_wrapping_order():
    envs = gym.vector.make("CartPole-v1", num_envs=3, disable_env_checker=True)
    wrapped_env = RecordEpisodeStatistics(VectorListInfo(envs))
    wrapped_env.reset()

    with pytest.raises(AssertionError):
        wrapped_env.step(wrapped_env.action_space.sample())


def test_add_vector_episode_statistics():
    NUM_ENVS = 5

    info = {}
    for i in range(NUM_ENVS):
        episode_info = {
            "episode": {
                "r": i,
                "l": i,
                "t": i,
            }
        }
        info = add_vector_episode_statistics(info, episode_info["episode"], NUM_ENVS, i)
        assert np.alltrue(info["_episode"][: i + 1])

        for j in range(NUM_ENVS):
            if j <= i:
                assert info["episode"]["r"][j] == j
                assert info["episode"]["l"][j] == j
                assert info["episode"]["t"][j] == j
            else:
                assert info["episode"]["r"][j] == 0
                assert info["episode"]["l"][j] == 0
                assert info["episode"]["t"][j] == 0