Back to Repositories

Testing ContiguousMemoryAllocator Tensor Management in DeepSpeed

This test suite validates the ContiguousMemoryAllocator implementation in DeepSpeed, focusing on memory allocation and release patterns for PyTorch tensors. It verifies proper memory management, tensor operations, and allocation strategies in a controlled testing environment.

Test Coverage Overview

The test suite provides comprehensive coverage of the ContiguousMemoryAllocator functionality through two main test cases.

Tests allocation and release of tensors with varying sizes (64-512 bytes)
Validates memory fragmentation handling and reuse
Verifies tensor operations (multiplication and addition)
Checks allocation boundaries and memory limits

Implementation Analysis

The testing approach uses systematic allocation and deallocation patterns to validate memory management.

Key implementation aspects include:

Sequential tensor allocations with different sizes
Interleaved release operations to test memory reuse
Tensor operations validation using PyTorch’s arithmetic functions
Assertion checks for tensor norm calculations

Technical Details

Testing infrastructure utilizes:

PyTorch framework for tensor operations
CPU-based testing environment
Half-precision (float16) tensor format
Memory allocator with configurable size limits (1024/512 bytes)
Resolution-based allocation visualization

Best Practices Demonstrated

The test suite exemplifies robust testing practices for memory management systems.

Systematic test case organization
Comprehensive edge case coverage
Precise numeric validation
Memory leak prevention checks
Clear test boundaries and expectations

microsoft/deepspeed

deepspeed/runtime/zero/test.py

            
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team

import torch
from deepspeed.runtime.zero.contiguous_memory_allocator import ContiguousMemoryAllocator


def test1():
    mem = ContiguousMemoryAllocator(1024, torch.half, 'cpu')
    mem.print_allocation(resolution=100)
    a1 = mem.allocate_tensor(64).mul_(0.0).add_(1.0)
    mem.print_allocation(resolution=100)
    mem.release_tensor(a1)
    mem.print_allocation(resolution=100)
    a2 = mem.allocate_tensor(64).mul_(0.0).add_(2.0)
    a3 = mem.allocate_tensor(256).mul_(0.0).add_(3.0)
    a4 = mem.allocate_tensor(128).mul_(0.0).add_(4.0)
    mem.print_allocation(resolution=100)
    mem.release_tensor(a3)
    mem.print_allocation(resolution=100)
    a5 = mem.allocate_tensor(64).mul_(0.0).add_(5.0)
    a6 = mem.allocate_tensor(256).mul_(0.0).add_(6.0)
    a7 = mem.allocate_tensor(128).mul_(0.0).add_(7.0)
    mem.print_allocation(resolution=100)
    a8 = mem.allocate_tensor(256).mul_(0.0).add_(8.0)
    a9 = mem.allocate_tensor(128).mul_(0.0).add_(9.0)
    mem.print_allocation(resolution=100)
    mem.release_tensor(a9)
    mem.release_tensor(a6)
    mem.release_tensor(a2)
    mem.release_tensor(a5)

    a10 = mem.allocate_tensor(512).mul_(0.0).add_(10.0)
    mem.print_allocation(resolution=100)
    #print(f"a4:{a4}")
    #print(f"a7:{a7}")
    #print(f"a8:{a8}")
    #print(f"a10:{a10}")
    assert (a4.norm() + a7.norm() + a8.norm() + a10.norm()).item() == 474.50, "Test failed"


def test2():
    mem = ContiguousMemoryAllocator(512, torch.half, 'cpu')
    a1 = mem.allocate_tensor(64).mul_(0.0).add_(1.0)
    a2 = mem.allocate_tensor(64).mul_(0.0).add_(2.0)
    a3 = mem.allocate_tensor(64).mul_(0.0).add_(3.0)
    a4 = mem.allocate_tensor(64).mul_(0.0).add_(4.0)
    a5 = mem.allocate_tensor(64).mul_(0.0).add_(5.0)
    a6 = mem.allocate_tensor(64).mul_(0.0).add_(6.0)
    a7 = mem.allocate_tensor(64).mul_(0.0).add_(7.0)
    a8 = mem.allocate_tensor(64).mul_(0.0).add_(8.0)
    mem.release_tensor(a2)
    mem.release_tensor(a4)
    mem.release_tensor(a6)
    mem.release_tensor(a8)
    mem.print_allocation(resolution=100)

    a9 = mem.allocate_tensor(128).mul_(0.0).add_(9.0)
    a10 = mem.allocate_tensor(64).mul_(0.0).add_(10.0)
    a11 = mem.allocate_tensor(64).mul_(0.0).add_(11.0)
    mem.release_tensor(a1)
    mem.release_tensor(a5)
    mem.print_allocation(resolution=100)
    a12 = mem.allocate_tensor(128).mul_(0.0).add_(12.0)
    mem.print_allocation(resolution=100)
    print(f"a7:{a7}")
    print(f"a9:{a9}")
    print(f"a10:{a10}")
    print(f"a11:{a11}")
    print(f"a12:{a12}")
    assert (a7.norm() + a9.norm() + a10.norm() + a11.norm() + a12.norm()) == 460.75, "TestFailed"


test1()
test2()