Back to Repositories

Testing GPT-2 Performance Scaling Implementation in Microsoft DeepSpeed

This test suite evaluates performance characteristics of GPT-2 models of varying sizes (1.5B to 20B parameters) using DeepSpeed. It measures execution time per iteration across different model configurations with varying batch sizes, layers, and model parallel configurations.

Test Coverage Overview

The test suite provides comprehensive coverage of GPT-2 model performance testing across multiple model scales.

Tests four model sizes: 1.5B, 4B, 8B, and 20B parameters
Evaluates different batch sizes (8, 16, 32)
Tests model parallelism configurations (1, 2, 4)
Measures execution time per iteration

Implementation Analysis

The implementation uses Python’s unittest framework with a structured class-based approach.

Key patterns include:

Inheritance from BaseTestCase for common functionality
Parameterized test configurations
Regular expression parsing for performance metrics
Centralized test execution through run_test method

Technical Details

Testing infrastructure includes:

DeepSpeed configuration files (ds_config_perf_*.json)
Multi-node setup (4 nodes, 16 GPUs)
Performance measurement tools
Sequence length of 1024 across all tests
Variable hidden sizes (1600-3808)
Checkpoint layer configurations

Best Practices Demonstrated

The test suite exemplifies several testing best practices:

Modular test configuration management
Systematic performance metric collection
Scalability testing across model sizes
Proper test isolation and setup
Clear output logging and reporting
Reusable test utilities

microsoft/deepspeed

tests/model/Megatron_GPT2/run_perf_test.py

            
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team
"""
Note: please copy webtext data to "Megatron-LM" folder, before running this script.
"""

import unittest
import re
from test_common import BaseTestCase


class GPT2PerfTestCase(BaseTestCase):

    def __init__(self, methodName="DeepSpeed performance test on GPT2 model"):
        super(GPT2PerfTestCase, self).__init__(methodName)

    def test_perf_1_5B(self):
        test_config = {
            "mp": 1,
            "gpus": 16,
            "nodes": 4,
            "bs": 32,
            "steps": 100,
            "layers": 48,
            "hidden_size": 1600,
            "seq_length": 1024,
            "heads": 16,
            "deepspeed": True,
            "json": "ds_config_perf_bs32.json",
        }

        self.run_test(test_config)

    def test_perf_4B(self):
        test_config = {
            "mp": 1,
            "gpus": 16,
            "nodes": 4,
            "bs": 8,
            "steps": 100,
            "layers": 64,
            "hidden_size": 2304,
            "seq_length": 1024,
            "heads": 16,
            "deepspeed": True,
            "json": "ds_config_perf_bs8.json",
        }

        self.run_test(test_config)

    def test_perf_8B(self):
        test_config = {
            "mp": 2,
            "gpus": 16,
            "nodes": 4,
            "bs": 16,
            "steps": 100,
            "layers": 72,
            "hidden_size": 3072,
            "seq_length": 1024,
            "heads": 24,
            "deepspeed": True,
            "json": "ds_config_perf_bs16.json",
        }

        self.run_test(test_config)

    def test_perf_20B(self):
        test_config = {
            "mp": 4,
            "gpus": 16,
            "nodes": 4,
            "bs": 8,
            "steps": 50,
            "layers": 111,
            "hidden_size": 3808,
            "seq_length": 1024,
            "heads": 32,
            "ckpt_num_layers": 1,
            "deepspeed": True,
            "json": "ds_config_perf_bs8.json",
        }

        self.run_test(test_config)

    def run_test(self, test_config):
        print("
")
        print("{0}: starting......".format(self.id()))
        prefix = "gpt2_perf"

        test_file = self.gen_output_name(test_config, prefix)
        self.run_gpt2_test(test_config, test_file)
        exec_time = self.grep_latency_from_file(test_file)

        if exec_time == 0.0:
            print("{0}: no latency found in file {1}".format(self.id(), test_file))
        else:
            print("{0}: execution time per iteration is {1}ms.".format(self.id(), exec_time))

    def grep_latency_from_file(self, file_name):
        latency = 0.0
        count = 0

        with open(file_name, 'r') as f:
            lines = f.readlines()
            line_filter = "elapsed time per iteration"
            match_number = re.compile(r'elapsed time per iteration \(ms\): ([-+]?[0-9]+\.?[0-9]*(?:[Ee][-+]?[0-9]+)?)')

            for line in lines:
                if line_filter in line:
                    ms_per_iter = re.findall(match_number, line)
                    latency += float(ms_per_iter[0])
                    count += 1

        if count > 0:
            latency /= count

        return latency


def suite():
    suite = unittest.TestSuite()
    suite.addTest(GPT2PerfTestCase('test_perf_1_5B'))
    suite.addTest(GPT2PerfTestCase('test_perf_4B'))
    suite.addTest(GPT2PerfTestCase('test_perf_8B'))
    suite.addTest(GPT2PerfTestCase('test_perf_20B'))
    return suite


if __name__ == '__main__':
    runner = unittest.TextTestRunner(failfast=True)
    runner.run(suite())