Testing GPT-2 Performance Scaling Implementation in Microsoft DeepSpeed
This test suite evaluates performance characteristics of GPT-2 models of varying sizes (1.5B to 20B parameters) using DeepSpeed. It measures execution time per iteration across different model configurations with varying batch sizes, layers, and model parallel configurations.
Test Coverage Overview
Implementation Analysis
Technical Details
Best Practices Demonstrated
microsoft/deepspeed
tests/model/Megatron_GPT2/run_perf_test.py
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
"""
Note: please copy webtext data to "Megatron-LM" folder, before running this script.
"""
import unittest
import re
from test_common import BaseTestCase
class GPT2PerfTestCase(BaseTestCase):
def __init__(self, methodName="DeepSpeed performance test on GPT2 model"):
super(GPT2PerfTestCase, self).__init__(methodName)
def test_perf_1_5B(self):
test_config = {
"mp": 1,
"gpus": 16,
"nodes": 4,
"bs": 32,
"steps": 100,
"layers": 48,
"hidden_size": 1600,
"seq_length": 1024,
"heads": 16,
"deepspeed": True,
"json": "ds_config_perf_bs32.json",
}
self.run_test(test_config)
def test_perf_4B(self):
test_config = {
"mp": 1,
"gpus": 16,
"nodes": 4,
"bs": 8,
"steps": 100,
"layers": 64,
"hidden_size": 2304,
"seq_length": 1024,
"heads": 16,
"deepspeed": True,
"json": "ds_config_perf_bs8.json",
}
self.run_test(test_config)
def test_perf_8B(self):
test_config = {
"mp": 2,
"gpus": 16,
"nodes": 4,
"bs": 16,
"steps": 100,
"layers": 72,
"hidden_size": 3072,
"seq_length": 1024,
"heads": 24,
"deepspeed": True,
"json": "ds_config_perf_bs16.json",
}
self.run_test(test_config)
def test_perf_20B(self):
test_config = {
"mp": 4,
"gpus": 16,
"nodes": 4,
"bs": 8,
"steps": 50,
"layers": 111,
"hidden_size": 3808,
"seq_length": 1024,
"heads": 32,
"ckpt_num_layers": 1,
"deepspeed": True,
"json": "ds_config_perf_bs8.json",
}
self.run_test(test_config)
def run_test(self, test_config):
print("
")
print("{0}: starting......".format(self.id()))
prefix = "gpt2_perf"
test_file = self.gen_output_name(test_config, prefix)
self.run_gpt2_test(test_config, test_file)
exec_time = self.grep_latency_from_file(test_file)
if exec_time == 0.0:
print("{0}: no latency found in file {1}".format(self.id(), test_file))
else:
print("{0}: execution time per iteration is {1}ms.".format(self.id(), exec_time))
def grep_latency_from_file(self, file_name):
latency = 0.0
count = 0
with open(file_name, 'r') as f:
lines = f.readlines()
line_filter = "elapsed time per iteration"
match_number = re.compile(r'elapsed time per iteration \(ms\): ([-+]?[0-9]+\.?[0-9]*(?:[Ee][-+]?[0-9]+)?)')
for line in lines:
if line_filter in line:
ms_per_iter = re.findall(match_number, line)
latency += float(ms_per_iter[0])
count += 1
if count > 0:
latency /= count
return latency
def suite():
suite = unittest.TestSuite()
suite.addTest(GPT2PerfTestCase('test_perf_1_5B'))
suite.addTest(GPT2PerfTestCase('test_perf_4B'))
suite.addTest(GPT2PerfTestCase('test_perf_8B'))
suite.addTest(GPT2PerfTestCase('test_perf_20B'))
return suite
if __name__ == '__main__':
runner = unittest.TextTestRunner(failfast=True)
runner.run(suite())