Back to Repositories

Validating Bash Command Parsing Implementation in OpenHands

This comprehensive test suite validates the bash command parsing functionality in OpenHands, focusing on the split_bash_commands utility. The tests cover various bash command scenarios including heredocs, command continuation, quoting, and error handling.

Test Coverage Overview

The test suite provides extensive coverage of bash command parsing scenarios:

Basic command splitting and validation
Heredoc syntax handling
Backslash continuation parsing
Complex quoting scenarios (single, double quotes)
Comment handling and empty line processing
Invalid syntax error cases

Implementation Analysis

The testing approach uses pytest’s parametrize feature for systematic validation of different command formats. The implementation employs fixture-based testing for reusable command sets and isolated test cases for specific parsing scenarios.

Tests verify command integrity through string comparison and assertion-based validation.

Technical Details

Testing tools and configuration:

Framework: pytest
Test Types: Unit tests with parametrization
Fixtures: sample_commands for reusable test data
Assertion Methods: Direct comparison and string matching
Error Handling: Invalid syntax validation

Best Practices Demonstrated

The test suite exemplifies several testing best practices:

Comprehensive edge case coverage
Modular test organization
Clear test case isolation
Reusable test fixtures
Detailed assertion messages
Systematic error case handling

all-hands-ai/openhands

tests/unit/test_bash_parsing.py

            
import pytest

from openhands.runtime.utils.bash import split_bash_commands


def test_split_commands_util():
    cmds = [
        'ls -l',
        'echo -e "hello
world"',
        """
echo -e "hello it\\'s me"
""".strip(),
        """
echo \\
    -e 'hello' \\
    -v
""".strip(),
        """
echo -e 'hello\
world\
are\
you\
there?'
""".strip(),
        """
echo -e 'hello
world
are
you\

there?'
""".strip(),
        """
echo -e 'hello
world "
'
""".strip(),
        """
kubectl apply -f - <<EOF
apiVersion: v1
kind: Pod
metadata:
  name: busybox-sleep
spec:
  containers:
  - name: busybox
    image: busybox:1.28
    args:
    - sleep
    - "1000000"
EOF
""".strip(),
        """
mkdir -p _modules && \
for month in {01..04}; do
    for day in {01..05}; do
        touch "_modules/2024-${month}-${day}-sample.md"
    done
done
""".strip(),
    ]
    joined_cmds = '
'.join(cmds)
    split_cmds = split_bash_commands(joined_cmds)
    for s in split_cmds:
        print('
CMD')
        print(s)
    for i in range(len(cmds)):
        assert (
            split_cmds[i].strip() == cmds[i].strip()
        ), f'At index {i}: {split_cmds[i]} != {cmds[i]}.'


@pytest.mark.parametrize(
    'input_command, expected_output',
    [
        ('ls -l', ['ls -l']),
        ("echo 'Hello, world!'", ["echo 'Hello, world!'"]),
        ('cd /tmp && touch test.txt', ['cd /tmp && touch test.txt']),
        ("echo -e 'line1\
line2\
line3'", ["echo -e 'line1\
line2\
line3'"]),
        (
            "grep 'pattern' file.txt | sort | uniq",
            ["grep 'pattern' file.txt | sort | uniq"],
        ),
        ('for i in {1..5}; do echo $i; done', ['for i in {1..5}; do echo $i; done']),
        (
            "echo 'Single quotes don\\'t escape'",
            ["echo 'Single quotes don\\'t escape'"],
        ),
        (
            'echo "Double quotes \\"do\\" escape"',
            ['echo "Double quotes \\"do\\" escape"'],
        ),
    ],
)
def test_single_commands(input_command, expected_output):
    assert split_bash_commands(input_command) == expected_output


def test_heredoc():
    input_commands = """
cat <<EOF
multiline
text
EOF
echo "Done"
"""
    expected_output = ['cat <<EOF
multiline
text
EOF', 'echo "Done"']
    assert split_bash_commands(input_commands) == expected_output


def test_backslash_continuation():
    input_commands = """
echo "This is a long \
command that spans \
multiple lines"
echo "Next command"
"""
    expected_output = [
        'echo "This is a long command that spans multiple lines"',
        'echo "Next command"',
    ]
    assert split_bash_commands(input_commands) == expected_output


def test_comments():
    input_commands = """
echo "Hello" # This is a comment
# This is another comment
ls -l
"""
    expected_output = [
        'echo "Hello" # This is a comment
# This is another comment',
        'ls -l',
    ]
    assert split_bash_commands(input_commands) == expected_output


def test_complex_quoting():
    input_commands = """
echo "This is a \\"quoted\\" string"
echo 'This is a '\''single-quoted'\'' string'
echo "Mixed 'quotes' in \\"double quotes\\""
"""
    expected_output = [
        'echo "This is a \\"quoted\\" string"',
        "echo 'This is a '''single-quoted''' string'",
        'echo "Mixed \'quotes\' in \\"double quotes\\""',
    ]
    assert split_bash_commands(input_commands) == expected_output


def test_invalid_syntax():
    invalid_inputs = [
        'echo "Unclosed quote',
        "echo 'Unclosed quote",
        'cat <<EOF
Unclosed heredoc',
    ]
    for input_command in invalid_inputs:
        # it will fall back to return the original input
        assert split_bash_commands(input_command) == [input_command]


@pytest.fixture
def sample_commands():
    return [
        'ls -l',
        'echo "Hello, world!"',
        'cd /tmp && touch test.txt',
        'echo -e "line1\
line2\
line3"',
        'grep "pattern" file.txt | sort | uniq',
        'for i in {1..5}; do echo $i; done',
        'cat <<EOF
multiline
text
EOF',
        'echo "Escaped \\"quotes\\""',
        "echo 'Single quotes don\\'t escape'",
        'echo "Command with a trailing backslash \\
  and continuation"',
    ]


def test_split_single_commands(sample_commands):
    for cmd in sample_commands:
        result = split_bash_commands(cmd)
        assert len(result) == 1, f'Expected single command, got: {result}'


def test_split_commands_with_heredoc():
    input_commands = """
cat <<EOF
multiline
text
EOF
echo "Done"
"""
    expected_output = ['cat <<EOF
multiline
text
EOF', 'echo "Done"']
    result = split_bash_commands(input_commands)
    assert result == expected_output, f'Expected {expected_output}, got {result}'


def test_split_commands_with_backslash_continuation():
    input_commands = """
echo "This is a long \
command that spans \
multiple lines"
echo "Next command"
"""
    expected_output = [
        'echo "This is a long command that spans multiple lines"',
        'echo "Next command"',
    ]
    result = split_bash_commands(input_commands)
    assert result == expected_output, f'Expected {expected_output}, got {result}'


def test_split_commands_with_empty_lines():
    input_commands = """
ls -l

echo "Hello"

cd /tmp
"""
    expected_output = ['ls -l', 'echo "Hello"', 'cd /tmp']
    result = split_bash_commands(input_commands)
    assert result == expected_output, f'Expected {expected_output}, got {result}'


def test_split_commands_with_comments():
    input_commands = """
echo "Hello" # This is a comment
# This is another comment
ls -l
"""
    expected_output = [
        'echo "Hello" # This is a comment
# This is another comment',
        'ls -l',
    ]
    result = split_bash_commands(input_commands)
    assert result == expected_output, f'Expected {expected_output}, got {result}'


def test_split_commands_with_complex_quoting():
    input_commands = """
echo "This is a \\"quoted\\" string"
echo "Mixed 'quotes' in \\"double quotes\\""
"""
    # echo 'This is a '\''single-quoted'\'' string'

    expected_output = [
        'echo "This is a \\"quoted\\" string"',
        'echo "Mixed \'quotes\' in \\"double quotes\\""',
    ]
    # "echo 'This is a '\\''single-quoted'\\'' string'",
    result = split_bash_commands(input_commands)
    assert result == expected_output, f'Expected {expected_output}, got {result}'


def test_split_commands_with_invalid_input():
    invalid_inputs = [
        'echo "Unclosed quote',
        "echo 'Unclosed quote",
        'cat <<EOF
Unclosed heredoc',
    ]
    for input_command in invalid_inputs:
        # it will fall back to return the original input
        assert split_bash_commands(input_command) == [input_command]