Back to Repositories

Testing PR Handler Success Detection Workflow in OpenHands

This test suite validates the PR handler’s success detection functionality in the OpenHands project, specifically focusing on the automated assessment of pull request feedback implementation. It covers various scenarios including review threads, comments, and LLM-based success evaluation.

Test Coverage Overview

The test suite provides comprehensive coverage of the PR handler’s success detection mechanism, examining multiple feedback channels and response patterns. Key areas tested include:

  • Review thread validation
  • Thread comment processing
  • Review comment handling
  • LLM response interpretation
  • Success criteria evaluation

Implementation Analysis

The testing approach utilizes mock objects and patches to simulate LLM interactions and GitHub API responses. The implementation follows a structured pattern of arranging test data, mocking external dependencies, and verifying both the success determination logic and prompt construction.

Key technical aspects include mock response generation, context validation, and JSON-based feedback processing.

Technical Details

Testing tools and configuration:

  • unittest.mock for dependency isolation
  • JSON-based response validation
  • MagicMock for LLM response simulation
  • Patch decorators for function mocking
  • Structured test data generation

Best Practices Demonstrated

The test suite exemplifies several testing best practices including comprehensive mock setup, explicit assertion checking, and thorough edge case coverage. Notable practices include:

  • Isolated test cases
  • Detailed assertion messages
  • Mock response validation
  • Clear test case organization
  • Comprehensive prompt verification

all-hands-ai/openhands

tests/unit/resolver/test_pr_handler_guess_success.py

            
import json
from unittest.mock import MagicMock, patch

from openhands.core.config import LLMConfig
from openhands.events.action.message import MessageAction
from openhands.llm.llm import LLM
from openhands.resolver.github_issue import GithubIssue, ReviewThread
from openhands.resolver.issue_definitions import PRHandler


def mock_llm_response(content):
    """Helper function to create a mock LLM response."""
    mock_response = MagicMock()
    mock_response.choices = [MagicMock(message=MagicMock(content=content))]
    return mock_response


def test_guess_success_review_threads_litellm_call():
    """Test that the completion() call for review threads contains the expected content."""
    # Create a PR handler instance
    llm_config = LLMConfig(model='test', api_key='test')
    handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)

    # Create a mock issue with review threads
    issue = GithubIssue(
        owner='test-owner',
        repo='test-repo',
        number=1,
        title='Test PR',
        body='Test Body',
        thread_comments=None,
        closing_issues=['Issue 1 description', 'Issue 2 description'],
        review_comments=None,
        review_threads=[
            ReviewThread(
                comment='Please fix the formatting
---
latest feedback:
Add docstrings',
                files=['/src/file1.py', '/src/file2.py'],
            ),
            ReviewThread(
                comment='Add more tests
---
latest feedback:
Add test cases',
                files=['/tests/test_file.py'],
            ),
        ],
        thread_ids=['1', '2'],
        head_branch='test-branch',
    )

    # Create mock history with a detailed response
    history = [
        MessageAction(
            content="""I have made the following changes:
1. Fixed formatting in file1.py and file2.py
2. Added docstrings to all functions
3. Added test cases in test_file.py"""
        )
    ]

    # Create mock LLM config
    llm_config = LLMConfig(model='test-model', api_key='test-key')

    # Mock the LLM response
    mock_response = MagicMock()
    mock_response.choices = [
        MagicMock(
            message=MagicMock(
                content="""--- success
true

--- explanation
The changes successfully address the feedback."""
            )
        )
    ]

    # Test the guess_success method
    with patch.object(LLM, 'completion') as mock_completion:
        mock_completion.return_value = mock_response
        success, success_list, explanation = handler.guess_success(issue, history)

        # Verify the completion() calls
        assert mock_completion.call_count == 2  # One call per review thread

        # Check first call
        first_call = mock_completion.call_args_list[0]
        first_prompt = first_call[1]['messages'][0]['content']
        assert (
            'Issue descriptions:
'
            + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
            in first_prompt
        )
        assert (
            'Feedback:
Please fix the formatting
---
latest feedback:
Add docstrings'
            in first_prompt
        )
        assert (
            'Files locations:
'
            + json.dumps(['/src/file1.py', '/src/file2.py'], indent=4)
            in first_prompt
        )
        assert 'Last message from AI agent:
' + history[0].content in first_prompt

        # Check second call
        second_call = mock_completion.call_args_list[1]
        second_prompt = second_call[1]['messages'][0]['content']
        assert (
            'Issue descriptions:
'
            + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
            in second_prompt
        )
        assert (
            'Feedback:
Add more tests
---
latest feedback:
Add test cases'
            in second_prompt
        )
        assert (
            'Files locations:
' + json.dumps(['/tests/test_file.py'], indent=4)
            in second_prompt
        )
        assert 'Last message from AI agent:
' + history[0].content in second_prompt

        assert len(json.loads(explanation)) == 2


def test_guess_success_thread_comments_litellm_call():
    """Test that the completion() call for thread comments contains the expected content."""
    # Create a PR handler instance
    llm_config = LLMConfig(model='test', api_key='test')
    handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)

    # Create a mock issue with thread comments
    issue = GithubIssue(
        owner='test-owner',
        repo='test-repo',
        number=1,
        title='Test PR',
        body='Test Body',
        thread_comments=[
            'Please improve error handling',
            'Add input validation',
            'latest feedback:
Handle edge cases',
        ],
        closing_issues=['Issue 1 description', 'Issue 2 description'],
        review_comments=None,
        thread_ids=None,
        head_branch='test-branch',
    )

    # Create mock history with a detailed response
    history = [
        MessageAction(
            content="""I have made the following changes:
1. Added try/catch blocks for error handling
2. Added input validation checks
3. Added handling for edge cases"""
        )
    ]

    # Create mock LLM config
    llm_config = LLMConfig(model='test-model', api_key='test-key')

    # Mock the LLM response
    mock_response = MagicMock()
    mock_response.choices = [
        MagicMock(
            message=MagicMock(
                content="""--- success
true

--- explanation
The changes successfully address the feedback."""
            )
        )
    ]

    # Test the guess_success method
    with patch.object(LLM, 'completion') as mock_completion:
        mock_completion.return_value = mock_response
        success, success_list, explanation = handler.guess_success(issue, history)

        # Verify the completion() call
        mock_completion.assert_called_once()
        call_args = mock_completion.call_args
        prompt = call_args[1]['messages'][0]['content']

        # Check prompt content
        assert (
            'Issue descriptions:
'
            + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
            in prompt
        )
        assert 'PR Thread Comments:
' + '
---
'.join(issue.thread_comments) in prompt
        assert 'Last message from AI agent:
' + history[0].content in prompt

        assert len(json.loads(explanation)) == 1


def test_check_feedback_with_llm():
    """Test the _check_feedback_with_llm helper function."""
    # Create a PR handler instance
    llm_config = LLMConfig(model='test', api_key='test')
    handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)

    # Test cases for different LLM responses
    test_cases = [
        {
            'response': '--- success
true
--- explanation
Changes look good',
            'expected': (True, 'Changes look good'),
        },
        {
            'response': '--- success
false
--- explanation
Not all issues fixed',
            'expected': (False, 'Not all issues fixed'),
        },
        {
            'response': 'Invalid response format',
            'expected': (
                False,
                'Failed to decode answer from LLM response: Invalid response format',
            ),
        },
        {
            'response': '--- success
true
--- explanation
Multiline
explanation
here',
            'expected': (True, 'Multiline
explanation
here'),
        },
    ]

    for case in test_cases:
        # Mock the LLM response
        mock_response = MagicMock()
        mock_response.choices = [MagicMock(message=MagicMock(content=case['response']))]

        # Test the function
        with patch.object(LLM, 'completion', return_value=mock_response):
            success, explanation = handler._check_feedback_with_llm('test prompt')
            assert (success, explanation) == case['expected']


def test_check_review_thread():
    """Test the _check_review_thread helper function."""
    # Create a PR handler instance
    llm_config = LLMConfig(model='test', api_key='test')
    handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)

    # Create test data
    review_thread = ReviewThread(
        comment='Please fix the formatting
---
latest feedback:
Add docstrings',
        files=['/src/file1.py', '/src/file2.py'],
    )
    issues_context = json.dumps(
        ['Issue 1 description', 'Issue 2 description'], indent=4
    )
    last_message = 'I have fixed the formatting and added docstrings'

    # Mock the LLM response
    mock_response = MagicMock()
    mock_response.choices = [
        MagicMock(
            message=MagicMock(
                content="""--- success
true

--- explanation
Changes look good"""
            )
        )
    ]

    # Test the function
    with patch.object(LLM, 'completion') as mock_completion:
        mock_completion.return_value = mock_response
        success, explanation = handler._check_review_thread(
            review_thread, issues_context, last_message
        )

        # Verify the completion() call
        mock_completion.assert_called_once()
        call_args = mock_completion.call_args
        prompt = call_args[1]['messages'][0]['content']

        # Check prompt content
        assert 'Issue descriptions:
' + issues_context in prompt
        assert 'Feedback:
' + review_thread.comment in prompt
        assert (
            'Files locations:
' + json.dumps(review_thread.files, indent=4) in prompt
        )
        assert 'Last message from AI agent:
' + last_message in prompt

        # Check result
        assert success is True
        assert explanation == 'Changes look good'


def test_check_thread_comments():
    """Test the _check_thread_comments helper function."""
    # Create a PR handler instance
    llm_config = LLMConfig(model='test', api_key='test')
    handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)

    # Create test data
    thread_comments = [
        'Please improve error handling',
        'Add input validation',
        'latest feedback:
Handle edge cases',
    ]
    issues_context = json.dumps(
        ['Issue 1 description', 'Issue 2 description'], indent=4
    )
    last_message = 'I have added error handling and input validation'

    # Mock the LLM response
    mock_response = MagicMock()
    mock_response.choices = [
        MagicMock(
            message=MagicMock(
                content="""--- success
true

--- explanation
Changes look good"""
            )
        )
    ]

    # Test the function
    with patch.object(LLM, 'completion') as mock_completion:
        mock_completion.return_value = mock_response
        success, explanation = handler._check_thread_comments(
            thread_comments, issues_context, last_message
        )

        # Verify the completion() call
        mock_completion.assert_called_once()
        call_args = mock_completion.call_args
        prompt = call_args[1]['messages'][0]['content']

        # Check prompt content
        assert 'Issue descriptions:
' + issues_context in prompt
        assert 'PR Thread Comments:
' + '
---
'.join(thread_comments) in prompt
        assert 'Last message from AI agent:
' + last_message in prompt

        # Check result
        assert success is True
        assert explanation == 'Changes look good'


def test_check_review_comments():
    """Test the _check_review_comments helper function."""
    # Create a PR handler instance
    llm_config = LLMConfig(model='test', api_key='test')
    handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)

    # Create test data
    review_comments = [
        'Please improve code readability',
        'Add comments to complex functions',
        'Follow PEP 8 style guide',
    ]
    issues_context = json.dumps(
        ['Issue 1 description', 'Issue 2 description'], indent=4
    )
    last_message = 'I have improved code readability and added comments'

    # Mock the LLM response
    mock_response = MagicMock()
    mock_response.choices = [
        MagicMock(
            message=MagicMock(
                content="""--- success
true

--- explanation
Changes look good"""
            )
        )
    ]

    # Test the function
    with patch.object(LLM, 'completion') as mock_completion:
        mock_completion.return_value = mock_response
        success, explanation = handler._check_review_comments(
            review_comments, issues_context, last_message
        )

        # Verify the completion() call
        mock_completion.assert_called_once()
        call_args = mock_completion.call_args
        prompt = call_args[1]['messages'][0]['content']

        # Check prompt content
        assert 'Issue descriptions:
' + issues_context in prompt
        assert 'PR Review Comments:
' + '
---
'.join(review_comments) in prompt
        assert 'Last message from AI agent:
' + last_message in prompt

        # Check result
        assert success is True
        assert explanation == 'Changes look good'


def test_guess_success_review_comments_litellm_call():
    """Test that the completion() call for review comments contains the expected content."""
    # Create a PR handler instance
    llm_config = LLMConfig(model='test', api_key='test')
    handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)

    # Create a mock issue with review comments
    issue = GithubIssue(
        owner='test-owner',
        repo='test-repo',
        number=1,
        title='Test PR',
        body='Test Body',
        thread_comments=None,
        closing_issues=['Issue 1 description', 'Issue 2 description'],
        review_comments=[
            'Please improve code readability',
            'Add comments to complex functions',
            'Follow PEP 8 style guide',
        ],
        thread_ids=None,
        head_branch='test-branch',
    )

    # Create mock history with a detailed response
    history = [
        MessageAction(
            content="""I have made the following changes:
1. Improved code readability by breaking down complex functions
2. Added detailed comments to all complex functions
3. Fixed code style to follow PEP 8"""
        )
    ]

    # Mock the LLM response
    mock_response = MagicMock()
    mock_response.choices = [
        MagicMock(
            message=MagicMock(
                content="""--- success
true

--- explanation
The changes successfully address the feedback."""
            )
        )
    ]

    # Test the guess_success method
    with patch.object(LLM, 'completion') as mock_completion:
        mock_completion.return_value = mock_response
        success, success_list, explanation = handler.guess_success(issue, history)

        # Verify the completion() call
        mock_completion.assert_called_once()
        call_args = mock_completion.call_args
        prompt = call_args[1]['messages'][0]['content']

        # Check prompt content
        assert (
            'Issue descriptions:
'
            + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
            in prompt
        )
        assert 'PR Review Comments:
' + '
---
'.join(issue.review_comments) in prompt
        assert 'Last message from AI agent:
' + history[0].content in prompt

        assert len(json.loads(explanation)) == 1