Testing PR Handler Success Detection Workflow in OpenHands
This test suite validates the PR handler’s success detection functionality in the OpenHands project, specifically focusing on the automated assessment of pull request feedback implementation. It covers various scenarios including review threads, comments, and LLM-based success evaluation.
Test Coverage Overview
Implementation Analysis
Technical Details
Best Practices Demonstrated
all-hands-ai/openhands
tests/unit/resolver/test_pr_handler_guess_success.py
import json
from unittest.mock import MagicMock, patch
from openhands.core.config import LLMConfig
from openhands.events.action.message import MessageAction
from openhands.llm.llm import LLM
from openhands.resolver.github_issue import GithubIssue, ReviewThread
from openhands.resolver.issue_definitions import PRHandler
def mock_llm_response(content):
"""Helper function to create a mock LLM response."""
mock_response = MagicMock()
mock_response.choices = [MagicMock(message=MagicMock(content=content))]
return mock_response
def test_guess_success_review_threads_litellm_call():
"""Test that the completion() call for review threads contains the expected content."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create a mock issue with review threads
issue = GithubIssue(
owner='test-owner',
repo='test-repo',
number=1,
title='Test PR',
body='Test Body',
thread_comments=None,
closing_issues=['Issue 1 description', 'Issue 2 description'],
review_comments=None,
review_threads=[
ReviewThread(
comment='Please fix the formatting
---
latest feedback:
Add docstrings',
files=['/src/file1.py', '/src/file2.py'],
),
ReviewThread(
comment='Add more tests
---
latest feedback:
Add test cases',
files=['/tests/test_file.py'],
),
],
thread_ids=['1', '2'],
head_branch='test-branch',
)
# Create mock history with a detailed response
history = [
MessageAction(
content="""I have made the following changes:
1. Fixed formatting in file1.py and file2.py
2. Added docstrings to all functions
3. Added test cases in test_file.py"""
)
]
# Create mock LLM config
llm_config = LLMConfig(model='test-model', api_key='test-key')
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
The changes successfully address the feedback."""
)
)
]
# Test the guess_success method
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, success_list, explanation = handler.guess_success(issue, history)
# Verify the completion() calls
assert mock_completion.call_count == 2 # One call per review thread
# Check first call
first_call = mock_completion.call_args_list[0]
first_prompt = first_call[1]['messages'][0]['content']
assert (
'Issue descriptions:
'
+ json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
in first_prompt
)
assert (
'Feedback:
Please fix the formatting
---
latest feedback:
Add docstrings'
in first_prompt
)
assert (
'Files locations:
'
+ json.dumps(['/src/file1.py', '/src/file2.py'], indent=4)
in first_prompt
)
assert 'Last message from AI agent:
' + history[0].content in first_prompt
# Check second call
second_call = mock_completion.call_args_list[1]
second_prompt = second_call[1]['messages'][0]['content']
assert (
'Issue descriptions:
'
+ json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
in second_prompt
)
assert (
'Feedback:
Add more tests
---
latest feedback:
Add test cases'
in second_prompt
)
assert (
'Files locations:
' + json.dumps(['/tests/test_file.py'], indent=4)
in second_prompt
)
assert 'Last message from AI agent:
' + history[0].content in second_prompt
assert len(json.loads(explanation)) == 2
def test_guess_success_thread_comments_litellm_call():
"""Test that the completion() call for thread comments contains the expected content."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create a mock issue with thread comments
issue = GithubIssue(
owner='test-owner',
repo='test-repo',
number=1,
title='Test PR',
body='Test Body',
thread_comments=[
'Please improve error handling',
'Add input validation',
'latest feedback:
Handle edge cases',
],
closing_issues=['Issue 1 description', 'Issue 2 description'],
review_comments=None,
thread_ids=None,
head_branch='test-branch',
)
# Create mock history with a detailed response
history = [
MessageAction(
content="""I have made the following changes:
1. Added try/catch blocks for error handling
2. Added input validation checks
3. Added handling for edge cases"""
)
]
# Create mock LLM config
llm_config = LLMConfig(model='test-model', api_key='test-key')
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
The changes successfully address the feedback."""
)
)
]
# Test the guess_success method
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, success_list, explanation = handler.guess_success(issue, history)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert (
'Issue descriptions:
'
+ json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
in prompt
)
assert 'PR Thread Comments:
' + '
---
'.join(issue.thread_comments) in prompt
assert 'Last message from AI agent:
' + history[0].content in prompt
assert len(json.loads(explanation)) == 1
def test_check_feedback_with_llm():
"""Test the _check_feedback_with_llm helper function."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Test cases for different LLM responses
test_cases = [
{
'response': '--- success
true
--- explanation
Changes look good',
'expected': (True, 'Changes look good'),
},
{
'response': '--- success
false
--- explanation
Not all issues fixed',
'expected': (False, 'Not all issues fixed'),
},
{
'response': 'Invalid response format',
'expected': (
False,
'Failed to decode answer from LLM response: Invalid response format',
),
},
{
'response': '--- success
true
--- explanation
Multiline
explanation
here',
'expected': (True, 'Multiline
explanation
here'),
},
]
for case in test_cases:
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [MagicMock(message=MagicMock(content=case['response']))]
# Test the function
with patch.object(LLM, 'completion', return_value=mock_response):
success, explanation = handler._check_feedback_with_llm('test prompt')
assert (success, explanation) == case['expected']
def test_check_review_thread():
"""Test the _check_review_thread helper function."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create test data
review_thread = ReviewThread(
comment='Please fix the formatting
---
latest feedback:
Add docstrings',
files=['/src/file1.py', '/src/file2.py'],
)
issues_context = json.dumps(
['Issue 1 description', 'Issue 2 description'], indent=4
)
last_message = 'I have fixed the formatting and added docstrings'
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
Changes look good"""
)
)
]
# Test the function
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, explanation = handler._check_review_thread(
review_thread, issues_context, last_message
)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert 'Issue descriptions:
' + issues_context in prompt
assert 'Feedback:
' + review_thread.comment in prompt
assert (
'Files locations:
' + json.dumps(review_thread.files, indent=4) in prompt
)
assert 'Last message from AI agent:
' + last_message in prompt
# Check result
assert success is True
assert explanation == 'Changes look good'
def test_check_thread_comments():
"""Test the _check_thread_comments helper function."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create test data
thread_comments = [
'Please improve error handling',
'Add input validation',
'latest feedback:
Handle edge cases',
]
issues_context = json.dumps(
['Issue 1 description', 'Issue 2 description'], indent=4
)
last_message = 'I have added error handling and input validation'
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
Changes look good"""
)
)
]
# Test the function
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, explanation = handler._check_thread_comments(
thread_comments, issues_context, last_message
)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert 'Issue descriptions:
' + issues_context in prompt
assert 'PR Thread Comments:
' + '
---
'.join(thread_comments) in prompt
assert 'Last message from AI agent:
' + last_message in prompt
# Check result
assert success is True
assert explanation == 'Changes look good'
def test_check_review_comments():
"""Test the _check_review_comments helper function."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create test data
review_comments = [
'Please improve code readability',
'Add comments to complex functions',
'Follow PEP 8 style guide',
]
issues_context = json.dumps(
['Issue 1 description', 'Issue 2 description'], indent=4
)
last_message = 'I have improved code readability and added comments'
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
Changes look good"""
)
)
]
# Test the function
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, explanation = handler._check_review_comments(
review_comments, issues_context, last_message
)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert 'Issue descriptions:
' + issues_context in prompt
assert 'PR Review Comments:
' + '
---
'.join(review_comments) in prompt
assert 'Last message from AI agent:
' + last_message in prompt
# Check result
assert success is True
assert explanation == 'Changes look good'
def test_guess_success_review_comments_litellm_call():
"""Test that the completion() call for review comments contains the expected content."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create a mock issue with review comments
issue = GithubIssue(
owner='test-owner',
repo='test-repo',
number=1,
title='Test PR',
body='Test Body',
thread_comments=None,
closing_issues=['Issue 1 description', 'Issue 2 description'],
review_comments=[
'Please improve code readability',
'Add comments to complex functions',
'Follow PEP 8 style guide',
],
thread_ids=None,
head_branch='test-branch',
)
# Create mock history with a detailed response
history = [
MessageAction(
content="""I have made the following changes:
1. Improved code readability by breaking down complex functions
2. Added detailed comments to all complex functions
3. Fixed code style to follow PEP 8"""
)
]
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
The changes successfully address the feedback."""
)
)
]
# Test the guess_success method
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, success_list, explanation = handler.guess_success(issue, history)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert (
'Issue descriptions:
'
+ json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
in prompt
)
assert 'PR Review Comments:
' + '
---
'.join(issue.review_comments) in prompt
assert 'Last message from AI agent:
' + history[0].content in prompt
assert len(json.loads(explanation)) == 1