mirror of
https://github.com/shareAI-lab/analysis_claude_code.git
synced 2026-02-04 13:16:37 +08:00
ci: add GitHub Actions test workflow with real agent tests
Tests: - test_bash_echo: Run simple bash command - test_file_creation: Create and verify file - test_directory_listing: List directory contents - test_file_search: Search with grep - test_multi_step_task: Multi-step file manipulation Each test runs a complete agent loop (API call -> tool execution -> continue). Required secrets: - TEST_API_KEY: API key for testing - TEST_BASE_URL: API base URL - TEST_MODEL: Model (default: claude-3-5-sonnet-20241022) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
120cf7ac99
commit
8f4a130371
31
.github/workflows/test.yml
vendored
Normal file
31
.github/workflows/test.yml
vendored
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
name: Test
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.11"
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
pip install anthropic python-dotenv openai
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
env:
|
||||||
|
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
|
||||||
|
TEST_BASE_URL: ${{ secrets.TEST_BASE_URL }}
|
||||||
|
TEST_MODEL: ${{ secrets.TEST_MODEL }}
|
||||||
|
run: |
|
||||||
|
python tests/test_agent.py
|
||||||
283
tests/test_agent.py
Normal file
283
tests/test_agent.py
Normal file
@ -0,0 +1,283 @@
|
|||||||
|
"""
|
||||||
|
Integration tests for learn-claude-code agents.
|
||||||
|
|
||||||
|
Real agent loop tests that run on GitHub Actions (Linux).
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
|
def get_client():
|
||||||
|
"""Get OpenAI-compatible client for testing."""
|
||||||
|
from openai import OpenAI
|
||||||
|
api_key = os.getenv("TEST_API_KEY")
|
||||||
|
base_url = os.getenv("TEST_BASE_URL", "https://api.openai-next.com/v1")
|
||||||
|
if not api_key:
|
||||||
|
return None
|
||||||
|
return OpenAI(api_key=api_key, base_url=base_url)
|
||||||
|
|
||||||
|
|
||||||
|
MODEL = os.getenv("TEST_MODEL", "claude-3-5-sonnet-20241022")
|
||||||
|
|
||||||
|
BASH_TOOL = {
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "bash",
|
||||||
|
"description": "Run a shell command",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"command": {"type": "string"}},
|
||||||
|
"required": ["command"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def run_agent_loop(client, task, tools, max_turns=10):
|
||||||
|
"""
|
||||||
|
Run a complete agent loop until done or max_turns.
|
||||||
|
Returns (final_response, tool_calls_made)
|
||||||
|
"""
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a coding agent. Use tools to complete tasks. Be concise."},
|
||||||
|
{"role": "user", "content": task}
|
||||||
|
]
|
||||||
|
|
||||||
|
tool_calls_made = []
|
||||||
|
|
||||||
|
for _ in range(max_turns):
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model=MODEL,
|
||||||
|
messages=messages,
|
||||||
|
tools=tools,
|
||||||
|
max_tokens=1000
|
||||||
|
)
|
||||||
|
|
||||||
|
message = response.choices[0].message
|
||||||
|
finish_reason = response.choices[0].finish_reason
|
||||||
|
|
||||||
|
# No tool calls, we're done
|
||||||
|
if finish_reason == "stop" or not message.tool_calls:
|
||||||
|
return message.content, tool_calls_made
|
||||||
|
|
||||||
|
# Process tool calls
|
||||||
|
messages.append({
|
||||||
|
"role": "assistant",
|
||||||
|
"content": message.content,
|
||||||
|
"tool_calls": [
|
||||||
|
{"id": tc.id, "type": "function", "function": {"name": tc.function.name, "arguments": tc.function.arguments}}
|
||||||
|
for tc in message.tool_calls
|
||||||
|
]
|
||||||
|
})
|
||||||
|
|
||||||
|
for tool_call in message.tool_calls:
|
||||||
|
func_name = tool_call.function.name
|
||||||
|
args = json.loads(tool_call.function.arguments)
|
||||||
|
tool_calls_made.append((func_name, args))
|
||||||
|
|
||||||
|
if func_name == "bash":
|
||||||
|
cmd = args.get("command", "")
|
||||||
|
try:
|
||||||
|
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=30)
|
||||||
|
output = result.stdout + result.stderr
|
||||||
|
except Exception as e:
|
||||||
|
output = f"Error: {e}"
|
||||||
|
else:
|
||||||
|
output = f"Unknown tool: {func_name}"
|
||||||
|
|
||||||
|
messages.append({
|
||||||
|
"role": "tool",
|
||||||
|
"tool_call_id": tool_call.id,
|
||||||
|
"content": output or "(empty)"
|
||||||
|
})
|
||||||
|
|
||||||
|
return None, tool_calls_made
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Test Cases
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
def test_bash_echo():
|
||||||
|
"""Test: Agent can run simple bash command."""
|
||||||
|
client = get_client()
|
||||||
|
if not client:
|
||||||
|
print("SKIP: No API key")
|
||||||
|
return True
|
||||||
|
|
||||||
|
response, calls = run_agent_loop(
|
||||||
|
client,
|
||||||
|
"Run 'echo hello world' and tell me what it outputs.",
|
||||||
|
[BASH_TOOL]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(calls) >= 1, "Should have made at least 1 tool call"
|
||||||
|
assert any("echo" in str(c) for c in calls), "Should have run echo command"
|
||||||
|
assert response and "hello" in response.lower(), f"Response should mention hello: {response}"
|
||||||
|
|
||||||
|
print(f"Tool calls: {calls}")
|
||||||
|
print(f"Response: {response}")
|
||||||
|
print("PASS: test_bash_echo")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def test_file_creation():
|
||||||
|
"""Test: Agent can create and verify a file."""
|
||||||
|
client = get_client()
|
||||||
|
if not client:
|
||||||
|
print("SKIP: No API key")
|
||||||
|
return True
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
filepath = os.path.join(tmpdir, "test.txt")
|
||||||
|
|
||||||
|
response, calls = run_agent_loop(
|
||||||
|
client,
|
||||||
|
f"Create a file at {filepath} with content 'agent test' using echo, then verify it exists with cat.",
|
||||||
|
[BASH_TOOL]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(calls) >= 2, f"Should have made at least 2 tool calls: {calls}"
|
||||||
|
assert os.path.exists(filepath), f"File should exist: {filepath}"
|
||||||
|
|
||||||
|
with open(filepath) as f:
|
||||||
|
content = f.read()
|
||||||
|
assert "agent test" in content, f"File content wrong: {content}"
|
||||||
|
|
||||||
|
print(f"Tool calls: {calls}")
|
||||||
|
print(f"File content: {content}")
|
||||||
|
print("PASS: test_file_creation")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def test_directory_listing():
|
||||||
|
"""Test: Agent can list directory contents."""
|
||||||
|
client = get_client()
|
||||||
|
if not client:
|
||||||
|
print("SKIP: No API key")
|
||||||
|
return True
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
# Create some test files
|
||||||
|
for name in ["foo.txt", "bar.py", "baz.md"]:
|
||||||
|
open(os.path.join(tmpdir, name), "w").close()
|
||||||
|
|
||||||
|
response, calls = run_agent_loop(
|
||||||
|
client,
|
||||||
|
f"List all files in {tmpdir} and tell me how many there are.",
|
||||||
|
[BASH_TOOL]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(calls) >= 1, "Should have made at least 1 tool call"
|
||||||
|
assert response and "3" in response, f"Should find 3 files: {response}"
|
||||||
|
|
||||||
|
print(f"Tool calls: {calls}")
|
||||||
|
print(f"Response: {response}")
|
||||||
|
print("PASS: test_directory_listing")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def test_file_search():
|
||||||
|
"""Test: Agent can search file contents with grep."""
|
||||||
|
client = get_client()
|
||||||
|
if not client:
|
||||||
|
print("SKIP: No API key")
|
||||||
|
return True
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
# Create files with different content
|
||||||
|
with open(os.path.join(tmpdir, "a.txt"), "w") as f:
|
||||||
|
f.write("hello world\nfoo bar\n")
|
||||||
|
with open(os.path.join(tmpdir, "b.txt"), "w") as f:
|
||||||
|
f.write("goodbye world\nbaz qux\n")
|
||||||
|
|
||||||
|
response, calls = run_agent_loop(
|
||||||
|
client,
|
||||||
|
f"Search for the word 'hello' in all .txt files in {tmpdir}. Which file contains it?",
|
||||||
|
[BASH_TOOL]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(calls) >= 1, "Should have made at least 1 tool call"
|
||||||
|
assert response and "a.txt" in response, f"Should find a.txt: {response}"
|
||||||
|
|
||||||
|
print(f"Tool calls: {calls}")
|
||||||
|
print(f"Response: {response}")
|
||||||
|
print("PASS: test_file_search")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def test_multi_step_task():
|
||||||
|
"""Test: Agent can complete multi-step file manipulation."""
|
||||||
|
client = get_client()
|
||||||
|
if not client:
|
||||||
|
print("SKIP: No API key")
|
||||||
|
return True
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
src = os.path.join(tmpdir, "source.txt")
|
||||||
|
with open(src, "w") as f:
|
||||||
|
f.write("original content")
|
||||||
|
|
||||||
|
response, calls = run_agent_loop(
|
||||||
|
client,
|
||||||
|
f"1. Read {src}, 2. Append ' - modified' to it, 3. Show the final content.",
|
||||||
|
[BASH_TOOL]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(calls) >= 2, f"Should have made multiple tool calls: {calls}"
|
||||||
|
|
||||||
|
with open(src) as f:
|
||||||
|
content = f.read()
|
||||||
|
assert "modified" in content, f"File should be modified: {content}"
|
||||||
|
|
||||||
|
print(f"Tool calls: {calls}")
|
||||||
|
print(f"Final content: {content}")
|
||||||
|
print("PASS: test_multi_step_task")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Main
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
tests = [
|
||||||
|
test_bash_echo,
|
||||||
|
test_file_creation,
|
||||||
|
test_directory_listing,
|
||||||
|
test_file_search,
|
||||||
|
test_multi_step_task,
|
||||||
|
]
|
||||||
|
|
||||||
|
failed = []
|
||||||
|
for test_fn in tests:
|
||||||
|
name = test_fn.__name__
|
||||||
|
print(f"\n{'='*50}")
|
||||||
|
print(f"Running: {name}")
|
||||||
|
print('='*50)
|
||||||
|
try:
|
||||||
|
if not test_fn():
|
||||||
|
failed.append(name)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"FAILED: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
failed.append(name)
|
||||||
|
|
||||||
|
print(f"\n{'='*50}")
|
||||||
|
print(f"Results: {len(tests) - len(failed)}/{len(tests)} passed")
|
||||||
|
print('='*50)
|
||||||
|
|
||||||
|
if failed:
|
||||||
|
print(f"FAILED: {failed}")
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
print("All tests passed!")
|
||||||
|
sys.exit(0)
|
||||||
Loading…
x
Reference in New Issue
Block a user