ci: add GitHub Actions test workflow with real agent tests

Tests: - test_bash_echo: Run simple bash command - test_file_creation: Create and verify file - test_directory_listing: List directory contents - test_file_search: Search with grep - test_multi_step_task: Multi-step file manipulation Each test runs a complete agent loop (API call -> tool execution -> continue). Required secrets: - TEST_API_KEY: API key for testing - TEST_BASE_URL: API base URL - TEST_MODEL: Model (default: claude-3-5-sonnet-20241022) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 13:16:37 +08:00 · 2026-01-24 23:39:26 +08:00 · 2026-01-24 23:39:26 +08:00 · 8f4a130371
commit 8f4a130371
parent 120cf7ac99
2 changed files with 314 additions and 0 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -0,0 +1,31 @@
 name: Test
 on:
  push:
    branches: [main]
  pull_request:
    branches: [main]
 jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.11"
      - name: Install dependencies
        run: |
          pip install anthropic python-dotenv openai
      - name: Run tests
        env:
          TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
          TEST_BASE_URL: ${{ secrets.TEST_BASE_URL }}
          TEST_MODEL: ${{ secrets.TEST_MODEL }}
        run: |
          python tests/test_agent.py
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@ -0,0 +1,283 @@
 """
 Integration tests for learn-claude-code agents.
 Real agent loop tests that run on GitHub Actions (Linux).
 """
 import os
 import sys
 import json
 import tempfile
 import shutil
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 def get_client():
    """Get OpenAI-compatible client for testing."""
    from openai import OpenAI
    api_key = os.getenv("TEST_API_KEY")
    base_url = os.getenv("TEST_BASE_URL", "https://api.openai-next.com/v1")
    if not api_key:
        return None
    return OpenAI(api_key=api_key, base_url=base_url)
 MODEL = os.getenv("TEST_MODEL", "claude-3-5-sonnet-20241022")
 BASH_TOOL = {
    "type": "function",
    "function": {
        "name": "bash",
        "description": "Run a shell command",
        "parameters": {
            "type": "object",
            "properties": {"command": {"type": "string"}},
            "required": ["command"]
        }
    }
 }
 def run_agent_loop(client, task, tools, max_turns=10):
    """
    Run a complete agent loop until done or max_turns.
    Returns (final_response, tool_calls_made)
    """
    import subprocess
    messages = [
        {"role": "system", "content": "You are a coding agent. Use tools to complete tasks. Be concise."},
        {"role": "user", "content": task}
    ]
    tool_calls_made = []
    for _ in range(max_turns):
        response = client.chat.completions.create(
            model=MODEL,
            messages=messages,
            tools=tools,
            max_tokens=1000
        )
        message = response.choices[0].message
        finish_reason = response.choices[0].finish_reason
        # No tool calls, we're done
        if finish_reason == "stop" or not message.tool_calls:
            return message.content, tool_calls_made
        # Process tool calls
        messages.append({
            "role": "assistant",
            "content": message.content,
            "tool_calls": [
                {"id": tc.id, "type": "function", "function": {"name": tc.function.name, "arguments": tc.function.arguments}}
                for tc in message.tool_calls
            ]
        })
        for tool_call in message.tool_calls:
            func_name = tool_call.function.name
            args = json.loads(tool_call.function.arguments)
            tool_calls_made.append((func_name, args))
            if func_name == "bash":
                cmd = args.get("command", "")
                try:
                    result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=30)
                    output = result.stdout + result.stderr
                except Exception as e:
                    output = f"Error: {e}"
            else:
                output = f"Unknown tool: {func_name}"
            messages.append({
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": output or "(empty)"
            })
    return None, tool_calls_made
 # =============================================================================
 # Test Cases
 # =============================================================================
 def test_bash_echo():
    """Test: Agent can run simple bash command."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True
    response, calls = run_agent_loop(
        client,
        "Run 'echo hello world' and tell me what it outputs.",
        [BASH_TOOL]
    )
    assert len(calls) >= 1, "Should have made at least 1 tool call"
    assert any("echo" in str(c) for c in calls), "Should have run echo command"
    assert response and "hello" in response.lower(), f"Response should mention hello: {response}"
    print(f"Tool calls: {calls}")
    print(f"Response: {response}")
    print("PASS: test_bash_echo")
    return True
 def test_file_creation():
    """Test: Agent can create and verify a file."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True
    with tempfile.TemporaryDirectory() as tmpdir:
        filepath = os.path.join(tmpdir, "test.txt")
        response, calls = run_agent_loop(
            client,
            f"Create a file at {filepath} with content 'agent test' using echo, then verify it exists with cat.",
            [BASH_TOOL]
        )
        assert len(calls) >= 2, f"Should have made at least 2 tool calls: {calls}"
        assert os.path.exists(filepath), f"File should exist: {filepath}"
        with open(filepath) as f:
            content = f.read()
        assert "agent test" in content, f"File content wrong: {content}"
        print(f"Tool calls: {calls}")
        print(f"File content: {content}")
        print("PASS: test_file_creation")
        return True
 def test_directory_listing():
    """Test: Agent can list directory contents."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True
    with tempfile.TemporaryDirectory() as tmpdir:
        # Create some test files
        for name in ["foo.txt", "bar.py", "baz.md"]:
            open(os.path.join(tmpdir, name), "w").close()
        response, calls = run_agent_loop(
            client,
            f"List all files in {tmpdir} and tell me how many there are.",
            [BASH_TOOL]
        )
        assert len(calls) >= 1, "Should have made at least 1 tool call"
        assert response and "3" in response, f"Should find 3 files: {response}"
        print(f"Tool calls: {calls}")
        print(f"Response: {response}")
        print("PASS: test_directory_listing")
        return True
 def test_file_search():
    """Test: Agent can search file contents with grep."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True
    with tempfile.TemporaryDirectory() as tmpdir:
        # Create files with different content
        with open(os.path.join(tmpdir, "a.txt"), "w") as f:
            f.write("hello world\nfoo bar\n")
        with open(os.path.join(tmpdir, "b.txt"), "w") as f:
            f.write("goodbye world\nbaz qux\n")
        response, calls = run_agent_loop(
            client,
            f"Search for the word 'hello' in all .txt files in {tmpdir}. Which file contains it?",
            [BASH_TOOL]
        )
        assert len(calls) >= 1, "Should have made at least 1 tool call"
        assert response and "a.txt" in response, f"Should find a.txt: {response}"
        print(f"Tool calls: {calls}")
        print(f"Response: {response}")
        print("PASS: test_file_search")
        return True
 def test_multi_step_task():
    """Test: Agent can complete multi-step file manipulation."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True
    with tempfile.TemporaryDirectory() as tmpdir:
        src = os.path.join(tmpdir, "source.txt")
        with open(src, "w") as f:
            f.write("original content")
        response, calls = run_agent_loop(
            client,
            f"1. Read {src}, 2. Append ' - modified' to it, 3. Show the final content.",
            [BASH_TOOL]
        )
        assert len(calls) >= 2, f"Should have made multiple tool calls: {calls}"
        with open(src) as f:
            content = f.read()
        assert "modified" in content, f"File should be modified: {content}"
        print(f"Tool calls: {calls}")
        print(f"Final content: {content}")
        print("PASS: test_multi_step_task")
        return True
 # =============================================================================
 # Main
 # =============================================================================
 if __name__ == "__main__":
    tests = [
        test_bash_echo,
        test_file_creation,
        test_directory_listing,
        test_file_search,
        test_multi_step_task,
    ]
    failed = []
    for test_fn in tests:
        name = test_fn.__name__
        print(f"\n{'='*50}")
        print(f"Running: {name}")
        print('='*50)
        try:
            if not test_fn():
                failed.append(name)
        except Exception as e:
            print(f"FAILED: {e}")
            import traceback
            traceback.print_exc()
            failed.append(name)
    print(f"\n{'='*50}")
    print(f"Results: {len(tests) - len(failed)}/{len(tests)} passed")
    print('='*50)
    if failed:
        print(f"FAILED: {failed}")
        sys.exit(1)
    else:
        print("All tests passed!")
        sys.exit(0)