From 8f4a130371df64ccb80910fa775ae96b90f6a55a Mon Sep 17 00:00:00 2001
From: CrazyBoyM <ai-lab@foxmail.com>
Date: Sat, 24 Jan 2026 23:39:26 +0800
Subject: [PATCH] ci: add GitHub Actions test workflow with real agent tests

Tests:
- test_bash_echo: Run simple bash command
- test_file_creation: Create and verify file
- test_directory_listing: List directory contents
- test_file_search: Search with grep
- test_multi_step_task: Multi-step file manipulation

Each test runs a complete agent loop (API call -> tool execution -> continue).

Required secrets:
- TEST_API_KEY: API key for testing
- TEST_BASE_URL: API base URL
- TEST_MODEL: Model (default: claude-3-5-sonnet-20241022)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .github/workflows/test.yml |  31 ++++
 tests/test_agent.py        | 283 +++++++++++++++++++++++++++++++++++++
 2 files changed, 314 insertions(+)
 create mode 100644 .github/workflows/test.yml
 create mode 100644 tests/test_agent.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..52142c2
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,31 @@
+name: Test
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          pip install anthropic python-dotenv openai
+
+      - name: Run tests
+        env:
+          TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
+          TEST_BASE_URL: ${{ secrets.TEST_BASE_URL }}
+          TEST_MODEL: ${{ secrets.TEST_MODEL }}
+        run: |
+          python tests/test_agent.py
diff --git a/tests/test_agent.py b/tests/test_agent.py
new file mode 100644
index 0000000..6520231
--- /dev/null
+++ b/tests/test_agent.py
@@ -0,0 +1,283 @@
+"""
+Integration tests for learn-claude-code agents.
+
+Real agent loop tests that run on GitHub Actions (Linux).
+"""
+import os
+import sys
+import json
+import tempfile
+import shutil
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+def get_client():
+    """Get OpenAI-compatible client for testing."""
+    from openai import OpenAI
+    api_key = os.getenv("TEST_API_KEY")
+    base_url = os.getenv("TEST_BASE_URL", "https://api.openai-next.com/v1")
+    if not api_key:
+        return None
+    return OpenAI(api_key=api_key, base_url=base_url)
+
+
+MODEL = os.getenv("TEST_MODEL", "claude-3-5-sonnet-20241022")
+
+BASH_TOOL = {
+    "type": "function",
+    "function": {
+        "name": "bash",
+        "description": "Run a shell command",
+        "parameters": {
+            "type": "object",
+            "properties": {"command": {"type": "string"}},
+            "required": ["command"]
+        }
+    }
+}
+
+
+def run_agent_loop(client, task, tools, max_turns=10):
+    """
+    Run a complete agent loop until done or max_turns.
+    Returns (final_response, tool_calls_made)
+    """
+    import subprocess
+
+    messages = [
+        {"role": "system", "content": "You are a coding agent. Use tools to complete tasks. Be concise."},
+        {"role": "user", "content": task}
+    ]
+
+    tool_calls_made = []
+
+    for _ in range(max_turns):
+        response = client.chat.completions.create(
+            model=MODEL,
+            messages=messages,
+            tools=tools,
+            max_tokens=1000
+        )
+
+        message = response.choices[0].message
+        finish_reason = response.choices[0].finish_reason
+
+        # No tool calls, we're done
+        if finish_reason == "stop" or not message.tool_calls:
+            return message.content, tool_calls_made
+
+        # Process tool calls
+        messages.append({
+            "role": "assistant",
+            "content": message.content,
+            "tool_calls": [
+                {"id": tc.id, "type": "function", "function": {"name": tc.function.name, "arguments": tc.function.arguments}}
+                for tc in message.tool_calls
+            ]
+        })
+
+        for tool_call in message.tool_calls:
+            func_name = tool_call.function.name
+            args = json.loads(tool_call.function.arguments)
+            tool_calls_made.append((func_name, args))
+
+            if func_name == "bash":
+                cmd = args.get("command", "")
+                try:
+                    result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=30)
+                    output = result.stdout + result.stderr
+                except Exception as e:
+                    output = f"Error: {e}"
+            else:
+                output = f"Unknown tool: {func_name}"
+
+            messages.append({
+                "role": "tool",
+                "tool_call_id": tool_call.id,
+                "content": output or "(empty)"
+            })
+
+    return None, tool_calls_made
+
+
+# =============================================================================
+# Test Cases
+# =============================================================================
+
+def test_bash_echo():
+    """Test: Agent can run simple bash command."""
+    client = get_client()
+    if not client:
+        print("SKIP: No API key")
+        return True
+
+    response, calls = run_agent_loop(
+        client,
+        "Run 'echo hello world' and tell me what it outputs.",
+        [BASH_TOOL]
+    )
+
+    assert len(calls) >= 1, "Should have made at least 1 tool call"
+    assert any("echo" in str(c) for c in calls), "Should have run echo command"
+    assert response and "hello" in response.lower(), f"Response should mention hello: {response}"
+
+    print(f"Tool calls: {calls}")
+    print(f"Response: {response}")
+    print("PASS: test_bash_echo")
+    return True
+
+
+def test_file_creation():
+    """Test: Agent can create and verify a file."""
+    client = get_client()
+    if not client:
+        print("SKIP: No API key")
+        return True
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        filepath = os.path.join(tmpdir, "test.txt")
+
+        response, calls = run_agent_loop(
+            client,
+            f"Create a file at {filepath} with content 'agent test' using echo, then verify it exists with cat.",
+            [BASH_TOOL]
+        )
+
+        assert len(calls) >= 2, f"Should have made at least 2 tool calls: {calls}"
+        assert os.path.exists(filepath), f"File should exist: {filepath}"
+
+        with open(filepath) as f:
+            content = f.read()
+        assert "agent test" in content, f"File content wrong: {content}"
+
+        print(f"Tool calls: {calls}")
+        print(f"File content: {content}")
+        print("PASS: test_file_creation")
+        return True
+
+
+def test_directory_listing():
+    """Test: Agent can list directory contents."""
+    client = get_client()
+    if not client:
+        print("SKIP: No API key")
+        return True
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Create some test files
+        for name in ["foo.txt", "bar.py", "baz.md"]:
+            open(os.path.join(tmpdir, name), "w").close()
+
+        response, calls = run_agent_loop(
+            client,
+            f"List all files in {tmpdir} and tell me how many there are.",
+            [BASH_TOOL]
+        )
+
+        assert len(calls) >= 1, "Should have made at least 1 tool call"
+        assert response and "3" in response, f"Should find 3 files: {response}"
+
+        print(f"Tool calls: {calls}")
+        print(f"Response: {response}")
+        print("PASS: test_directory_listing")
+        return True
+
+
+def test_file_search():
+    """Test: Agent can search file contents with grep."""
+    client = get_client()
+    if not client:
+        print("SKIP: No API key")
+        return True
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Create files with different content
+        with open(os.path.join(tmpdir, "a.txt"), "w") as f:
+            f.write("hello world\nfoo bar\n")
+        with open(os.path.join(tmpdir, "b.txt"), "w") as f:
+            f.write("goodbye world\nbaz qux\n")
+
+        response, calls = run_agent_loop(
+            client,
+            f"Search for the word 'hello' in all .txt files in {tmpdir}. Which file contains it?",
+            [BASH_TOOL]
+        )
+
+        assert len(calls) >= 1, "Should have made at least 1 tool call"
+        assert response and "a.txt" in response, f"Should find a.txt: {response}"
+
+        print(f"Tool calls: {calls}")
+        print(f"Response: {response}")
+        print("PASS: test_file_search")
+        return True
+
+
+def test_multi_step_task():
+    """Test: Agent can complete multi-step file manipulation."""
+    client = get_client()
+    if not client:
+        print("SKIP: No API key")
+        return True
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        src = os.path.join(tmpdir, "source.txt")
+        with open(src, "w") as f:
+            f.write("original content")
+
+        response, calls = run_agent_loop(
+            client,
+            f"1. Read {src}, 2. Append ' - modified' to it, 3. Show the final content.",
+            [BASH_TOOL]
+        )
+
+        assert len(calls) >= 2, f"Should have made multiple tool calls: {calls}"
+
+        with open(src) as f:
+            content = f.read()
+        assert "modified" in content, f"File should be modified: {content}"
+
+        print(f"Tool calls: {calls}")
+        print(f"Final content: {content}")
+        print("PASS: test_multi_step_task")
+        return True
+
+
+# =============================================================================
+# Main
+# =============================================================================
+
+if __name__ == "__main__":
+    tests = [
+        test_bash_echo,
+        test_file_creation,
+        test_directory_listing,
+        test_file_search,
+        test_multi_step_task,
+    ]
+
+    failed = []
+    for test_fn in tests:
+        name = test_fn.__name__
+        print(f"\n{'='*50}")
+        print(f"Running: {name}")
+        print('='*50)
+        try:
+            if not test_fn():
+                failed.append(name)
+        except Exception as e:
+            print(f"FAILED: {e}")
+            import traceback
+            traceback.print_exc()
+            failed.append(name)
+
+    print(f"\n{'='*50}")
+    print(f"Results: {len(tests) - len(failed)}/{len(tests)} passed")
+    print('='*50)
+
+    if failed:
+        print(f"FAILED: {failed}")
+        sys.exit(1)
+    else:
+        print("All tests passed!")
+        sys.exit(0)