analysis_claude_code/tests/test_agent.py

"""
Integration tests for learn-claude-code agents.

Comprehensive agent task tests covering v0-v4 core capabilities.
Runs on GitHub Actions (Linux).
"""
import os
import sys
import json
import tempfile
import shutil

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))


def get_client():
    """Get OpenAI-compatible client for testing."""
    from openai import OpenAI
    api_key = os.getenv("TEST_API_KEY")
    base_url = os.getenv("TEST_BASE_URL", "https://api.openai-next.com/v1")
    if not api_key:
        return None
    return OpenAI(api_key=api_key, base_url=base_url)


MODEL = os.getenv("TEST_MODEL", "claude-3-5-sonnet-20241022")


# =============================================================================
# Tool Definitions
# =============================================================================

BASH_TOOL = {
    "type": "function",
    "function": {
        "name": "bash",
        "description": "Run a shell command",
        "parameters": {
            "type": "object",
            "properties": {"command": {"type": "string"}},
            "required": ["command"]
        }
    }
}

READ_FILE_TOOL = {
    "type": "function",
    "function": {
        "name": "read_file",
        "description": "Read contents of a file",
        "parameters": {
            "type": "object",
            "properties": {"path": {"type": "string"}},
            "required": ["path"]
        }
    }
}

WRITE_FILE_TOOL = {
    "type": "function",
    "function": {
        "name": "write_file",
        "description": "Write content to a file (creates or overwrites)",
        "parameters": {
            "type": "object",
            "properties": {
                "path": {"type": "string"},
                "content": {"type": "string"}
            },
            "required": ["path", "content"]
        }
    }
}

EDIT_FILE_TOOL = {
    "type": "function",
    "function": {
        "name": "edit_file",
        "description": "Replace old_string with new_string in a file",
        "parameters": {
            "type": "object",
            "properties": {
                "path": {"type": "string"},
                "old_string": {"type": "string"},
                "new_string": {"type": "string"}
            },
            "required": ["path", "old_string", "new_string"]
        }
    }
}

TODO_WRITE_TOOL = {
    "type": "function",
    "function": {
        "name": "TodoWrite",
        "description": "Update the todo list to track task progress",
        "parameters": {
            "type": "object",
            "properties": {
                "items": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "content": {"type": "string"},
                            "status": {"type": "string", "enum": ["pending", "in_progress", "completed"]},
                            "activeForm": {"type": "string"}
                        },
                        "required": ["content", "status", "activeForm"]
                    }
                }
            },
            "required": ["items"]
        }
    }
}

V1_TOOLS = [BASH_TOOL, READ_FILE_TOOL, WRITE_FILE_TOOL, EDIT_FILE_TOOL]
V2_TOOLS = V1_TOOLS + [TODO_WRITE_TOOL]


# =============================================================================
# Agent Loop Runner
# =============================================================================

def execute_tool(name, args, workdir):
    """Execute a tool and return output."""
    import subprocess

    if name == "bash":
        cmd = args.get("command", "")
        try:
            result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=30, cwd=workdir)
            return result.stdout + result.stderr or "(empty)"
        except Exception as e:
            return f"Error: {e}"

    elif name == "read_file":
        path = args.get("path", "")
        try:
            with open(path, "r") as f:
                return f.read()
        except Exception as e:
            return f"Error: {e}"

    elif name == "write_file":
        path = args.get("path", "")
        content = args.get("content", "")
        try:
            with open(path, "w") as f:
                f.write(content)
            return f"Written {len(content)} bytes to {path}"
        except Exception as e:
            return f"Error: {e}"

    elif name == "edit_file":
        path = args.get("path", "")
        old = args.get("old_string", "")
        new = args.get("new_string", "")
        try:
            with open(path, "r") as f:
                content = f.read()
            if old not in content:
                return f"Error: '{old}' not found in file"
            content = content.replace(old, new, 1)
            with open(path, "w") as f:
                f.write(content)
            return f"Replaced in {path}"
        except Exception as e:
            return f"Error: {e}"

    elif name == "TodoWrite":
        items = args.get("items", [])
        # Simulate todo tracking
        result = []
        for item in items:
            status_icon = {"pending": "[ ]", "in_progress": "[>]", "completed": "[x]"}.get(item["status"], "[ ]")
            result.append(f"{status_icon} {item['content']}")
        return "\n".join(result) + f"\n({len([i for i in items if i['status']=='completed'])}/{len(items)} completed)"

    return f"Unknown tool: {name}"


def run_agent_loop(client, task, tools, workdir=None, max_turns=15, system_prompt=None):
    """
    Run a complete agent loop until done or max_turns.
    Returns (final_response, tool_calls_made, messages)
    """
    if workdir is None:
        workdir = os.getcwd()

    if system_prompt is None:
        system_prompt = f"You are a coding agent at {workdir}. Use tools to complete tasks. Be concise."

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": task}
    ]

    tool_calls_made = []

    for turn in range(max_turns):
        response = client.chat.completions.create(
            model=MODEL,
            messages=messages,
            tools=tools,
            max_tokens=1500
        )

        message = response.choices[0].message
        finish_reason = response.choices[0].finish_reason

        if finish_reason == "stop" or not message.tool_calls:
            return message.content, tool_calls_made, messages

        messages.append({
            "role": "assistant",
            "content": message.content,
            "tool_calls": [
                {"id": tc.id, "type": "function", "function": {"name": tc.function.name, "arguments": tc.function.arguments}}
                for tc in message.tool_calls
            ]
        })

        for tool_call in message.tool_calls:
            func_name = tool_call.function.name
            args = json.loads(tool_call.function.arguments)
            tool_calls_made.append((func_name, args))

            output = execute_tool(func_name, args, workdir)

            messages.append({
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": output[:5000]
            })

    return None, tool_calls_made, messages


# =============================================================================
# v0 Tests: Bash Only
# =============================================================================

def test_v0_bash_echo():
    """v0: Simple bash command execution."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True

    response, calls, _ = run_agent_loop(
        client,
        "Run 'echo hello world' and tell me the output.",
        [BASH_TOOL]
    )

    assert len(calls) >= 1, "Should make at least 1 tool call"
    assert any("echo" in str(c) for c in calls), "Should run echo"
    assert response and "hello" in response.lower()

    print(f"Tool calls: {len(calls)}")
    print("PASS: test_v0_bash_echo")
    return True


def test_v0_bash_pipeline():
    """v0: Bash pipeline with multiple commands."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True

    with tempfile.TemporaryDirectory() as tmpdir:
        # Create test file
        with open(os.path.join(tmpdir, "data.txt"), "w") as f:
            f.write("apple\nbanana\napricot\ncherry\n")

        response, calls, _ = run_agent_loop(
            client,
            f"Count how many lines in {tmpdir}/data.txt start with 'a'. Use grep and wc.",
            [BASH_TOOL],
            workdir=tmpdir
        )

        assert len(calls) >= 1
        assert response and "2" in response

    print(f"Tool calls: {len(calls)}")
    print("PASS: test_v0_bash_pipeline")
    return True


# =============================================================================
# v1 Tests: 4 Core Tools
# =============================================================================

def test_v1_read_file():
    """v1: Read file contents."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True

    with tempfile.TemporaryDirectory() as tmpdir:
        filepath = os.path.join(tmpdir, "secret.txt")
        with open(filepath, "w") as f:
            f.write("The secret code is: XYZ123")

        response, calls, _ = run_agent_loop(
            client,
            f"Read {filepath} and tell me what the secret code is.",
            V1_TOOLS,
            workdir=tmpdir
        )

        assert any(c[0] == "read_file" for c in calls), "Should use read_file"
        assert response and "XYZ123" in response

    print(f"Tool calls: {len(calls)}")
    print("PASS: test_v1_read_file")
    return True


def test_v1_write_file():
    """v1: Create new file with write_file."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True

    with tempfile.TemporaryDirectory() as tmpdir:
        filepath = os.path.join(tmpdir, "greeting.txt")

        response, calls, _ = run_agent_loop(
            client,
            f"Create a file at {filepath} containing 'Hello, Agent!' using write_file tool.",
            V1_TOOLS,
            workdir=tmpdir
        )

        assert any(c[0] == "write_file" for c in calls), "Should use write_file"
        assert os.path.exists(filepath)
        with open(filepath) as f:
            content = f.read()
        assert "Hello" in content

    print(f"Tool calls: {len(calls)}")
    print("PASS: test_v1_write_file")
    return True


def test_v1_edit_file():
    """v1: Edit existing file with edit_file."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True

    with tempfile.TemporaryDirectory() as tmpdir:
        filepath = os.path.join(tmpdir, "config.txt")
        with open(filepath, "w") as f:
            f.write("debug=false\nport=8080\n")

        response, calls, _ = run_agent_loop(
            client,
            f"Edit {filepath} to change debug=false to debug=true using edit_file tool.",
            V1_TOOLS,
            workdir=tmpdir
        )

        assert any(c[0] == "edit_file" for c in calls), "Should use edit_file"
        with open(filepath) as f:
            content = f.read()
        assert "debug=true" in content

    print(f"Tool calls: {len(calls)}")
    print("PASS: test_v1_edit_file")
    return True


def test_v1_read_edit_verify():
    """v1: Multi-tool workflow: read -> edit -> verify."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True

    with tempfile.TemporaryDirectory() as tmpdir:
        filepath = os.path.join(tmpdir, "version.txt")
        with open(filepath, "w") as f:
            f.write("version=1.0.0")

        response, calls, _ = run_agent_loop(
            client,
            f"1. Read {filepath}, 2. Change version to 2.0.0, 3. Read it again to verify.",
            V1_TOOLS,
            workdir=tmpdir
        )

        tool_names = [c[0] for c in calls]
        assert "read_file" in tool_names, "Should read file"
        assert "edit_file" in tool_names or "write_file" in tool_names, "Should modify file"

        with open(filepath) as f:
            content = f.read()
        assert "2.0.0" in content

    print(f"Tool calls: {len(calls)}")
    print("PASS: test_v1_read_edit_verify")
    return True


# =============================================================================
# v2 Tests: Todo Tracking
# =============================================================================

def test_v2_todo_single_task():
    """v2: Agent uses TodoWrite for simple task."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True

    with tempfile.TemporaryDirectory() as tmpdir:
        system = f"""You are a coding agent at {tmpdir}.
Use TodoWrite to track tasks. Use write_file to create files. Be concise."""

        response, calls, _ = run_agent_loop(
            client,
            f"Create a file at {tmpdir}/hello.txt with content 'hello'. First use TodoWrite to plan, then use write_file to create the file.",
            V2_TOOLS,
            workdir=tmpdir,
            system_prompt=system,
            max_turns=10
        )

        todo_calls = [c for c in calls if c[0] == "TodoWrite"]
        write_calls = [c for c in calls if c[0] == "write_file"]
        file_exists = os.path.exists(os.path.join(tmpdir, "hello.txt"))

        print(f"TodoWrite calls: {len(todo_calls)}, write_file calls: {len(write_calls)}")

        # Pass if file created (core functionality)
        # TodoWrite is optional for simple tasks
        assert file_exists or len(write_calls) >= 1, "Should attempt to create file"

    print(f"Tool calls: {len(calls)}")
    print("PASS: test_v2_todo_single_task")
    return True


def test_v2_todo_multi_step():
    """v2: Agent uses TodoWrite for multi-step task."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True

    with tempfile.TemporaryDirectory() as tmpdir:
        system = f"""You are a coding agent at {tmpdir}.
Use TodoWrite to plan multi-step tasks. Use write_file to create files. Complete all steps."""

        response, calls, _ = run_agent_loop(
            client,
            f"""Create 3 files in {tmpdir}:
1. Use write_file to create a.txt with content 'A'
2. Use write_file to create b.txt with content 'B'
3. Use write_file to create c.txt with content 'C'
Use TodoWrite to track progress. Execute all steps.""",
            V2_TOOLS,
            workdir=tmpdir,
            system_prompt=system,
            max_turns=25
        )

        # Check files created
        files_created = sum(1 for f in ["a.txt", "b.txt", "c.txt"]
                          if os.path.exists(os.path.join(tmpdir, f)))

        write_calls = [c for c in calls if c[0] == "write_file"]
        todo_calls = [c for c in calls if c[0] == "TodoWrite"]

        print(f"Files created: {files_created}/3, write_file calls: {len(write_calls)}, TodoWrite calls: {len(todo_calls)}")

        # Pass if at least 2 files created or 2 write attempts made
        assert files_created >= 2 or len(write_calls) >= 2, f"Should create/attempt at least 2 files"

    print(f"Tool calls: {len(calls)}")
    print("PASS: test_v2_todo_multi_step")
    return True


# =============================================================================
# Error Handling Tests
# =============================================================================

def test_error_file_not_found():
    """Error: Agent handles missing file gracefully."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True

    with tempfile.TemporaryDirectory() as tmpdir:
        response, calls, _ = run_agent_loop(
            client,
            f"Read the file {tmpdir}/nonexistent.txt and tell me if it exists.",
            V1_TOOLS,
            workdir=tmpdir
        )

        assert response is not None, "Should return a response"
        # Agent should acknowledge file doesn't exist
        assert any(word in response.lower() for word in ["not", "error", "exist", "found", "cannot"])

    print(f"Tool calls: {len(calls)}")
    print("PASS: test_error_file_not_found")
    return True


def test_error_command_fails():
    """Error: Agent handles failed command gracefully."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True

    response, calls, _ = run_agent_loop(
        client,
        "Run the command 'nonexistent_command_xyz' and tell me what happens.",
        [BASH_TOOL]
    )

    assert response is not None
    assert any(word in response.lower() for word in ["not found", "error", "fail", "command"])

    print(f"Tool calls: {len(calls)}")
    print("PASS: test_error_command_fails")
    return True


def test_error_edit_string_not_found():
    """Error: Agent handles edit with missing string."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True

    with tempfile.TemporaryDirectory() as tmpdir:
        filepath = os.path.join(tmpdir, "test.txt")
        with open(filepath, "w") as f:
            f.write("hello world")

        response, calls, _ = run_agent_loop(
            client,
            f"Edit {filepath} to replace 'xyz123' with 'abc'. Tell me if it worked.",
            V1_TOOLS,
            workdir=tmpdir
        )

        assert response is not None
        # Should report the string wasn't found
        assert any(word in response.lower() for word in ["not found", "error", "doesn't", "cannot", "couldn't"])

    print(f"Tool calls: {len(calls)}")
    print("PASS: test_error_edit_string_not_found")
    return True


# =============================================================================
# Complex Workflow Tests
# =============================================================================

def test_workflow_create_python_script():
    """Workflow: Create and run a Python script."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True

    with tempfile.TemporaryDirectory() as tmpdir:
        response, calls, _ = run_agent_loop(
            client,
            f"Create a Python script at {tmpdir}/calc.py that prints 2+2, then run it with python3.",
            V1_TOOLS,
            workdir=tmpdir
        )

        assert os.path.exists(os.path.join(tmpdir, "calc.py")), "Script should exist"
        tool_names = [c[0] for c in calls]
        assert "write_file" in tool_names, "Should write file"
        assert "bash" in tool_names, "Should run bash"
        assert response and "4" in response

    print(f"Tool calls: {len(calls)}")
    print("PASS: test_workflow_create_python_script")
    return True


def test_workflow_find_and_replace():
    """Workflow: Find files and replace content."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True

    with tempfile.TemporaryDirectory() as tmpdir:
        # Create multiple files
        for i, content in enumerate(["foo=old", "bar=old", "baz=new"]):
            with open(os.path.join(tmpdir, f"file{i}.txt"), "w") as f:
                f.write(content)

        response, calls, _ = run_agent_loop(
            client,
            f"Find all .txt files in {tmpdir} containing 'old' and change 'old' to 'NEW'.",
            V1_TOOLS,
            workdir=tmpdir,
            max_turns=20
        )

        # Check modifications
        modified = 0
        for i in range(3):
            with open(os.path.join(tmpdir, f"file{i}.txt")) as f:
                if "NEW" in f.read():
                    modified += 1

        assert modified >= 2, f"Should modify at least 2 files, got {modified}"

    print(f"Tool calls: {len(calls)}, Files modified: {modified}")
    print("PASS: test_workflow_find_and_replace")
    return True


def test_workflow_directory_setup():
    """Workflow: Create directory structure with files."""
    client = get_client()
    if not client:
        print("SKIP: No API key")
        return True

    with tempfile.TemporaryDirectory() as tmpdir:
        response, calls, _ = run_agent_loop(
            client,
            f"""In {tmpdir}, create this structure:
- src/main.py (content: print('main'))
- src/utils.py (content: print('utils'))
- README.md (content: '# Project')""",
            V1_TOOLS,
            workdir=tmpdir,
            max_turns=20
        )

        # Check structure
        checks = [
            os.path.exists(os.path.join(tmpdir, "src", "main.py")),
            os.path.exists(os.path.join(tmpdir, "src", "utils.py")),
            os.path.exists(os.path.join(tmpdir, "README.md")),
        ]

        passed = sum(checks)
        assert passed >= 2, f"Should create at least 2/3 items, got {passed}"

    print(f"Tool calls: {len(calls)}, Items created: {passed}/3")
    print("PASS: test_workflow_directory_setup")
    return True


# =============================================================================
# Main
# =============================================================================

if __name__ == "__main__":
    tests = [
        # v0: Bash only
        test_v0_bash_echo,
        test_v0_bash_pipeline,
        # v1: 4 core tools
        test_v1_read_file,
        test_v1_write_file,
        test_v1_edit_file,
        test_v1_read_edit_verify,
        # v2: Todo tracking
        test_v2_todo_single_task,
        test_v2_todo_multi_step,
        # Error handling
        test_error_file_not_found,
        test_error_command_fails,
        test_error_edit_string_not_found,
        # Complex workflows
        test_workflow_create_python_script,
        test_workflow_find_and_replace,
        test_workflow_directory_setup,
    ]

    failed = []
    for test_fn in tests:
        name = test_fn.__name__
        print(f"\n{'='*60}")
        print(f"Running: {name}")
        print('='*60)
        try:
            if not test_fn():
                failed.append(name)
        except Exception as e:
            print(f"FAILED: {e}")
            import traceback
            traceback.print_exc()
            failed.append(name)

    print(f"\n{'='*60}")
    print(f"Results: {len(tests) - len(failed)}/{len(tests)} passed")
    print('='*60)

    if failed:
        print(f"FAILED: {failed}")
        sys.exit(1)
    else:
        print("All integration tests passed!")
        sys.exit(0)