From abe09a9ccb500097ca2d5cd5e6195fdf1df515b0 Mon Sep 17 00:00:00 2001 From: gui-yue Date: Sun, 31 May 2026 17:27:29 +0800 Subject: [PATCH] Sync generated course content --- .../s03_permission/permission-overview.en.svg | 17 ++- .../s03_permission/permission-overview.ja.svg | 17 ++- .../s03_permission/permission-overview.svg | 17 ++- web/src/data/generated/docs.json | 14 +-- web/src/data/generated/versions.json | 100 +++++++++--------- 5 files changed, 93 insertions(+), 72 deletions(-) diff --git a/web/public/course-assets/s03_permission/permission-overview.en.svg b/web/public/course-assets/s03_permission/permission-overview.en.svg index 9c2537d..8255bb2 100644 --- a/web/public/course-assets/s03_permission/permission-overview.en.svg +++ b/web/public/course-assets/s03_permission/permission-overview.en.svg @@ -68,9 +68,9 @@ Gate 3: User Approval - - - Deny + + + Deny @@ -85,8 +85,15 @@ HANDLERS bash/read/write/... - - + + + + + + tool_result + + + diff --git a/web/public/course-assets/s03_permission/permission-overview.ja.svg b/web/public/course-assets/s03_permission/permission-overview.ja.svg index c381d5e..f4fd613 100644 --- a/web/public/course-assets/s03_permission/permission-overview.ja.svg +++ b/web/public/course-assets/s03_permission/permission-overview.ja.svg @@ -68,9 +68,9 @@ ゲート 3: ユーザー承認 - - - 拒否 + + + 拒否 @@ -85,8 +85,15 @@ HANDLERS bash/read/write/... - - + + + + + + tool_result + + + diff --git a/web/public/course-assets/s03_permission/permission-overview.svg b/web/public/course-assets/s03_permission/permission-overview.svg index b5097a3..61567d8 100644 --- a/web/public/course-assets/s03_permission/permission-overview.svg +++ b/web/public/course-assets/s03_permission/permission-overview.svg @@ -68,9 +68,9 @@ 闸门 3: 用户审批 - - - 拒绝 + + + 拒绝 @@ -85,8 +85,15 @@ HANDLERS bash/read/write/... - - + + + + + + tool_result + + + diff --git a/web/src/data/generated/docs.json b/web/src/data/generated/docs.json index 7db1561..3f6bb41 100644 --- a/web/src/data/generated/docs.json +++ b/web/src/data/generated/docs.json @@ -57,19 +57,19 @@ "version": "s04", "locale": "en", "title": "s04: Hooks — Hang on the Loop, Don't Write into It", - "content": "# s04: Hooks — Hang on the Loop, Don't Write into It\n\ns01 → s02 → s03 → `s04` → [s05](/en/s05) → s06 → ... → s20\n\n> *\"Hang on the loop, don't write into it\"* — Hooks inject extension logic before and after tool execution.\n>\n> **Harness Layer**: Hooks — Extension points that don't invade the loop.\n\n---\n\n## The Problem\n\nThe s03 Agent has permission checks. But every new check, \"log every bash call\", \"auto git add after writes\", requires modifying the `agent_loop` function.\n\nThe loop quickly becomes this:\n\n```python\ndef agent_loop(messages):\n while True:\n # ... LLM call ...\n for block in response.content:\n if block.type == \"tool_use\":\n log_to_file(block) # added a line\n check_permission(block) # added a line\n notify_slack(block) # added another line\n output = execute(block)\n auto_git_add(block) # yet another line\n # ... the loop is unrecognizable\n```\n\nWhat you want to extend is the Agent's behavior, but what you're modifying is the loop itself. The loop should be a stable core; extensions should hang on the outside.\n\n---\n\n## The Solution\n\n![Hooks Overview](/course-assets/s04_hooks/hooks-overview.en.svg)\n\nThe s03 loop and permission logic are fully preserved. The only change is moving `check_permission()` from inside the loop body onto a hook. The loop no longer directly calls any check function. Instead it calls `trigger_hooks(\"PreToolUse\", block)`, and the registry decides what to run.\n\nFour events, covering a complete agent cycle:\n\n| Event | Trigger Timing | Typical Use |\n|-------|---------------|-------------|\n| UserPromptSubmit | After user input, before entering LLM | Input validation, context injection |\n| PreToolUse | Before tool execution | Permission checks, logging |\n| PostToolUse | After tool execution | Side effects (auto git add etc.), output checking |\n| Stop | When the loop is about to exit | Cleanup (CC also supports force continuation) |\n\nExtensions are added via `register_hook()`. The loop only calls `trigger_hooks()`.\n\n---\n\n## How It Works\n\n**Hook registry**: a dict mapping event names to callback lists.\n\n```python\nHOOKS = {\n \"UserPromptSubmit\": [],\n \"PreToolUse\": [],\n \"PostToolUse\": [],\n \"Stop\": [],\n}\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None: # return value ≠ None → hook says \"stop\"\n return result\n return None\n```\n\nIn the teaching version, PreToolUse returning non-None means block execution; Stop returning non-None means force continuation. UserPromptSubmit and PostToolUse return values are unused.\n\n**UserPromptSubmit**, triggers after user input, before entering the LLM. CC can intercept or modify input; the teaching version only logs:\n\n```python\ndef context_inject_hook(query: str) -> str | None:\n \"\"\"Inject current working directory info into every prompt.\"\"\"\n print(f\"\\033[90m[HOOK] UserPromptSubmit: working in {WORKDIR}\\033[0m\")\n return None # return None = no modification, let prompt through\n\nregister_hook(\"UserPromptSubmit\", context_inject_hook)\n```\n\nIn the main loop, triggered right after user input:\n\n```python\nquery = input(\"s04 >> \")\ntrigger_hooks(\"UserPromptSubmit\", query) # ← before entering LLM\nhistory.append({\"role\": \"user\", \"content\": query})\nagent_loop(history)\n```\n\n**PreToolUse / PostToolUse**, hooks before and after tool execution. s03's permission check logic is now wrapped as a PreToolUse hook, plus a logging hook and a large-output reminder:\n\n```python\n# PreToolUse: permission check (s03 logic, moved from loop to hook)\ndef permission_hook(block):\n if block.name == \"bash\":\n for pattern in DENY_LIST:\n if pattern in block.input.get(\"command\", \"\"):\n return \"Permission denied by deny list\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n if not (WORKDIR / path).resolve().is_relative_to(WORKDIR):\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n# PreToolUse: logging\ndef log_hook(block):\n print(f\"[HOOK] {block.name}(...)\")\n\n# PostToolUse: large output reminder\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"[HOOK] ⚠ Large output from {block.name}\")\n\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\n```\n\n**Stop**, triggers when the loop is about to exit (`stop_reason != \"tool_use\"`). The teaching version prints a cleanup summary:\n\n```python\ndef summary_hook(messages: list) -> str | None:\n \"\"\"Print a summary when the loop is about to stop.\"\"\"\n tool_count = sum(1 for m in messages\n for b in (m.get(\"content\") if isinstance(m.get(\"content\"), list) else [])\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: session used {tool_count} tool calls\\033[0m\")\n return None # return None = allow stop, return string = force continuation\n\nregister_hook(\"Stop\", summary_hook)\n```\n\nIn agent_loop, triggered before exit:\n\n```python\nif response.stop_reason != \"tool_use\":\n force = trigger_hooks(\"Stop\", messages) # ← before exiting\n if force:\n # hook returned a message → inject it and continue\n messages.append({\"role\": \"user\", \"content\": force})\n continue\n return\n```\n\n**Only one change in the loop**: s03 directly called `check_permission(block)`, s04 replaces it with `trigger_hooks(\"PreToolUse\", block)`:\n\n```python\nfor block in response.content:\n if block.type != \"tool_use\":\n continue\n\n # s03: if not check_permission(block): ...\n # s04: hooks replace hardcoding\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n\n trigger_hooks(\"PostToolUse\", block, output)\n\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n```\n\nFour hooks cover the critical nodes of the agent cycle: input → before execution → after execution → exit. The loop only calls trigger_hooks(); all logic lives in hook callbacks.\n\n---\n\n## Changes from s03\n\n| Component | Before (s03) | After (s04) |\n|-----------|-------------|-------------|\n| Extension method | check_permission() hardcoded in the loop | HOOKS registry + trigger_hooks() |\n| New functions | — | register_hook, trigger_hooks |\n| Hook callbacks | — | context_inject_hook, permission_hook, log_hook, large_output_hook, summary_hook |\n| Loop | Directly calls check_permission() | Calls trigger_hooks(\"PreToolUse\", ...) |\n| Exit control | None | trigger_hooks(\"Stop\", ...) can prevent exit |\n| Input interception | None | trigger_hooks(\"UserPromptSubmit\", ...) can inject context |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s04_hooks/code.py\n```\n\nTry these prompts:\n\n1. `Read the file README.md` (should pass directly, observe hook logs)\n2. `Create a file called test.txt` (after creation, observe if PostToolUse fires)\n3. `Delete all temporary files in /tmp` (bash + rm triggers permission hook)\n\nWhat to watch for: Before each tool execution, does the `[HOOK]` log appear? When permission is denied, was it intercepted by a hook or hardcoded in the loop?\n\n---\n\n## What's Next\n\nThe Agent can now safely execute operations. But does it ever stop to think \"what should I do first, and what next?\" Given a complex task, does it jump straight in, or plan first?\n\n→ s05 TodoWrite: Give the Agent a planning tool. Make a list first, then execute.\n\n
\nDive into CC Source Code\n\n> The following is based on a complete analysis of CC source code `toolHooks.ts` (650 lines), `hooks.ts`, `stopHooks.ts`, and `coreTypes.ts`.\n\n### 1. Hook Events: Not Just 4, but 27\n\nThe teaching version covers only PreToolUse and PostToolUse. CC actually has 27 hook events (`coreTypes.ts:25-53`):\n\n| Category | Events |\n|----------|--------|\n| Tool-related | `PreToolUse`, `PostToolUse`, `PostToolUseFailure` |\n| Session-related | `SessionStart`, `SessionEnd`, `Stop`, `StopFailure`, `Setup` |\n| User interaction | `UserPromptSubmit`, `Notification`, `PermissionRequest`, `PermissionDenied` |\n| Sub-agents | `SubagentStart`, `SubagentStop` |\n| Compaction-related | `PreCompact`, `PostCompact` |\n| Team-related | `TeammateIdle`, `TaskCreated`, `TaskCompleted` |\n| Other | `Elicitation`, `ElicitationResult`, `ConfigChange`, `WorktreeCreate`, `WorktreeRemove`, `InstructionsLoaded`, `CwdChanged`, `FileChanged` |\n\nThe teaching version covers only 4 core events (UserPromptSubmit, PreToolUse, PostToolUse, Stop) because they cover every critical node of a complete agent cycle. The other 23 follow the same pattern.\n\n### 2. HookResult Common Fields\n\nCC's `HookResult` (`types/hooks.ts:260-275`) has 14 fields. Common ones:\n\n| Field | Type | Purpose |\n|-------|------|---------|\n| `message` | Message | Optional UI message |\n| `blockingError` | HookBlockingError | Blocking error → injected into conversation for model self-correction |\n| `outcome` | success/blocking/non_blocking_error/cancelled | Execution result |\n| `preventContinuation` | boolean | Prevent subsequent execution |\n| `stopReason` | string | Stop reason description |\n| `permissionBehavior` | allow/deny/ask/passthrough | Hook returns permission decision |\n| `updatedInput` | Record | Modify tool input |\n| `additionalContext` | string | Additional context |\n| `updatedMCPToolOutput` | unknown | MCP tool output modification |\n\n### 3. Key Invariant: Hook 'allow' Cannot Bypass deny/ask Rules\n\nThis is the most important security design in CC's permission system (`toolHooks.ts:325-331`): **when a hook returns allow, it still checks settings.json deny/ask rules.** Even if the user's hook script says \"allow\", if the tool is disabled in settings.json, the operation is still blocked.\n\nThe teaching version doesn't have this layer; hooks returning non-None directly interrupt. This is sufficient for teaching, but would create a security vulnerability in production.\n\n### 4. stopHookActive Mechanism\n\nCC's Stop hooks have an infinite-loop prevention mechanism (`query.ts:212,1300`): the `stopHookActive` state field. When stop hooks produce a blockingError, the loop re-enters with `stopHookActive: true`. Subsequent iterations see this flag and don't trigger stop hooks again. This prevents a never-stopping bug: model self-corrects → stop hook errors again → model self-corrects again → stop hook errors again...\n\n### 5. hook_stopped_continuation\n\nWhen PostToolUse hooks return `preventContinuation: true`, a `hook_stopped_continuation` attachment is produced (`toolHooks.ts:117-130`). query.ts (L1388-1393) detects it and sets `shouldPreventContinuation = true`, causing the loop to exit. This is the mechanism for \"hooks gracefully shut down the Agent\" — not a crash, but a completion.\n\n### Teaching Version Simplifications Are Intentional\n\n- 27 events → 4 (UserPromptSubmit/PreToolUse/PostToolUse/Stop): covers agent cycle critical nodes\n- 14 fields → simple return values (None = continue, non-None = interrupt/continue): minimal cognitive load\n- Hook allow vs deny/ask invariant → omitted: teaching version has no settings.json layer\n- stopHookActive → omitted: teaching version Stop hook only does simple continuation, no infinite-loop prevention needed\n\n
\n\n\n" + "content": "# s04: Hooks — Hang on the Loop, Don't Write into It\n\ns01 → s02 → s03 → `s04` → [s05](/en/s05) → s06 → ... → s20\n\n> *\"Hang on the loop, don't write into it\"* — Hooks inject extension logic before and after tool execution.\n>\n> **Harness Layer**: Hooks — Extension points that don't invade the loop.\n\n---\n\n## The Problem\n\nThe s03 Agent has permission checks. But every new check, \"log every bash call\", \"auto git add after writes\", requires modifying the `agent_loop` function.\n\nThe loop quickly becomes this:\n\n```python\ndef agent_loop(messages):\n while True:\n # ... LLM call ...\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n log_to_file(block) # added a line\n check_permission(block) # added a line\n notify_slack(block) # added another line\n output = execute(block)\n auto_git_add(block) # yet another line\n # ... the loop is unrecognizable\n```\n\nWhat you want to extend is the Agent's behavior, but what you're modifying is the loop itself. The loop should be a stable core; extensions should hang on the outside.\n\n---\n\n## The Solution\n\n![Hooks Overview](/course-assets/s04_hooks/hooks-overview.en.svg)\n\nThe s03 loop and permission logic are fully preserved. The only change is moving `check_permission()` from inside the loop body onto a hook. The loop no longer directly calls any check function. Instead it calls `trigger_hooks(\"PreToolUse\", block)`, and the registry decides what to run.\n\nFour events, covering a complete agent cycle:\n\n| Event | Trigger Timing | Typical Use |\n|-------|---------------|-------------|\n| UserPromptSubmit | After user input, before entering LLM | Input validation, context injection |\n| PreToolUse | Before tool execution | Permission checks, logging |\n| PostToolUse | After tool execution | Side effects (auto git add etc.), output checking |\n| Stop | When the loop is about to exit | Cleanup (CC also supports force continuation) |\n\nExtensions are added via `register_hook()`. The loop only calls `trigger_hooks()`.\n\n---\n\n## How It Works\n\n**Hook registry**: a dict mapping event names to callback lists.\n\n```python\nHOOKS = {\n \"UserPromptSubmit\": [],\n \"PreToolUse\": [],\n \"PostToolUse\": [],\n \"Stop\": [],\n}\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None: # return value ≠ None → hook says \"stop\"\n return result\n return None\n```\n\nIn the teaching version, PreToolUse returning non-None means block execution; Stop returning non-None means force continuation. UserPromptSubmit and PostToolUse return values are unused.\n\n**UserPromptSubmit**, triggers after user input, before entering the LLM. CC can intercept or modify input; the teaching version only logs:\n\n```python\ndef context_inject_hook(query: str) -> str | None:\n \"\"\"Inject current working directory info into every prompt.\"\"\"\n print(f\"\\033[90m[HOOK] UserPromptSubmit: working in {WORKDIR}\\033[0m\")\n return None # return None = no modification, let prompt through\n\nregister_hook(\"UserPromptSubmit\", context_inject_hook)\n```\n\nIn the main loop, triggered right after user input:\n\n```python\nquery = input(\"s04 >> \")\ntrigger_hooks(\"UserPromptSubmit\", query) # ← before entering LLM\nhistory.append({\"role\": \"user\", \"content\": query})\nagent_loop(history)\n```\n\n**PreToolUse / PostToolUse**, hooks before and after tool execution. s03's permission check logic is now wrapped as a PreToolUse hook, plus a logging hook and a large-output reminder:\n\n```python\n# PreToolUse: permission check (s03 logic, moved from loop to hook)\ndef permission_hook(block):\n if block.name == \"bash\":\n for pattern in DENY_LIST:\n if pattern in block.input.get(\"command\", \"\"):\n return \"Permission denied by deny list\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n if not (WORKDIR / path).resolve().is_relative_to(WORKDIR):\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n# PreToolUse: logging\ndef log_hook(block):\n print(f\"[HOOK] {block.name}(...)\")\n\n# PostToolUse: large output reminder\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"[HOOK] ⚠ Large output from {block.name}\")\n\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\n```\n\n**Stop**, triggers when the loop is about to exit (`stop_reason != \"tool_use\"`). The teaching version prints a cleanup summary:\n\n```python\ndef summary_hook(messages: list) -> str | None:\n \"\"\"Print a summary when the loop is about to stop.\"\"\"\n tool_count = sum(1 for m in messages\n for b in (m.get(\"content\") if isinstance(m.get(\"content\"), list) else [])\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: session used {tool_count} tool calls\\033[0m\")\n return None # return None = allow stop, return string = force continuation\n\nregister_hook(\"Stop\", summary_hook)\n```\n\nIn agent_loop, triggered before exit:\n\n```python\nif response.stop_reason != \"tool_use\":\n force = trigger_hooks(\"Stop\", messages) # ← before exiting\n if force:\n # hook returned a message → inject it and continue\n messages.append({\"role\": \"user\", \"content\": force})\n continue\n return\n```\n\n**Only one change in the loop**: s03 directly called `check_permission(block)`, s04 replaces it with `trigger_hooks(\"PreToolUse\", block)`:\n\n```python\nfor block in response.content:\n if block.type != \"tool_use\":\n continue\n\n # s03: if not check_permission(block): ...\n # s04: hooks replace hardcoding\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n\n trigger_hooks(\"PostToolUse\", block, output)\n\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n```\n\nFour hooks cover the critical nodes of the agent cycle: input → before execution → after execution → exit. The loop only calls trigger_hooks(); all logic lives in hook callbacks.\n\n---\n\n## Changes from s03\n\n| Component | Before (s03) | After (s04) |\n|-----------|-------------|-------------|\n| Extension method | check_permission() hardcoded in the loop | HOOKS registry + trigger_hooks() |\n| New functions | — | register_hook, trigger_hooks |\n| Hook callbacks | — | context_inject_hook, permission_hook, log_hook, large_output_hook, summary_hook |\n| Loop | Directly calls check_permission() | Calls trigger_hooks(\"PreToolUse\", ...) |\n| Exit control | None | trigger_hooks(\"Stop\", ...) can prevent exit |\n| Input interception | None | trigger_hooks(\"UserPromptSubmit\", ...) can inject context |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s04_hooks/code.py\n```\n\nTry these prompts:\n\n1. `Read the file README.md` (should pass directly, observe hook logs)\n2. `Create a file called test.txt` (after creation, observe if PostToolUse fires)\n3. `Delete all temporary files in /tmp` (bash + rm triggers permission hook)\n\nWhat to watch for: Before each tool execution, does the `[HOOK]` log appear? When permission is denied, was it intercepted by a hook or hardcoded in the loop?\n\n---\n\n## What's Next\n\nThe Agent can now safely execute operations. But does it ever stop to think \"what should I do first, and what next?\" Given a complex task, does it jump straight in, or plan first?\n\n→ s05 TodoWrite: Give the Agent a planning tool. Make a list first, then execute.\n\n
\nDive into CC Source Code\n\n> The following is based on a complete analysis of CC source code `toolHooks.ts` (650 lines), `hooks.ts`, `stopHooks.ts`, and `coreTypes.ts`.\n\n### 1. Hook Events: Not Just 4, but 27\n\nThe teaching version covers only PreToolUse and PostToolUse. CC actually has 27 hook events (`coreTypes.ts:25-53`):\n\n| Category | Events |\n|----------|--------|\n| Tool-related | `PreToolUse`, `PostToolUse`, `PostToolUseFailure` |\n| Session-related | `SessionStart`, `SessionEnd`, `Stop`, `StopFailure`, `Setup` |\n| User interaction | `UserPromptSubmit`, `Notification`, `PermissionRequest`, `PermissionDenied` |\n| Sub-agents | `SubagentStart`, `SubagentStop` |\n| Compaction-related | `PreCompact`, `PostCompact` |\n| Team-related | `TeammateIdle`, `TaskCreated`, `TaskCompleted` |\n| Other | `Elicitation`, `ElicitationResult`, `ConfigChange`, `WorktreeCreate`, `WorktreeRemove`, `InstructionsLoaded`, `CwdChanged`, `FileChanged` |\n\nThe teaching version covers only 4 core events (UserPromptSubmit, PreToolUse, PostToolUse, Stop) because they cover every critical node of a complete agent cycle. The other 23 follow the same pattern.\n\n### 2. HookResult Common Fields\n\nCC's `HookResult` (`types/hooks.ts:260-275`) has 14 fields. Common ones:\n\n| Field | Type | Purpose |\n|-------|------|---------|\n| `message` | Message | Optional UI message |\n| `blockingError` | HookBlockingError | Blocking error → injected into conversation for model self-correction |\n| `outcome` | success/blocking/non_blocking_error/cancelled | Execution result |\n| `preventContinuation` | boolean | Prevent subsequent execution |\n| `stopReason` | string | Stop reason description |\n| `permissionBehavior` | allow/deny/ask/passthrough | Hook returns permission decision |\n| `updatedInput` | Record | Modify tool input |\n| `additionalContext` | string | Additional context |\n| `updatedMCPToolOutput` | unknown | MCP tool output modification |\n\n### 3. Key Invariant: Hook 'allow' Cannot Bypass deny/ask Rules\n\nThis is the most important security design in CC's permission system (`toolHooks.ts:325-331`): **when a hook returns allow, it still checks settings.json deny/ask rules.** Even if the user's hook script says \"allow\", if the tool is disabled in settings.json, the operation is still blocked.\n\nThe teaching version doesn't have this layer; hooks returning non-None directly interrupt. This is sufficient for teaching, but would create a security vulnerability in production.\n\n### 4. stopHookActive Mechanism\n\nCC's Stop hooks have an infinite-loop prevention mechanism (`query.ts:212,1300`): the `stopHookActive` state field. When stop hooks produce a blockingError, the loop re-enters with `stopHookActive: true`. Subsequent iterations see this flag and don't trigger stop hooks again. This prevents a never-stopping bug: model self-corrects → stop hook errors again → model self-corrects again → stop hook errors again...\n\n### 5. hook_stopped_continuation\n\nWhen PostToolUse hooks return `preventContinuation: true`, a `hook_stopped_continuation` attachment is produced (`toolHooks.ts:117-130`). query.ts (L1388-1393) detects it and sets `shouldPreventContinuation = true`, causing the loop to exit. This is the mechanism for \"hooks gracefully shut down the Agent\" — not a crash, but a completion.\n\n### Teaching Version Simplifications Are Intentional\n\n- 27 events → 4 (UserPromptSubmit/PreToolUse/PostToolUse/Stop): covers agent cycle critical nodes\n- 14 fields → simple return values (None = continue, non-None = interrupt/continue): minimal cognitive load\n- Hook allow vs deny/ask invariant → omitted: teaching version has no settings.json layer\n- stopHookActive → omitted: teaching version Stop hook only does simple continuation, no infinite-loop prevention needed\n\n
\n\n\n" }, { "version": "s04", "locale": "zh", "title": "s04: Hooks — 挂在循环上,不写进循环里", - "content": "# s04: Hooks — 挂在循环上,不写进循环里\n\ns01 → s02 → s03 → `s04` → [s05](/zh/s05) → s06 → ... → s20\n\n> *\"挂在循环上, 不写进循环里\"* — hook 在工具执行前后注入扩展逻辑。\n>\n> **Harness 层**: hook — 扩展点不侵入循环。\n\n---\n\n## 问题\n\ns03 的 Agent 有权限检查了。但每次加一个新检查,比如\"记录每次 bash 调用\"、\"操作后自动 git add\",都要修改 `agent_loop` 函数。\n\n循环很快就变成了这样:\n\n```python\ndef agent_loop(messages):\n while True:\n # ... LLM call ...\n for block in response.content:\n if block.type == \"tool_use\":\n log_to_file(block) # 加一行\n check_permission(block) # 加一行\n notify_slack(block) # 又加一行\n output = execute(block)\n auto_git_add(block) # 再加一行\n # ... 很快循环就认不出来了\n```\n\n你想扩展的是 Agent 的行为,但你改的却是循环本身。循环应该是一个稳定的核心,扩展应该挂在外面。\n\n---\n\n## 解决方案\n\n![Hooks Overview](/course-assets/s04_hooks/hooks-overview.svg)\n\ns03 的循环和权限逻辑完全保留。唯一的变动是把 `check_permission()` 从循环体内移到了 hook 上,循环不再直接调用任何检查函数,改为 `trigger_hooks(\"PreToolUse\", block)`,由注册表决定跑什么。\n\n四个事件,覆盖一个完整的 agent cycle:\n\n| 事件 | 触发时机 | 典型用途 |\n|------|---------|---------|\n| UserPromptSubmit | 用户输入提交后、进入 LLM 前 | 输入验证、注入上下文 |\n| PreToolUse | 工具执行前 | 权限检查、日志记录 |\n| PostToolUse | 工具执行后 | 副作用(自动 git add 等)、输出检查 |\n| Stop | 循环即将退出时 | 收尾清理(CC 还支持强制续跑) |\n\n扩展通过 `register_hook()` 添加,循环只调用 `trigger_hooks()`。\n\n---\n\n## 工作原理\n\n**hook 注册表**:一个字典,事件名映射到回调列表。\n\n```python\nHOOKS = {\n \"UserPromptSubmit\": [],\n \"PreToolUse\": [],\n \"PostToolUse\": [],\n \"Stop\": [],\n}\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None: # 返回值 ≠ None → hook 说\"停\"\n return result\n return None\n```\n\n教学版中,PreToolUse 的非 None 返回值会阻止本次工具执行,Stop 的非 None 返回值会强制续跑。UserPromptSubmit 和 PostToolUse 的返回值未被使用。\n\n**UserPromptSubmit**,用户输入提交后、进入 LLM 前触发。CC 中可以拦截或修改输入,教学版只做日志演示:\n\n```python\ndef context_inject_hook(query: str) -> str | None:\n \"\"\"Inject current working directory info into every prompt.\"\"\"\n print(f\"\\033[90m[HOOK] UserPromptSubmit: working in {WORKDIR}\\033[0m\")\n return None # return None = no modification, let prompt through\n\nregister_hook(\"UserPromptSubmit\", context_inject_hook)\n```\n\n在主循环中,用户输入后立即触发:\n\n```python\nquery = input(\"s04 >> \")\ntrigger_hooks(\"UserPromptSubmit\", query) # ← 进入 LLM 之前\nhistory.append({\"role\": \"user\", \"content\": query})\nagent_loop(history)\n```\n\n**PreToolUse / PostToolUse**,工具执行前后的 hook。s03 的权限检查逻辑现在包装成 PreToolUse hook,再加一个日志 hook 和一个大输出提醒:\n\n```python\n# PreToolUse: 权限检查(s03 的逻辑,从循环移到 hook)\ndef permission_hook(block):\n if block.name == \"bash\":\n for pattern in DENY_LIST:\n if pattern in block.input.get(\"command\", \"\"):\n return \"Permission denied by deny list\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n if not (WORKDIR / path).resolve().is_relative_to(WORKDIR):\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n# PreToolUse: 日志\ndef log_hook(block):\n print(f\"[HOOK] {block.name}(...)\")\n\n# PostToolUse: 大文件提醒\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"[HOOK] ⚠ Large output from {block.name}\")\n\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\n```\n\n**Stop**,循环即将退出时触发(`stop_reason != \"tool_use\"`)。教学版用于打印收尾统计:\n\n```python\ndef summary_hook(messages: list) -> str | None:\n \"\"\"Print a summary when the loop is about to stop.\"\"\"\n tool_count = sum(1 for m in messages\n for b in (m.get(\"content\") if isinstance(m.get(\"content\"), list) else [])\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: session used {tool_count} tool calls\\033[0m\")\n return None # return None = allow stop, return string = force continuation\n\nregister_hook(\"Stop\", summary_hook)\n```\n\n在 agent_loop 中,退出前触发:\n\n```python\nif response.stop_reason != \"tool_use\":\n force = trigger_hooks(\"Stop\", messages) # ← 退出之前\n if force:\n # hook returned a message → inject it and continue\n messages.append({\"role\": \"user\", \"content\": force})\n continue\n return\n```\n\n**循环里只改了一处**:s03 直接调用 `check_permission(block)`,s04 改为 `trigger_hooks(\"PreToolUse\", block)`:\n\n```python\nfor block in response.content:\n if block.type != \"tool_use\":\n continue\n\n # s03: if not check_permission(block): ...\n # s04: hook 替代硬编码\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n\n trigger_hooks(\"PostToolUse\", block, output)\n\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n```\n\n四个 hook 覆盖了 agent cycle 的关键节点:输入→执行前→执行后→退出。循环只负责调用 trigger_hooks(),具体逻辑全在 hook 回调里。\n\n---\n\n## 相对 s03 的变更\n\n| 组件 | 之前 (s03) | 之后 (s04) |\n|------|-----------|-----------|\n| 扩展方式 | check_permission() 硬编码在循环里 | HOOKS 注册表 + trigger_hooks() |\n| 新函数 | — | register_hook, trigger_hooks |\n| hook 回调 | — | context_inject_hook, permission_hook, log_hook, large_output_hook, summary_hook |\n| 循环 | 直接调用 check_permission() | 调用 trigger_hooks(\"PreToolUse\", ...) |\n| 退出控制 | 无 | trigger_hooks(\"Stop\", ...) 可阻止退出 |\n| 输入拦截 | 无 | trigger_hooks(\"UserPromptSubmit\", ...) 可注入上下文 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s04_hooks/code.py\n```\n\n试试这些 prompt:\n\n1. `Read the file README.md`(应该直接通过,观察 hook 日志)\n2. `Create a file called test.txt`(通过后观察 PostToolUse 是否触发)\n3. `Delete all temporary files in /tmp`(bash + rm 触发权限 hook)\n\n观察重点:每次工具执行前,是否出现了 `[HOOK]` 日志?权限被拒时,是 hook 拦截的还是循环里硬编码的?\n\n---\n\n## 接下来\n\nAgent 现在能安全执行操作了。但它有没有停下来想过\"我应该先做什么,再做什么\"?给它一个复杂任务,它是一上来就动手,还是先列个计划?\n\ns05 TodoWrite → 给 Agent 一个计划工具。先列清单,再做。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `toolHooks.ts`(650 行)、`hooks.ts`、`stopHooks.ts`、`coreTypes.ts` 的完整分析。\n\n### 一、Hook 事件:不止这 4 个,而是 27 个\n\n教学版只讲了 PreToolUse 和 PostToolUse。CC 实际有 27 个 hook 事件(`coreTypes.ts:25-53`):\n\n| 类别 | 事件 |\n|------|------|\n| 工具相关 | `PreToolUse`, `PostToolUse`, `PostToolUseFailure` |\n| 会话相关 | `SessionStart`, `SessionEnd`, `Stop`, `StopFailure`, `Setup` |\n| 用户交互 | `UserPromptSubmit`, `Notification`, `PermissionRequest`, `PermissionDenied` |\n| 子 Agent | `SubagentStart`, `SubagentStop` |\n| 压缩相关 | `PreCompact`, `PostCompact` |\n| 团队相关 | `TeammateIdle`, `TaskCreated`, `TaskCompleted` |\n| 其他 | `Elicitation`, `ElicitationResult`, `ConfigChange`, `WorktreeCreate`, `WorktreeRemove`, `InstructionsLoaded`, `CwdChanged`, `FileChanged` |\n\n教学版只讲 4 个核心事件(UserPromptSubmit、PreToolUse、PostToolUse、Stop),因为它们覆盖了一个完整 agent cycle 的关键节点。其他 23 个都是同样的模式。\n\n### 二、HookResult 常用字段摘录\n\nCC 的 `HookResult`(`types/hooks.ts:260-275`)有 14 个字段,以下是常用字段:\n\n| 字段 | 类型 | 用途 |\n|------|------|------|\n| `message` | Message | 可选 UI 消息 |\n| `blockingError` | HookBlockingError | 阻塞错误 → 注入对话让模型自纠 |\n| `outcome` | success/blocking/non_blocking_error/cancelled | 执行结果 |\n| `preventContinuation` | boolean | 阻止后续执行 |\n| `stopReason` | string | 停止原因描述 |\n| `permissionBehavior` | allow/deny/ask/passthrough | hook 返回权限决策 |\n| `updatedInput` | Record | 修改工具输入 |\n| `additionalContext` | string | 附加上下文 |\n| `updatedMCPToolOutput` | unknown | MCP 工具输出修改 |\n\n### 三、关键不变式:Hook 'allow' 不能绕过 deny/ask 规则\n\n这是 CC 权限系统最重要的安全设计(`toolHooks.ts:325-331`):**hook 返回 allow 时,仍然要检查 settings.json 的 deny/ask 规则**。即使用户的 hook 脚本说\"允许\",如果在 settings.json 中禁用了这个工具,操作仍然会被阻止。\n\n教学版没有这个层次,只把 PreToolUse 的非 None 返回值解释为阻止本次工具执行。这在教学场景中够了,但在生产环境中会形成安全漏洞。\n\n### 四、stopHookActive 机制\n\nCC 的 Stop hooks 有一个防无限循环机制(`query.ts:212,1300`):`stopHookActive` 状态字段。当 stop hooks 产生 blockingError 时,循环带 `stopHookActive: true` 重入下一轮。后续迭代中 stop hooks 看到这个标志就不会再次触发。这防止了一个永不停机的 bug:模型自纠后 stop hook 再次报错 → 模型再自纠 → stop hook 再报错...\n\n### 五、hook_stopped_continuation\n\nPostToolUse hooks 返回 `preventContinuation: true` 时,会产生一个 `hook_stopped_continuation` 附件(`toolHooks.ts:117-130`)。query.ts(L1388-1393)检测到后设置 `shouldPreventContinuation = true`,循环退出。这是 \"hook 优雅地让 Agent 停机\" 的机制,不是崩溃,是完成。\n\n### 教学版的简化是刻意的\n\n- 27 个事件 → 4 个(UserPromptSubmit/PreToolUse/PostToolUse/Stop):覆盖 agent cycle 关键节点\n- 14 个字段 → 简单的返回值(None = 继续,非 None = 阻止/续跑):心智负担降到最低\n- Hook allow vs deny/ask 不变式 → 省略:教学版没有 settings.json 层\n- stopHookActive → 省略:教学版 Stop hook 只做简单续跑,不涉及防无限循环机制\n\n
\n\n\n" + "content": "# s04: Hooks — 挂在循环上,不写进循环里\n\ns01 → s02 → s03 → `s04` → [s05](/zh/s05) → s06 → ... → s20\n\n> *\"挂在循环上, 不写进循环里\"* — hook 在工具执行前后注入扩展逻辑。\n>\n> **Harness 层**: hook — 扩展点不侵入循环。\n\n---\n\n## 问题\n\ns03 的 Agent 有权限检查了。但每次加一个新检查,比如\"记录每次 bash 调用\"、\"操作后自动 git add\",都要修改 `agent_loop` 函数。\n\n循环很快就变成了这样:\n\n```python\ndef agent_loop(messages):\n while True:\n # ... LLM call ...\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n log_to_file(block) # 加一行\n check_permission(block) # 加一行\n notify_slack(block) # 又加一行\n output = execute(block)\n auto_git_add(block) # 再加一行\n # ... 很快循环就认不出来了\n```\n\n你想扩展的是 Agent 的行为,但你改的却是循环本身。循环应该是一个稳定的核心,扩展应该挂在外面。\n\n---\n\n## 解决方案\n\n![Hooks Overview](/course-assets/s04_hooks/hooks-overview.svg)\n\ns03 的循环和权限逻辑完全保留。唯一的变动是把 `check_permission()` 从循环体内移到了 hook 上,循环不再直接调用任何检查函数,改为 `trigger_hooks(\"PreToolUse\", block)`,由注册表决定跑什么。\n\n四个事件,覆盖一个完整的 agent cycle:\n\n| 事件 | 触发时机 | 典型用途 |\n|------|---------|---------|\n| UserPromptSubmit | 用户输入提交后、进入 LLM 前 | 输入验证、注入上下文 |\n| PreToolUse | 工具执行前 | 权限检查、日志记录 |\n| PostToolUse | 工具执行后 | 副作用(自动 git add 等)、输出检查 |\n| Stop | 循环即将退出时 | 收尾清理(CC 还支持强制续跑) |\n\n扩展通过 `register_hook()` 添加,循环只调用 `trigger_hooks()`。\n\n---\n\n## 工作原理\n\n**hook 注册表**:一个字典,事件名映射到回调列表。\n\n```python\nHOOKS = {\n \"UserPromptSubmit\": [],\n \"PreToolUse\": [],\n \"PostToolUse\": [],\n \"Stop\": [],\n}\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None: # 返回值 ≠ None → hook 说\"停\"\n return result\n return None\n```\n\n教学版中,PreToolUse 的非 None 返回值会阻止本次工具执行,Stop 的非 None 返回值会强制续跑。UserPromptSubmit 和 PostToolUse 的返回值未被使用。\n\n**UserPromptSubmit**,用户输入提交后、进入 LLM 前触发。CC 中可以拦截或修改输入,教学版只做日志演示:\n\n```python\ndef context_inject_hook(query: str) -> str | None:\n \"\"\"Inject current working directory info into every prompt.\"\"\"\n print(f\"\\033[90m[HOOK] UserPromptSubmit: working in {WORKDIR}\\033[0m\")\n return None # return None = no modification, let prompt through\n\nregister_hook(\"UserPromptSubmit\", context_inject_hook)\n```\n\n在主循环中,用户输入后立即触发:\n\n```python\nquery = input(\"s04 >> \")\ntrigger_hooks(\"UserPromptSubmit\", query) # ← 进入 LLM 之前\nhistory.append({\"role\": \"user\", \"content\": query})\nagent_loop(history)\n```\n\n**PreToolUse / PostToolUse**,工具执行前后的 hook。s03 的权限检查逻辑现在包装成 PreToolUse hook,再加一个日志 hook 和一个大输出提醒:\n\n```python\n# PreToolUse: 权限检查(s03 的逻辑,从循环移到 hook)\ndef permission_hook(block):\n if block.name == \"bash\":\n for pattern in DENY_LIST:\n if pattern in block.input.get(\"command\", \"\"):\n return \"Permission denied by deny list\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n if not (WORKDIR / path).resolve().is_relative_to(WORKDIR):\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n# PreToolUse: 日志\ndef log_hook(block):\n print(f\"[HOOK] {block.name}(...)\")\n\n# PostToolUse: 大文件提醒\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"[HOOK] ⚠ Large output from {block.name}\")\n\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\n```\n\n**Stop**,循环即将退出时触发(`stop_reason != \"tool_use\"`)。教学版用于打印收尾统计:\n\n```python\ndef summary_hook(messages: list) -> str | None:\n \"\"\"Print a summary when the loop is about to stop.\"\"\"\n tool_count = sum(1 for m in messages\n for b in (m.get(\"content\") if isinstance(m.get(\"content\"), list) else [])\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: session used {tool_count} tool calls\\033[0m\")\n return None # return None = allow stop, return string = force continuation\n\nregister_hook(\"Stop\", summary_hook)\n```\n\n在 agent_loop 中,退出前触发:\n\n```python\nif response.stop_reason != \"tool_use\":\n force = trigger_hooks(\"Stop\", messages) # ← 退出之前\n if force:\n # hook returned a message → inject it and continue\n messages.append({\"role\": \"user\", \"content\": force})\n continue\n return\n```\n\n**循环里只改了一处**:s03 直接调用 `check_permission(block)`,s04 改为 `trigger_hooks(\"PreToolUse\", block)`:\n\n```python\nfor block in response.content:\n if block.type != \"tool_use\":\n continue\n\n # s03: if not check_permission(block): ...\n # s04: hook 替代硬编码\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n\n trigger_hooks(\"PostToolUse\", block, output)\n\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n```\n\n四个 hook 覆盖了 agent cycle 的关键节点:输入→执行前→执行后→退出。循环只负责调用 trigger_hooks(),具体逻辑全在 hook 回调里。\n\n---\n\n## 相对 s03 的变更\n\n| 组件 | 之前 (s03) | 之后 (s04) |\n|------|-----------|-----------|\n| 扩展方式 | check_permission() 硬编码在循环里 | HOOKS 注册表 + trigger_hooks() |\n| 新函数 | — | register_hook, trigger_hooks |\n| hook 回调 | — | context_inject_hook, permission_hook, log_hook, large_output_hook, summary_hook |\n| 循环 | 直接调用 check_permission() | 调用 trigger_hooks(\"PreToolUse\", ...) |\n| 退出控制 | 无 | trigger_hooks(\"Stop\", ...) 可阻止退出 |\n| 输入拦截 | 无 | trigger_hooks(\"UserPromptSubmit\", ...) 可注入上下文 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s04_hooks/code.py\n```\n\n试试这些 prompt:\n\n1. `Read the file README.md`(应该直接通过,观察 hook 日志)\n2. `Create a file called test.txt`(通过后观察 PostToolUse 是否触发)\n3. `Delete all temporary files in /tmp`(bash + rm 触发权限 hook)\n\n观察重点:每次工具执行前,是否出现了 `[HOOK]` 日志?权限被拒时,是 hook 拦截的还是循环里硬编码的?\n\n---\n\n## 接下来\n\nAgent 现在能安全执行操作了。但它有没有停下来想过\"我应该先做什么,再做什么\"?给它一个复杂任务,它是一上来就动手,还是先列个计划?\n\ns05 TodoWrite → 给 Agent 一个计划工具。先列清单,再做。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `toolHooks.ts`(650 行)、`hooks.ts`、`stopHooks.ts`、`coreTypes.ts` 的完整分析。\n\n### 一、Hook 事件:不止这 4 个,而是 27 个\n\n教学版只讲了 PreToolUse 和 PostToolUse。CC 实际有 27 个 hook 事件(`coreTypes.ts:25-53`):\n\n| 类别 | 事件 |\n|------|------|\n| 工具相关 | `PreToolUse`, `PostToolUse`, `PostToolUseFailure` |\n| 会话相关 | `SessionStart`, `SessionEnd`, `Stop`, `StopFailure`, `Setup` |\n| 用户交互 | `UserPromptSubmit`, `Notification`, `PermissionRequest`, `PermissionDenied` |\n| 子 Agent | `SubagentStart`, `SubagentStop` |\n| 压缩相关 | `PreCompact`, `PostCompact` |\n| 团队相关 | `TeammateIdle`, `TaskCreated`, `TaskCompleted` |\n| 其他 | `Elicitation`, `ElicitationResult`, `ConfigChange`, `WorktreeCreate`, `WorktreeRemove`, `InstructionsLoaded`, `CwdChanged`, `FileChanged` |\n\n教学版只讲 4 个核心事件(UserPromptSubmit、PreToolUse、PostToolUse、Stop),因为它们覆盖了一个完整 agent cycle 的关键节点。其他 23 个都是同样的模式。\n\n### 二、HookResult 常用字段摘录\n\nCC 的 `HookResult`(`types/hooks.ts:260-275`)有 14 个字段,以下是常用字段:\n\n| 字段 | 类型 | 用途 |\n|------|------|------|\n| `message` | Message | 可选 UI 消息 |\n| `blockingError` | HookBlockingError | 阻塞错误 → 注入对话让模型自纠 |\n| `outcome` | success/blocking/non_blocking_error/cancelled | 执行结果 |\n| `preventContinuation` | boolean | 阻止后续执行 |\n| `stopReason` | string | 停止原因描述 |\n| `permissionBehavior` | allow/deny/ask/passthrough | hook 返回权限决策 |\n| `updatedInput` | Record | 修改工具输入 |\n| `additionalContext` | string | 附加上下文 |\n| `updatedMCPToolOutput` | unknown | MCP 工具输出修改 |\n\n### 三、关键不变式:Hook 'allow' 不能绕过 deny/ask 规则\n\n这是 CC 权限系统最重要的安全设计(`toolHooks.ts:325-331`):**hook 返回 allow 时,仍然要检查 settings.json 的 deny/ask 规则**。即使用户的 hook 脚本说\"允许\",如果在 settings.json 中禁用了这个工具,操作仍然会被阻止。\n\n教学版没有这个层次,只把 PreToolUse 的非 None 返回值解释为阻止本次工具执行。这在教学场景中够了,但在生产环境中会形成安全漏洞。\n\n### 四、stopHookActive 机制\n\nCC 的 Stop hooks 有一个防无限循环机制(`query.ts:212,1300`):`stopHookActive` 状态字段。当 stop hooks 产生 blockingError 时,循环带 `stopHookActive: true` 重入下一轮。后续迭代中 stop hooks 看到这个标志就不会再次触发。这防止了一个永不停机的 bug:模型自纠后 stop hook 再次报错 → 模型再自纠 → stop hook 再报错...\n\n### 五、hook_stopped_continuation\n\nPostToolUse hooks 返回 `preventContinuation: true` 时,会产生一个 `hook_stopped_continuation` 附件(`toolHooks.ts:117-130`)。query.ts(L1388-1393)检测到后设置 `shouldPreventContinuation = true`,循环退出。这是 \"hook 优雅地让 Agent 停机\" 的机制,不是崩溃,是完成。\n\n### 教学版的简化是刻意的\n\n- 27 个事件 → 4 个(UserPromptSubmit/PreToolUse/PostToolUse/Stop):覆盖 agent cycle 关键节点\n- 14 个字段 → 简单的返回值(None = 继续,非 None = 阻止/续跑):心智负担降到最低\n- Hook allow vs deny/ask 不变式 → 省略:教学版没有 settings.json 层\n- stopHookActive → 省略:教学版 Stop hook 只做简单续跑,不涉及防无限循环机制\n\n
\n\n\n" }, { "version": "s04", "locale": "ja", "title": "s04: Hooks — ループに掛ける、ループには書き込まない", - "content": "# s04: Hooks — ループに掛ける、ループには書き込まない\n\ns01 → s02 → s03 → `s04` → [s05](/ja/s05) → s06 → ... → s20\n\n> *\"ループに掛ける、ループには書き込まない\"* — フックがツール実行の前後に拡張ロジックを注入する。\n>\n> **Harness レイヤー**: フック — ループを侵襲しない拡張ポイント。\n\n---\n\n## 課題\n\ns03 の Agent には権限チェックがある。しかし新しいチェックを追加するたび、「bash 呼び出しを毎回ログに記録」「操作後に自動 git add」、`agent_loop` 関数を修正する必要がある。\n\nループはすぐにこうなる:\n\n```python\ndef agent_loop(messages):\n while True:\n # ... LLM call ...\n for block in response.content:\n if block.type == \"tool_use\":\n log_to_file(block) # 一行追加\n check_permission(block) # 一行追加\n notify_slack(block) # さらに一行追加\n output = execute(block)\n auto_git_add(block) # さらに一行追加\n # ... もうループが見えない\n```\n\n拡張したいのは Agent の振る舞いなのに、変更しているのはループそのもの。ループは安定した核心であるべき。拡張は外側に掛ける。\n\n---\n\n## ソリューション\n\n![Hooks Overview](/course-assets/s04_hooks/hooks-overview.ja.svg)\n\ns03 のループと権限ロジックは完全に保持される。唯一の変更点は `check_permission()` をループ本体内からフックに移動したこと。ループはもうチェック関数を直接呼び出さず、代わりに `trigger_hooks(\"PreToolUse\", block)` を呼び、登録済みのフックが何を実行するかを決める。\n\n4 つのイベントで、完全な agent cycle をカバー:\n\n| イベント | 発火タイミング | 典型的な用途 |\n|----------|--------------|-------------|\n| UserPromptSubmit | ユーザー入力後、LLM に入る前 | 入力バリデーション、コンテキスト注入 |\n| PreToolUse | ツール実行前 | 権限チェック、ログ記録 |\n| PostToolUse | ツール実行後 | 副作用(自動 git add など)、出力チェック |\n| Stop | ループが終了する直前 | クリーンアップ(CC は強制続行もサポート) |\n\n拡張は `register_hook()` で追加する。ループは `trigger_hooks()` を呼ぶだけ。\n\n---\n\n## 仕組み\n\n**フック登録簿**:イベント名をコールバックリストにマッピングする辞書。\n\n```python\nHOOKS = {\n \"UserPromptSubmit\": [],\n \"PreToolUse\": [],\n \"PostToolUse\": [],\n \"Stop\": [],\n}\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None: # 戻り値 ≠ None → フックが「止め」と指示\n return result\n return None\n```\n\n教学版では、PreToolUse の非 None 戻り値は実行阻止を意味し、Stop の非 None 戻り値は強制続行を意味する。UserPromptSubmit と PostToolUse の戻り値は未使用。\n\n**UserPromptSubmit**、ユーザー入力後、LLM に入る前に発火。CC では入力の横取りや変更が可能、教学版はログ出力のみ:\n\n```python\ndef context_inject_hook(query: str) -> str | None:\n \"\"\"Inject current working directory info into every prompt.\"\"\"\n print(f\"\\033[90m[HOOK] UserPromptSubmit: working in {WORKDIR}\\033[0m\")\n return None # return None = 変更なし、プロンプトを通す\n\nregister_hook(\"UserPromptSubmit\", context_inject_hook)\n```\n\nメインループでは、ユーザー入力直後に発火:\n\n```python\nquery = input(\"s04 >> \")\ntrigger_hooks(\"UserPromptSubmit\", query) # ← LLM に入る前\nhistory.append({\"role\": \"user\", \"content\": query})\nagent_loop(history)\n```\n\n**PreToolUse / PostToolUse**、ツール実行の前後のフック。s03 の権限チェックロジックは PreToolUse フックに包まれ、さらにログフックと大出力リマインダーが追加される:\n\n```python\n# PreToolUse: 権限チェック(s03 のロジック、ループからフックに移動)\ndef permission_hook(block):\n if block.name == \"bash\":\n for pattern in DENY_LIST:\n if pattern in block.input.get(\"command\", \"\"):\n return \"Permission denied by deny list\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n if not (WORKDIR / path).resolve().is_relative_to(WORKDIR):\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n# PreToolUse: ログ\ndef log_hook(block):\n print(f\"[HOOK] {block.name}(...)\")\n\n# PostToolUse: 大ファイルリマインダー\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"[HOOK] ⚠ Large output from {block.name}\")\n\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\n```\n\n**Stop**、ループが終了する直前に発火(`stop_reason != \"tool_use\"`)。教学版ではクリーンアップ統計を印刷:\n\n```python\ndef summary_hook(messages: list) -> str | None:\n \"\"\"Print a summary when the loop is about to stop.\"\"\"\n tool_count = sum(1 for m in messages\n for b in (m.get(\"content\") if isinstance(m.get(\"content\"), list) else [])\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: session used {tool_count} tool calls\\033[0m\")\n return None # return None = 終了を許可、return 文字列 = 強制続行\n\nregister_hook(\"Stop\", summary_hook)\n```\n\nagent_loop 内では、終了前に発火:\n\n```python\nif response.stop_reason != \"tool_use\":\n force = trigger_hooks(\"Stop\", messages) # ← 終了する前に\n if force:\n # フックがメッセージを返した → 注入して続行\n messages.append({\"role\": \"user\", \"content\": force})\n continue\n return\n```\n\n**ループ内で変更されたのは一箇所だけ**:s03 は直接 `check_permission(block)` を呼び出していたが、s04 は `trigger_hooks(\"PreToolUse\", block)` に置き換えた:\n\n```python\nfor block in response.content:\n if block.type != \"tool_use\":\n continue\n\n # s03: if not check_permission(block): ...\n # s04: フックがハードコードを代替\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n\n trigger_hooks(\"PostToolUse\", block, output)\n\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n```\n\n4 つのフックが agent cycle の重要ノードをカバー:入力→実行前→実行後→終了。ループは trigger_hooks() を呼ぶだけで、具体的なロジックは全てフックコールバックにある。\n\n---\n\n## s03 からの変更\n\n| コンポーネント | 変更前 (s03) | 変更後 (s04) |\n|--------------|-------------|-------------|\n| 拡張方式 | check_permission() をループ内にハードコード | HOOKS 登録簿 + trigger_hooks() |\n| 新規関数 | — | register_hook, trigger_hooks |\n| フックコールバック | — | context_inject_hook, permission_hook, log_hook, large_output_hook, summary_hook |\n| ループ | check_permission() を直接呼び出し | trigger_hooks(\"PreToolUse\", ...) を呼び出し |\n| 終了制御 | なし | trigger_hooks(\"Stop\", ...) が終了を阻止可能 |\n| 入力横取り | なし | trigger_hooks(\"UserPromptSubmit\", ...) がコンテキスト注入可能 |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s04_hooks/code.py\n```\n\n以下のプロンプトを試してみよう:\n\n1. `Read the file README.md`(そのまま通過するはず、フックログを観察)\n2. `Create a file called test.txt`(作成後、PostToolUse が発火するか観察)\n3. `Delete all temporary files in /tmp`(bash + rm で権限フックが発動)\n\n観察のポイント:各ツール実行前に `[HOOK]` ログが表示されるか? 権限が拒否されたとき、フックが拦截したのか、ループ内のハードコードが拦截したのか?\n\n---\n\n## 次へ\n\nAgent は安全に操作を実行できるようになった。しかし「まず何をして、次に何をすべきか」を立ち止まって考えたことはあるか? 複雑なタスクを与えたとき、すぐに取り掛かるのか、まず計画を立てるのか?\n\n→ s05 TodoWrite:Agent に計画ツールを与える。まずリストを作り、それから実行。\n\n
\nCC ソースコードを深掘り\n\n> 以下は CC ソースコード `toolHooks.ts`(650 行)、`hooks.ts`、`stopHooks.ts`、`coreTypes.ts` の完全分析に基づく。\n\n### 一、Hook イベント:4 つではなく 27 個\n\n教育版は PreToolUse と PostToolUse のみを取り上げる。CC には実際に 27 のフックイベントがある(`coreTypes.ts:25-53`):\n\n| カテゴリ | イベント |\n|----------|---------|\n| ツール関連 | `PreToolUse`, `PostToolUse`, `PostToolUseFailure` |\n| セッション関連 | `SessionStart`, `SessionEnd`, `Stop`, `StopFailure`, `Setup` |\n| ユーザー対話 | `UserPromptSubmit`, `Notification`, `PermissionRequest`, `PermissionDenied` |\n| サブエージェント | `SubagentStart`, `SubagentStop` |\n| 圧縮関連 | `PreCompact`, `PostCompact` |\n| チーム関連 | `TeammateIdle`, `TaskCreated`, `TaskCompleted` |\n| その他 | `Elicitation`, `ElicitationResult`, `ConfigChange`, `WorktreeCreate`, `WorktreeRemove`, `InstructionsLoaded`, `CwdChanged`, `FileChanged` |\n\n教育版は 4 つのコアイベント(UserPromptSubmit、PreToolUse、PostToolUse、Stop)のみを取り上げる。これらで agent cycle の重要ノードを全てカバーできる。残り 23 個は同じパターン。\n\n### 二、HookResult よく使うフィールド抜粋\n\nCC の `HookResult`(`types/hooks.ts:260-275`)には 14 のフィールドがある。よく使うもの:\n\n| フィールド | 型 | 用途 |\n|-----------|-----|------|\n| `message` | Message | オプションの UI メッセージ |\n| `blockingError` | HookBlockingError | ブロッキングエラー → 会話に注入してモデルが自己修正 |\n| `outcome` | success/blocking/non_blocking_error/cancelled | 実行結果 |\n| `preventContinuation` | boolean | 後続実行を阻止 |\n| `stopReason` | string | 停止理由の説明 |\n| `permissionBehavior` | allow/deny/ask/passthrough | フックが権限決定を返す |\n| `updatedInput` | Record | ツール入力の変更 |\n| `additionalContext` | string | 追加コンテキスト |\n| `updatedMCPToolOutput` | unknown | MCP ツール出力の変更 |\n\n### 三、重要な不変条件:Hook 'allow' は deny/ask ルールをバイパスできない\n\nこれは CC 権限システムで最も重要なセキュリティ設計(`toolHooks.ts:325-331`):**フックが allow を返しても、settings.json の deny/ask ルールをチェックする。** ユーザーのフックスクリプトが「許可」と言っても、settings.json でそのツールが無効になっていれば、操作は阻止される。\n\n教育版にはこの階層がない。フックが非 None を返せば直接中断。教育目的では十分だが、本番環境ではセキュリティホールになる。\n\n### 四、stopHookActive 機構\n\nCC の Stop フックには無限ループ防止機構がある(`query.ts:212,1300`):`stopHookActive` 状態フィールド。Stop フックが blockingError を発生させると、ループは `stopHookActive: true` で次のラウンドに再入する。後続のイテレーションではこのフラグを見て Stop フックを再トリガーしない。これで「永久に止まらない」バグを防ぐ:モデルが自己修正 → Stop フックが再度エラー → モデルが再修正 → Stop フックが再度エラー... を防止。\n\n### 五、hook_stopped_continuation\n\nPostToolUse フックが `preventContinuation: true` を返すと、`hook_stopped_continuation` アタッチメントが生成される(`toolHooks.ts:117-130`)。query.ts(L1388-1393)はそれを検出して `shouldPreventContinuation = true` を設定し、ループが終了する。これは「フックが Agent を優雅に停止させる」機構 — クラッシュではなく、完了。\n\n### 教育版の簡略化は意図的\n\n- 27 イベント → 4(UserPromptSubmit/PreToolUse/PostToolUse/Stop):agent cycle の重要ノードをカバー\n- 14 フィールド → 単純な戻り値(None = 続行、非 None = 中断/続行):認知負荷を最小限に\n- Hook allow vs deny/ask の不変条件 → 省略:教育版に settings.json 層はない\n- stopHookActive → 省略:教育版の Stop フックは単純な続行のみ、無限ループ防止は不要\n\n
\n\n\n" + "content": "# s04: Hooks — ループに掛ける、ループには書き込まない\n\ns01 → s02 → s03 → `s04` → [s05](/ja/s05) → s06 → ... → s20\n\n> *\"ループに掛ける、ループには書き込まない\"* — フックがツール実行の前後に拡張ロジックを注入する。\n>\n> **Harness レイヤー**: フック — ループを侵襲しない拡張ポイント。\n\n---\n\n## 課題\n\ns03 の Agent には権限チェックがある。しかし新しいチェックを追加するたび、「bash 呼び出しを毎回ログに記録」「操作後に自動 git add」、`agent_loop` 関数を修正する必要がある。\n\nループはすぐにこうなる:\n\n```python\ndef agent_loop(messages):\n while True:\n # ... LLM call ...\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n log_to_file(block) # 一行追加\n check_permission(block) # 一行追加\n notify_slack(block) # さらに一行追加\n output = execute(block)\n auto_git_add(block) # さらに一行追加\n # ... もうループが見えない\n```\n\n拡張したいのは Agent の振る舞いなのに、変更しているのはループそのもの。ループは安定した核心であるべき。拡張は外側に掛ける。\n\n---\n\n## ソリューション\n\n![Hooks Overview](/course-assets/s04_hooks/hooks-overview.ja.svg)\n\ns03 のループと権限ロジックは完全に保持される。唯一の変更点は `check_permission()` をループ本体内からフックに移動したこと。ループはもうチェック関数を直接呼び出さず、代わりに `trigger_hooks(\"PreToolUse\", block)` を呼び、登録済みのフックが何を実行するかを決める。\n\n4 つのイベントで、完全な agent cycle をカバー:\n\n| イベント | 発火タイミング | 典型的な用途 |\n|----------|--------------|-------------|\n| UserPromptSubmit | ユーザー入力後、LLM に入る前 | 入力バリデーション、コンテキスト注入 |\n| PreToolUse | ツール実行前 | 権限チェック、ログ記録 |\n| PostToolUse | ツール実行後 | 副作用(自動 git add など)、出力チェック |\n| Stop | ループが終了する直前 | クリーンアップ(CC は強制続行もサポート) |\n\n拡張は `register_hook()` で追加する。ループは `trigger_hooks()` を呼ぶだけ。\n\n---\n\n## 仕組み\n\n**フック登録簿**:イベント名をコールバックリストにマッピングする辞書。\n\n```python\nHOOKS = {\n \"UserPromptSubmit\": [],\n \"PreToolUse\": [],\n \"PostToolUse\": [],\n \"Stop\": [],\n}\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None: # 戻り値 ≠ None → フックが「止め」と指示\n return result\n return None\n```\n\n教学版では、PreToolUse の非 None 戻り値は実行阻止を意味し、Stop の非 None 戻り値は強制続行を意味する。UserPromptSubmit と PostToolUse の戻り値は未使用。\n\n**UserPromptSubmit**、ユーザー入力後、LLM に入る前に発火。CC では入力の横取りや変更が可能、教学版はログ出力のみ:\n\n```python\ndef context_inject_hook(query: str) -> str | None:\n \"\"\"Inject current working directory info into every prompt.\"\"\"\n print(f\"\\033[90m[HOOK] UserPromptSubmit: working in {WORKDIR}\\033[0m\")\n return None # return None = 変更なし、プロンプトを通す\n\nregister_hook(\"UserPromptSubmit\", context_inject_hook)\n```\n\nメインループでは、ユーザー入力直後に発火:\n\n```python\nquery = input(\"s04 >> \")\ntrigger_hooks(\"UserPromptSubmit\", query) # ← LLM に入る前\nhistory.append({\"role\": \"user\", \"content\": query})\nagent_loop(history)\n```\n\n**PreToolUse / PostToolUse**、ツール実行の前後のフック。s03 の権限チェックロジックは PreToolUse フックに包まれ、さらにログフックと大出力リマインダーが追加される:\n\n```python\n# PreToolUse: 権限チェック(s03 のロジック、ループからフックに移動)\ndef permission_hook(block):\n if block.name == \"bash\":\n for pattern in DENY_LIST:\n if pattern in block.input.get(\"command\", \"\"):\n return \"Permission denied by deny list\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n if not (WORKDIR / path).resolve().is_relative_to(WORKDIR):\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n# PreToolUse: ログ\ndef log_hook(block):\n print(f\"[HOOK] {block.name}(...)\")\n\n# PostToolUse: 大ファイルリマインダー\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"[HOOK] ⚠ Large output from {block.name}\")\n\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\n```\n\n**Stop**、ループが終了する直前に発火(`stop_reason != \"tool_use\"`)。教学版ではクリーンアップ統計を印刷:\n\n```python\ndef summary_hook(messages: list) -> str | None:\n \"\"\"Print a summary when the loop is about to stop.\"\"\"\n tool_count = sum(1 for m in messages\n for b in (m.get(\"content\") if isinstance(m.get(\"content\"), list) else [])\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: session used {tool_count} tool calls\\033[0m\")\n return None # return None = 終了を許可、return 文字列 = 強制続行\n\nregister_hook(\"Stop\", summary_hook)\n```\n\nagent_loop 内では、終了前に発火:\n\n```python\nif response.stop_reason != \"tool_use\":\n force = trigger_hooks(\"Stop\", messages) # ← 終了する前に\n if force:\n # フックがメッセージを返した → 注入して続行\n messages.append({\"role\": \"user\", \"content\": force})\n continue\n return\n```\n\n**ループ内で変更されたのは一箇所だけ**:s03 は直接 `check_permission(block)` を呼び出していたが、s04 は `trigger_hooks(\"PreToolUse\", block)` に置き換えた:\n\n```python\nfor block in response.content:\n if block.type != \"tool_use\":\n continue\n\n # s03: if not check_permission(block): ...\n # s04: フックがハードコードを代替\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n\n trigger_hooks(\"PostToolUse\", block, output)\n\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n```\n\n4 つのフックが agent cycle の重要ノードをカバー:入力→実行前→実行後→終了。ループは trigger_hooks() を呼ぶだけで、具体的なロジックは全てフックコールバックにある。\n\n---\n\n## s03 からの変更\n\n| コンポーネント | 変更前 (s03) | 変更後 (s04) |\n|--------------|-------------|-------------|\n| 拡張方式 | check_permission() をループ内にハードコード | HOOKS 登録簿 + trigger_hooks() |\n| 新規関数 | — | register_hook, trigger_hooks |\n| フックコールバック | — | context_inject_hook, permission_hook, log_hook, large_output_hook, summary_hook |\n| ループ | check_permission() を直接呼び出し | trigger_hooks(\"PreToolUse\", ...) を呼び出し |\n| 終了制御 | なし | trigger_hooks(\"Stop\", ...) が終了を阻止可能 |\n| 入力横取り | なし | trigger_hooks(\"UserPromptSubmit\", ...) がコンテキスト注入可能 |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s04_hooks/code.py\n```\n\n以下のプロンプトを試してみよう:\n\n1. `Read the file README.md`(そのまま通過するはず、フックログを観察)\n2. `Create a file called test.txt`(作成後、PostToolUse が発火するか観察)\n3. `Delete all temporary files in /tmp`(bash + rm で権限フックが発動)\n\n観察のポイント:各ツール実行前に `[HOOK]` ログが表示されるか? 権限が拒否されたとき、フックが拦截したのか、ループ内のハードコードが拦截したのか?\n\n---\n\n## 次へ\n\nAgent は安全に操作を実行できるようになった。しかし「まず何をして、次に何をすべきか」を立ち止まって考えたことはあるか? 複雑なタスクを与えたとき、すぐに取り掛かるのか、まず計画を立てるのか?\n\n→ s05 TodoWrite:Agent に計画ツールを与える。まずリストを作り、それから実行。\n\n
\nCC ソースコードを深掘り\n\n> 以下は CC ソースコード `toolHooks.ts`(650 行)、`hooks.ts`、`stopHooks.ts`、`coreTypes.ts` の完全分析に基づく。\n\n### 一、Hook イベント:4 つではなく 27 個\n\n教育版は PreToolUse と PostToolUse のみを取り上げる。CC には実際に 27 のフックイベントがある(`coreTypes.ts:25-53`):\n\n| カテゴリ | イベント |\n|----------|---------|\n| ツール関連 | `PreToolUse`, `PostToolUse`, `PostToolUseFailure` |\n| セッション関連 | `SessionStart`, `SessionEnd`, `Stop`, `StopFailure`, `Setup` |\n| ユーザー対話 | `UserPromptSubmit`, `Notification`, `PermissionRequest`, `PermissionDenied` |\n| サブエージェント | `SubagentStart`, `SubagentStop` |\n| 圧縮関連 | `PreCompact`, `PostCompact` |\n| チーム関連 | `TeammateIdle`, `TaskCreated`, `TaskCompleted` |\n| その他 | `Elicitation`, `ElicitationResult`, `ConfigChange`, `WorktreeCreate`, `WorktreeRemove`, `InstructionsLoaded`, `CwdChanged`, `FileChanged` |\n\n教育版は 4 つのコアイベント(UserPromptSubmit、PreToolUse、PostToolUse、Stop)のみを取り上げる。これらで agent cycle の重要ノードを全てカバーできる。残り 23 個は同じパターン。\n\n### 二、HookResult よく使うフィールド抜粋\n\nCC の `HookResult`(`types/hooks.ts:260-275`)には 14 のフィールドがある。よく使うもの:\n\n| フィールド | 型 | 用途 |\n|-----------|-----|------|\n| `message` | Message | オプションの UI メッセージ |\n| `blockingError` | HookBlockingError | ブロッキングエラー → 会話に注入してモデルが自己修正 |\n| `outcome` | success/blocking/non_blocking_error/cancelled | 実行結果 |\n| `preventContinuation` | boolean | 後続実行を阻止 |\n| `stopReason` | string | 停止理由の説明 |\n| `permissionBehavior` | allow/deny/ask/passthrough | フックが権限決定を返す |\n| `updatedInput` | Record | ツール入力の変更 |\n| `additionalContext` | string | 追加コンテキスト |\n| `updatedMCPToolOutput` | unknown | MCP ツール出力の変更 |\n\n### 三、重要な不変条件:Hook 'allow' は deny/ask ルールをバイパスできない\n\nこれは CC 権限システムで最も重要なセキュリティ設計(`toolHooks.ts:325-331`):**フックが allow を返しても、settings.json の deny/ask ルールをチェックする。** ユーザーのフックスクリプトが「許可」と言っても、settings.json でそのツールが無効になっていれば、操作は阻止される。\n\n教育版にはこの階層がない。フックが非 None を返せば直接中断。教育目的では十分だが、本番環境ではセキュリティホールになる。\n\n### 四、stopHookActive 機構\n\nCC の Stop フックには無限ループ防止機構がある(`query.ts:212,1300`):`stopHookActive` 状態フィールド。Stop フックが blockingError を発生させると、ループは `stopHookActive: true` で次のラウンドに再入する。後続のイテレーションではこのフラグを見て Stop フックを再トリガーしない。これで「永久に止まらない」バグを防ぐ:モデルが自己修正 → Stop フックが再度エラー → モデルが再修正 → Stop フックが再度エラー... を防止。\n\n### 五、hook_stopped_continuation\n\nPostToolUse フックが `preventContinuation: true` を返すと、`hook_stopped_continuation` アタッチメントが生成される(`toolHooks.ts:117-130`)。query.ts(L1388-1393)はそれを検出して `shouldPreventContinuation = true` を設定し、ループが終了する。これは「フックが Agent を優雅に停止させる」機構 — クラッシュではなく、完了。\n\n### 教育版の簡略化は意図的\n\n- 27 イベント → 4(UserPromptSubmit/PreToolUse/PostToolUse/Stop):agent cycle の重要ノードをカバー\n- 14 フィールド → 単純な戻り値(None = 続行、非 None = 中断/続行):認知負荷を最小限に\n- Hook allow vs deny/ask の不変条件 → 省略:教育版に settings.json 層はない\n- stopHookActive → 省略:教育版の Stop フックは単純な続行のみ、無限ループ防止は不要\n\n
\n\n\n" }, { "version": "s05", @@ -111,19 +111,19 @@ "version": "s07", "locale": "en", "title": "s07: Skill Loading — Load Only When Needed", - "content": "# s07: Skill Loading — Load Only When Needed\n\ns01 → s02 → s03 → s04 → s05 → s06 → `s07` → [s08](/en/s08) → s09 → ... → s20\n> *\"Load when needed, don't stuff the prompt\"* — Inject via tool_result, not system prompt.\n>\n> **Harness Layer**: Knowledge — load on demand, don't fill the context.\n\n---\n\n## The Problem\n\nYour project has a React component spec, a SQL style guide, and an API design doc. You want the Agent to follow these specs automatically. The most straightforward idea — stuff them all into the system prompt:\n\n```python\nSYSTEM = (\n f\"You are a coding agent. \"\n + open(\"docs/react-style.md\").read() # 2000 lines\n + open(\"docs/sql-style.md\").read() # 1500 lines\n + open(\"docs/api-design.md\").read() # 3000 lines\n)\n```\n\n6500 lines of system prompt. The Agent carries these docs on every LLM call — whether it's changing a CSS color or fixing a SQL query. 99% of the content is irrelevant to the current task, burning tokens for nothing.\n\n---\n\n## The Solution\n\n![Skill Overview](/course-assets/s07_skill_loading/skill-overview.en.svg)\n\nThe minimal hook structure, `todo_write`, and sub-Agent from the previous chapter are preserved. This chapter focuses on the new `load_skill` tool. At startup, inject the skill catalog into the SYSTEM prompt; at runtime, register one more tool to load full content, spending tokens only when used.\n\nTwo-level design:\n\n| Level | Location | Timing | Cost |\n|-------|----------|--------|------|\n| 1. Catalog | system prompt | Injected at startup (harness scans skills/) | ~100 tokens/skill, carried every turn |\n| 2. Content | tool_result | When Agent calls load_skill | ~2000 tokens/skill, on demand |\n\nThe dispatch mechanism is unchanged, `load_skill` auto-dispatches via `TOOL_HANDLERS[block.name]`.\n\n---\n\n## How It Works\n\n**skills/ directory**, one subdirectory per skill, each containing a `SKILL.md` file:\n\n```\nskills/\n agent-builder/SKILL.md\n code-review/SKILL.md\n mcp-builder/SKILL.md\n pdf/SKILL.md\n```\n\n**Level 1: Inject catalog at startup**: the harness calls `_scan_skills()` at startup to scan the skills/ directory, parsing each SKILL.md's YAML frontmatter (`name`, `description`) into a `SKILL_REGISTRY` dictionary. `list_skills()` generates the catalog from the registry, injected into the SYSTEM prompt. The Agent sees \"which skills I have available\" every turn, with no extra API calls:\n\n```python\nSKILL_REGISTRY: dict[str, dict] = {}\n\ndef _scan_skills():\n if not SKILLS_DIR.exists():\n return\n for d in sorted(SKILLS_DIR.iterdir()):\n if not d.is_dir():\n continue\n manifest = d / \"SKILL.md\"\n if manifest.exists():\n raw = manifest.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", d.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\"name\": name, \"description\": desc, \"content\": raw}\n\n_scan_skills() # runs once at startup\n\ndef list_skills() -> str:\n return \"\\n\".join(f\"- **{s['name']}**: {s['description']}\" for s in SKILL_REGISTRY.values())\n\ndef build_system() -> str:\n catalog = list_skills()\n return (\n f\"You are a coding agent at {WORKDIR}. \"\n f\"Skills available:\\n{catalog}\\n\"\n \"Use load_skill to get full details when needed.\"\n )\n\nSYSTEM = build_system()\n```\n\n**Level 2: load_skill**: the Agent decides \"I need the SQL style guide\" and calls `load_skill(\"sql-style\")`. Lookup goes through the registry, not file paths, eliminating path traversal risk. The content is injected via `tool_result`:\n\n```python\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n return f\"Skill not found: {name}\"\n return skill[\"content\"]\n```\n\nThe key distinction: skill content is not part of the system prompt. It enters the current messages as a tool result. Subsequent calls carry it along with the history until context compaction, truncation, or session end. This naturally connects to s08's compact: on-demand loading solves \"don't carry what you shouldn't\", compact solves \"how to drop what you should.\"\n\n---\n\n## Changes from s06\n\n| Component | Before (s06) | After (s07) |\n|-----------|-------------|-------------|\n| Tool count | 7 (bash, read, write, edit, glob, todo_write, task) | 8 (+load_skill) |\n| Knowledge loading | None | Two-level: startup catalog in SYSTEM + runtime load_skill |\n| SYSTEM prompt | Static string | Startup scan of skills/ injects catalog |\n| Skill registry | None | SKILL_REGISTRY (populated at startup, prevents path traversal) |\n| Loop | Unchanged | Unchanged (skill tool auto-dispatches) |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s07_skill_loading/code.py\n```\n\nTry these prompts:\n\n1. `What skills are available?`\n2. `Load the code-review skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n\nWhat to watch for: Does the Agent know available skills from the SYSTEM catalog? Does `[HOOK] load_skill` appear when full instructions are needed? Does the answer use the loaded skill's instructions?\n\n---\n\n## What's Next\n\nOn-demand loading solved \"don't carry what you shouldn't.\" But another problem looms: after the Agent works for 30 minutes, the messages list fills up with intermediate process. Old tool_results, stale file contents, occupying context but adding no value.\n\n→ s08 Context Compact: A four-layer compaction strategy. Cheap layers run first, expensive layers run last.\n\n
\nDive into CC Source Code\n\n> The following is based on analysis of CC source code `loadSkillsDir.ts`, `SkillTool.ts`, `bundledSkills.ts`, `commands.ts`.\n\n### 1. Skill Sources: Not Just One skills/ Directory\n\nThe teaching version assumes all skills live in a `skills/` directory. CC loads from multiple sources spread across multiple files: `loadSkillsDir.ts` handles user/project/`--add-dir` directories and legacy commands (`.claude/commands/`); `bundledSkills.ts` handles built-in skills; `SkillTool.ts` handles MCP remote skills; `commands.ts` handles command aggregation. Types include managed/policy skills, user skills (`~/.claude/skills/`), project skills (`.claude/skills/`), `--add-dir` skills, legacy commands, dynamic skills, conditional skills (with `paths` frontmatter, activated by file path), bundled skills, plugin skills, MCP skills.\n\n### 2. SKILL.md Frontmatter — Common Fields\n\nCC's SKILL.md YAML frontmatter is parsed by `parseSkillFrontmatterFields()` in `loadSkillsDir.ts`. Common fields include:\n\n| Field | Purpose |\n|-------|---------|\n| `name` / `description` | Display name and description |\n| `when_to_use` | Guides the model on when to invoke |\n| `allowed-tools` | Auto-allow list of tools available to the skill |\n| `context` | `inline` (default) or `fork` (run as sub-Agent) |\n| `model` | Model override (haiku/sonnet/opus/inherit) |\n| `hooks` | Skill-level hook configuration |\n| `paths` | Glob patterns for conditional activation |\n| `user-invocable` | Users can invoke via `/name` |\n\nThe complete field list changes across versions; above are the core fields relevant to the teaching version.\n\n### 3. Precise Implementation of Two-Level Loading\n\n1. **Catalog (at startup)**: `getSkillDirCommands()` scans directory → registers as `Command` objects containing only metadata. `getSkillListingAttachments()` formats the skill list as attachments, budgeted at ~1% of the context window (cap 8000 characters).\n2. **Load (on invocation)**: Model calls `Skill` tool (input fields are `skill` + optional `args`; teaching version uses `name`) → `getPromptForCommand()` expands full SKILL.md content → `SkillTool` returns a tool_result with display text `\"Launching skill: {name}\"`, while the actual skill content is injected via `newMessages`. The teaching version merges both into \"injected via tool_result\" as a simplification.\n\n### The Teaching Version's Simplification Is Intentional\n\n- Multiple files and sources → 1 `skills/` directory: sufficient to demonstrate the core concept of two-level loading\n- Multiple frontmatter fields → only parse name/description: reduces parsing complexity\n- Forked skills (`context: 'fork'`) → omitted: the teaching version only expands inline skill loading\n- `Skill` tool input `skill`+`args` → teaching version uses `name`: avoids extra argument parsing complexity\n\n
\n\n\n" + "content": "# s07: Skill Loading — Load Only When Needed\n\ns01 → s02 → s03 → s04 → s05 → s06 → `s07` → [s08](/en/s08) → s09 → ... → s20\n> *\"Load when needed, don't stuff the prompt\"* — Inject via tool_result, not system prompt.\n>\n> **Harness Layer**: Knowledge — load on demand, don't fill the context.\n\n---\n\n## The Problem\n\nYour project has a React component spec, a SQL style guide, and an API design doc. You want the Agent to follow these specs automatically. The most straightforward idea — stuff them all into the system prompt:\n\n```python\nSYSTEM = (\n f\"You are a coding agent. \"\n + open(\"docs/react-style.md\").read() # 2000 lines\n + open(\"docs/sql-style.md\").read() # 1500 lines\n + open(\"docs/api-design.md\").read() # 3000 lines\n)\n```\n\n6500 lines of system prompt. The Agent carries these docs on every LLM call — whether it's changing a CSS color or fixing a SQL query. 99% of the content is irrelevant to the current task, burning tokens for nothing.\n\n---\n\n## The Solution\n\n![Skill Overview](/course-assets/s07_skill_loading/skill-overview.en.svg)\n\nThe minimal hook structure, `todo_write`, and sub-Agent from the previous chapter are preserved. This chapter focuses on the new `load_skill` tool. At startup, inject the skill catalog into the SYSTEM prompt; at runtime, register one more tool to load full content, spending tokens only when used.\n\nTwo-level design:\n\n| Level | Location | Timing | Cost |\n|-------|----------|--------|------|\n| 1. Catalog | system prompt | Injected at startup (harness scans skills/) | ~100 tokens/skill, carried every turn |\n| 2. Content | tool_result | When Agent calls load_skill; SKILL.md can guide later read_file/bash access to extra resources | ~2000 tokens/skill, on demand |\n\nThe dispatch mechanism is unchanged, `load_skill` auto-dispatches via `TOOL_HANDLERS[block.name]`.\n\n---\n\n## How It Works\n\n**skills/ directory**, one subdirectory per skill, each containing a `SKILL.md` file:\n\n```\nskills/\n agent-builder/SKILL.md\n code-review/SKILL.md\n mcp-builder/SKILL.md\n pdf/SKILL.md\n```\n\n**Level 1: Inject catalog at startup**: the harness calls `_scan_skills()` at startup to scan the skills/ directory, parsing each SKILL.md's YAML frontmatter (`name`, `description`) into a `SKILL_REGISTRY` dictionary. `list_skills()` generates the catalog from the registry, injected into the SYSTEM prompt. The Agent sees \"which skills I have available\" every turn, with no extra API calls:\n\n```python\nSKILL_REGISTRY: dict[str, dict] = {}\n\ndef _scan_skills():\n if not SKILLS_DIR.exists():\n return\n for d in sorted(SKILLS_DIR.iterdir()):\n if not d.is_dir():\n continue\n manifest = d / \"SKILL.md\"\n if manifest.exists():\n raw = manifest.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", d.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\"name\": name, \"description\": desc, \"content\": raw}\n\n_scan_skills() # runs once at startup\n\ndef list_skills() -> str:\n return \"\\n\".join(f\"- **{s['name']}**: {s['description']}\" for s in SKILL_REGISTRY.values())\n\ndef build_system() -> str:\n catalog = list_skills()\n return (\n f\"You are a coding agent at {WORKDIR}. \"\n f\"Skills available:\\n{catalog}\\n\"\n \"Use load_skill to get full details when needed.\"\n )\n\nSYSTEM = build_system()\n```\n\n**Level 2: load_skill**: the Agent decides \"I need the SQL style guide\" and calls `load_skill(\"sql-style\")`. Lookup goes through the registry, not file paths, eliminating path traversal risk. The SKILL.md content is injected via `tool_result`, and can include later access to referenced `references/`, `scripts/`, or `assets/` through the existing file and bash tools.\n\n```python\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n return f\"Skill not found: {name}\"\n return skill[\"content\"]\n```\n\nThe key distinction: skill content is not part of the system prompt. It enters the current messages as a tool result. Subsequent calls carry it along with the history until context compaction, truncation, or session end. This naturally connects to s08's compact: on-demand loading solves \"don't carry what you shouldn't\", compact solves \"how to drop what you should.\"\n\n---\n\n## Changes from s06\n\n| Component | Before (s06) | After (s07) |\n|-----------|-------------|-------------|\n| Tool count | 7 (bash, read, write, edit, glob, todo_write, task) | 8 (+load_skill) |\n| Knowledge loading | None | Two-level: startup catalog in SYSTEM + runtime load_skill; SKILL.md may guide later resource access |\n| SYSTEM prompt | Static string | Startup scan of skills/ injects catalog |\n| Skill registry | None | SKILL_REGISTRY (populated at startup, prevents path traversal) |\n| Loop | Unchanged | Unchanged (skill tool auto-dispatches) |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s07_skill_loading/code.py\n```\n\nTry these prompts:\n\n1. `What skills are available?`\n2. `Load the code-review skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n\nWhat to watch for: Does the Agent know available skills from the SYSTEM catalog? Does `[HOOK] load_skill` appear when full instructions are needed? Does the answer use the loaded skill's instructions?\n\n---\n\n## What's Next\n\nOn-demand loading solved \"don't carry what you shouldn't.\" But another problem looms: after the Agent works for 30 minutes, the messages list fills up with intermediate process. Old tool_results, stale file contents, occupying context but adding no value.\n\n→ s08 Context Compact: A four-layer compaction strategy. Cheap layers run first, expensive layers run last.\n\n
\nDive into CC Source Code\n\n> The following is based on analysis of CC source code `loadSkillsDir.ts`, `SkillTool.ts`, `bundledSkills.ts`, `commands.ts`.\n\n### 1. Skill Sources: Not Just One skills/ Directory\n\nThe teaching version assumes all skills live in a `skills/` directory. CC loads from multiple sources spread across multiple files: `loadSkillsDir.ts` handles user/project/`--add-dir` directories and legacy commands (`.claude/commands/`); `bundledSkills.ts` handles built-in skills; `SkillTool.ts` handles MCP remote skills; `commands.ts` handles command aggregation. Types include managed/policy skills, user skills (`~/.claude/skills/`), project skills (`.claude/skills/`), `--add-dir` skills, legacy commands, dynamic skills, conditional skills (with `paths` frontmatter, activated by file path), bundled skills, plugin skills, MCP skills.\n\n### 2. SKILL.md Frontmatter — Common Fields\n\nCC's SKILL.md YAML frontmatter is parsed by `parseSkillFrontmatterFields()` in `loadSkillsDir.ts`. Common fields include:\n\n| Field | Purpose |\n|-------|---------|\n| `name` / `description` | Display name and description |\n| `when_to_use` | Guides the model on when to invoke |\n| `allowed-tools` | Auto-allow list of tools available to the skill |\n| `context` | `inline` (default) or `fork` (run as sub-Agent) |\n| `model` | Model override (haiku/sonnet/opus/inherit) |\n| `hooks` | Skill-level hook configuration |\n| `paths` | Glob patterns for conditional activation |\n| `user-invocable` | Users can invoke via `/name` |\n\nThe complete field list changes across versions; above are the core fields relevant to the teaching version.\n\n### 3. Precise Implementation of Two-Level Loading\n\n1. **Catalog (at startup)**: `getSkillDirCommands()` scans directory → registers as `Command` objects containing only metadata. `getSkillListingAttachments()` formats the skill list as attachments, budgeted at ~1% of the context window (cap 8000 characters).\n2. **Load (on invocation)**: Model calls `Skill` tool (input fields are `skill` + optional `args`; teaching version uses `name`) → `getPromptForCommand()` expands full SKILL.md content → `SkillTool` returns a tool_result with display text `\"Launching skill: {name}\"`, while the actual skill content is injected via `newMessages`. The teaching version merges both into \"injected via tool_result\" as a simplification; the loaded SKILL.md can still guide later access to referenced resources through existing file/bash tools.\n\n### The Teaching Version's Simplification Is Intentional\n\n- Multiple files and sources → 1 `skills/` directory: sufficient to demonstrate the core concept of two-level loading\n- Multiple frontmatter fields → only parse name/description: reduces parsing complexity\n- Forked skills (`context: 'fork'`) → omitted: the teaching version only expands inline skill loading\n- `Skill` tool input `skill`+`args` → teaching version uses `name`: avoids extra argument parsing complexity\n\n
\n\n\n" }, { "version": "s07", "locale": "zh", "title": "s07: Skill Loading — 用到的时候才加载", - "content": "# s07: Skill Loading — 用到的时候才加载\n\ns01 → s02 → s03 → s04 → s05 → s06 → `s07` → [s08](/zh/s08) → s09 → ... → s20\n> *\"用到时再加载, 别全塞 prompt 里\"* — 通过 tool_result 注入, 不塞 system prompt。\n>\n> **Harness 层**: 知识 — 按需加载, 不堆满上下文。\n\n---\n\n## 问题\n\n你的项目有一套 React 组件规范、一份 SQL 风格指南、一份 API 设计文档。你希望 Agent 自动遵守这些规范。最直接的想法,全塞进 system prompt:\n\n```python\nSYSTEM = (\n f\"You are a coding agent. \"\n + open(\"docs/react-style.md\").read() # 2000 行\n + open(\"docs/sql-style.md\").read() # 1500 行\n + open(\"docs/api-design.md\").read() # 3000 行\n)\n```\n\n6500 行 system prompt。Agent 每次调用 LLM 都带着这些文档——不管是在改 CSS 颜色还是修 SQL 查询。99% 的内容和当前任务无关,白白消耗 token。\n\n---\n\n## 解决方案\n\n![Skill Overview](/course-assets/s07_skill_loading/skill-overview.svg)\n\n保留上一章的最小 hook 结构、`todo_write` 和子 Agent,本章重点转向新增的 `load_skill` 工具。启动时把技能目录注入 SYSTEM prompt,运行时多注册一个工具加载完整内容,用到才花 token。\n\n两层设计:\n\n| 层 | 位置 | 时机 | 代价 |\n|---|------|------|------|\n| 1. 目录 | system prompt | 启动时注入(harness 扫描 skills/) | ~100 tokens/skill,每轮都带 |\n| 2. 内容 | tool_result | Agent 调用 load_skill 时 | ~2000 tokens/skill,按需 |\n\ndispatch 机制不变,load_skill 通过 `TOOL_HANDLERS[block.name]` 分发。\n\n---\n\n## 工作原理\n\n**skills/ 目录**,每个技能一个子目录,包含 `SKILL.md` 文件:\n\n```\nskills/\n agent-builder/SKILL.md\n code-review/SKILL.md\n mcp-builder/SKILL.md\n pdf/SKILL.md\n```\n\n**第一级:启动时注入目录**:harness 启动时调用 `_scan_skills()` 扫描 skills/ 目录,解析每个 SKILL.md 的 YAML frontmatter(`name`、`description`),存入 `SKILL_REGISTRY` 字典。`list_skills()` 从注册表生成目录,注入 SYSTEM prompt。Agent 每轮都能看到\"我有哪些技能可用\",不花额外 API 调用:\n\n```python\nSKILL_REGISTRY: dict[str, dict] = {}\n\ndef _scan_skills():\n if not SKILLS_DIR.exists():\n return\n for d in sorted(SKILLS_DIR.iterdir()):\n if not d.is_dir():\n continue\n manifest = d / \"SKILL.md\"\n if manifest.exists():\n raw = manifest.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", d.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\"name\": name, \"description\": desc, \"content\": raw}\n\n_scan_skills() # runs once at startup\n\ndef list_skills() -> str:\n return \"\\n\".join(f\"- **{s['name']}**: {s['description']}\" for s in SKILL_REGISTRY.values())\n\ndef build_system() -> str:\n catalog = list_skills()\n return (\n f\"You are a coding agent at {WORKDIR}. \"\n f\"Skills available:\\n{catalog}\\n\"\n \"Use load_skill to get full details when needed.\"\n )\n\nSYSTEM = build_system()\n```\n\n**第二级:load_skill**:Agent 决定\"我需要 SQL 风格指南\",调用 `load_skill(\"sql-style\")`。通过注册表查找,不走文件路径,没有路径遍历风险。内容通过 `tool_result` 注入:\n\n```python\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n return f\"Skill not found: {name}\"\n return skill[\"content\"]\n```\n\n关键区别:技能内容不是 system prompt 的一部分,它作为一次工具结果进入当前 messages。后续调用会随历史一起携带,直到上下文压缩、截断或会话结束。这和 s08 的 compact 自然衔接:按需加载解决了\"不该提前带的不要带\",compact 解决\"该丢的怎么丢\"。\n\n---\n\n## 相对 s06 的变更\n\n| 组件 | 之前 (s06) | 之后 (s07) |\n|------|-----------|-----------|\n| 工具数量 | 7 (bash, read, write, edit, glob, todo_write, task) | 8 (+load_skill) |\n| 知识加载 | 无 | 两级:启动时目录注入 SYSTEM + 运行时 load_skill |\n| SYSTEM 提示 | 静态字符串 | 启动时扫描 skills/ 注入目录 |\n| 技能注册表 | 无 | SKILL_REGISTRY(启动时填充,防路径遍历) |\n| 循环 | 不变 | 不变(skill 工具自动分发) |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s07_skill_loading/code.py\n```\n\n试试这些 prompt:\n\n1. `What skills are available?`\n2. `Load the code-review skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n\n观察重点:Agent 是否直接从 SYSTEM 里的目录知道有哪些技能?需要完整规范时是否出现 `[HOOK] load_skill`?加载后回答是否使用了对应 skill 的说明?\n\n---\n\n## 接下来\n\n按需加载解决了\"不该带的不要带\"。但另一个问题来了:Agent 连续工作 30 分钟后,messages 列表塞满了中间过程。旧的 tool_result、过时的文件内容,占着上下文但不产生价值。\n\ns08 Context Compact → 四层压缩策略。便宜的先跑,贵的后跑。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `loadSkillsDir.ts`、`SkillTool.ts`、`bundledSkills.ts`、`commands.ts` 的分析。\n\n### 一、技能来源:不是只有一个 skills/ 目录\n\n教学版假设所有技能在 `skills/` 目录下。CC 实际从多个来源加载,分布在多个文件中:`loadSkillsDir.ts` 负责从 user/project/`--add-dir` 目录和 legacy commands(`.claude/commands/`)加载;`bundledSkills.ts` 负责内置技能;`SkillTool.ts` 处理 MCP 远程技能;`commands.ts` 负责命令聚合。类型包括 managed/policy skills、user skills(`~/.claude/skills/`)、project skills(`.claude/skills/`)、`--add-dir` skills、legacy commands、dynamic skills、conditional skills(带 `paths` frontmatter,按文件路径激活)、bundled skills、plugin skills、MCP skills。\n\n### 二、SKILL.md Frontmatter 常见字段\n\nCC 的 SKILL.md YAML frontmatter 由 `parseSkillFrontmatterFields()` 解析(`loadSkillsDir.ts`),常见字段包括:\n\n| 字段 | 用途 |\n|------|------|\n| `name` / `description` | 显示名称和描述 |\n| `when_to_use` | 指导模型何时调用 |\n| `allowed-tools` | 技能可用工具的自动允许列表 |\n| `context` | `inline`(默认)或 `fork`(作为子 Agent 运行) |\n| `model` | 模型覆盖(haiku/sonnet/opus/inherit) |\n| `hooks` | 技能级别的 hook 配置 |\n| `paths` | 条件激活的 glob 模式 |\n| `user-invocable` | 用户可以通过 `/name` 调用 |\n\n完整字段列表随版本迭代会变化,以上仅列出教学版涉及的核心字段。\n\n### 三、两级加载的精确实现\n\n1. **Catalog(启动时)**:`getSkillDirCommands()` 扫描目录 → 注册为 `Command` 对象,只包含元数据。`getSkillListingAttachments()` 把技能列表格式化为附件,预算为上下文窗口的 ~1%(上限 8000 字符)。\n2. **Load(调用时)**:模型调 `Skill` 工具(输入字段是 `skill` + 可选 `args`,教学版用 `name`)→ `getPromptForCommand()` 展开完整 SKILL.md 内容 → `SkillTool` 返回的 tool_result 展示文本只是 `\"Launching skill: {name}\"`,真正的技能内容通过 `newMessages` 注入对话。教学版把两者合并为\"通过 tool_result 注入\"是一种简化。\n\n### 教学版的简化是刻意的\n\n- 多文件多来源 → 1 个 `skills/` 目录:足以展示两级加载的核心概念\n- 多个 frontmatter 字段 → 只解析 name/description:减少解析复杂度\n- forked skills(`context: 'fork'`)→ 省略:教学版只展开 inline 技能加载\n- `Skill` 工具输入 `skill`+`args` → 教学版用 `name`:避免参数解析的额外复杂度\n\n
\n\n\n" + "content": "# s07: Skill Loading — 用到的时候才加载\n\ns01 → s02 → s03 → s04 → s05 → s06 → `s07` → [s08](/zh/s08) → s09 → ... → s20\n> *\"用到时再加载, 别全塞 prompt 里\"* — 通过 tool_result 注入, 不塞 system prompt。\n>\n> **Harness 层**: 知识 — 按需加载, 不堆满上下文。\n\n---\n\n## 问题\n\n你的项目有一套 React 组件规范、一份 SQL 风格指南、一份 API 设计文档。你希望 Agent 自动遵守这些规范。最直接的想法,全塞进 system prompt:\n\n```python\nSYSTEM = (\n f\"You are a coding agent. \"\n + open(\"docs/react-style.md\").read() # 2000 行\n + open(\"docs/sql-style.md\").read() # 1500 行\n + open(\"docs/api-design.md\").read() # 3000 行\n)\n```\n\n6500 行 system prompt。Agent 每次调用 LLM 都带着这些文档——不管是在改 CSS 颜色还是修 SQL 查询。99% 的内容和当前任务无关,白白消耗 token。\n\n---\n\n## 解决方案\n\n![Skill Overview](/course-assets/s07_skill_loading/skill-overview.svg)\n\n保留上一章的最小 hook 结构、`todo_write` 和子 Agent,本章重点转向新增的 `load_skill` 工具。启动时把技能目录注入 SYSTEM prompt,运行时多注册一个工具加载完整内容,用到才花 token。\n\n两层设计:\n\n| 层 | 位置 | 时机 | 代价 |\n|---|------|------|------|\n| 1. 目录 | system prompt | 启动时注入(harness 扫描 skills/) | ~100 tokens/skill,每轮都带 |\n| 2. 内容 | tool_result | Agent 调用 load_skill 时;SKILL.md 可指引后续的 read_file/bash 调用,用于按需访问额外资源 | ~2000 tokens/skill,按需 |\n\ndispatch 机制不变,load_skill 通过 `TOOL_HANDLERS[block.name]` 分发。\n\n---\n\n## 工作原理\n\n**skills/ 目录**,每个技能一个子目录,包含 `SKILL.md` 文件:\n\n```\nskills/\n agent-builder/SKILL.md\n code-review/SKILL.md\n mcp-builder/SKILL.md\n pdf/SKILL.md\n```\n\n**第一级:启动时注入目录**:harness 启动时调用 `_scan_skills()` 扫描 skills/ 目录,解析每个 SKILL.md 的 YAML frontmatter(`name`、`description`),存入 `SKILL_REGISTRY` 字典。`list_skills()` 从注册表生成目录,注入 SYSTEM prompt。Agent 每轮都能看到\"我有哪些技能可用\",不花额外 API 调用:\n\n```python\nSKILL_REGISTRY: dict[str, dict] = {}\n\ndef _scan_skills():\n if not SKILLS_DIR.exists():\n return\n for d in sorted(SKILLS_DIR.iterdir()):\n if not d.is_dir():\n continue\n manifest = d / \"SKILL.md\"\n if manifest.exists():\n raw = manifest.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", d.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\"name\": name, \"description\": desc, \"content\": raw}\n\n_scan_skills() # runs once at startup\n\ndef list_skills() -> str:\n return \"\\n\".join(f\"- **{s['name']}**: {s['description']}\" for s in SKILL_REGISTRY.values())\n\ndef build_system() -> str:\n catalog = list_skills()\n return (\n f\"You are a coding agent at {WORKDIR}. \"\n f\"Skills available:\\n{catalog}\\n\"\n \"Use load_skill to get full details when needed.\"\n )\n\nSYSTEM = build_system()\n```\n\n**第二级:load_skill**:Agent 决定\"我需要 SQL 风格指南\",调用 `load_skill(\"sql-style\")`。通过注册表查找,不走文件路径,没有路径遍历风险。SKILL.md 内容通过 `tool_result` 注入,并可通过现有的 file 和 bash 工具进一步访问引用的 `references/`、`scripts/` 或 `assets/`。\n\n```python\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n return f\"Skill not found: {name}\"\n return skill[\"content\"]\n```\n\n关键区别:技能内容不是 system prompt 的一部分,它作为一次工具结果进入当前 messages。后续调用会随历史一起携带,直到上下文压缩、截断或会话结束。这和 s08 的 compact 自然衔接:按需加载解决了\"不该提前带的不要带\",compact 解决\"该丢的怎么丢\"。\n\n---\n\n## 相对 s06 的变更\n\n| 组件 | 之前 (s06) | 之后 (s07) |\n|------|-----------|-----------|\n| 工具数量 | 7 (bash, read, write, edit, glob, todo_write, task) | 8 (+load_skill) |\n| 知识加载 | 无 | 两级:启动时目录注入 SYSTEM + 运行时 load_skill;SKILL.md 可指引后续资源访问 |\n| SYSTEM 提示 | 静态字符串 | 启动时扫描 skills/ 注入目录 |\n| 技能注册表 | 无 | SKILL_REGISTRY(启动时填充,防路径遍历) |\n| 循环 | 不变 | 不变(skill 工具自动分发) |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s07_skill_loading/code.py\n```\n\n试试这些 prompt:\n\n1. `What skills are available?`\n2. `Load the code-review skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n\n观察重点:Agent 是否直接从 SYSTEM 里的目录知道有哪些技能?需要完整规范时是否出现 `[HOOK] load_skill`?加载后回答是否使用了对应 skill 的说明?\n\n---\n\n## 接下来\n\n按需加载解决了\"不该带的不要带\"。但另一个问题来了:Agent 连续工作 30 分钟后,messages 列表塞满了中间过程。旧的 tool_result、过时的文件内容,占着上下文但不产生价值。\n\ns08 Context Compact → 四层压缩策略。便宜的先跑,贵的后跑。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `loadSkillsDir.ts`、`SkillTool.ts`、`bundledSkills.ts`、`commands.ts` 的分析。\n\n### 一、技能来源:不是只有一个 skills/ 目录\n\n教学版假设所有技能在 `skills/` 目录下。CC 实际从多个来源加载,分布在多个文件中:`loadSkillsDir.ts` 负责从 user/project/`--add-dir` 目录和 legacy commands(`.claude/commands/`)加载;`bundledSkills.ts` 负责内置技能;`SkillTool.ts` 处理 MCP 远程技能;`commands.ts` 负责命令聚合。类型包括 managed/policy skills、user skills(`~/.claude/skills/`)、project skills(`.claude/skills/`)、`--add-dir` skills、legacy commands、dynamic skills、conditional skills(带 `paths` frontmatter,按文件路径激活)、bundled skills、plugin skills、MCP skills。\n\n### 二、SKILL.md Frontmatter 常见字段\n\nCC 的 SKILL.md YAML frontmatter 由 `parseSkillFrontmatterFields()` 解析(`loadSkillsDir.ts`),常见字段包括:\n\n| 字段 | 用途 |\n|------|------|\n| `name` / `description` | 显示名称和描述 |\n| `when_to_use` | 指导模型何时调用 |\n| `allowed-tools` | 技能可用工具的自动允许列表 |\n| `context` | `inline`(默认)或 `fork`(作为子 Agent 运行) |\n| `model` | 模型覆盖(haiku/sonnet/opus/inherit) |\n| `hooks` | 技能级别的 hook 配置 |\n| `paths` | 条件激活的 glob 模式 |\n| `user-invocable` | 用户可以通过 `/name` 调用 |\n\n完整字段列表随版本迭代会变化,以上仅列出教学版涉及的核心字段。\n\n### 三、两级加载的精确实现\n\n1. **Catalog(启动时)**:`getSkillDirCommands()` 扫描目录 → 注册为 `Command` 对象,只包含元数据。`getSkillListingAttachments()` 把技能列表格式化为附件,预算为上下文窗口的 ~1%(上限 8000 字符)。\n2. **Load(调用时)**:模型调 `Skill` 工具(输入字段是 `skill` + 可选 `args`,教学版用 `name`)→ `getPromptForCommand()` 展开完整 SKILL.md 内容 → `SkillTool` 返回的 tool_result 展示文本只是 `\"Launching skill: {name}\"`,真正的技能内容通过 `newMessages` 注入对话。教学版把两者合并为\"通过 tool_result 注入\"是一种简化;加载后的 SKILL.md 仍可作为指引,帮助模型后续通过现有 file/bash 工具访问相关资源。\n\n### 教学版的简化是刻意的\n\n- 多文件多来源 → 1 个 `skills/` 目录:足以展示两级加载的核心概念\n- 多个 frontmatter 字段 → 只解析 name/description:减少解析复杂度\n- forked skills(`context: 'fork'`)→ 省略:教学版只展开 inline 技能加载\n- `Skill` 工具输入 `skill`+`args` → 教学版用 `name`:避免参数解析的额外复杂度\n\n
\n\n\n" }, { "version": "s07", "locale": "ja", "title": "s07: Skill Loading — 必要なときにだけ読み込む", - "content": "# s07: Skill Loading — 必要なときにだけ読み込む\n\ns01 → s02 → s03 → s04 → s05 → s06 → `s07` → [s08](/ja/s08) → s09 → ... → s20\n> *\"Load when needed, don't stuff the prompt\"* — tool_result で注入、system prompt には詰め込まない。\n>\n> **Harness レイヤー**: 知識 — 必要に応じて読み込み、コンテキストに詰め込まない。\n\n---\n\n## 課題\n\nプロジェクトには React コンポーネント仕様、SQL スタイルガイド、API 設計ドキュメントがある。Agent にこれらの仕様を自動的に守らせたい。最も直接的な方法 — すべて system prompt に詰め込む:\n\n```python\nSYSTEM = (\n f\"You are a coding agent. \"\n + open(\"docs/react-style.md\").read() # 2000 行\n + open(\"docs/sql-style.md\").read() # 1500 行\n + open(\"docs/api-design.md\").read() # 3000 行\n)\n```\n\n6500 行の system prompt。Agent は LLM を呼び出すたびにこれらのドキュメントを運ぶ — CSS の色を変えるときも SQL クエリを修正するときも。99% の内容が現在のタスクと無関係で、トークンを無駄に消費する。\n\n---\n\n## ソリューション\n\n![Skill Overview](/course-assets/s07_skill_loading/skill-overview.ja.svg)\n\n前章の最小フック構造、`todo_write`、サブ Agent を維持し、本章は新規の `load_skill` ツールに注目する。起動時にスキルカタログを SYSTEM prompt に注入し、実行時に完全な内容を読み込むツールを登録する。使ったときだけトークンを消費。\n\n2 層設計:\n\n| 層 | 場所 | タイミング | コスト |\n|---|------|-----------|--------|\n| 1. カタログ | system prompt | 起動時に注入(harness が skills/ をスキャン) | ~100 トークン/スキル、毎ターン携帯 |\n| 2. 内容 | tool_result | Agent が load_skill を呼び出したとき | ~2000 トークン/スキル、オンデマンド |\n\nディスパッチ機構は変わらず、`load_skill` は `TOOL_HANDLERS[block.name]` を通じて自動的にディスパッチされる。\n\n---\n\n## 仕組み\n\n**skills/ ディレクトリ**、スキルごとに 1 つのサブディレクトリ、それぞれに `SKILL.md` ファイルを含む:\n\n```\nskills/\n agent-builder/SKILL.md\n code-review/SKILL.md\n mcp-builder/SKILL.md\n pdf/SKILL.md\n```\n\n**第 1 層:起動時にカタログを注入**:harness は起動時に `_scan_skills()` を呼び出して skills/ ディレクトリをスキャンし、各 SKILL.md の YAML frontmatter(`name`、`description`)を解析して `SKILL_REGISTRY` 辞書に格納する。`list_skills()` はレジストリからカタログを生成し、SYSTEM prompt に注入する。Agent は毎ターン「どのスキルが利用可能か」を確認できる。追加の API 呼び出しは不要:\n\n```python\nSKILL_REGISTRY: dict[str, dict] = {}\n\ndef _scan_skills():\n if not SKILLS_DIR.exists():\n return\n for d in sorted(SKILLS_DIR.iterdir()):\n if not d.is_dir():\n continue\n manifest = d / \"SKILL.md\"\n if manifest.exists():\n raw = manifest.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", d.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\"name\": name, \"description\": desc, \"content\": raw}\n\n_scan_skills() # runs once at startup\n\ndef list_skills() -> str:\n return \"\\n\".join(f\"- **{s['name']}**: {s['description']}\" for s in SKILL_REGISTRY.values())\n\ndef build_system() -> str:\n catalog = list_skills()\n return (\n f\"You are a coding agent at {WORKDIR}. \"\n f\"Skills available:\\n{catalog}\\n\"\n \"Use load_skill to get full details when needed.\"\n )\n\nSYSTEM = build_system()\n```\n\n**第 2 層:load_skill**:Agent が「SQL スタイルガイドが必要」と判断し、`load_skill(\"sql-style\")` を呼び出す。レジストリを通じて検索し、ファイルパスを経由しないため、パストラバーサルのリスクがない。内容は `tool_result` を通じて注入される:\n\n```python\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n return f\"Skill not found: {name}\"\n return skill[\"content\"]\n```\n\n重要な違い:スキル内容は system prompt の一部ではなく、ツール結果として現在の messages に入る。後続の呼び出しでは履歴とともに携帯され、コンテキスト圧縮、切り捨て、またはセッション終了まで保持される。これは s08 の compact と自然に接続する:オンデマンド読み込みで「運ぶべきでないものは運ばない」を解決し、compact が「捨てるべきものをどう捨てるか」を解決する。\n\n---\n\n## s06 からの変更点\n\n| コンポーネント | 変更前 (s06) | 変更後 (s07) |\n|---------------|-------------|-------------|\n| ツール数 | 7 (bash, read, write, edit, glob, todo_write, task) | 8 (+load_skill) |\n| 知識読み込み | なし | 2 層:起動時カタログ注入 SYSTEM + 実行時 load_skill |\n| SYSTEM プロンプト | 静的文字列 | 起動時に skills/ をスキャンしてカタログ注入 |\n| スキルレジストリ | なし | SKILL_REGISTRY(起動時に充填、パストラバーサル防止) |\n| ループ | 変更なし | 変更なし(スキルツールは自動ディスパッチ) |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s07_skill_loading/code.py\n```\n\n以下のプロンプトを試してみよう:\n\n1. `What skills are available?`\n2. `Load the code-review skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n\n観察のポイント:Agent は SYSTEM 内のカタログから利用可能なスキルを知っているか? 完全な手順が必要なときに `[HOOK] load_skill` が表示されるか? 読み込んだスキルの説明を使って回答しているか?\n\n---\n\n## 次へ\n\nオンデマンド読み込みで「運ぶべきでないものは運ばない」問題は解決した。しかし別の問題が待っている:Agent が 30 分連続で作業すると、messages リストが中間プロセスで埋め尽くされる。古い tool_result、期限切れのファイル内容、コンテキストを占領しているが価値を生まない。\n\n→ s08 Context Compact:4 層圧縮戦略。安価な層を先に実行、高価な層を後に実行。\n\n
\nCC ソースコードを深掘り\n\n> 以下は CC ソースコード `loadSkillsDir.ts`、`SkillTool.ts`、`bundledSkills.ts`、`commands.ts` の分析に基づく。\n\n### 一、スキルソース:skills/ ディレクトリだけではない\n\n教育版はすべてのスキルが `skills/` ディレクトリにあると想定している。CC は実際に複数のファイルに分散したソースから読み込む:`loadSkillsDir.ts` は user/project/`--add-dir` ディレクトリと legacy commands(`.claude/commands/`)を担当、`bundledSkills.ts` は組み込みスキル、`SkillTool.ts` は MCP リモートスキル、`commands.ts` はコマンド集約を担当。タイプには managed/policy skills、user skills(`~/.claude/skills/`)、project skills(`.claude/skills/`)、`--add-dir` skills、legacy commands、dynamic skills、conditional skills(`paths` frontmatter を持ち、ファイルパスでアクティベート)、bundled skills、plugin skills、MCP skills が含まれる。\n\n### 二、SKILL.md Frontmatter の一般的なフィールド\n\nCC の SKILL.md YAML frontmatter は `parseSkillFrontmatterFields()`(`loadSkillsDir.ts`)で解析される。一般的なフィールド:\n\n| フィールド | 用途 |\n|-----------|------|\n| `name` / `description` | 表示名と説明 |\n| `when_to_use` | モデルにいつ呼び出すかを指導 |\n| `allowed-tools` | スキルが使用可能なツールの自動許可リスト |\n| `context` | `inline`(デフォルト)または `fork`(サブ Agent として実行) |\n| `model` | モデルオーバーライド(haiku/sonnet/opus/inherit) |\n| `hooks` | スキルレベルのフック設定 |\n| `paths` | 条件付きアクティベーションの glob パターン |\n| `user-invocable` | ユーザーが `/name` で呼び出し可能 |\n\n完全なフィールドリストはバージョンによって変動する。上記は教育版に関連するコアフィールドのみ。\n\n### 三、2 層読み込みの正確な実装\n\n1. **カタログ(起動時)**:`getSkillDirCommands()` がディレクトリをスキャン → メタデータのみを含む `Command` オブジェクトとして登録。`getSkillListingAttachments()` がスキルリストを添付ファイルとしてフォーマット、コンテキストウィンドウの ~1% を予算とする(上限 8000 文字)。\n2. **読み込み(呼び出し時)**:モデルが `Skill` ツールを呼び出す(入力フィールドは `skill` + オプションの `args`、教育版は `name` を使用)→ `getPromptForCommand()` が完全な SKILL.md 内容を展開 → `SkillTool` が返す tool_result の表示テキストは `\"Launching skill: {name}\"` のみ、実際のスキル内容は `newMessages` を通じて注入される。教育版では両者を「tool_result を通じて注入」として簡略化している。\n\n### 教育版の単純化は意図的\n\n- 複数ファイル・複数ソース → 1 つの `skills/` ディレクトリ:2 層読み込みの核心概念を示すのに十分\n- 複数の frontmatter フィールド → name/description のみ解析:解析の複雑さを削減\n- forked skills(`context: 'fork'`)→ 省略:教学版では inline skill loading のみ展開する\n- `Skill` ツールの入力 `skill`+`args` → 教育版は `name` を使用:追加の引数解析の複雑さを回避\n\n
\n\n\n" + "content": "# s07: Skill Loading — 必要なときにだけ読み込む\n\ns01 → s02 → s03 → s04 → s05 → s06 → `s07` → [s08](/ja/s08) → s09 → ... → s20\n> *\"Load when needed, don't stuff the prompt\"* — tool_result で注入、system prompt には詰め込まない。\n>\n> **Harness レイヤー**: 知識 — 必要に応じて読み込み、コンテキストに詰め込まない。\n\n---\n\n## 課題\n\nプロジェクトには React コンポーネント仕様、SQL スタイルガイド、API 設計ドキュメントがある。Agent にこれらの仕様を自動的に守らせたい。最も直接的な方法 — すべて system prompt に詰め込む:\n\n```python\nSYSTEM = (\n f\"You are a coding agent. \"\n + open(\"docs/react-style.md\").read() # 2000 行\n + open(\"docs/sql-style.md\").read() # 1500 行\n + open(\"docs/api-design.md\").read() # 3000 行\n)\n```\n\n6500 行の system prompt。Agent は LLM を呼び出すたびにこれらのドキュメントを運ぶ — CSS の色を変えるときも SQL クエリを修正するときも。99% の内容が現在のタスクと無関係で、トークンを無駄に消費する。\n\n---\n\n## ソリューション\n\n![Skill Overview](/course-assets/s07_skill_loading/skill-overview.ja.svg)\n\n前章の最小フック構造、`todo_write`、サブ Agent を維持し、本章は新規の `load_skill` ツールに注目する。起動時にスキルカタログを SYSTEM prompt に注入し、実行時に完全な内容を読み込むツールを登録する。使ったときだけトークンを消費。\n\n2 層設計:\n\n| 層 | 場所 | タイミング | コスト |\n|---|------|-----------|--------|\n| 1. カタログ | system prompt | 起動時に注入(harness が skills/ をスキャン) | ~100 トークン/スキル、毎ターン携帯 |\n| 2. 内容 | tool_result | Agent が load_skill を呼び出したとき。SKILL.md は、必要に応じて read_file/bash で追加リソースへアクセスするための手がかりになる | ~2000 トークン/スキル、オンデマンド |\n\nディスパッチ機構は変わらず、`load_skill` は `TOOL_HANDLERS[block.name]` を通じて自動的にディスパッチされる。\n\n---\n\n## 仕組み\n\n**skills/ ディレクトリ**、スキルごとに 1 つのサブディレクトリ、それぞれに `SKILL.md` ファイルを含む:\n\n```\nskills/\n agent-builder/SKILL.md\n code-review/SKILL.md\n mcp-builder/SKILL.md\n pdf/SKILL.md\n```\n\n**第 1 層:起動時にカタログを注入**:harness は起動時に `_scan_skills()` を呼び出して skills/ ディレクトリをスキャンし、各 SKILL.md の YAML frontmatter(`name`、`description`)を解析して `SKILL_REGISTRY` 辞書に格納する。`list_skills()` はレジストリからカタログを生成し、SYSTEM prompt に注入する。Agent は毎ターン「どのスキルが利用可能か」を確認できる。追加の API 呼び出しは不要:\n\n```python\nSKILL_REGISTRY: dict[str, dict] = {}\n\ndef _scan_skills():\n if not SKILLS_DIR.exists():\n return\n for d in sorted(SKILLS_DIR.iterdir()):\n if not d.is_dir():\n continue\n manifest = d / \"SKILL.md\"\n if manifest.exists():\n raw = manifest.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", d.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\"name\": name, \"description\": desc, \"content\": raw}\n\n_scan_skills() # runs once at startup\n\ndef list_skills() -> str:\n return \"\\n\".join(f\"- **{s['name']}**: {s['description']}\" for s in SKILL_REGISTRY.values())\n\ndef build_system() -> str:\n catalog = list_skills()\n return (\n f\"You are a coding agent at {WORKDIR}. \"\n f\"Skills available:\\n{catalog}\\n\"\n \"Use load_skill to get full details when needed.\"\n )\n\nSYSTEM = build_system()\n```\n\n**第 2 層:load_skill**:Agent が「SQL スタイルガイドが必要」と判断し、`load_skill(\"sql-style\")` を呼び出す。レジストリを通じて検索し、ファイルパスを経由しないため、パストラバーサルのリスクがない。SKILL.md の内容は `tool_result` を通じて注入され、既存の file および bash ツールを通じて、参照される `references/`、`scripts/`、`assets/` へのその後のアクセスも含められる。\n\n```python\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n return f\"Skill not found: {name}\"\n return skill[\"content\"]\n```\n\n重要な違い:スキル内容は system prompt の一部ではなく、ツール結果として現在の messages に入る。後続の呼び出しでは履歴とともに携帯され、コンテキスト圧縮、切り捨て、またはセッション終了まで保持される。これは s08 の compact と自然に接続する:オンデマンド読み込みで「運ぶべきでないものは運ばない」を解決し、compact が「捨てるべきものをどう捨てるか」を解決する。\n\n---\n\n## s06 からの変更点\n\n| コンポーネント | 変更前 (s06) | 変更後 (s07) |\n|---------------|-------------|-------------|\n| ツール数 | 7 (bash, read, write, edit, glob, todo_write, task) | 8 (+load_skill) |\n| 知識読み込み | なし | 2 層:起動時カタログ注入 SYSTEM + 実行時 load_skill。SKILL.md がその後のリソースアクセスを案内できる |\n| SYSTEM プロンプト | 静的文字列 | 起動時に skills/ をスキャンしてカタログ注入 |\n| スキルレジストリ | なし | SKILL_REGISTRY(起動時に充填、パストラバーサル防止) |\n| ループ | 変更なし | 変更なし(スキルツールは自動ディスパッチ) |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s07_skill_loading/code.py\n```\n\n以下のプロンプトを試してみよう:\n\n1. `What skills are available?`\n2. `Load the code-review skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n\n観察のポイント:Agent は SYSTEM 内のカタログから利用可能なスキルを知っているか? 完全な手順が必要なときに `[HOOK] load_skill` が表示されるか? 読み込んだスキルの説明を使って回答しているか?\n\n---\n\n## 次へ\n\nオンデマンド読み込みで「運ぶべきでないものは運ばない」問題は解決した。しかし別の問題が待っている:Agent が 30 分連続で作業すると、messages リストが中間プロセスで埋め尽くされる。古い tool_result、期限切れのファイル内容、コンテキストを占領しているが価値を生まない。\n\n→ s08 Context Compact:4 層圧縮戦略。安価な層を先に実行、高価な層を後に実行。\n\n
\nCC ソースコードを深掘り\n\n> 以下は CC ソースコード `loadSkillsDir.ts`、`SkillTool.ts`、`bundledSkills.ts`、`commands.ts` の分析に基づく。\n\n### 一、スキルソース:skills/ ディレクトリだけではない\n\n教育版はすべてのスキルが `skills/` ディレクトリにあると想定している。CC は実際に複数のファイルに分散したソースから読み込む:`loadSkillsDir.ts` は user/project/`--add-dir` ディレクトリと legacy commands(`.claude/commands/`)を担当、`bundledSkills.ts` は組み込みスキル、`SkillTool.ts` は MCP リモートスキル、`commands.ts` はコマンド集約を担当。タイプには managed/policy skills、user skills(`~/.claude/skills/`)、project skills(`.claude/skills/`)、`--add-dir` skills、legacy commands、dynamic skills、conditional skills(`paths` frontmatter を持ち、ファイルパスでアクティベート)、bundled skills、plugin skills、MCP skills が含まれる。\n\n### 二、SKILL.md Frontmatter の一般的なフィールド\n\nCC の SKILL.md YAML frontmatter は `parseSkillFrontmatterFields()`(`loadSkillsDir.ts`)で解析される。一般的なフィールド:\n\n| フィールド | 用途 |\n|-----------|------|\n| `name` / `description` | 表示名と説明 |\n| `when_to_use` | モデルにいつ呼び出すかを指導 |\n| `allowed-tools` | スキルが使用可能なツールの自動許可リスト |\n| `context` | `inline`(デフォルト)または `fork`(サブ Agent として実行) |\n| `model` | モデルオーバーライド(haiku/sonnet/opus/inherit) |\n| `hooks` | スキルレベルのフック設定 |\n| `paths` | 条件付きアクティベーションの glob パターン |\n| `user-invocable` | ユーザーが `/name` で呼び出し可能 |\n\n完全なフィールドリストはバージョンによって変動する。上記は教育版に関連するコアフィールドのみ。\n\n### 三、2 層読み込みの正確な実装\n\n1. **カタログ(起動時)**:`getSkillDirCommands()` がディレクトリをスキャン → メタデータのみを含む `Command` オブジェクトとして登録。`getSkillListingAttachments()` がスキルリストを添付ファイルとしてフォーマット、コンテキストウィンドウの ~1% を予算とする(上限 8000 文字)。\n2. **読み込み(呼び出し時)**:モデルが `Skill` ツールを呼び出す(入力フィールドは `skill` + オプションの `args`、教育版は `name` を使用)→ `getPromptForCommand()` が完全な SKILL.md 内容を展開 → `SkillTool` が返す tool_result の表示テキストは `\"Launching skill: {name}\"` のみ、実際のスキル内容は `newMessages` を通じて注入される。教育版では両者を「tool_result を通じて注入」として簡略化している。読み込まれた SKILL.md は、モデルが後続で既存の file/bash ツールから関連リソースへアクセスする際の手がかりにもなる。\n\n### 教育版の単純化は意図的\n\n- 複数ファイル・複数ソース → 1 つの `skills/` ディレクトリ:2 層読み込みの核心概念を示すのに十分\n- 複数の frontmatter フィールド → name/description のみ解析:解析の複雑さを削減\n- forked skills(`context: 'fork'`)→ 省略:教育版では inline skill loading のみ展開する\n- `Skill` ツールの入力 `skill`+`args` → 教育版は `name` を使用:追加の引数解析の複雑さを回避\n\n
\n\n\n" }, { "version": "s08", @@ -153,7 +153,7 @@ "version": "s09", "locale": "zh", "title": "s09: Memory — 压缩会丢细节,要有一层不丢的", - "content": "# s09: Memory — 压缩会丢细节,要有一层不丢的\n\ns01 → ... → s07 → s08 → `s09` → [s10](/zh/s10) → s11 → ... → s20\n> *\"压缩会丢细节, 要有一层不丢的\"* — 文件仓库 + 索引 + 按需加载,跨压缩、跨会话。\n>\n> **Harness 层**: 记忆 — 跨压缩、跨会话的知识积累。\n\n---\n\n## 问题\n\ns08 的 autoCompact 会把当前目标、剩余工作、用户约束写进摘要,但细节会丢失:\"用 tab 缩进不要用空格\"可能被简化成\"用户有代码风格偏好\"。而且新开一个会话,连摘要也没了。\n\nLLM 没有持久状态,所有信息都在上下文窗口里。上下文满了要压缩,压缩就有损。需要一层不参与压缩、跨会话保留的存储。\n\n---\n\n## 解决方案\n\n![Memory Overview](/course-assets/s09_memory/memory-overview.svg)\n\ns08 的压缩管线保留,聚焦记忆。存储选文件系统:`.memory/` 目录下,每个记忆一个 `.md` 文件,带 YAML frontmatter(`name` / `description` / `type`)。文件多了需要索引:`MEMORY.md` 一行一个链接,注入 SYSTEM。\n\n关键设计:索引常驻 SYSTEM prompt(可被 prompt cache 缓存),文件内容按需注入(按 filename/description 匹配当前对话,不破坏 cache)。写入分两条路径:用户显式说\"记住\",或者每轮结束后后台提取。文件积累多了,定期整理去重。\n\n四类记忆,各有用途:\n\n| 类型 | 回答什么 | 示例 |\n|------|---------|------|\n| user | 你是谁 | \"用 tab 不用空格\" |\n| feedback | 怎么做事 | \"别 mock 数据库\" |\n| project | 正在发生什么 | \"auth 重写是合规驱动\" |\n| reference | 东西在哪找 | \"pipeline bug 在 Linear INGEST\" |\n\n---\n\n## 工作原理\n\n![Memory Subsystems](/course-assets/s09_memory/memory-subsystems.svg)\n\n### 存储:Markdown 文件 + 索引\n\n每个记忆是一个 `.md` 文件,YAML frontmatter 记录元数据:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` 是索引,一行一个链接:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\n写入新记忆时自动重建索引:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### 加载:两条路径\n\n**路径一:索引常驻 SYSTEM。** `build_system()` 每轮重建 SYSTEM 时读取 `MEMORY.md`,把记忆清单注入。SYSTEM prompt 中的索引可以被 prompt cache 缓存,不需要每轮重新发送。\n\n**路径二:相关记忆按需注入。** 每轮调用前,`load_memories()` 把最近对话和记忆目录(name + description)一起发给 LLM 做一次轻量 side-query,选出相关的文件名,再读文件内容注入上下文。最多 5 条,控制开销。\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n indices = json.loads(re.search(r'\\[.*?\\]', response.content[0].text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\n如果 side-query 失败(API 错误、JSON 解析失败),降级到关键词匹配 name + description。\n\n### 写入:每轮结束后提取\n\n用户不会每次都说\"记住这个\"。偏好通常散落在正常对话中:\"用 tab 比空格好\"、\"以后都用单引号\"。\n\n`extract_memories()` 在每轮结束时运行,条件是模型停止且没有 tool_use(说明对话告一段落):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(messages) # 从最近对话提取新记忆\n consolidate_memories() # 检查是否需要整理\n return\n```\n\n提取前先检查已有记忆,避免重复。提取 prompt 要求 LLM 返回 `{name, type, description, body}` 的 JSON 数组,只有确实有新信息时才写文件。\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### 整理:低频合并去重\n\n记忆文件会积累。`consolidate_memories()` 在文件数达到阈值(默认 10)时触发,让 LLM 去重、合并矛盾、淘汰过时记忆:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # 太少,不值得整理\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC 把这个过程叫 Dream,实际有四层门控:时间间隔、扫描节流、会话数、文件锁。教学版简化为文件数阈值。\n\n### Memory 适合保存什么\n\nMemory 保存跨会话仍然有用的信息:用户偏好、反复出现的反馈、项目背景、常用入口和排查线索。它关注“以后还会用到什么”,并通过索引 + 按需加载把这些信息带回当前对话。\n\nsession memory 关注同一会话内的连续性:compact 之后,当前会话还需要保留哪些上下文。两者配合使用:Memory 管长期知识,session memory 管当前会话的压缩续接。\n\n---\n\n## 相对 s08 的变更\n\n| 组件 | 之前 (s08) | 之后 (s09) |\n|------|-----------|-----------|\n| 记忆能力 | 无(压缩后偏好随摘要退化) | 存储 + 加载 + 提取 + 整理 |\n| 新函数 | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| 存储 | — | .memory/MEMORY.md 索引 + .memory/*.md 文件 |\n| 工具 | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| 循环 | 每轮只做压缩 | 每轮注入记忆 + 压缩 + 每轮结束后提取 + 定期整理 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\n试试这些 prompt(分多轮输入,观察记忆的累积和加载):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py`(观察 Agent 是否用了 tab)\n3. `What did I tell you about my preferences?`(观察 Agent 是否记得)\n4. `I also prefer single quotes over double quotes for strings.`\n\n观察重点:每轮结束后是否出现 `[Memory: extracted N new memories]`?`.memory/` 目录下是否生成了 `.md` 文件?`MEMORY.md` 索引是否更新?新一轮对话时 Agent 是否自动加载了之前的记忆?\n\n---\n\n## 接下来\n\n记忆、压缩、工具都已就绪。但 system prompt 还是硬编码的一大段字符串。加了新工具要手动加描述,换了项目要重写整个 prompt。prompt 应该运行时组装。\n\ns10 System Prompt → 分段 + 运行时组装。不同项目、不同工具,拼出不同的 prompt。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `src/` 下 `memdir/`、`services/`、`utils/`、`query/` 的分析,行号已对照核实。\n\n### 源码路径\n\n| 文件 | 行数 | 职责 |\n|------|------|------|\n| `memdir/memdir.ts` | 507 | 核心:MEMORY.md 定义(`34-38`)、记忆行为指令区分 memory/plan/tasks(`199-266`)、`loadMemoryPrompt()` 三条路径(`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query 选记忆(`18-24` 系统提示、`97-122` 调用逻辑) |\n| `memdir/memoryTypes.ts` | 271 | 类型定义,frontmatter 字段 |\n| `memdir/memoryScan.ts` | — | 扫描 .md 文件,排除 MEMORY.md,读 frontmatter,最多 200 个,按 mtime 降序(`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | forked agent 提取记忆,受限权限,`skipTranscript: true`,`maxTurns: 5`(`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream 整理,四层门控(`63-66` 默认值、`130-190` 门控、`224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | 会话级记忆管理 |\n| `services/compact/sessionMemoryCompact.ts` | — | session memory 轻量摘要,阈值 10K/5/40K(`56-61`) |\n| `utils/attachments.ts` | — | 注入预算:200 行 / 4096 字节每文件,60KB 每 session(`269-288`);按 query 找相关 memory(`2196-2241`) |\n| `query.ts` | — | memory prefetch 每轮启动(`301-304`),非阻塞收集(`1592-1614`) |\n| `query/stopHooks.ts` | — | stop hook fire-and-forget 触发提取和 Dream(`141-155`) |\n\n### 记忆选择:LLM 选,不是 embedding\n\nCC 用 **Sonnet 本身来选**(`findRelevantMemories.ts`),不是 embedding 向量相似度:\n\n1. `memoryScan.ts` 扫描 `.memory/` 下所有 `.md` 文件(排除 MEMORY.md),最多 200 个,按 mtime 降序\n2. 把 `name` + `description` 列成清单\n3. 发给 Sonnet side-query:\"根据名称和描述选出真正有用的记忆(最多 5 个)。不确定就不要选。\"\n4. Sonnet 返回 `{ selected_memories: [\"file1.md\", ...] }`\n5. 选中文件读取完整内容(每文件 ≤ 200 行 / 4096 字节),注入上下文。单 session 总预算 60KB\n\n每轮用户 turn 开始时,`query.ts:301-304` 启动 memory prefetch(异步);工具执行后 `1592-1614` 非阻塞收集结果,不卡主流程。\n\n### 提取时机:stop hook,不是 autoCompact 后\n\n触发位置(`stopHooks.ts:141-155`):在 `handleStopHooks()` 中,fire-and-forget 触发提取和 Dream。教学版把提取放在 `stop_reason != \"tool_use\"` 分支里,方向一致。\n\nCC 的提取通过 forked agent 执行(`extractMemories.ts:371-427`):受限权限、`skipTranscript: true`、`maxTurns: 5`。还有重叠保护:如果主 Agent 已经写入了记忆文件,跳过提取。\n\n### 记忆文件格式\n\nCC 用 Markdown + YAML frontmatter,和教学版一致。四种类型:`user`、`feedback`、`project`、`reference`。\n\n`memdir.ts:34-38` 定义索引约束:`MEMORY.md` 最多 200 行 / 25KB。`memdir.ts:199-266` 构建记忆行为指令,明确区分 memory、plan、tasks。存储位置:`~/.claude/projects//memory/`。\n\n### Dream:四层门控\n\n不是\"空闲时触发\"或\"数量够了就合并\",而是四层门控(`autoDream.ts`,默认值 `63-66`,门控逻辑 `130-190`):\n\n1. **时间门控**:距上次合并 ≥ 24 小时\n2. **扫描节流**:避免频繁扫描文件系统\n3. **会话门控**:自上次合并以来修改了 ≥ 5 个会话 transcript\n4. **锁门控**:没有其他进程正在合并(`.consolidate-lock` 文件)\n\n合并本身通过 forked agent 执行(`224-233`):定位 → 收集近期信号 → 合并写文件 → 剪枝更新索引。锁文件 mtime 就是 lastConsolidatedAt。崩溃恢复:1 小时后锁自动过期。\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| 持久性 | 跨会话 | 单会话 |\n| 存储 | `memory/` 下多个 .md 文件 | `session-memory//memory.md` |\n| 加载到 | system prompt | compact 摘要 |\n| 用途 | 跨会话的知识积累 | 跨 compact 的上下文连续性 |\n\nsessionMemoryCompact(s08 中提到的机制)正是使用了 Session Memory:autoCompact 前先读 session memory 文件,如果内容足够(≥ 10K token、≥ 5 条文本消息、≤ 40K token,`sessionMemoryCompact.ts:56-61`),就用它做摘要,不调 LLM。\n\n### 真实实现比教学版复杂的地方\n\n- **Feature flags**:记忆相关功能有多层 feature gate 控制\n- **Team memory**:团队共享记忆,`loadMemoryPrompt()` 有专门路径(教学版未涉及)\n- **KAIROS**:时机感知的记忆提取策略,`loadMemoryPrompt()` 中 daily-log 模式\n- **Prompt cache**:记忆注入需要考虑 prompt cache 的 TTL,避免每次都重写 system prompt 的大段内容\n- **文件锁**:多进程并发时的锁机制\n- **Memory prefetch**:异步预取,不阻塞主流程\n\n### 教学版的简化是刻意的\n\n- LLM side-query → LLM side-query + 关键词降级:教学版保留了 LLM 选择,加了降级路径\n- 记忆 JSON → Markdown + frontmatter:教学版与 CC 一致\n- stop hook 触发 → `stop_reason != \"tool_use\"` 分支:方向一致\n- 四层门控 → 文件数阈值:教学版没有 transcript 系统和多会话概念\n- forked agent + 受限权限 → 直接调用:教学版没有子进程隔离\n\n
\n\n\n" + "content": "# s09: Memory — 压缩会丢细节,要有一层不丢的\n\ns01 → ... → s07 → s08 → `s09` → [s10](/zh/s10) → s11 → ... → s20\n> *\"压缩会丢细节, 要有一层不丢的\"* — 文件仓库 + 索引 + 按需加载,跨压缩、跨会话。\n>\n> **Harness 层**: 记忆 — 跨压缩、跨会话的知识积累。\n\n---\n\n## 问题\n\ns08 的 autoCompact 会把当前目标、剩余工作、用户约束写进摘要,但细节会丢失:\"用 tab 缩进不要用空格\"可能被简化成\"用户有代码风格偏好\"。而且新开一个会话,连摘要也没了。\n\nLLM 没有持久状态,所有信息都在上下文窗口里。上下文满了要压缩,压缩就有损。需要一层不参与压缩、跨会话保留的存储。\n\n---\n\n## 解决方案\n\n![Memory Overview](/course-assets/s09_memory/memory-overview.svg)\n\ns08 的压缩管线保留,聚焦记忆。存储选文件系统:`.memory/` 目录下,每个记忆一个 `.md` 文件,带 YAML frontmatter(`name` / `description` / `type`)。文件多了需要索引:`MEMORY.md` 一行一个链接,注入 SYSTEM。\n\n关键设计:索引常驻 SYSTEM prompt(可被 prompt cache 缓存),文件内容按需注入到当前 user turn(按 filename/description 匹配当前对话,不破坏 cache)。写入由每轮结束后的提取器完成:用户显式说\"记住\"或表达稳定偏好时,提取器会保存为记忆。文件积累多了,定期整理去重。\n\n四类记忆,各有用途:\n\n| 类型 | 回答什么 | 示例 |\n|------|---------|------|\n| user | 你是谁 | \"用 tab 不用空格\" |\n| feedback | 怎么做事 | \"别 mock 数据库\" |\n| project | 正在发生什么 | \"auth 重写是合规驱动\" |\n| reference | 东西在哪找 | \"pipeline bug 在 Linear INGEST\" |\n\n---\n\n## 工作原理\n\n![Memory Subsystems](/course-assets/s09_memory/memory-subsystems.svg)\n\n### 存储:Markdown 文件 + 索引\n\n每个记忆是一个 `.md` 文件,YAML frontmatter 记录元数据:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` 是索引,一行一个链接:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\n写入新记忆时自动重建索引:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### 加载:两条路径\n\n**路径一:索引常驻 SYSTEM。** `build_system()` 每轮重建 SYSTEM 时读取 `MEMORY.md`,把记忆清单注入。SYSTEM prompt 中的索引可以被 prompt cache 缓存,不需要每轮重新发送。\n\n**路径二:相关记忆按需注入。** 每轮调用前,`load_memories()` 把最近对话和记忆目录(name + description)一起发给 LLM 做一次轻量 side-query,选出相关的文件名,再读文件内容临时注入到当前 user turn。最多 5 条,控制开销。\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n text = extract_text(response.content).strip()\n indices = json.loads(re.search(r'\\[.*?\\]', text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\n如果 side-query 失败(API 错误、JSON 解析失败),降级到关键词匹配 name + description。\n\n### 写入:每轮结束后提取\n\n用户不会每次都说\"记住这个\"。偏好通常散落在正常对话中:\"用 tab 比空格好\"、\"以后都用单引号\"。\n\n`extract_memories()` 在每轮结束时运行,条件是模型停止且没有 tool_use(说明对话告一段落):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(pre_compress) # 从压缩前快照提取新记忆\n consolidate_memories() # 检查是否需要整理\n return\n```\n\n提取前先检查已有记忆,避免重复。提取 prompt 要求 LLM 返回 `{name, type, description, body}` 的 JSON 数组,只有确实有新信息时才写文件。\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### 整理:低频合并去重\n\n记忆文件会积累。`consolidate_memories()` 在文件数达到阈值(默认 10)时触发,让 LLM 去重、合并矛盾、淘汰过时记忆:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # 太少,不值得整理\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC 把这个过程叫 Dream,实际有四层门控:时间间隔、扫描节流、会话数、文件锁。教学版简化为文件数阈值。\n\n### Memory 适合保存什么\n\nMemory 保存跨会话仍然有用的信息:用户偏好、反复出现的反馈、项目背景、常用入口和排查线索。它关注“以后还会用到什么”,并通过索引 + 按需加载把这些信息带回当前对话。\n\nsession memory 关注同一会话内的连续性:compact 之后,当前会话还需要保留哪些上下文。两者配合使用:Memory 管长期知识,session memory 管当前会话的压缩续接。\n\n---\n\n## 相对 s08 的变更\n\n| 组件 | 之前 (s08) | 之后 (s09) |\n|------|-----------|-----------|\n| 记忆能力 | 无(压缩后偏好随摘要退化) | 存储 + 加载 + 提取 + 整理 |\n| 新函数 | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| 存储 | — | .memory/MEMORY.md 索引 + .memory/*.md 文件 |\n| 工具 | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| 循环 | 每轮只做压缩 | 每轮注入记忆 + 压缩 + 每轮结束后提取 + 定期整理 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\n试试这些 prompt(分多轮输入,观察记忆的累积和加载):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py`(观察 Agent 是否用了 tab)\n3. `What did I tell you about my preferences?`(观察 Agent 是否记得)\n4. `I also prefer single quotes over double quotes for strings.`\n\n观察重点:每轮结束后是否出现 `[Memory: extracted N new memories]`?`.memory/` 目录下是否生成了 `.md` 文件?`MEMORY.md` 索引是否更新?新一轮对话时 Agent 是否自动加载了之前的记忆?\n\n---\n\n## 接下来\n\n记忆、压缩、工具都已就绪。但 system prompt 还是硬编码的一大段字符串。加了新工具要手动加描述,换了项目要重写整个 prompt。prompt 应该运行时组装。\n\ns10 System Prompt → 分段 + 运行时组装。不同项目、不同工具,拼出不同的 prompt。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `src/` 下 `memdir/`、`services/`、`utils/`、`query/` 的分析,行号已对照核实。\n\n### 源码路径\n\n| 文件 | 行数 | 职责 |\n|------|------|------|\n| `memdir/memdir.ts` | 507 | 核心:MEMORY.md 定义(`34-38`)、记忆行为指令区分 memory/plan/tasks(`199-266`)、`loadMemoryPrompt()` 三条路径(`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query 选记忆(`18-24` 系统提示、`97-122` 调用逻辑) |\n| `memdir/memoryTypes.ts` | 271 | 类型定义,frontmatter 字段 |\n| `memdir/memoryScan.ts` | — | 扫描 .md 文件,排除 MEMORY.md,读 frontmatter,最多 200 个,按 mtime 降序(`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | forked agent 提取记忆,受限权限,`skipTranscript: true`,`maxTurns: 5`(`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream 整理,四层门控(`63-66` 默认值、`130-190` 门控、`224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | 会话级记忆管理 |\n| `services/compact/sessionMemoryCompact.ts` | — | session memory 轻量摘要,阈值 10K/5/40K(`56-61`) |\n| `utils/attachments.ts` | — | 注入预算:200 行 / 4096 字节每文件,60KB 每 session(`269-288`);按 query 找相关 memory(`2196-2241`) |\n| `query.ts` | — | memory prefetch 每轮启动(`301-304`),非阻塞收集(`1592-1614`) |\n| `query/stopHooks.ts` | — | stop hook fire-and-forget 触发提取和 Dream(`141-155`) |\n\n### 记忆选择:LLM 选,不是 embedding\n\nCC 用 **Sonnet 本身来选**(`findRelevantMemories.ts`),不是 embedding 向量相似度:\n\n1. `memoryScan.ts` 扫描 `.memory/` 下所有 `.md` 文件(排除 MEMORY.md),最多 200 个,按 mtime 降序\n2. 把 `name` + `description` 列成清单\n3. 发给 Sonnet side-query:\"根据名称和描述选出真正有用的记忆(最多 5 个)。不确定就不要选。\"\n4. Sonnet 返回 `{ selected_memories: [\"file1.md\", ...] }`\n5. 选中文件读取完整内容(每文件 ≤ 200 行 / 4096 字节),注入上下文。单 session 总预算 60KB\n\n每轮用户 turn 开始时,`query.ts:301-304` 启动 memory prefetch(异步);工具执行后 `1592-1614` 非阻塞收集结果,不卡主流程。\n\n### 提取时机:stop hook,不是 autoCompact 后\n\n触发位置(`stopHooks.ts:141-155`):在 `handleStopHooks()` 中,fire-and-forget 触发提取和 Dream。教学版把提取放在 `stop_reason != \"tool_use\"` 分支里,方向一致。\n\nCC 的提取通过 forked agent 执行(`extractMemories.ts:371-427`):受限权限、`skipTranscript: true`、`maxTurns: 5`。还有重叠保护:如果主 Agent 已经写入了记忆文件,跳过提取。\n\n### 记忆文件格式\n\nCC 用 Markdown + YAML frontmatter,和教学版一致。四种类型:`user`、`feedback`、`project`、`reference`。\n\n`memdir.ts:34-38` 定义索引约束:`MEMORY.md` 最多 200 行 / 25KB。`memdir.ts:199-266` 构建记忆行为指令,明确区分 memory、plan、tasks。存储位置:`~/.claude/projects//memory/`。\n\n### Dream:四层门控\n\n不是\"空闲时触发\"或\"数量够了就合并\",而是四层门控(`autoDream.ts`,默认值 `63-66`,门控逻辑 `130-190`):\n\n1. **时间门控**:距上次合并 ≥ 24 小时\n2. **扫描节流**:避免频繁扫描文件系统\n3. **会话门控**:自上次合并以来修改了 ≥ 5 个会话 transcript\n4. **锁门控**:没有其他进程正在合并(`.consolidate-lock` 文件)\n\n合并本身通过 forked agent 执行(`224-233`):定位 → 收集近期信号 → 合并写文件 → 剪枝更新索引。锁文件 mtime 就是 lastConsolidatedAt。崩溃恢复:1 小时后锁自动过期。\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| 持久性 | 跨会话 | 单会话 |\n| 存储 | `memory/` 下多个 .md 文件 | `session-memory//memory.md` |\n| 加载到 | system prompt | compact 摘要 |\n| 用途 | 跨会话的知识积累 | 跨 compact 的上下文连续性 |\n\nsessionMemoryCompact(s08 中提到的机制)正是使用了 Session Memory:autoCompact 前先读 session memory 文件,如果内容足够(≥ 10K token、≥ 5 条文本消息、≤ 40K token,`sessionMemoryCompact.ts:56-61`),就用它做摘要,不调 LLM。\n\n### 真实实现比教学版复杂的地方\n\n- **Feature flags**:记忆相关功能有多层 feature gate 控制\n- **Team memory**:团队共享记忆,`loadMemoryPrompt()` 有专门路径(教学版未涉及)\n- **KAIROS**:时机感知的记忆提取策略,`loadMemoryPrompt()` 中 daily-log 模式\n- **Prompt cache**:记忆注入需要考虑 prompt cache 的 TTL,避免每次都重写 system prompt 的大段内容\n- **文件锁**:多进程并发时的锁机制\n- **Memory prefetch**:异步预取,不阻塞主流程\n\n### 教学版的简化是刻意的\n\n- LLM side-query → LLM side-query + 关键词降级:教学版保留了 LLM 选择,加了降级路径\n- 记忆 JSON → Markdown + frontmatter:教学版与 CC 一致\n- stop hook 触发 → `stop_reason != \"tool_use\"` 分支:方向一致\n- 四层门控 → 文件数阈值:教学版没有 transcript 系统和多会话概念\n- forked agent + 受限权限 → 直接调用:教学版没有子进程隔离\n\n
\n\n\n" }, { "version": "s09", diff --git a/web/src/data/generated/versions.json b/web/src/data/generated/versions.json index 238364e..a535d8e 100644 --- a/web/src/data/generated/versions.json +++ b/web/src/data/generated/versions.json @@ -454,7 +454,7 @@ { "name": "extract_text", "signature": "def extract_text(content)", - "startLine": 139 + "startLine": 183 }, { "name": "spawn_subagent", @@ -498,7 +498,7 @@ } ], "layer": "planning", - "source": "#!/usr/bin/env python3\n\"\"\"\ns06: Subagent — spawn sub-agents with fresh messages[] for context isolation.\n\n Parent Agent Subagent\n +------------------+ +------------------+\n | messages=[...] | | messages=[task] | <-- fresh\n | | dispatch | |\n | tool: task | ---------------> | own while loop |\n | prompt=\"...\" | | bash/read/... |\n | | summary only | (max 30 turns) |\n | result = \"...\" | <--------------- | return last text |\n +------------------+ +------------------+\n ^ |\n | intermediate results DISCARDED |\n +--------------------------------------+\n\n Subagent tools: bash, read, write, edit, glob (NO task — no recursion)\n\nChanges from s05:\n + task tool + spawn_subagent() with fresh messages[]\n + Safety limit: max 30 turns per subagent\n + extract_text() helper\n Subagent cannot spawn sub-subagents (no task tool in sub_tools).\n Main loop unchanged: task auto-dispatches via TOOL_HANDLERS.\n\nRun: python s06_subagent/code.py\nNeeds: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport os, subprocess\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nCURRENT_TODOS: list[dict] = []\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"For complex sub-problems, use the task tool to spawn a subagent.\"\n)\n\n# s06: subagent gets its own system prompt — no task, no recursion\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s05 (unchanged): Tool Implementations\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text:\n return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n for i, t in enumerate(todos):\n if \"content\" not in t or \"status\" not in t:\n return f\"Error: todos[{i}] missing 'content' or 'status'\"\n if t[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return f\"Error: todos[{i}] has invalid status '{t['status']}'\"\n CURRENT_TODOS = todos\n lines = [\"\\n\\033[33m## Current Tasks\\033[0m\"]\n for t in CURRENT_TODOS:\n icon = {\"pending\": \" \", \"in_progress\": \"\\033[36m▸\\033[0m\", \"completed\": \"\\033[32m✓\\033[0m\"}[t[\"status\"]]\n lines.append(f\" [{icon}] {t['content']}\")\n print(\"\\n\".join(lines))\n return f\"Updated {len(CURRENT_TODOS)} tasks\"\n\ndef extract_text(content) -> str:\n \"\"\"Extract text from message content blocks.\"\"\"\n if not isinstance(content, list):\n return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\", \"description\": \"Create and manage a task list for your current coding session.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"todos\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"content\", \"status\"]}}}, \"required\": [\"todos\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"todo_write\": run_todo_write,\n}\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s06: Subagent — fresh messages[], summary only\n# ═══════════════════════════════════════════════════════════\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n]\n# NO \"task\" tool — prevent recursive spawning\n\nSUB_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob,\n}\n\ndef spawn_subagent(description: str) -> str:\n \"\"\"Spawn a subagent with fresh messages[], return summary only.\"\"\"\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": description}] # fresh context\n\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n # Issue 1: subagent also runs hooks (permissions apply)\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n trigger_hooks(\"PostToolUse\", block, output)\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Issue 5: fallback if safety limit hit during tool_use\n result = extract_text(messages[-1][\"content\"])\n if not result:\n # last message is tool_result, look backwards for assistant text\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result:\n break\n if not result:\n result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result # only summary, entire message history discarded\n\n# Add task tool to parent's tools\nTOOLS.append({\n \"name\": \"task\",\n \"description\": \"Launch a subagent to handle a complex subtask. Returns only the final conclusion.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]},\n})\nTOOL_HANDLERS[\"task\"] = spawn_subagent\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s04 (unchanged): Hook System\n# ═══════════════════════════════════════════════════════════\n\nHOOKS = {\"UserPromptSubmit\": [], \"PreToolUse\": [], \"PostToolUse\": [], \"Stop\": []}\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None:\n return result\n return None\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"mkfs\", \"dd if=\"]\n\ndef permission_hook(block):\n \"\"\"PreToolUse: deny list check.\"\"\"\n if block.name == \"bash\":\n for p in DENY_LIST:\n if p in block.input.get(\"command\", \"\"):\n print(f\"\\n\\033[31m⛔ Blocked: '{p}'\\033[0m\")\n return \"Permission denied\"\n return None\n\ndef log_hook(block):\n \"\"\"PreToolUse: log tool calls.\"\"\"\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\ndef context_inject_hook(query: str):\n \"\"\"UserPromptSubmit: log working directory.\"\"\"\n print(f\"\\033[90m[HOOK] UserPromptSubmit: working in {WORKDIR}\\033[0m\")\n return None\n\ndef summary_hook(messages: list):\n \"\"\"Stop: print tool call count.\"\"\"\n tool_count = sum(1 for m in messages\n for b in (m.get(\"content\") if isinstance(m.get(\"content\"), list) else [])\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: session used {tool_count} tool calls\\033[0m\")\n return None\n\nregister_hook(\"UserPromptSubmit\", context_inject_hook)\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"Stop\", summary_hook)\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — same as s05 + nag reminder, task auto-dispatches\n# ═══════════════════════════════════════════════════════════\n\nrounds_since_todo = 0\n\ndef agent_loop(messages: list):\n global rounds_since_todo\n while True:\n # s05: nag reminder\n if rounds_since_todo >= 3 and messages:\n messages.append({\"role\": \"user\",\n \"content\": \"Update your todos.\"})\n rounds_since_todo = 0\n\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n force = trigger_hooks(\"Stop\", messages)\n if force:\n messages.append({\"role\": \"user\", \"content\": force})\n continue\n return\n\n rounds_since_todo += 1\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n\n trigger_hooks(\"PostToolUse\", block, output)\n\n if block.name == \"todo_write\":\n rounds_since_todo = 0\n\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(\"s06: Subagent — spawn sub-agents with fresh context, summary only\")\n print(\"Type a question, press Enter. Type q to quit.\\n\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms06 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n trigger_hooks(\"UserPromptSubmit\", query)\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n", + "source": "#!/usr/bin/env python3\n\"\"\"\ns06: Subagent — spawn sub-agents with fresh messages[] for context isolation.\n\n Parent Agent Subagent\n +------------------+ +------------------+\n | messages=[...] | | messages=[task] | <-- fresh\n | | dispatch | |\n | tool: task | ---------------> | own while loop |\n | prompt=\"...\" | | bash/read/... |\n | | summary only | (max 30 turns) |\n | result = \"...\" | <--------------- | return last text |\n +------------------+ +------------------+\n ^ |\n | intermediate results DISCARDED |\n +--------------------------------------+\n\n Subagent tools: bash, read, write, edit, glob (NO task — no recursion)\n\nChanges from s05:\n + task tool + spawn_subagent() with fresh messages[]\n + Safety limit: max 30 turns per subagent\n + extract_text() helper\n Subagent cannot spawn sub-subagents (no task tool in sub_tools).\n Main loop unchanged: task auto-dispatches via TOOL_HANDLERS.\n\nRun: python s06_subagent/code.py\nNeeds: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport os, subprocess\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nCURRENT_TODOS: list[dict] = []\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"For complex sub-problems, use the task tool to spawn a subagent.\"\n)\n\n# s06: subagent gets its own system prompt — no task, no recursion\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s05 (unchanged): Tool Implementations\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text:\n return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n for i, t in enumerate(todos):\n if \"content\" not in t or \"status\" not in t:\n return f\"Error: todos[{i}] missing 'content' or 'status'\"\n if t[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return f\"Error: todos[{i}] has invalid status '{t['status']}'\"\n CURRENT_TODOS = todos\n lines = [\"\\n\\033[33m## Current Tasks\\033[0m\"]\n for t in CURRENT_TODOS:\n icon = {\"pending\": \" \", \"in_progress\": \"\\033[36m▸\\033[0m\", \"completed\": \"\\033[32m✓\\033[0m\"}[t[\"status\"]]\n lines.append(f\" [{icon}] {t['content']}\")\n print(\"\\n\".join(lines))\n return f\"Updated {len(CURRENT_TODOS)} tasks\"\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\", \"description\": \"Create and manage a task list for your current coding session.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"todos\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"content\", \"status\"]}}}, \"required\": [\"todos\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"todo_write\": run_todo_write,\n}\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s06: Subagent — fresh messages[], summary only\n# ═══════════════════════════════════════════════════════════\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n]\n# NO \"task\" tool — prevent recursive spawning\n\nSUB_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob,\n}\n\ndef extract_text(content) -> str:\n \"\"\"Extract text from message content blocks.\"\"\"\n if not isinstance(content, list):\n return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\ndef spawn_subagent(description: str) -> str:\n \"\"\"Spawn a subagent with fresh messages[], return summary only.\"\"\"\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": description}] # fresh context\n\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n # Issue 1: subagent also runs hooks (permissions apply)\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n trigger_hooks(\"PostToolUse\", block, output)\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Issue 5: fallback if safety limit hit during tool_use\n result = extract_text(messages[-1][\"content\"])\n if not result:\n # last message is tool_result, look backwards for assistant text\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result:\n break\n if not result:\n result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result # only summary, entire message history discarded\n\n# Add task tool to parent's tools\nTOOLS.append({\n \"name\": \"task\",\n \"description\": \"Launch a subagent to handle a complex subtask. Returns only the final conclusion.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]},\n})\nTOOL_HANDLERS[\"task\"] = spawn_subagent\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s04 (unchanged): Hook System\n# ═══════════════════════════════════════════════════════════\n\nHOOKS = {\"UserPromptSubmit\": [], \"PreToolUse\": [], \"PostToolUse\": [], \"Stop\": []}\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None:\n return result\n return None\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"mkfs\", \"dd if=\"]\n\ndef permission_hook(block):\n \"\"\"PreToolUse: deny list check.\"\"\"\n if block.name == \"bash\":\n for p in DENY_LIST:\n if p in block.input.get(\"command\", \"\"):\n print(f\"\\n\\033[31m⛔ Blocked: '{p}'\\033[0m\")\n return \"Permission denied\"\n return None\n\ndef log_hook(block):\n \"\"\"PreToolUse: log tool calls.\"\"\"\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\ndef context_inject_hook(query: str):\n \"\"\"UserPromptSubmit: log working directory.\"\"\"\n print(f\"\\033[90m[HOOK] UserPromptSubmit: working in {WORKDIR}\\033[0m\")\n return None\n\ndef summary_hook(messages: list):\n \"\"\"Stop: print tool call count.\"\"\"\n tool_count = sum(1 for m in messages\n for b in (m.get(\"content\") if isinstance(m.get(\"content\"), list) else [])\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: session used {tool_count} tool calls\\033[0m\")\n return None\n\nregister_hook(\"UserPromptSubmit\", context_inject_hook)\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"Stop\", summary_hook)\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — same as s05 + nag reminder, task auto-dispatches\n# ═══════════════════════════════════════════════════════════\n\nrounds_since_todo = 0\n\ndef agent_loop(messages: list):\n global rounds_since_todo\n while True:\n # s05: nag reminder\n if rounds_since_todo >= 3 and messages:\n messages.append({\"role\": \"user\",\n \"content\": \"Update your todos.\"})\n rounds_since_todo = 0\n\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n force = trigger_hooks(\"Stop\", messages)\n if force:\n messages.append({\"role\": \"user\", \"content\": force})\n continue\n return\n\n rounds_since_todo += 1\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n\n trigger_hooks(\"PostToolUse\", block, output)\n\n if block.name == \"todo_write\":\n rounds_since_todo = 0\n\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(\"s06: Subagent — spawn sub-agents with fresh context, summary only\")\n print(\"Type a question, press Enter. Type q to quit.\\n\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms06 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n trigger_hooks(\"UserPromptSubmit\", query)\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n", "images": [ { "src": "/course-assets/s06_subagent/subagent-overview.svg", @@ -511,7 +511,7 @@ "filename": "s07_skill_loading/code.py", "title": "Skill Loading", "subtitle": "Load Only When Needed", - "loc": 321, + "loc": 318, "tools": [ "bash", "read_file", @@ -532,7 +532,7 @@ { "name": "_parse_frontmatter", "signature": "def _parse_frontmatter(text: str)", - "startLine": 52 + "startLine": 53 }, { "name": "_scan_skills", @@ -839,7 +839,7 @@ "filename": "s09_memory/code.py", "title": "Memory", "subtitle": "Keep a Layer That Doesn't Lose Details", - "loc": 492, + "loc": 498, "tools": [ "bash", "read_file", @@ -1005,7 +1005,7 @@ } ], "layer": "memory", - "source": "#!/usr/bin/env python3\n\"\"\"\ns09_memory.py - Memory System\n\nPersistent, cross-session knowledge for the coding agent.\n\nStorage:\n .memory/\n MEMORY.md ← index (one line per memory, ≤200 lines)\n feedback_tabs.md ← individual memory files (Markdown + YAML frontmatter)\n user_profile.md\n project_facts.md\n\nFlow in agent_loop:\n 1. Load MEMORY.md index into SYSTEM prompt (cheap, always present)\n 2. Select relevant memories by filename/description → inject content\n 3. Run compression pipeline from s08\n 4. After each turn ends → extract new memories from original messages\n 5. Periodically consolidate (Dream)\n\nBuilds on s08 (context compact). Usage:\n\n python s09_memory/code.py\n Needs: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport os, subprocess, json, time, re\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"): os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"; MEMORY_DIR.mkdir(exist_ok=True)\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s09: Memory System\n# ═══════════════════════════════════════════════════════════\n\nMEMORY_TYPES = [\"user\", \"feedback\", \"project\", \"reference\"]\n\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n meta = {}\n for line in parts[1].strip().splitlines():\n if \":\" in line:\n k, v = line.split(\":\", 1)\n meta[k.strip()] = v.strip().strip('\"').strip(\"'\")\n return meta, parts[2].strip()\n\n\ndef write_memory_file(name: str, mem_type: str, description: str, body: str):\n \"\"\"Write a single memory file with YAML frontmatter.\"\"\"\n slug = name.lower().replace(\" \", \"-\").replace(\"/\", \"-\")\n filename = f\"{slug}.md\"\n filepath = MEMORY_DIR / filename\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n return filepath\n\n\ndef _rebuild_index():\n \"\"\"Rebuild MEMORY.md index from all memory files.\"\"\"\n lines = []\n for f in sorted(MEMORY_DIR.glob(\"*.md\")):\n if f.name == \"MEMORY.md\":\n continue\n raw = f.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", f.stem)\n desc = meta.get(\"description\", body.split(\"\\n\")[0][:80])\n lines.append(f\"- [{name}]({f.name}) — {desc}\")\n MEMORY_INDEX.write_text(\"\\n\".join(lines) + \"\\n\" if lines else \"\")\n\n\ndef read_memory_index() -> str:\n \"\"\"Read MEMORY.md index (injected into SYSTEM every turn).\"\"\"\n if not MEMORY_INDEX.exists():\n return \"\"\n text = MEMORY_INDEX.read_text().strip()\n return text if text else \"\"\n\n\ndef read_memory_file(filename: str) -> str | None:\n \"\"\"Read a single memory file's full content.\"\"\"\n path = MEMORY_DIR / filename\n if not path.exists():\n return None\n return path.read_text()\n\n\ndef list_memory_files() -> list[dict]:\n \"\"\"List all memory files with metadata.\"\"\"\n result = []\n for f in sorted(MEMORY_DIR.glob(\"*.md\")):\n if f.name == \"MEMORY.md\":\n continue\n raw = f.read_text()\n meta, body = _parse_frontmatter(raw)\n result.append({\n \"filename\": f.name,\n \"name\": meta.get(\"name\", f.stem),\n \"description\": meta.get(\"description\", \"\"),\n \"type\": meta.get(\"type\", \"user\"),\n \"body\": body,\n })\n return result\n\n\ndef select_relevant_memories(messages: list, max_items: int = 5) -> list[str]:\n \"\"\"Select relevant memory filenames by matching recent conversation against\n memory names/descriptions. Uses a simple LLM call (or falls back to keyword\n matching on name+description).\"\"\"\n files = list_memory_files()\n if not files:\n return []\n\n # Collect recent user text for context\n recent_texts = []\n for msg in reversed(messages):\n if msg.get(\"role\") == \"user\":\n content = msg.get(\"content\", \"\")\n if isinstance(content, list):\n content = \" \".join(\n str(getattr(b, \"text\", \"\")) for b in content\n if getattr(b, \"type\", None) == \"text\"\n )\n if isinstance(content, str):\n recent_texts.append(content)\n if len(recent_texts) >= 3:\n break\n recent = \" \".join(reversed(recent_texts))[:2000]\n\n if not recent.strip():\n return []\n\n # Build catalog of name + description for LLM to choose from\n catalog_lines = []\n for i, f in enumerate(files):\n catalog_lines.append(f\"{i}: {f['name']} — {f['description']}\")\n catalog = \"\\n\".join(catalog_lines)\n\n prompt = (\n \"Given the recent conversation and the memory catalog below, \"\n \"select the indices of memories that are clearly relevant. \"\n \"Return ONLY a JSON array of integers, e.g. [0, 3]. \"\n \"If none are relevant, return [].\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\n\"\n f\"Memory catalog:\\n{catalog}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=200,\n )\n text = response.content[0].text.strip()\n # Extract JSON array from response\n match = re.search(r'\\[.*?\\]', text, re.DOTALL)\n if match:\n indices = json.loads(match.group())\n selected = []\n for idx in indices:\n if isinstance(idx, int) and 0 <= idx < len(files):\n selected.append(files[idx][\"filename\"])\n if len(selected) >= max_items:\n break\n return selected\n except Exception:\n pass\n\n # Fallback: keyword matching on name + description\n keywords = [w.lower() for w in recent.split() if len(w) > 3]\n selected = []\n for f in files:\n text = (f[\"name\"] + \" \" + f[\"description\"]).lower()\n if any(kw in text for kw in keywords):\n selected.append(f[\"filename\"])\n if len(selected) >= max_items:\n break\n return selected\n\n\ndef load_memories(messages: list) -> str:\n \"\"\"Load relevant memory content for injection into context.\"\"\"\n selected_files = select_relevant_memories(messages)\n if not selected_files:\n return \"\"\n\n parts = [\"\"]\n for filename in selected_files:\n content = read_memory_file(filename)\n if content:\n parts.append(content)\n parts.append(\"\")\n return \"\\n\\n\".join(parts)\n\n\ndef extract_memories(messages: list):\n \"\"\"Extract new memories from recent dialogue. Runs after each turn.\"\"\"\n # Collect recent conversation text\n dialogue_parts = []\n for msg in messages[-10:]:\n role = msg.get(\"role\", \"?\")\n content = msg.get(\"content\", \"\")\n if isinstance(content, list):\n content = \" \".join(\n str(getattr(b, \"text\", \"\")) for b in content\n if getattr(b, \"type\", None) == \"text\"\n )\n if isinstance(content, str) and content.strip():\n dialogue_parts.append(f\"{role}: {content}\")\n dialogue = \"\\n\".join(dialogue_parts)\n\n if not dialogue.strip():\n return\n\n # Check existing memories to avoid duplicates\n existing = list_memory_files()\n existing_desc = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in existing) if existing else \"(none)\"\n\n prompt = (\n \"Extract user preferences, constraints, or project facts from this dialogue.\\n\"\n \"Return a JSON array. Each item: {name, type, description, body}.\\n\"\n \"- name: short kebab-case identifier (e.g. 'user-preference-tabs')\\n\"\n \"- type: one of 'user' (user preference), 'feedback' (guidance), \"\n \"'project' (project fact), 'reference' (external pointer)\\n\"\n \"- description: one-line summary for index lookup\\n\"\n \"- body: full detail in markdown\\n\"\n \"If nothing new or already covered by existing memories, return [].\\n\\n\"\n f\"Existing memories:\\n{existing_desc}\\n\\n\"\n f\"Dialogue:\\n{dialogue[:4000]}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=800\n )\n text = response.content[0].text.strip()\n # Extract JSON array from response\n match = re.search(r'\\[.*\\]', text, re.DOTALL)\n if not match:\n return\n items = json.loads(match.group())\n if not items:\n return\n count = 0\n for mem in items:\n name = mem.get(\"name\", f\"memory_{int(time.time())}\")\n mem_type = mem.get(\"type\", \"user\")\n desc = mem.get(\"description\", \"\")\n body = mem.get(\"body\", \"\")\n if desc and body:\n write_memory_file(name, mem_type, desc, body)\n count += 1\n if count:\n print(f\"\\n\\033[33m[Memory: extracted {count} new memories]\\033[0m\")\n except Exception:\n pass\n\n\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n \"\"\"Merge duplicate/stale memories. Triggered when file count ≥ threshold.\"\"\"\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return\n\n catalog = \"\\n\\n\".join(\n f\"## {f['filename']}\\nname: {f['name']}\\ndescription: {f['description']}\\n{f['body']}\"\n for f in files\n )\n\n prompt = (\n \"Consolidate the following memory files. Rules:\\n\"\n \"1. Merge duplicates into one\\n\"\n \"2. Remove outdated/contradicted memories\\n\"\n \"3. Keep the total under 30 memories\\n\"\n \"4. Preserve important user preferences above all\\n\"\n \"Return a JSON array. Each item: {name, type, description, body}.\\n\\n\"\n f\"{catalog[:16000]}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=3000\n )\n text = response.content[0].text.strip()\n match = re.search(r'\\[.*\\]', text, re.DOTALL)\n if not match:\n return\n items = json.loads(match.group())\n\n # Remove old memory files (keep MEMORY.md)\n for f in MEMORY_DIR.glob(\"*.md\"):\n if f.name != \"MEMORY.md\":\n f.unlink()\n\n for mem in items:\n name = mem.get(\"name\", f\"memory_{int(time.time())}\")\n mem_type = mem.get(\"type\", \"user\")\n desc = mem.get(\"description\", \"\")\n body = mem.get(\"body\", \"\")\n if desc and body:\n write_memory_file(name, mem_type, desc, body)\n\n print(f\"\\n\\033[33m[Memory: consolidated {len(files)} → {len(items)} memories]\\033[0m\")\n except Exception:\n pass\n\n\n# Build SYSTEM with memory index\ndef build_system() -> str:\n index = read_memory_index()\n memories_section = f\"\\n\\nMemories available:\\n{index}\" if index else \"\"\n return (\n f\"You are a coding agent at {WORKDIR}.\"\n f\"{memories_section}\\n\"\n \"Relevant memories are injected below. Respect user preferences from memory.\\n\"\n \"When the user says 'remember' or expresses a clear preference, extract it as a memory.\"\n )\n\nSYSTEM = build_system()\n\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s08 (skeleton): Basic tools\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR): raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired: return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines): lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e: return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path); file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content); return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text: return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e: return f\"Error: {e}\"\n\ndef extract_text(content) -> str:\n if not isinstance(content, list): return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\n# Subagent (simplified from s06-s07)\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n]\nSUB_HANDLERS = {\"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write}\n\ndef spawn_subagent(task: str) -> str:\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": task}]\n for _ in range(30):\n response = client.messages.create(model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\": break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n result = extract_text(messages[-1][\"content\"])\n if not result:\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result: break\n if not result: result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s08 (skeleton): Compaction pipeline\n# ═══════════════════════════════════════════════════════════\n\nCONTEXT_LIMIT = 50000; KEEP_RECENT = 3; PERSIST_THRESHOLD = 30000\n\ndef estimate_size(msgs): return len(str(msgs))\n\ndef snip_compact(msgs, mx=50):\n if len(msgs) <= mx: return msgs\n return msgs[:3] + [{\"role\": \"user\", \"content\": f\"[snipped {len(msgs)-mx} msgs]\"}] + msgs[-(mx-3):]\n\ndef collect_tool_results(msgs):\n blocks = []\n for mi, msg in enumerate(msgs):\n if msg.get(\"role\") != \"user\" or not isinstance(msg.get(\"content\"), list): continue\n for bi, block in enumerate(msg[\"content\"]):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\": blocks.append((mi, bi, block))\n return blocks\n\ndef micro_compact(msgs):\n tr = collect_tool_results(msgs)\n if len(tr) <= KEEP_RECENT: return msgs\n for _, _, b in tr[:-KEEP_RECENT]:\n if len(b.get(\"content\", \"\")) > 120: b[\"content\"] = \"[Earlier tool result compacted.]\"\n return msgs\n\ndef persist_large(tid, out):\n if len(out) <= PERSIST_THRESHOLD: return out\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n p = TOOL_RESULTS_DIR / f\"{tid}.txt\"\n if not p.exists(): p.write_text(out)\n return f\"\\nFull: {p}\\nPreview:\\n{out[:2000]}\\n\"\n\ndef tool_result_budget(msgs, mx=200_000):\n last = msgs[-1] if msgs else None\n if not last or last.get(\"role\") != \"user\" or not isinstance(last.get(\"content\"), list): return msgs\n blocks = [(i, b) for i, b in enumerate(last[\"content\"]) if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= mx: return msgs\n for _, block in sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True):\n if total <= mx: break\n c = str(block.get(\"content\", \"\"))\n if len(c) <= PERSIST_THRESHOLD: continue\n block[\"content\"] = persist_large(block.get(\"tool_use_id\", \"?\"), c)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return msgs\n\ndef write_transcript(msgs):\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n p = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with p.open(\"w\") as f:\n for m in msgs: f.write(json.dumps(m, default=str) + \"\\n\")\n return p\n\ndef summarize_history(msgs):\n conv = json.dumps(msgs, default=str)[:80000]\n r = client.messages.create(model=MODEL, messages=[{\"role\": \"user\", \"content\":\n \"Summarize this coding-agent conversation so work can continue.\\n\"\n \"Preserve: 1. current goal, 2. key findings, 3. files changed, 4. remaining work, 5. user constraints.\\n\\n\" + conv}],\n max_tokens=2000)\n return r.content[0].text.strip()\n\ndef compact_history(msgs):\n write_transcript(msgs)\n summary = summarize_history(msgs)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\ndef reactive_compact(msgs):\n write_transcript(msgs)\n summary = summarize_history(msgs)\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *msgs[-5:]]\n\n\n# ═══════════════════════════════════════════════════════════\n# Tool Definitions (skeleton — fewer tools to focus on memory)\n# ═══════════════════════════════════════════════════════════\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"task\", \"description\": \"Launch a subagent to handle a subtask.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"task\": spawn_subagent,\n}\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — s09: inject memories + extract after each turn\n# ═══════════════════════════════════════════════════════════\n\nMAX_REACTIVE_RETRIES = 1\n\ndef agent_loop(messages: list):\n reactive_retries = 0\n while True:\n # s09: rebuild system with current memory index + relevant memories\n system = build_system()\n memories_content = load_memories(messages)\n if memories_content:\n system += \"\\n\\n\" + memories_content\n\n # s09: save pre-compression snapshot for accurate memory extraction\n pre_compress = [m if isinstance(m, dict) else {\"role\": m.get(\"role\",\"\"),\n \"content\": str(m.get(\"content\",\"\"))} for m in messages]\n\n # s08: compression pipeline (budget → snip → micro)\n messages[:] = tool_result_budget(messages)\n messages[:] = snip_compact(messages)\n messages[:] = micro_compact(messages)\n\n if estimate_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages, tools=TOOLS, max_tokens=8000\n )\n reactive_retries = 0\n except Exception as e:\n if (\"prompt_too_long\" in str(e).lower() or \"too many tokens\" in str(e).lower()) and reactive_retries < MAX_REACTIVE_RETRIES:\n print(\"[reactive compact]\")\n messages[:] = reactive_compact(messages)\n reactive_retries += 1\n continue\n raise\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n # s09: extract from pre-compression snapshot for full fidelity\n extract_memories(pre_compress)\n consolidate_memories()\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\": continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(\"s09: Memory — persistent cross-session knowledge\")\n print(\"输入问题,回车发送。输入 q 退出。\\n\")\n history = []\n while True:\n try: query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt): break\n if query.strip().lower() in (\"q\", \"exit\", \"\"): break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\": print(block.text)\n print()\n", + "source": "#!/usr/bin/env python3\n\"\"\"\ns09_memory.py - Memory System\n\nPersistent, cross-session knowledge for the coding agent.\n\nStorage:\n .memory/\n MEMORY.md ← index (one line per memory, ≤200 lines)\n feedback_tabs.md ← individual memory files (Markdown + YAML frontmatter)\n user_profile.md\n project_facts.md\n\nFlow in agent_loop:\n 1. Load MEMORY.md index into SYSTEM prompt (cheap, always present)\n 2. Select relevant memories by filename/description → inject content\n 3. Run compression pipeline from s08\n 4. After each turn ends → extract new memories from original messages\n 5. Periodically consolidate (Dream)\n\nBuilds on s08 (context compact). Usage:\n\n python s09_memory/code.py\n Needs: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport os, subprocess, json, time, re\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"): os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"; MEMORY_DIR.mkdir(exist_ok=True)\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s09: Memory System\n# ═══════════════════════════════════════════════════════════\n\nMEMORY_TYPES = [\"user\", \"feedback\", \"project\", \"reference\"]\n\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n meta = {}\n for line in parts[1].strip().splitlines():\n if \":\" in line:\n k, v = line.split(\":\", 1)\n meta[k.strip()] = v.strip().strip('\"').strip(\"'\")\n return meta, parts[2].strip()\n\n\ndef write_memory_file(name: str, mem_type: str, description: str, body: str):\n \"\"\"Write a single memory file with YAML frontmatter.\"\"\"\n slug = name.lower().replace(\" \", \"-\").replace(\"/\", \"-\")\n filename = f\"{slug}.md\"\n filepath = MEMORY_DIR / filename\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n return filepath\n\n\ndef _rebuild_index():\n \"\"\"Rebuild MEMORY.md index from all memory files.\"\"\"\n lines = []\n for f in sorted(MEMORY_DIR.glob(\"*.md\")):\n if f.name == \"MEMORY.md\":\n continue\n raw = f.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", f.stem)\n desc = meta.get(\"description\", body.split(\"\\n\")[0][:80])\n lines.append(f\"- [{name}]({f.name}) — {desc}\")\n MEMORY_INDEX.write_text(\"\\n\".join(lines) + \"\\n\" if lines else \"\")\n\n\ndef read_memory_index() -> str:\n \"\"\"Read MEMORY.md index (injected into SYSTEM every turn).\"\"\"\n if not MEMORY_INDEX.exists():\n return \"\"\n text = MEMORY_INDEX.read_text().strip()\n return text if text else \"\"\n\n\ndef read_memory_file(filename: str) -> str | None:\n \"\"\"Read a single memory file's full content.\"\"\"\n path = MEMORY_DIR / filename\n if not path.exists():\n return None\n return path.read_text()\n\n\ndef list_memory_files() -> list[dict]:\n \"\"\"List all memory files with metadata.\"\"\"\n result = []\n for f in sorted(MEMORY_DIR.glob(\"*.md\")):\n if f.name == \"MEMORY.md\":\n continue\n raw = f.read_text()\n meta, body = _parse_frontmatter(raw)\n result.append({\n \"filename\": f.name,\n \"name\": meta.get(\"name\", f.stem),\n \"description\": meta.get(\"description\", \"\"),\n \"type\": meta.get(\"type\", \"user\"),\n \"body\": body,\n })\n return result\n\n\ndef select_relevant_memories(messages: list, max_items: int = 5) -> list[str]:\n \"\"\"Select relevant memory filenames by matching recent conversation against\n memory names/descriptions. Uses a simple LLM call (or falls back to keyword\n matching on name+description).\"\"\"\n files = list_memory_files()\n if not files:\n return []\n\n # Collect recent user text for context\n recent_texts = []\n for msg in reversed(messages):\n if msg.get(\"role\") == \"user\":\n content = msg.get(\"content\", \"\")\n if isinstance(content, list):\n content = \" \".join(\n str(getattr(b, \"text\", \"\")) for b in content\n if getattr(b, \"type\", None) == \"text\"\n )\n if isinstance(content, str):\n recent_texts.append(content)\n if len(recent_texts) >= 3:\n break\n recent = \" \".join(reversed(recent_texts))[:2000]\n\n if not recent.strip():\n return []\n\n # Build catalog of name + description for LLM to choose from\n catalog_lines = []\n for i, f in enumerate(files):\n catalog_lines.append(f\"{i}: {f['name']} — {f['description']}\")\n catalog = \"\\n\".join(catalog_lines)\n\n prompt = (\n \"Given the recent conversation and the memory catalog below, \"\n \"select the indices of memories that are clearly relevant. \"\n \"Return ONLY a JSON array of integers, e.g. [0, 3]. \"\n \"If none are relevant, return [].\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\n\"\n f\"Memory catalog:\\n{catalog}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=200,\n )\n text = extract_text(response.content).strip()\n # Extract JSON array from response\n match = re.search(r'\\[.*?\\]', text, re.DOTALL)\n if match:\n indices = json.loads(match.group())\n selected = []\n for idx in indices:\n if isinstance(idx, int) and 0 <= idx < len(files):\n selected.append(files[idx][\"filename\"])\n if len(selected) >= max_items:\n break\n return selected\n except Exception:\n pass\n\n # Fallback: keyword matching on name + description\n keywords = [w.lower() for w in recent.split() if len(w) > 3]\n selected = []\n for f in files:\n text = (f[\"name\"] + \" \" + f[\"description\"]).lower()\n if any(kw in text for kw in keywords):\n selected.append(f[\"filename\"])\n if len(selected) >= max_items:\n break\n return selected\n\n\ndef load_memories(messages: list) -> str:\n \"\"\"Load relevant memory content for injection into context.\"\"\"\n selected_files = select_relevant_memories(messages)\n if not selected_files:\n return \"\"\n\n parts = [\"\"]\n for filename in selected_files:\n content = read_memory_file(filename)\n if content:\n parts.append(content)\n parts.append(\"\")\n return \"\\n\\n\".join(parts)\n\n\ndef extract_memories(messages: list):\n \"\"\"Extract new memories from recent dialogue. Runs after each turn.\"\"\"\n # Collect recent conversation text\n dialogue_parts = []\n for msg in messages[-10:]:\n role = msg.get(\"role\", \"?\")\n content = msg.get(\"content\", \"\")\n if isinstance(content, list):\n content = \" \".join(\n str(getattr(b, \"text\", \"\")) for b in content\n if getattr(b, \"type\", None) == \"text\"\n )\n if isinstance(content, str) and content.strip():\n dialogue_parts.append(f\"{role}: {content}\")\n dialogue = \"\\n\".join(dialogue_parts)\n\n if not dialogue.strip():\n return\n\n # Check existing memories to avoid duplicates\n existing = list_memory_files()\n existing_desc = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in existing) if existing else \"(none)\"\n\n prompt = (\n \"Extract user preferences, constraints, or project facts from this dialogue.\\n\"\n \"Return a JSON array. Each item: {name, type, description, body}.\\n\"\n \"- name: short kebab-case identifier (e.g. 'user-preference-tabs')\\n\"\n \"- type: one of 'user' (user preference), 'feedback' (guidance), \"\n \"'project' (project fact), 'reference' (external pointer)\\n\"\n \"- description: one-line summary for index lookup\\n\"\n \"- body: full detail in markdown\\n\"\n \"If nothing new or already covered by existing memories, return [].\\n\\n\"\n f\"Existing memories:\\n{existing_desc}\\n\\n\"\n f\"Dialogue:\\n{dialogue[:4000]}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=800\n )\n text = extract_text(response.content).strip()\n # Extract JSON array from response\n match = re.search(r'\\[.*\\]', text, re.DOTALL)\n if not match:\n return\n items = json.loads(match.group())\n if not items:\n return\n count = 0\n for mem in items:\n name = mem.get(\"name\", f\"memory_{int(time.time())}\")\n mem_type = mem.get(\"type\", \"user\")\n desc = mem.get(\"description\", \"\")\n body = mem.get(\"body\", \"\")\n if desc and body:\n write_memory_file(name, mem_type, desc, body)\n count += 1\n if count:\n print(f\"\\n\\033[33m[Memory: extracted {count} new memories]\\033[0m\")\n except Exception:\n pass\n\n\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n \"\"\"Merge duplicate/stale memories. Triggered when file count ≥ threshold.\"\"\"\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return\n\n catalog = \"\\n\\n\".join(\n f\"## {f['filename']}\\nname: {f['name']}\\ndescription: {f['description']}\\n{f['body']}\"\n for f in files\n )\n\n prompt = (\n \"Consolidate the following memory files. Rules:\\n\"\n \"1. Merge duplicates into one\\n\"\n \"2. Remove outdated/contradicted memories\\n\"\n \"3. Keep the total under 30 memories\\n\"\n \"4. Preserve important user preferences above all\\n\"\n \"Return a JSON array. Each item: {name, type, description, body}.\\n\\n\"\n f\"{catalog[:16000]}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=3000\n )\n text = extract_text(response.content).strip()\n match = re.search(r'\\[.*\\]', text, re.DOTALL)\n if not match:\n return\n items = json.loads(match.group())\n\n # Remove old memory files (keep MEMORY.md)\n for f in MEMORY_DIR.glob(\"*.md\"):\n if f.name != \"MEMORY.md\":\n f.unlink()\n\n for mem in items:\n name = mem.get(\"name\", f\"memory_{int(time.time())}\")\n mem_type = mem.get(\"type\", \"user\")\n desc = mem.get(\"description\", \"\")\n body = mem.get(\"body\", \"\")\n if desc and body:\n write_memory_file(name, mem_type, desc, body)\n\n print(f\"\\n\\033[33m[Memory: consolidated {len(files)} → {len(items)} memories]\\033[0m\")\n except Exception:\n pass\n\n\n# Build SYSTEM with memory index\ndef build_system() -> str:\n index = read_memory_index()\n memories_section = f\"\\n\\nMemories available:\\n{index}\" if index else \"\"\n return (\n f\"You are a coding agent at {WORKDIR}.\"\n f\"{memories_section}\\n\"\n \"Relevant memories are injected below. Respect user preferences from memory.\\n\"\n \"When the user says 'remember' or expresses a clear preference, extract it as a memory.\"\n )\n\nSYSTEM = build_system()\n\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s08 (skeleton): Basic tools\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR): raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired: return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines): lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e: return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path); file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content); return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text: return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e: return f\"Error: {e}\"\n\ndef extract_text(content) -> str:\n if not isinstance(content, list): return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\n# Subagent (simplified from s06-s07)\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n]\nSUB_HANDLERS = {\"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write}\n\ndef spawn_subagent(task: str) -> str:\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": task}]\n for _ in range(30):\n response = client.messages.create(model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\": break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n result = extract_text(messages[-1][\"content\"])\n if not result:\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result: break\n if not result: result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s08 (skeleton): Compaction pipeline\n# ═══════════════════════════════════════════════════════════\n\nCONTEXT_LIMIT = 50000; KEEP_RECENT = 3; PERSIST_THRESHOLD = 30000\n\ndef estimate_size(msgs): return len(str(msgs))\n\ndef snip_compact(msgs, mx=50):\n if len(msgs) <= mx: return msgs\n return msgs[:3] + [{\"role\": \"user\", \"content\": f\"[snipped {len(msgs)-mx} msgs]\"}] + msgs[-(mx-3):]\n\ndef collect_tool_results(msgs):\n blocks = []\n for mi, msg in enumerate(msgs):\n if msg.get(\"role\") != \"user\" or not isinstance(msg.get(\"content\"), list): continue\n for bi, block in enumerate(msg[\"content\"]):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\": blocks.append((mi, bi, block))\n return blocks\n\ndef micro_compact(msgs):\n tr = collect_tool_results(msgs)\n if len(tr) <= KEEP_RECENT: return msgs\n for _, _, b in tr[:-KEEP_RECENT]:\n if len(b.get(\"content\", \"\")) > 120: b[\"content\"] = \"[Earlier tool result compacted.]\"\n return msgs\n\ndef persist_large(tid, out):\n if len(out) <= PERSIST_THRESHOLD: return out\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n p = TOOL_RESULTS_DIR / f\"{tid}.txt\"\n if not p.exists(): p.write_text(out)\n return f\"\\nFull: {p}\\nPreview:\\n{out[:2000]}\\n\"\n\ndef tool_result_budget(msgs, mx=200_000):\n last = msgs[-1] if msgs else None\n if not last or last.get(\"role\") != \"user\" or not isinstance(last.get(\"content\"), list): return msgs\n blocks = [(i, b) for i, b in enumerate(last[\"content\"]) if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= mx: return msgs\n for _, block in sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True):\n if total <= mx: break\n c = str(block.get(\"content\", \"\"))\n if len(c) <= PERSIST_THRESHOLD: continue\n block[\"content\"] = persist_large(block.get(\"tool_use_id\", \"?\"), c)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return msgs\n\ndef write_transcript(msgs):\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n p = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with p.open(\"w\") as f:\n for m in msgs: f.write(json.dumps(m, default=str) + \"\\n\")\n return p\n\ndef summarize_history(msgs):\n conv = json.dumps(msgs, default=str)[:80000]\n r = client.messages.create(model=MODEL, messages=[{\"role\": \"user\", \"content\":\n \"Summarize this coding-agent conversation so work can continue.\\n\"\n \"Preserve: 1. current goal, 2. key findings, 3. files changed, 4. remaining work, 5. user constraints.\\n\\n\" + conv}],\n max_tokens=2000)\n return extract_text(r.content).strip()\n\ndef compact_history(msgs):\n write_transcript(msgs)\n summary = summarize_history(msgs)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\ndef reactive_compact(msgs):\n write_transcript(msgs)\n summary = summarize_history(msgs)\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *msgs[-5:]]\n\n\n# ═══════════════════════════════════════════════════════════\n# Tool Definitions (skeleton — fewer tools to focus on memory)\n# ═══════════════════════════════════════════════════════════\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"task\", \"description\": \"Launch a subagent to handle a subtask.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"task\": spawn_subagent,\n}\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — s09: inject memories + extract after each turn\n# ═══════════════════════════════════════════════════════════\n\nMAX_REACTIVE_RETRIES = 1\n\ndef agent_loop(messages: list):\n reactive_retries = 0\n # s09: inject relevant memory content into the current user turn\n memories_content = load_memories(messages)\n memory_turn = len(messages) - 1 if messages and isinstance(messages[-1].get(\"content\"), str) else None\n while True:\n # s09: rebuild system with current memory index\n system = build_system()\n\n # s09: save pre-compression snapshot for accurate memory extraction\n pre_compress = [m if isinstance(m, dict) else {\"role\": m.get(\"role\",\"\"),\n \"content\": str(m.get(\"content\",\"\"))} for m in messages]\n\n # s08: compression pipeline (budget → snip → micro)\n messages[:] = tool_result_budget(messages)\n messages[:] = snip_compact(messages)\n messages[:] = micro_compact(messages)\n\n if estimate_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages)\n\n try:\n request_messages = messages\n if memories_content and memory_turn is not None and memory_turn < len(messages):\n request_messages = messages.copy()\n request_messages[memory_turn] = {\n **messages[memory_turn],\n \"content\": memories_content + \"\\n\\n\" + messages[memory_turn][\"content\"],\n }\n response = client.messages.create(\n model=MODEL, system=system, messages=request_messages, tools=TOOLS, max_tokens=8000\n )\n reactive_retries = 0\n except Exception as e:\n if (\"prompt_too_long\" in str(e).lower() or \"too many tokens\" in str(e).lower()) and reactive_retries < MAX_REACTIVE_RETRIES:\n print(\"[reactive compact]\")\n messages[:] = reactive_compact(messages)\n reactive_retries += 1\n continue\n raise\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n # s09: extract from pre-compression snapshot for full fidelity\n extract_memories(pre_compress)\n consolidate_memories()\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\": continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(\"s09: Memory — persistent cross-session knowledge\")\n print(\"输入问题,回车发送。输入 q 退出。\\n\")\n history = []\n while True:\n try: query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt): break\n if query.strip().lower() in (\"q\", \"exit\", \"\"): break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\": print(block.text)\n print()\n", "images": [ { "src": "/course-assets/s09_memory/memory-overview.svg", @@ -1327,7 +1327,7 @@ "filename": "s13_background_tasks/code.py", "title": "Background Tasks", "subtitle": "Slow Operations Go to the Background", - "loc": 380, + "loc": 379, "tools": [ "bash", "read_file", @@ -1476,7 +1476,7 @@ } ], "layer": "concurrency", - "source": "#!/usr/bin/env python3\n\"\"\"\ns13: Background Tasks — thread-based async execution + notification injection.\n\nRun: python s13_background_tasks/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s12:\n - threading.Thread for background execution\n - background_tasks dict for lifecycle tracking (bg_id, command, status)\n - background_results dict + threading.Lock for thread-safe storage\n - should_run_background: model explicit request via run_in_background param\n - is_slow_operation: fallback heuristic when model doesn't specify\n - start_background_task: dispatch to daemon thread, return bg task id\n - collect_background_results: gather completed, return as notifications\n - agent_loop: slow ops → background + placeholder, inject notifications\n - Notifications use format, not reused tool_use_id\n\nNote: Teaching code keeps a basic agent loop to stay focused on background\ntasks. S11's full error recovery (RecoveryState, backoff, escalation,\nreactive compact, fallback model) is omitted.\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n}\n\n\n# ── Background Tasks (s13 new) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {} # bg_id → {tool_use_id, command, status}\nbackground_results: dict[str, str] = {} # bg_id → output\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = TOOL_HANDLERS.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n thread = threading.Thread(target=worker, daemon=True)\n thread.start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": list(TOOL_HANDLERS.keys()),\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop (simplified, focused on background tasks) ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Command: {block.input.get('command', '')}. \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Inject background notifications + tool results in one user message\n user_content = []\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n print(f\" \\033[32m[inject] {len(bg_notifications)} background \"\n f\"notification(s)\\033[0m\")\n user_content.extend(results)\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s13: background tasks\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms13 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n", + "source": "#!/usr/bin/env python3\n\"\"\"\ns13: Background Tasks — thread-based async execution + notification injection.\n\nRun: python s13_background_tasks/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s12:\n - threading.Thread for background execution\n - background_tasks dict for lifecycle tracking (bg_id, command, status)\n - background_results dict + threading.Lock for thread-safe storage\n - should_run_background: model explicit request via run_in_background param\n - is_slow_operation: fallback heuristic when model doesn't specify\n - start_background_task: dispatch to daemon thread, return bg task id\n - collect_background_results: gather completed, return as notifications\n - agent_loop: slow ops → background + placeholder, inject notifications\n - Notifications use format, not reused tool_use_id\n\nNote: Teaching code keeps a basic agent loop to stay focused on background\ntasks. S11's full error recovery (RecoveryState, backoff, escalation,\nreactive compact, fallback model) is omitted.\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n}\n\n\n# ── Background Tasks (s13 new) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {} # bg_id → {tool_use_id, command, status}\nbackground_results: dict[str, str] = {} # bg_id → output\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = TOOL_HANDLERS.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n thread = threading.Thread(target=worker, daemon=True)\n thread.start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": list(TOOL_HANDLERS.keys()),\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop (simplified, focused on background tasks) ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Command: {block.input.get('command', '')}. \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Inject tool results + background notifications in one user message\n user_content = list(results)\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n print(f\" \\033[32m[inject] {len(bg_notifications)} background \"\n f\"notification(s)\\033[0m\")\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s13: background tasks\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms13 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n", "images": [ { "src": "/course-assets/s13_background_tasks/background-tasks-overview.svg", @@ -1489,7 +1489,7 @@ "filename": "s14_cron_scheduler/code.py", "title": "Cron Scheduler", "subtitle": "Producing Work on a Schedule", - "loc": 646, + "loc": 645, "tools": [ "bash", "read_file", @@ -1711,21 +1711,21 @@ { "name": "print_latest_assistant_text", "signature": "def print_latest_assistant_text(messages: list)", - "startLine": 745 + "startLine": 744 }, { "name": "run_agent_turn_locked", "signature": "def run_agent_turn_locked(user_query: str | None = None)", - "startLine": 763 + "startLine": 762 }, { "name": "queue_processor_loop", "signature": "def queue_processor_loop()", - "startLine": 774 + "startLine": 773 } ], "layer": "concurrency", - "source": "#!/usr/bin/env python3\n\"\"\"\ns14: Cron Scheduler — independent daemon thread + queue processor.\n\nRun: python s14_cron_scheduler/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s13:\n - CronJob dataclass (id, cron, prompt, recurring, durable)\n - cron_matches: 5-field cron expression matching with DOM/DOW OR semantics\n - schedule_job / cancel_job: register/remove cron jobs (with validation)\n - cron_scheduler_loop: independent daemon thread, polls every 1s\n - cron_queue: thread-safe queue, scheduler writes, queue processor delivers\n - queue_processor_loop: auto-runs agent_loop when cron_queue has work\n - Durable storage: .scheduled_tasks.json (survives restart)\n - 3 new tools: schedule_cron, list_crons, cancel_cron\n\nFour layers:\n 1. Scheduler: daemon thread checks time → fires matching jobs\n 2. Queue: cron_queue decouples scheduler from agent loop\n 3. Queue processor: wakes the agent when queued work exists and it is idle\n 4. Consumer: agent_loop consumes queued jobs and injects them into messages\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"schedule_cron, list_crons, cancel_cron.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\n# ── Background Tasks (from s13, synced) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n \"schedule_cron\": run_schedule_cron, \"list_crons\": run_list_crons,\n \"cancel_cron\": run_cancel_cron,\n }.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── Cron Scheduler (s14 new) ──\n\nDURABLE_PATH = WORKDIR / \".scheduled_tasks.json\"\n\n\n@dataclass\nclass CronJob:\n id: str\n cron: str # \"0 9 * * *\"\n prompt: str # message to inject when fired\n recurring: bool # True = recurring, False = one-shot\n durable: bool # True = persist to disk\n\n\nscheduled_jobs: dict[str, CronJob] = {}\ncron_queue: list[CronJob] = []\ncron_lock = threading.Lock()\nagent_lock = threading.Lock()\n_last_fired: dict[str, str] = {} # job_id → \"YYYY-MM-DD HH:MM\"\n\n\ndef _cron_field_matches(field: str, value: int) -> bool:\n \"\"\"Match a single cron field against a value.\"\"\"\n if field == \"*\":\n return True\n if field.startswith(\"*/\"):\n step = int(field[2:])\n return step > 0 and value % step == 0\n if \",\" in field:\n return any(_cron_field_matches(f.strip(), value)\n for f in field.split(\",\"))\n if \"-\" in field:\n lo, hi = field.split(\"-\", 1)\n return int(lo) <= value <= int(hi)\n return value == int(field)\n\n\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n \"\"\"Check if a 5-field cron expression matches the given datetime.\n Standard cron semantics: DOM and DOW use OR when both are constrained.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7 # Python Monday=0 → cron Sunday=0\n\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n\n # Minute, hour, month must all match\n if not (m and h and month_ok):\n return False\n # DOM and DOW: if both constrained, either matching is enough (OR)\n dom_unconstrained = dom == \"*\"\n dow_unconstrained = dow == \"*\"\n if dom_unconstrained and dow_unconstrained:\n return True\n if dom_unconstrained:\n return dow_ok\n if dow_unconstrained:\n return dom_ok\n return dom_ok or dow_ok\n\n\ndef _validate_cron_field(field: str, lo: int, hi: int) -> str | None:\n \"\"\"Validate a single cron field value is within [lo, hi].\"\"\"\n if field == \"*\":\n return None\n if field.startswith(\"*/\"):\n step_str = field[2:]\n if not step_str.isdigit():\n return f\"Invalid step: {field}\"\n step = int(step_str)\n if step <= 0:\n return f\"Step must be > 0: {field}\"\n return None\n if \",\" in field:\n for part in field.split(\",\"):\n err = _validate_cron_field(part.strip(), lo, hi)\n if err: return err\n return None\n if \"-\" in field:\n parts = field.split(\"-\", 1)\n if not parts[0].isdigit() or not parts[1].isdigit():\n return f\"Invalid range: {field}\"\n a, b = int(parts[0]), int(parts[1])\n if a < lo or a > hi or b < lo or b > hi:\n return f\"Range {field} out of bounds [{lo}-{hi}]\"\n if a > b:\n return f\"Range start > end: {field}\"\n return None\n if not field.isdigit():\n return f\"Invalid field: {field}\"\n val = int(field)\n if val < lo or val > hi:\n return f\"Value {val} out of bounds [{lo}-{hi}]\"\n return None\n\n\ndef validate_cron(cron_expr: str) -> str | None:\n \"\"\"Validate a cron expression. Returns error message or None.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return f\"Expected 5 fields, got {len(fields)}\"\n bounds = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n names = [\"minute\", \"hour\", \"day-of-month\", \"month\", \"day-of-week\"]\n for i, (field, (lo, hi), name) in enumerate(zip(fields, bounds, names)):\n err = _validate_cron_field(field, lo, hi)\n if err:\n return f\"{name}: {err}\"\n return None\n\n\ndef save_durable_jobs():\n \"\"\"Persist durable jobs to .scheduled_tasks.json.\"\"\"\n durable = [asdict(j) for j in scheduled_jobs.values() if j.durable]\n DURABLE_PATH.write_text(json.dumps(durable, indent=2))\n\n\ndef load_durable_jobs():\n \"\"\"Load durable jobs from disk on startup.\"\"\"\n if not DURABLE_PATH.exists():\n return\n try:\n jobs = json.loads(DURABLE_PATH.read_text())\n for j in jobs:\n job = CronJob(**j)\n err = validate_cron(job.cron)\n if err:\n print(f\" \\033[31m[cron] skipping invalid job {job.id}: {err}\\033[0m\")\n continue\n scheduled_jobs[job.id] = job\n valid = [j for j in jobs if j[\"id\"] in scheduled_jobs]\n if valid:\n print(f\" \\033[35m[cron] loaded {len(valid)} durable job(s)\\033[0m\")\n except Exception:\n pass\n\n\ndef schedule_job(cron: str, prompt: str, recurring: bool = True,\n durable: bool = True) -> CronJob | str:\n \"\"\"Register a new cron job. Returns CronJob or error string.\"\"\"\n err = validate_cron(cron)\n if err:\n return err\n job = CronJob(\n id=f\"cron_{random.randint(0, 999999):06d}\",\n cron=cron, prompt=prompt,\n recurring=recurring, durable=durable,\n )\n with cron_lock:\n scheduled_jobs[job.id] = job\n if durable:\n save_durable_jobs()\n print(f\" \\033[35m[cron register] {job.id} '{cron}' → {prompt[:40]}\\033[0m\")\n return job\n\n\ndef cancel_job(job_id: str) -> str:\n \"\"\"Cancel a cron job.\"\"\"\n with cron_lock:\n job = scheduled_jobs.pop(job_id, None)\n if not job:\n return f\"Job {job_id} not found\"\n if job.durable:\n save_durable_jobs()\n print(f\" \\033[31m[cron cancel] {job_id}\\033[0m\")\n return f\"Cancelled {job_id}\"\n\n\ndef cron_scheduler_loop():\n \"\"\"Independent daemon thread: poll every 1s, fire matching jobs.\n Individual job errors are caught to prevent one bad job from\n killing the entire scheduler thread.\"\"\"\n while True:\n time.sleep(1)\n now = datetime.now()\n # Date-aware marker prevents daily jobs from skipping on day 2+\n minute_marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now):\n if _last_fired.get(job.id) != minute_marker:\n cron_queue.append(job)\n _last_fired[job.id] = minute_marker\n print(f\" \\033[35m[cron fire] {job.id} → \"\n f\"{job.prompt[:40]}\\033[0m\")\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\" \\033[31m[cron error] {job.id}: {e}\\033[0m\")\n\n\ndef consume_cron_queue() -> list[CronJob]:\n \"\"\"Consume fired jobs from cron_queue (called by agent_loop).\"\"\"\n with cron_lock:\n fired = list(cron_queue)\n cron_queue.clear()\n return fired\n\n\ndef has_cron_queue() -> bool:\n \"\"\"Return whether fired cron jobs are waiting to be delivered.\"\"\"\n with cron_lock:\n return bool(cron_queue)\n\n\n# Load durable jobs on startup, then start scheduler thread\nload_durable_jobs()\nthreading.Thread(target=cron_scheduler_loop, daemon=True).start()\nprint(\" \\033[35m[cron] scheduler thread started\\033[0m\")\n\n\n# ── Cron Tools ──\n\ndef run_schedule_cron(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> str:\n result = schedule_job(cron, prompt, recurring, durable)\n if isinstance(result, str):\n return f\"Error: {result}\"\n return f\"Scheduled {result.id}: '{cron}' → {prompt}\"\n\n\ndef run_list_crons() -> str:\n with cron_lock:\n jobs = list(scheduled_jobs.values())\n if not jobs:\n return \"No cron jobs. Use schedule_cron to add one.\"\n lines = []\n for j in jobs:\n tag = \"recurring\" if j.recurring else \"one-shot\"\n dur = \"durable\" if j.durable else \"session\"\n lines.append(f\" {j.id}: '{j.cron}' → {j.prompt[:40]} \"\n f\"[{tag}, {dur}]\")\n return \"\\n\".join(lines)\n\n\ndef run_cancel_cron(job_id: str) -> str:\n return cancel_job(job_id)\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"schedule_cron\",\n \"description\": \"Schedule a cron job. cron is 5-field: min hour dom month dow.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"cron\": {\"type\": \"string\",\n \"description\": \"5-field cron expression\"},\n \"prompt\": {\"type\": \"string\",\n \"description\": \"Message to inject when fired\"},\n \"recurring\": {\"type\": \"boolean\",\n \"description\": \"True=recurring, False=one-shot\"},\n \"durable\": {\"type\": \"boolean\",\n \"description\": \"True=persist to disk\"}},\n \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"list_crons\",\n \"description\": \"List all registered cron jobs.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"cancel_cron\",\n \"description\": \"Cancel a cron job by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"job_id\": {\"type\": \"string\"}},\n \"required\": [\"job_id\"]}},\n]\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": [t[\"name\"] for t in TOOLS],\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop (simplified, focused on cron scheduler) ──\n# Teaching code keeps a basic agent loop. S11's full error recovery is omitted.\n# cron_scheduler_loop produces work; queue_processor_loop wakes this loop when\n# queued work exists and no other agent turn is running.\n\ndef agent_loop(messages: list, context: dict) -> dict:\n system = get_system_prompt(context)\n while True:\n # Layer 4: consume fired cron jobs → inject as messages\n fired = consume_cron_queue()\n for job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n print(f\" \\033[35m[inject cron] {job.prompt[:50]}\\033[0m\")\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return context\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return context\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Merge background notifications + tool results into one user message\n user_content = []\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n user_content.extend(results)\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nsession_history: list = []\nsession_context = update_context({}, [])\n\n\ndef print_latest_assistant_text(messages: list):\n \"\"\"Print text blocks from the latest assistant message.\"\"\"\n if not messages:\n return\n msg = messages[-1]\n if not isinstance(msg, dict) or msg.get(\"role\") != \"assistant\":\n return\n content = msg.get(\"content\", \"\")\n if isinstance(content, str):\n print(content)\n return\n for block in content:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n elif isinstance(block, dict) and block.get(\"type\") == \"text\":\n print(block.get(\"text\", \"\"))\n\n\ndef run_agent_turn_locked(user_query: str | None = None):\n \"\"\"Run one agent turn. Caller must hold agent_lock.\"\"\"\n global session_context\n if user_query is not None:\n session_history.append({\"role\": \"user\", \"content\": user_query})\n session_context = agent_loop(session_history, session_context)\n session_context = update_context(session_context, session_history)\n print_latest_assistant_text(session_history)\n print()\n\n\ndef queue_processor_loop():\n \"\"\"Auto-deliver fired cron jobs when the agent is idle.\"\"\"\n global session_context\n while True:\n time.sleep(0.2)\n if not has_cron_queue():\n continue\n if not agent_lock.acquire(blocking=False):\n continue\n try:\n if not has_cron_queue():\n continue\n print(\"\\n \\033[35m[queue processor] delivering scheduled work\\033[0m\")\n run_agent_turn_locked()\n finally:\n agent_lock.release()\n\n\nif __name__ == \"__main__\":\n print(\"s14: cron scheduler\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n threading.Thread(target=queue_processor_loop, daemon=True).start()\n print(\" \\033[35m[queue processor] started\\033[0m\")\n while True:\n try:\n query = input(\"\\033[36ms14 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n with agent_lock:\n run_agent_turn_locked(query)\n", + "source": "#!/usr/bin/env python3\n\"\"\"\ns14: Cron Scheduler — independent daemon thread + queue processor.\n\nRun: python s14_cron_scheduler/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s13:\n - CronJob dataclass (id, cron, prompt, recurring, durable)\n - cron_matches: 5-field cron expression matching with DOM/DOW OR semantics\n - schedule_job / cancel_job: register/remove cron jobs (with validation)\n - cron_scheduler_loop: independent daemon thread, polls every 1s\n - cron_queue: thread-safe queue, scheduler writes, queue processor delivers\n - queue_processor_loop: auto-runs agent_loop when cron_queue has work\n - Durable storage: .scheduled_tasks.json (survives restart)\n - 3 new tools: schedule_cron, list_crons, cancel_cron\n\nFour layers:\n 1. Scheduler: daemon thread checks time → fires matching jobs\n 2. Queue: cron_queue decouples scheduler from agent loop\n 3. Queue processor: wakes the agent when queued work exists and it is idle\n 4. Consumer: agent_loop consumes queued jobs and injects them into messages\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"schedule_cron, list_crons, cancel_cron.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\n# ── Background Tasks (from s13, synced) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n \"schedule_cron\": run_schedule_cron, \"list_crons\": run_list_crons,\n \"cancel_cron\": run_cancel_cron,\n }.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── Cron Scheduler (s14 new) ──\n\nDURABLE_PATH = WORKDIR / \".scheduled_tasks.json\"\n\n\n@dataclass\nclass CronJob:\n id: str\n cron: str # \"0 9 * * *\"\n prompt: str # message to inject when fired\n recurring: bool # True = recurring, False = one-shot\n durable: bool # True = persist to disk\n\n\nscheduled_jobs: dict[str, CronJob] = {}\ncron_queue: list[CronJob] = []\ncron_lock = threading.Lock()\nagent_lock = threading.Lock()\n_last_fired: dict[str, str] = {} # job_id → \"YYYY-MM-DD HH:MM\"\n\n\ndef _cron_field_matches(field: str, value: int) -> bool:\n \"\"\"Match a single cron field against a value.\"\"\"\n if field == \"*\":\n return True\n if field.startswith(\"*/\"):\n step = int(field[2:])\n return step > 0 and value % step == 0\n if \",\" in field:\n return any(_cron_field_matches(f.strip(), value)\n for f in field.split(\",\"))\n if \"-\" in field:\n lo, hi = field.split(\"-\", 1)\n return int(lo) <= value <= int(hi)\n return value == int(field)\n\n\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n \"\"\"Check if a 5-field cron expression matches the given datetime.\n Standard cron semantics: DOM and DOW use OR when both are constrained.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7 # Python Monday=0 → cron Sunday=0\n\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n\n # Minute, hour, month must all match\n if not (m and h and month_ok):\n return False\n # DOM and DOW: if both constrained, either matching is enough (OR)\n dom_unconstrained = dom == \"*\"\n dow_unconstrained = dow == \"*\"\n if dom_unconstrained and dow_unconstrained:\n return True\n if dom_unconstrained:\n return dow_ok\n if dow_unconstrained:\n return dom_ok\n return dom_ok or dow_ok\n\n\ndef _validate_cron_field(field: str, lo: int, hi: int) -> str | None:\n \"\"\"Validate a single cron field value is within [lo, hi].\"\"\"\n if field == \"*\":\n return None\n if field.startswith(\"*/\"):\n step_str = field[2:]\n if not step_str.isdigit():\n return f\"Invalid step: {field}\"\n step = int(step_str)\n if step <= 0:\n return f\"Step must be > 0: {field}\"\n return None\n if \",\" in field:\n for part in field.split(\",\"):\n err = _validate_cron_field(part.strip(), lo, hi)\n if err: return err\n return None\n if \"-\" in field:\n parts = field.split(\"-\", 1)\n if not parts[0].isdigit() or not parts[1].isdigit():\n return f\"Invalid range: {field}\"\n a, b = int(parts[0]), int(parts[1])\n if a < lo or a > hi or b < lo or b > hi:\n return f\"Range {field} out of bounds [{lo}-{hi}]\"\n if a > b:\n return f\"Range start > end: {field}\"\n return None\n if not field.isdigit():\n return f\"Invalid field: {field}\"\n val = int(field)\n if val < lo or val > hi:\n return f\"Value {val} out of bounds [{lo}-{hi}]\"\n return None\n\n\ndef validate_cron(cron_expr: str) -> str | None:\n \"\"\"Validate a cron expression. Returns error message or None.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return f\"Expected 5 fields, got {len(fields)}\"\n bounds = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n names = [\"minute\", \"hour\", \"day-of-month\", \"month\", \"day-of-week\"]\n for i, (field, (lo, hi), name) in enumerate(zip(fields, bounds, names)):\n err = _validate_cron_field(field, lo, hi)\n if err:\n return f\"{name}: {err}\"\n return None\n\n\ndef save_durable_jobs():\n \"\"\"Persist durable jobs to .scheduled_tasks.json.\"\"\"\n durable = [asdict(j) for j in scheduled_jobs.values() if j.durable]\n DURABLE_PATH.write_text(json.dumps(durable, indent=2))\n\n\ndef load_durable_jobs():\n \"\"\"Load durable jobs from disk on startup.\"\"\"\n if not DURABLE_PATH.exists():\n return\n try:\n jobs = json.loads(DURABLE_PATH.read_text())\n for j in jobs:\n job = CronJob(**j)\n err = validate_cron(job.cron)\n if err:\n print(f\" \\033[31m[cron] skipping invalid job {job.id}: {err}\\033[0m\")\n continue\n scheduled_jobs[job.id] = job\n valid = [j for j in jobs if j[\"id\"] in scheduled_jobs]\n if valid:\n print(f\" \\033[35m[cron] loaded {len(valid)} durable job(s)\\033[0m\")\n except Exception:\n pass\n\n\ndef schedule_job(cron: str, prompt: str, recurring: bool = True,\n durable: bool = True) -> CronJob | str:\n \"\"\"Register a new cron job. Returns CronJob or error string.\"\"\"\n err = validate_cron(cron)\n if err:\n return err\n job = CronJob(\n id=f\"cron_{random.randint(0, 999999):06d}\",\n cron=cron, prompt=prompt,\n recurring=recurring, durable=durable,\n )\n with cron_lock:\n scheduled_jobs[job.id] = job\n if durable:\n save_durable_jobs()\n print(f\" \\033[35m[cron register] {job.id} '{cron}' → {prompt[:40]}\\033[0m\")\n return job\n\n\ndef cancel_job(job_id: str) -> str:\n \"\"\"Cancel a cron job.\"\"\"\n with cron_lock:\n job = scheduled_jobs.pop(job_id, None)\n if not job:\n return f\"Job {job_id} not found\"\n if job.durable:\n save_durable_jobs()\n print(f\" \\033[31m[cron cancel] {job_id}\\033[0m\")\n return f\"Cancelled {job_id}\"\n\n\ndef cron_scheduler_loop():\n \"\"\"Independent daemon thread: poll every 1s, fire matching jobs.\n Individual job errors are caught to prevent one bad job from\n killing the entire scheduler thread.\"\"\"\n while True:\n time.sleep(1)\n now = datetime.now()\n # Date-aware marker prevents daily jobs from skipping on day 2+\n minute_marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now):\n if _last_fired.get(job.id) != minute_marker:\n cron_queue.append(job)\n _last_fired[job.id] = minute_marker\n print(f\" \\033[35m[cron fire] {job.id} → \"\n f\"{job.prompt[:40]}\\033[0m\")\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\" \\033[31m[cron error] {job.id}: {e}\\033[0m\")\n\n\ndef consume_cron_queue() -> list[CronJob]:\n \"\"\"Consume fired jobs from cron_queue (called by agent_loop).\"\"\"\n with cron_lock:\n fired = list(cron_queue)\n cron_queue.clear()\n return fired\n\n\ndef has_cron_queue() -> bool:\n \"\"\"Return whether fired cron jobs are waiting to be delivered.\"\"\"\n with cron_lock:\n return bool(cron_queue)\n\n\n# Load durable jobs on startup, then start scheduler thread\nload_durable_jobs()\nthreading.Thread(target=cron_scheduler_loop, daemon=True).start()\nprint(\" \\033[35m[cron] scheduler thread started\\033[0m\")\n\n\n# ── Cron Tools ──\n\ndef run_schedule_cron(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> str:\n result = schedule_job(cron, prompt, recurring, durable)\n if isinstance(result, str):\n return f\"Error: {result}\"\n return f\"Scheduled {result.id}: '{cron}' → {prompt}\"\n\n\ndef run_list_crons() -> str:\n with cron_lock:\n jobs = list(scheduled_jobs.values())\n if not jobs:\n return \"No cron jobs. Use schedule_cron to add one.\"\n lines = []\n for j in jobs:\n tag = \"recurring\" if j.recurring else \"one-shot\"\n dur = \"durable\" if j.durable else \"session\"\n lines.append(f\" {j.id}: '{j.cron}' → {j.prompt[:40]} \"\n f\"[{tag}, {dur}]\")\n return \"\\n\".join(lines)\n\n\ndef run_cancel_cron(job_id: str) -> str:\n return cancel_job(job_id)\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"schedule_cron\",\n \"description\": \"Schedule a cron job. cron is 5-field: min hour dom month dow.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"cron\": {\"type\": \"string\",\n \"description\": \"5-field cron expression\"},\n \"prompt\": {\"type\": \"string\",\n \"description\": \"Message to inject when fired\"},\n \"recurring\": {\"type\": \"boolean\",\n \"description\": \"True=recurring, False=one-shot\"},\n \"durable\": {\"type\": \"boolean\",\n \"description\": \"True=persist to disk\"}},\n \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"list_crons\",\n \"description\": \"List all registered cron jobs.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"cancel_cron\",\n \"description\": \"Cancel a cron job by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"job_id\": {\"type\": \"string\"}},\n \"required\": [\"job_id\"]}},\n]\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": [t[\"name\"] for t in TOOLS],\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop (simplified, focused on cron scheduler) ──\n# Teaching code keeps a basic agent loop. S11's full error recovery is omitted.\n# cron_scheduler_loop produces work; queue_processor_loop wakes this loop when\n# queued work exists and no other agent turn is running.\n\ndef agent_loop(messages: list, context: dict) -> dict:\n system = get_system_prompt(context)\n while True:\n # Layer 4: consume fired cron jobs → inject as messages\n fired = consume_cron_queue()\n for job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n print(f\" \\033[35m[inject cron] {job.prompt[:50]}\\033[0m\")\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return context\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return context\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Merge background tool results + notifications into one user message\n user_content = list(results)\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nsession_history: list = []\nsession_context = update_context({}, [])\n\n\ndef print_latest_assistant_text(messages: list):\n \"\"\"Print text blocks from the latest assistant message.\"\"\"\n if not messages:\n return\n msg = messages[-1]\n if not isinstance(msg, dict) or msg.get(\"role\") != \"assistant\":\n return\n content = msg.get(\"content\", \"\")\n if isinstance(content, str):\n print(content)\n return\n for block in content:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n elif isinstance(block, dict) and block.get(\"type\") == \"text\":\n print(block.get(\"text\", \"\"))\n\n\ndef run_agent_turn_locked(user_query: str | None = None):\n \"\"\"Run one agent turn. Caller must hold agent_lock.\"\"\"\n global session_context\n if user_query is not None:\n session_history.append({\"role\": \"user\", \"content\": user_query})\n session_context = agent_loop(session_history, session_context)\n session_context = update_context(session_context, session_history)\n print_latest_assistant_text(session_history)\n print()\n\n\ndef queue_processor_loop():\n \"\"\"Auto-deliver fired cron jobs when the agent is idle.\"\"\"\n global session_context\n while True:\n time.sleep(0.2)\n if not has_cron_queue():\n continue\n if not agent_lock.acquire(blocking=False):\n continue\n try:\n if not has_cron_queue():\n continue\n print(\"\\n \\033[35m[queue processor] delivering scheduled work\\033[0m\")\n run_agent_turn_locked()\n finally:\n agent_lock.release()\n\n\nif __name__ == \"__main__\":\n print(\"s14: cron scheduler\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n threading.Thread(target=queue_processor_loop, daemon=True).start()\n print(\" \\033[35m[queue processor] started\\033[0m\")\n while True:\n try:\n query = input(\"\\033[36ms14 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n with agent_lock:\n run_agent_turn_locked(query)\n", "images": [ { "src": "/course-assets/s14_cron_scheduler/cron-scheduler-overview.svg", @@ -1738,7 +1738,7 @@ "filename": "s15_agent_teams/code.py", "title": "Agent Teams", "subtitle": "One Agent Isn't Enough, Form a Team", - "loc": 746, + "loc": 745, "tools": [ "bash", "read_file", @@ -1982,7 +1982,7 @@ } ], "layer": "collaboration", - "source": "#!/usr/bin/env python3\n\"\"\"\ns15: Agent Teams — MessageBus + spawn_teammate_thread + inbox injection.\n\nRun: python s15_agent_teams/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s14:\n - MessageBus class: file-based mailboxes (.mailboxes/*.jsonl)\n - spawn_teammate_thread: creates teammate in background thread\n - Teammate runs own simplified agent_loop (bash, read, write, send_message)\n - Lead tools: spawn_teammate, send_message, check_inbox (3 new)\n - Lead inbox: teammate messages injected into history (not just printed)\n - Teaching version: teammates limited to 10 rounds (real CC uses idle loop)\n\nASCII flow:\n Lead: cron_queue → messages → prompt → LLM → TOOLS ────→ loop\n ↑ ↓ |\n └── inbox ← MessageBus ← teammate.send_message ←┘\n Teammate: inbox → LLM → bash/read/write/send → loop (max 10 turns)\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"get_task, create_task, list_tasks, claim_task, complete_task, \"\n \"schedule_cron, list_crons, cancel_cron, \"\n \"spawn_teammate, send_message, check_inbox.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\n# ── Background Tasks (from s13, synced) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n \"schedule_cron\": run_schedule_cron, \"list_crons\": run_list_crons,\n \"cancel_cron\": run_cancel_cron,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n }.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── Cron Scheduler (from s14, synced) ──\n\nDURABLE_PATH = WORKDIR / \".scheduled_tasks.json\"\n\n\n@dataclass\nclass CronJob:\n id: str\n cron: str # \"0 9 * * *\"\n prompt: str # message to inject when fired\n recurring: bool # True = recurring, False = one-shot\n durable: bool # True = persist to disk\n\n\nscheduled_jobs: dict[str, CronJob] = {}\ncron_queue: list[CronJob] = []\ncron_lock = threading.Lock()\n_last_fired: dict[str, str] = {} # job_id → \"YYYY-MM-DD HH:MM\"\n\n\ndef _cron_field_matches(field: str, value: int) -> bool:\n \"\"\"Match a single cron field against a value.\"\"\"\n if field == \"*\":\n return True\n if field.startswith(\"*/\"):\n step = int(field[2:])\n return step > 0 and value % step == 0\n if \",\" in field:\n return any(_cron_field_matches(f.strip(), value)\n for f in field.split(\",\"))\n if \"-\" in field:\n lo, hi = field.split(\"-\", 1)\n return int(lo) <= value <= int(hi)\n return value == int(field)\n\n\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n \"\"\"Check if a 5-field cron expression matches the given datetime.\n Standard cron semantics: DOM and DOW use OR when both are constrained.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7 # Python Monday=0 → cron Sunday=0\n\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n\n # Minute, hour, month must all match\n if not (m and h and month_ok):\n return False\n # DOM and DOW: if both constrained, either matching is enough (OR)\n dom_unconstrained = dom == \"*\"\n dow_unconstrained = dow == \"*\"\n if dom_unconstrained and dow_unconstrained:\n return True\n if dom_unconstrained:\n return dow_ok\n if dow_unconstrained:\n return dom_ok\n return dom_ok or dow_ok\n\n\ndef _validate_cron_field(field: str, lo: int, hi: int) -> str | None:\n \"\"\"Validate a single cron field value is within [lo, hi].\"\"\"\n if field == \"*\":\n return None\n if field.startswith(\"*/\"):\n step_str = field[2:]\n if not step_str.isdigit():\n return f\"Invalid step: {field}\"\n step = int(step_str)\n if step <= 0:\n return f\"Step must be > 0: {field}\"\n return None\n if \",\" in field:\n for part in field.split(\",\"):\n err = _validate_cron_field(part.strip(), lo, hi)\n if err: return err\n return None\n if \"-\" in field:\n parts = field.split(\"-\", 1)\n if not parts[0].isdigit() or not parts[1].isdigit():\n return f\"Invalid range: {field}\"\n a, b = int(parts[0]), int(parts[1])\n if a < lo or a > hi or b < lo or b > hi:\n return f\"Range {field} out of bounds [{lo}-{hi}]\"\n if a > b:\n return f\"Range start > end: {field}\"\n return None\n if not field.isdigit():\n return f\"Invalid field: {field}\"\n val = int(field)\n if val < lo or val > hi:\n return f\"Value {val} out of bounds [{lo}-{hi}]\"\n return None\n\n\ndef validate_cron(cron_expr: str) -> str | None:\n \"\"\"Validate a cron expression. Returns error message or None.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return f\"Expected 5 fields, got {len(fields)}\"\n bounds = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n names = [\"minute\", \"hour\", \"day-of-month\", \"month\", \"day-of-week\"]\n for i, (field, (lo, hi), name) in enumerate(zip(fields, bounds, names)):\n err = _validate_cron_field(field, lo, hi)\n if err:\n return f\"{name}: {err}\"\n return None\n\n\ndef save_durable_jobs():\n \"\"\"Persist durable jobs to .scheduled_tasks.json.\"\"\"\n durable = [asdict(j) for j in scheduled_jobs.values() if j.durable]\n DURABLE_PATH.write_text(json.dumps(durable, indent=2))\n\n\ndef load_durable_jobs():\n \"\"\"Load durable jobs from disk on startup.\"\"\"\n if not DURABLE_PATH.exists():\n return\n try:\n jobs = json.loads(DURABLE_PATH.read_text())\n for j in jobs:\n job = CronJob(**j)\n err = validate_cron(job.cron)\n if err:\n print(f\" \\033[31m[cron] skipping invalid job {job.id}: {err}\\033[0m\")\n continue\n scheduled_jobs[job.id] = job\n valid = [j for j in jobs if j[\"id\"] in scheduled_jobs]\n if valid:\n print(f\" \\033[35m[cron] loaded {len(valid)} durable job(s)\\033[0m\")\n except Exception:\n pass\n\n\ndef schedule_job(cron: str, prompt: str, recurring: bool = True,\n durable: bool = True) -> CronJob | str:\n \"\"\"Register a new cron job. Returns CronJob or error string.\"\"\"\n err = validate_cron(cron)\n if err:\n return err\n job = CronJob(\n id=f\"cron_{random.randint(0, 999999):06d}\",\n cron=cron, prompt=prompt,\n recurring=recurring, durable=durable,\n )\n with cron_lock:\n scheduled_jobs[job.id] = job\n if durable:\n save_durable_jobs()\n print(f\" \\033[35m[cron register] {job.id} '{cron}' → {prompt[:40]}\\033[0m\")\n return job\n\n\ndef cancel_job(job_id: str) -> str:\n \"\"\"Cancel a cron job.\"\"\"\n with cron_lock:\n job = scheduled_jobs.pop(job_id, None)\n if not job:\n return f\"Job {job_id} not found\"\n if job.durable:\n save_durable_jobs()\n print(f\" \\033[31m[cron cancel] {job_id}\\033[0m\")\n return f\"Cancelled {job_id}\"\n\n\ndef cron_scheduler_loop():\n \"\"\"Independent daemon thread: poll every 1s, fire matching jobs.\n Individual job errors are caught to prevent one bad job from\n killing the entire scheduler thread.\"\"\"\n while True:\n time.sleep(1)\n now = datetime.now()\n # Date-aware marker prevents daily jobs from skipping on day 2+\n minute_marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now):\n if _last_fired.get(job.id) != minute_marker:\n cron_queue.append(job)\n _last_fired[job.id] = minute_marker\n print(f\" \\033[35m[cron fire] {job.id} → \"\n f\"{job.prompt[:40]}\\033[0m\")\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\" \\033[31m[cron error] {job.id}: {e}\\033[0m\")\n\n\ndef consume_cron_queue() -> list[CronJob]:\n \"\"\"Consume fired jobs from cron_queue (called by agent_loop).\"\"\"\n with cron_lock:\n fired = list(cron_queue)\n cron_queue.clear()\n return fired\n\n\n# Load durable jobs on startup, then start scheduler thread\nload_durable_jobs()\nthreading.Thread(target=cron_scheduler_loop, daemon=True).start()\nprint(\" \\033[35m[cron] scheduler thread started\\033[0m\")\n\n\n# Cron tool handlers\n\ndef run_schedule_cron(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> str:\n result = schedule_job(cron, prompt, recurring, durable)\n if isinstance(result, str):\n return f\"Error: {result}\"\n return f\"Scheduled {result.id}: '{cron}' → {prompt}\"\n\n\ndef run_list_crons() -> str:\n with cron_lock:\n jobs = list(scheduled_jobs.values())\n if not jobs:\n return \"No cron jobs. Use schedule_cron to add one.\"\n lines = []\n for j in jobs:\n tag = \"recurring\" if j.recurring else \"one-shot\"\n dur = \"durable\" if j.durable else \"session\"\n lines.append(f\" {j.id}: '{j.cron}' → {j.prompt[:40]} \"\n f\"[{tag}, {dur}]\")\n return \"\\n\".join(lines)\n\n\ndef run_cancel_cron(job_id: str) -> str:\n return cancel_job(job_id)\n\n\n# ── MessageBus (s15 new) ──\n# Teaching version uses simple file append + unlink.\n# Real CC uses proper-lockfile for concurrent write safety.\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n \"\"\"File-based message bus. Each agent has a .jsonl inbox.\n Read is destructive: read_text + unlink (consumes messages).\n Teaching version: no file locking; real CC uses proper-lockfile.\"\"\"\n\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\"):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time()}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"{content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink() # consume: read + delete\n return msgs\n\n\nBUS = MessageBus()\n\n# Track spawned teammates\nactive_teammates: dict[str, bool] = {}\n\n\n# ── Teammate Thread (s15 new) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n \"\"\"Spawn a teammate agent in a background thread.\n Teaching version: max 10 rounds per teammate.\n Real CC: teammates use idle loop (wait for inbox, work, repeat)\n until shutdown_request.\"\"\"\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"Send results via send_message to 'lead'.\")\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send a message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n ]\n sub_handlers = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n }\n\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{json.dumps(inbox)}\"})\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Send final summary to Lead\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role}\"\n\n\n# ── Team Tool Handlers (s15 new) ──\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef run_check_inbox() -> str:\n msgs = BUS.read_inbox(\"lead\")\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n lines.append(f\" [{m['from']}] {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"schedule_cron\",\n \"description\": \"Schedule a cron job. cron is 5-field: min hour dom month dow.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"cron\": {\"type\": \"string\",\n \"description\": \"5-field cron expression\"},\n \"prompt\": {\"type\": \"string\",\n \"description\": \"Message to inject when fired\"},\n \"recurring\": {\"type\": \"boolean\",\n \"description\": \"True=recurring, False=one-shot\"},\n \"durable\": {\"type\": \"boolean\",\n \"description\": \"True=persist to disk\"}},\n \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"list_crons\",\n \"description\": \"List all registered cron jobs.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"cancel_cron\",\n \"description\": \"Cancel a cron job by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"job_id\": {\"type\": \"string\"}},\n \"required\": [\"job_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn a teammate agent in a background thread.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send a message to a teammate via MessageBus.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check Lead's inbox for teammate messages.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n]\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": [t[\"name\"] for t in TOOLS],\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop ──\n# Teaching code keeps a basic agent loop. S11's full error recovery is omitted.\n# Cron queue is consumed when agent_loop is called; real CC auto-wakes via\n# queue processor (useQueueProcessor.ts) when items arrive.\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n # Consume fired cron jobs → inject as messages\n fired = consume_cron_queue()\n for job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n print(f\" \\033[35m[inject cron] {job.prompt[:50]}\\033[0m\")\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Merge background notifications + tool results into one user message\n user_content = []\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n user_content.extend(results)\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s15: agent teams\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms15 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n\n # Check inbox for teammate results → inject into history\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']}: {m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print(f\"\\n\\033[33m[Inbox: {len(inbox)} messages injected]\\033[0m\")\n print()\n", + "source": "#!/usr/bin/env python3\n\"\"\"\ns15: Agent Teams — MessageBus + spawn_teammate_thread + inbox injection.\n\nRun: python s15_agent_teams/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s14:\n - MessageBus class: file-based mailboxes (.mailboxes/*.jsonl)\n - spawn_teammate_thread: creates teammate in background thread\n - Teammate runs own simplified agent_loop (bash, read, write, send_message)\n - Lead tools: spawn_teammate, send_message, check_inbox (3 new)\n - Lead inbox: teammate messages injected into history (not just printed)\n - Teaching version: teammates limited to 10 rounds (real CC uses idle loop)\n\nASCII flow:\n Lead: cron_queue → messages → prompt → LLM → TOOLS ────→ loop\n ↑ ↓ |\n └── inbox ← MessageBus ← teammate.send_message ←┘\n Teammate: inbox → LLM → bash/read/write/send → loop (max 10 turns)\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"get_task, create_task, list_tasks, claim_task, complete_task, \"\n \"schedule_cron, list_crons, cancel_cron, \"\n \"spawn_teammate, send_message, check_inbox.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\n# ── Background Tasks (from s13, synced) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n \"schedule_cron\": run_schedule_cron, \"list_crons\": run_list_crons,\n \"cancel_cron\": run_cancel_cron,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n }.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── Cron Scheduler (from s14, synced) ──\n\nDURABLE_PATH = WORKDIR / \".scheduled_tasks.json\"\n\n\n@dataclass\nclass CronJob:\n id: str\n cron: str # \"0 9 * * *\"\n prompt: str # message to inject when fired\n recurring: bool # True = recurring, False = one-shot\n durable: bool # True = persist to disk\n\n\nscheduled_jobs: dict[str, CronJob] = {}\ncron_queue: list[CronJob] = []\ncron_lock = threading.Lock()\n_last_fired: dict[str, str] = {} # job_id → \"YYYY-MM-DD HH:MM\"\n\n\ndef _cron_field_matches(field: str, value: int) -> bool:\n \"\"\"Match a single cron field against a value.\"\"\"\n if field == \"*\":\n return True\n if field.startswith(\"*/\"):\n step = int(field[2:])\n return step > 0 and value % step == 0\n if \",\" in field:\n return any(_cron_field_matches(f.strip(), value)\n for f in field.split(\",\"))\n if \"-\" in field:\n lo, hi = field.split(\"-\", 1)\n return int(lo) <= value <= int(hi)\n return value == int(field)\n\n\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n \"\"\"Check if a 5-field cron expression matches the given datetime.\n Standard cron semantics: DOM and DOW use OR when both are constrained.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7 # Python Monday=0 → cron Sunday=0\n\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n\n # Minute, hour, month must all match\n if not (m and h and month_ok):\n return False\n # DOM and DOW: if both constrained, either matching is enough (OR)\n dom_unconstrained = dom == \"*\"\n dow_unconstrained = dow == \"*\"\n if dom_unconstrained and dow_unconstrained:\n return True\n if dom_unconstrained:\n return dow_ok\n if dow_unconstrained:\n return dom_ok\n return dom_ok or dow_ok\n\n\ndef _validate_cron_field(field: str, lo: int, hi: int) -> str | None:\n \"\"\"Validate a single cron field value is within [lo, hi].\"\"\"\n if field == \"*\":\n return None\n if field.startswith(\"*/\"):\n step_str = field[2:]\n if not step_str.isdigit():\n return f\"Invalid step: {field}\"\n step = int(step_str)\n if step <= 0:\n return f\"Step must be > 0: {field}\"\n return None\n if \",\" in field:\n for part in field.split(\",\"):\n err = _validate_cron_field(part.strip(), lo, hi)\n if err: return err\n return None\n if \"-\" in field:\n parts = field.split(\"-\", 1)\n if not parts[0].isdigit() or not parts[1].isdigit():\n return f\"Invalid range: {field}\"\n a, b = int(parts[0]), int(parts[1])\n if a < lo or a > hi or b < lo or b > hi:\n return f\"Range {field} out of bounds [{lo}-{hi}]\"\n if a > b:\n return f\"Range start > end: {field}\"\n return None\n if not field.isdigit():\n return f\"Invalid field: {field}\"\n val = int(field)\n if val < lo or val > hi:\n return f\"Value {val} out of bounds [{lo}-{hi}]\"\n return None\n\n\ndef validate_cron(cron_expr: str) -> str | None:\n \"\"\"Validate a cron expression. Returns error message or None.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return f\"Expected 5 fields, got {len(fields)}\"\n bounds = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n names = [\"minute\", \"hour\", \"day-of-month\", \"month\", \"day-of-week\"]\n for i, (field, (lo, hi), name) in enumerate(zip(fields, bounds, names)):\n err = _validate_cron_field(field, lo, hi)\n if err:\n return f\"{name}: {err}\"\n return None\n\n\ndef save_durable_jobs():\n \"\"\"Persist durable jobs to .scheduled_tasks.json.\"\"\"\n durable = [asdict(j) for j in scheduled_jobs.values() if j.durable]\n DURABLE_PATH.write_text(json.dumps(durable, indent=2))\n\n\ndef load_durable_jobs():\n \"\"\"Load durable jobs from disk on startup.\"\"\"\n if not DURABLE_PATH.exists():\n return\n try:\n jobs = json.loads(DURABLE_PATH.read_text())\n for j in jobs:\n job = CronJob(**j)\n err = validate_cron(job.cron)\n if err:\n print(f\" \\033[31m[cron] skipping invalid job {job.id}: {err}\\033[0m\")\n continue\n scheduled_jobs[job.id] = job\n valid = [j for j in jobs if j[\"id\"] in scheduled_jobs]\n if valid:\n print(f\" \\033[35m[cron] loaded {len(valid)} durable job(s)\\033[0m\")\n except Exception:\n pass\n\n\ndef schedule_job(cron: str, prompt: str, recurring: bool = True,\n durable: bool = True) -> CronJob | str:\n \"\"\"Register a new cron job. Returns CronJob or error string.\"\"\"\n err = validate_cron(cron)\n if err:\n return err\n job = CronJob(\n id=f\"cron_{random.randint(0, 999999):06d}\",\n cron=cron, prompt=prompt,\n recurring=recurring, durable=durable,\n )\n with cron_lock:\n scheduled_jobs[job.id] = job\n if durable:\n save_durable_jobs()\n print(f\" \\033[35m[cron register] {job.id} '{cron}' → {prompt[:40]}\\033[0m\")\n return job\n\n\ndef cancel_job(job_id: str) -> str:\n \"\"\"Cancel a cron job.\"\"\"\n with cron_lock:\n job = scheduled_jobs.pop(job_id, None)\n if not job:\n return f\"Job {job_id} not found\"\n if job.durable:\n save_durable_jobs()\n print(f\" \\033[31m[cron cancel] {job_id}\\033[0m\")\n return f\"Cancelled {job_id}\"\n\n\ndef cron_scheduler_loop():\n \"\"\"Independent daemon thread: poll every 1s, fire matching jobs.\n Individual job errors are caught to prevent one bad job from\n killing the entire scheduler thread.\"\"\"\n while True:\n time.sleep(1)\n now = datetime.now()\n # Date-aware marker prevents daily jobs from skipping on day 2+\n minute_marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now):\n if _last_fired.get(job.id) != minute_marker:\n cron_queue.append(job)\n _last_fired[job.id] = minute_marker\n print(f\" \\033[35m[cron fire] {job.id} → \"\n f\"{job.prompt[:40]}\\033[0m\")\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\" \\033[31m[cron error] {job.id}: {e}\\033[0m\")\n\n\ndef consume_cron_queue() -> list[CronJob]:\n \"\"\"Consume fired jobs from cron_queue (called by agent_loop).\"\"\"\n with cron_lock:\n fired = list(cron_queue)\n cron_queue.clear()\n return fired\n\n\n# Load durable jobs on startup, then start scheduler thread\nload_durable_jobs()\nthreading.Thread(target=cron_scheduler_loop, daemon=True).start()\nprint(\" \\033[35m[cron] scheduler thread started\\033[0m\")\n\n\n# Cron tool handlers\n\ndef run_schedule_cron(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> str:\n result = schedule_job(cron, prompt, recurring, durable)\n if isinstance(result, str):\n return f\"Error: {result}\"\n return f\"Scheduled {result.id}: '{cron}' → {prompt}\"\n\n\ndef run_list_crons() -> str:\n with cron_lock:\n jobs = list(scheduled_jobs.values())\n if not jobs:\n return \"No cron jobs. Use schedule_cron to add one.\"\n lines = []\n for j in jobs:\n tag = \"recurring\" if j.recurring else \"one-shot\"\n dur = \"durable\" if j.durable else \"session\"\n lines.append(f\" {j.id}: '{j.cron}' → {j.prompt[:40]} \"\n f\"[{tag}, {dur}]\")\n return \"\\n\".join(lines)\n\n\ndef run_cancel_cron(job_id: str) -> str:\n return cancel_job(job_id)\n\n\n# ── MessageBus (s15 new) ──\n# Teaching version uses simple file append + unlink.\n# Real CC uses proper-lockfile for concurrent write safety.\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n \"\"\"File-based message bus. Each agent has a .jsonl inbox.\n Read is destructive: read_text + unlink (consumes messages).\n Teaching version: no file locking; real CC uses proper-lockfile.\"\"\"\n\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\"):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time()}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"{content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink() # consume: read + delete\n return msgs\n\n\nBUS = MessageBus()\n\n# Track spawned teammates\nactive_teammates: dict[str, bool] = {}\n\n\n# ── Teammate Thread (s15 new) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n \"\"\"Spawn a teammate agent in a background thread.\n Teaching version: max 10 rounds per teammate.\n Real CC: teammates use idle loop (wait for inbox, work, repeat)\n until shutdown_request.\"\"\"\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"Send results via send_message to 'lead'.\")\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send a message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n ]\n sub_handlers = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n }\n\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{json.dumps(inbox)}\"})\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Send final summary to Lead\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role}\"\n\n\n# ── Team Tool Handlers (s15 new) ──\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef run_check_inbox() -> str:\n msgs = BUS.read_inbox(\"lead\")\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n lines.append(f\" [{m['from']}] {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"schedule_cron\",\n \"description\": \"Schedule a cron job. cron is 5-field: min hour dom month dow.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"cron\": {\"type\": \"string\",\n \"description\": \"5-field cron expression\"},\n \"prompt\": {\"type\": \"string\",\n \"description\": \"Message to inject when fired\"},\n \"recurring\": {\"type\": \"boolean\",\n \"description\": \"True=recurring, False=one-shot\"},\n \"durable\": {\"type\": \"boolean\",\n \"description\": \"True=persist to disk\"}},\n \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"list_crons\",\n \"description\": \"List all registered cron jobs.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"cancel_cron\",\n \"description\": \"Cancel a cron job by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"job_id\": {\"type\": \"string\"}},\n \"required\": [\"job_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn a teammate agent in a background thread.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send a message to a teammate via MessageBus.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check Lead's inbox for teammate messages.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n]\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": [t[\"name\"] for t in TOOLS],\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop ──\n# Teaching code keeps a basic agent loop. S11's full error recovery is omitted.\n# Cron queue is consumed when agent_loop is called; real CC auto-wakes via\n# queue processor (useQueueProcessor.ts) when items arrive.\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n # Consume fired cron jobs → inject as messages\n fired = consume_cron_queue()\n for job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n print(f\" \\033[35m[inject cron] {job.prompt[:50]}\\033[0m\")\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Merge background tool results + notifications into one user message\n user_content = list(results)\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s15: agent teams\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms15 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n\n # Check inbox for teammate results → inject into history\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']}: {m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print(f\"\\n\\033[33m[Inbox: {len(inbox)} messages injected]\\033[0m\")\n print()\n", "images": [ { "src": "/course-assets/s15_agent_teams/agent-teams-overview.svg", @@ -1999,7 +1999,7 @@ "filename": "s16_team_protocols/code.py", "title": "Team Protocols", "subtitle": "Teammates Need Agreements", - "loc": 710, + "loc": 709, "tools": [ "bash", "read_file", @@ -2225,7 +2225,7 @@ } ], "layer": "collaboration", - "source": "#!/usr/bin/env python3\n\"\"\"\ns16: Team Protocols — request-response protocol + request_id + dispatch + state machine.\n\nRun: python s16_team_protocols/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s15:\n - ProtocolState dataclass (request_id, type, sender, status, created_at)\n - pending_requests dict: tracks in-flight protocol requests\n - dispatch_message: routes incoming messages by type to handlers\n - request_shutdown: Lead sends shutdown protocol request\n - request_plan: Lead asks teammate to submit plan\n - handle_shutdown_request / handle_plan_response: teammate receives & responds\n - match_response: Lead correlates response to request via request_id (with type validation)\n - Teammate idle loop: waits for inbox messages instead of exiting after 10 rounds\n - Unified consume_lead_inbox: protocol routing + injection into history\n - 3 new Lead tools: request_shutdown, request_plan, review_plan\n - 1 new teammate tool: submit_plan\n\nASCII flow:\n Lead: BUS.send(\"shutdown_request\", {request_id}) ──────→ teammate inbox\n Teammate: dispatch → handler → BUS.send(\"shutdown_response\", {request_id}) ─→ Lead inbox\n Lead: consume_lead_inbox → match_response(request_id) → pending_requests[req_id].status = approved\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"get_task, create_task, list_tasks, claim_task, complete_task, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\n# ── Background Tasks (from s13, synced) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── MessageBus (from s15) ──\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n \"\"\"File-based message bus. Each agent has a .jsonl inbox.\n Read is destructive: read_text + unlink (consumes messages).\n Teaching version: no file locking; real CC uses proper-lockfile.\"\"\"\n\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink() # consume: read + delete\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State (s16 new) ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str # \"shutdown\" | \"plan_approval\"\n sender: str\n target: str\n status: str # pending | approved | rejected\n payload: str # plan text or shutdown reason\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n \"\"\"Correlate a response to the original request via request_id.\n Validates that response_type matches the request type.\"\"\"\n state = pending_requests.get(request_id)\n if not state:\n print(f\" \\033[31m[protocol] unknown request_id: {request_id}\\033[0m\")\n return\n # Validate response type matches request type\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected shutdown_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected plan_approval_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.status != \"pending\":\n print(f\" \\033[33m[protocol] {request_id} already {state.status}, \"\n f\"ignoring duplicate\\033[0m\")\n return\n state.status = \"approved\" if approve else \"rejected\"\n icon = \"✓\" if approve else \"✗\"\n color = \"32\" if approve else \"31\"\n print(f\" \\033[{color}m[protocol] {state.type} {icon} \"\n f\"({request_id}: {state.status})\\033[0m\")\n\n\n# ── Unified Lead Inbox Consumer (s16 fix) ──\n# Both check_inbox tool and main loop call this function.\n# Protocol responses are routed via match_response before returning.\n\ndef consume_lead_inbox(route_protocol: bool = True) -> list[dict]:\n \"\"\"Read Lead's inbox. Route protocol responses, return all messages.\n Called by both run_check_inbox() and main loop to avoid\n messages being consumed without protocol routing.\"\"\"\n msgs = BUS.read_inbox(\"lead\")\n if not msgs:\n return []\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n approve = meta.get(\"approve\", False)\n match_response(msg_type, req_id, approve)\n return msgs\n\n\n# ── Teammate Thread (s16: idle loop + dispatch) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n \"\"\"Spawn a teammate agent in a background thread.\n Uses idle loop: after each LLM turn, waits for inbox messages\n (shutdown_request, new task) instead of exiting.\"\"\"\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"Check inbox for protocol messages (shutdown_request, etc).\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list) -> bool:\n \"\"\"Dispatch incoming protocol messages by type.\n Returns True if teammate should stop.\"\"\"\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"({req_id})\\033[0m\")\n return True # stop the loop\n\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if approve:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan approved] Proceed with the task.\"})\n else:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan rejected] Feedback: {msg['content']}\"})\n\n return False # continue\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n ]\n sub_handlers = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"submit_plan\": lambda plan: _teammate_submit_plan(name, plan),\n }\n\n shutdown_requested = False\n while not shutdown_requested:\n # Check inbox for protocol messages\n inbox = BUS.read_inbox(name)\n should_stop = False\n non_protocol = []\n for msg in inbox:\n if msg.get(\"type\") in (\"shutdown_request\", \"plan_approval_response\"):\n should_stop = handle_inbox_message(name, msg, messages)\n if should_stop:\n break\n else:\n non_protocol.append(msg)\n if should_stop:\n shutdown_requested = True\n break\n if non_protocol:\n inbox_json = json.dumps(non_protocol)\n messages.append({\"role\": \"user\",\n \"content\": \"\" + inbox_json + \"\"})\n\n # LLM turn\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n # Idle: wait for inbox messages instead of exiting\n # Real CC sends idle_notification to Lead here\n while not shutdown_requested:\n time.sleep(1)\n inbox = BUS.read_inbox(name)\n if not inbox:\n continue\n for msg in inbox:\n if msg.get(\"type\") in (\"shutdown_request\", \"plan_approval_response\"):\n should_stop = handle_inbox_message(name, msg, messages)\n if should_stop:\n shutdown_requested = True\n break\n else:\n non_protocol.append(msg)\n if shutdown_requested:\n break\n if non_protocol:\n inbox_json = json.dumps(non_protocol)\n messages.append({\"role\": \"user\",\n \"content\": \"\" + inbox_json + \"\"})\n break # back to LLM turn with new messages\n\n # Execute tool calls\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Send final summary to Lead\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role}\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n \"\"\"Teammate submits a plan to Lead for approval.\n\n Note: This is a protocol-level request, not a code-level gate.\n After submitting, the teammate's thread continues running — it can\n still call bash/write/etc. Real enforcement relies on the model\n waiting for the approval response before acting. Code-level tool\n gating would require blocking the teammate's tool dispatch until\n approval arrives.\n \"\"\"\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id}). Waiting for approval...\"\n\n\n# ── Lead Protocol Tools (s16 new) ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\",\n {\"request_id\": req_id})\n print(f\" \\033[35m[protocol] shutdown_request → {teammate} \"\n f\"({req_id})\\033[0m\")\n return f\"Shutdown request sent to {teammate} (req: {req_id})\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n \"\"\"Lead asks a teammate to submit a plan for a task.\"\"\"\n BUS.send(\"lead\", teammate, f\"Please submit a plan for: {task}\",\n \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool, feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n if state.status != \"pending\":\n return f\"Request {request_id} already {state.status}\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender, feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n icon = \"✓\" if approve else \"✗\"\n print(f\" \\033[32m[protocol] plan {icon} ({request_id})\\033[0m\")\n return f\"Plan {'approved' if approve else 'rejected'} ({request_id})\"\n\n\n# ── Other Lead Tool Handlers ──\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef run_check_inbox() -> str:\n \"\"\"Check Lead's inbox. Routes protocol responses via match_response.\"\"\"\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Dispatch ──\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n }.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn a teammate agent in a background thread.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to a teammate via MessageBus.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check Lead's inbox. Routes protocol responses automatically.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down gracefully.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan for review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan by request_id.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n]\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": [t[\"name\"] for t in TOOLS],\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Merge background notifications + tool results into one user message\n user_content = []\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n user_content.extend(results)\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s16: team protocols\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms16 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n\n # Check inbox → route protocol + inject into history\n inbox_msgs = consume_lead_inbox(route_protocol=True)\n if inbox_msgs:\n inbox_text = \"\\n\".join(\n f\"From {m['from']}: {m['content'][:200]}\" for m in inbox_msgs)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print(f\"\\n\\033[33m[Inbox: {len(inbox_msgs)} messages injected]\\033[0m\")\n print()\n", + "source": "#!/usr/bin/env python3\n\"\"\"\ns16: Team Protocols — request-response protocol + request_id + dispatch + state machine.\n\nRun: python s16_team_protocols/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s15:\n - ProtocolState dataclass (request_id, type, sender, status, created_at)\n - pending_requests dict: tracks in-flight protocol requests\n - dispatch_message: routes incoming messages by type to handlers\n - request_shutdown: Lead sends shutdown protocol request\n - request_plan: Lead asks teammate to submit plan\n - handle_shutdown_request / handle_plan_response: teammate receives & responds\n - match_response: Lead correlates response to request via request_id (with type validation)\n - Teammate idle loop: waits for inbox messages instead of exiting after 10 rounds\n - Unified consume_lead_inbox: protocol routing + injection into history\n - 3 new Lead tools: request_shutdown, request_plan, review_plan\n - 1 new teammate tool: submit_plan\n\nASCII flow:\n Lead: BUS.send(\"shutdown_request\", {request_id}) ──────→ teammate inbox\n Teammate: dispatch → handler → BUS.send(\"shutdown_response\", {request_id}) ─→ Lead inbox\n Lead: consume_lead_inbox → match_response(request_id) → pending_requests[req_id].status = approved\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"get_task, create_task, list_tasks, claim_task, complete_task, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\n# ── Background Tasks (from s13, synced) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── MessageBus (from s15) ──\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n \"\"\"File-based message bus. Each agent has a .jsonl inbox.\n Read is destructive: read_text + unlink (consumes messages).\n Teaching version: no file locking; real CC uses proper-lockfile.\"\"\"\n\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink() # consume: read + delete\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State (s16 new) ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str # \"shutdown\" | \"plan_approval\"\n sender: str\n target: str\n status: str # pending | approved | rejected\n payload: str # plan text or shutdown reason\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n \"\"\"Correlate a response to the original request via request_id.\n Validates that response_type matches the request type.\"\"\"\n state = pending_requests.get(request_id)\n if not state:\n print(f\" \\033[31m[protocol] unknown request_id: {request_id}\\033[0m\")\n return\n # Validate response type matches request type\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected shutdown_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected plan_approval_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.status != \"pending\":\n print(f\" \\033[33m[protocol] {request_id} already {state.status}, \"\n f\"ignoring duplicate\\033[0m\")\n return\n state.status = \"approved\" if approve else \"rejected\"\n icon = \"✓\" if approve else \"✗\"\n color = \"32\" if approve else \"31\"\n print(f\" \\033[{color}m[protocol] {state.type} {icon} \"\n f\"({request_id}: {state.status})\\033[0m\")\n\n\n# ── Unified Lead Inbox Consumer (s16 fix) ──\n# Both check_inbox tool and main loop call this function.\n# Protocol responses are routed via match_response before returning.\n\ndef consume_lead_inbox(route_protocol: bool = True) -> list[dict]:\n \"\"\"Read Lead's inbox. Route protocol responses, return all messages.\n Called by both run_check_inbox() and main loop to avoid\n messages being consumed without protocol routing.\"\"\"\n msgs = BUS.read_inbox(\"lead\")\n if not msgs:\n return []\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n approve = meta.get(\"approve\", False)\n match_response(msg_type, req_id, approve)\n return msgs\n\n\n# ── Teammate Thread (s16: idle loop + dispatch) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n \"\"\"Spawn a teammate agent in a background thread.\n Uses idle loop: after each LLM turn, waits for inbox messages\n (shutdown_request, new task) instead of exiting.\"\"\"\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"Check inbox for protocol messages (shutdown_request, etc).\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list) -> bool:\n \"\"\"Dispatch incoming protocol messages by type.\n Returns True if teammate should stop.\"\"\"\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"({req_id})\\033[0m\")\n return True # stop the loop\n\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if approve:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan approved] Proceed with the task.\"})\n else:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan rejected] Feedback: {msg['content']}\"})\n\n return False # continue\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n ]\n sub_handlers = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"submit_plan\": lambda plan: _teammate_submit_plan(name, plan),\n }\n\n shutdown_requested = False\n while not shutdown_requested:\n # Check inbox for protocol messages\n inbox = BUS.read_inbox(name)\n should_stop = False\n non_protocol = []\n for msg in inbox:\n if msg.get(\"type\") in (\"shutdown_request\", \"plan_approval_response\"):\n should_stop = handle_inbox_message(name, msg, messages)\n if should_stop:\n break\n else:\n non_protocol.append(msg)\n if should_stop:\n shutdown_requested = True\n break\n if non_protocol:\n inbox_json = json.dumps(non_protocol)\n messages.append({\"role\": \"user\",\n \"content\": \"\" + inbox_json + \"\"})\n\n # LLM turn\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n # Idle: wait for inbox messages instead of exiting\n # Real CC sends idle_notification to Lead here\n while not shutdown_requested:\n time.sleep(1)\n inbox = BUS.read_inbox(name)\n if not inbox:\n continue\n for msg in inbox:\n if msg.get(\"type\") in (\"shutdown_request\", \"plan_approval_response\"):\n should_stop = handle_inbox_message(name, msg, messages)\n if should_stop:\n shutdown_requested = True\n break\n else:\n non_protocol.append(msg)\n if shutdown_requested:\n break\n if non_protocol:\n inbox_json = json.dumps(non_protocol)\n messages.append({\"role\": \"user\",\n \"content\": \"\" + inbox_json + \"\"})\n break # back to LLM turn with new messages\n\n # Execute tool calls\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Send final summary to Lead\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role}\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n \"\"\"Teammate submits a plan to Lead for approval.\n\n Note: This is a protocol-level request, not a code-level gate.\n After submitting, the teammate's thread continues running — it can\n still call bash/write/etc. Real enforcement relies on the model\n waiting for the approval response before acting. Code-level tool\n gating would require blocking the teammate's tool dispatch until\n approval arrives.\n \"\"\"\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id}). Waiting for approval...\"\n\n\n# ── Lead Protocol Tools (s16 new) ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\",\n {\"request_id\": req_id})\n print(f\" \\033[35m[protocol] shutdown_request → {teammate} \"\n f\"({req_id})\\033[0m\")\n return f\"Shutdown request sent to {teammate} (req: {req_id})\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n \"\"\"Lead asks a teammate to submit a plan for a task.\"\"\"\n BUS.send(\"lead\", teammate, f\"Please submit a plan for: {task}\",\n \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool, feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n if state.status != \"pending\":\n return f\"Request {request_id} already {state.status}\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender, feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n icon = \"✓\" if approve else \"✗\"\n print(f\" \\033[32m[protocol] plan {icon} ({request_id})\\033[0m\")\n return f\"Plan {'approved' if approve else 'rejected'} ({request_id})\"\n\n\n# ── Other Lead Tool Handlers ──\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef run_check_inbox() -> str:\n \"\"\"Check Lead's inbox. Routes protocol responses via match_response.\"\"\"\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Dispatch ──\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n }.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn a teammate agent in a background thread.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to a teammate via MessageBus.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check Lead's inbox. Routes protocol responses automatically.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down gracefully.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan for review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan by request_id.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n]\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": [t[\"name\"] for t in TOOLS],\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Merge background tool results + notifications into one user message\n user_content = list(results)\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s16: team protocols\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms16 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n\n # Check inbox → route protocol + inject into history\n inbox_msgs = consume_lead_inbox(route_protocol=True)\n if inbox_msgs:\n inbox_text = \"\\n\".join(\n f\"From {m['from']}: {m['content'][:200]}\" for m in inbox_msgs)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print(f\"\\n\\033[33m[Inbox: {len(inbox_msgs)} messages injected]\\033[0m\")\n print()\n", "images": [ { "src": "/course-assets/s16_team_protocols/team-protocols-overview.svg", @@ -3026,7 +3026,7 @@ "filename": "s20_comprehensive/code.py", "title": "Comprehensive Agent", "subtitle": "All Mechanisms, One Loop", - "loc": 1661, + "loc": 1660, "tools": [ "bash", "read_file", @@ -3077,8 +3077,8 @@ "classes": [ { "name": "Task", - "startLine": 80, - "endLine": 89 + "startLine": 81, + "endLine": 90 }, { "name": "MessageBus", @@ -3110,92 +3110,92 @@ { "name": "terminal_print", "signature": "def terminal_print(text: str)", - "startLine": 57 + "startLine": 58 }, { "name": "_task_path", "signature": "def _task_path(task_id: str)", - "startLine": 90 + "startLine": 91 }, { "name": "save_task", "signature": "def save_task(task: Task)", - "startLine": 106 + "startLine": 107 }, { "name": "load_task", "signature": "def load_task(task_id: str)", - "startLine": 110 + "startLine": 111 }, { "name": "list_tasks", "signature": "def list_tasks()", - "startLine": 114 + "startLine": 115 }, { "name": "get_task_json", "signature": "def get_task_json(task_id: str)", - "startLine": 119 + "startLine": 120 }, { "name": "can_start", "signature": "def can_start(task_id: str)", - "startLine": 123 + "startLine": 124 }, { "name": "claim_task", "signature": "def claim_task(task_id: str, owner: str = \"agent\")", - "startLine": 135 + "startLine": 136 }, { "name": "complete_task", "signature": "def complete_task(task_id: str)", - "startLine": 156 + "startLine": 157 }, { "name": "validate_worktree_name", "signature": "def validate_worktree_name(name: str)", - "startLine": 181 + "startLine": 182 }, { "name": "run_git", "signature": "def run_git(args: list[str])", - "startLine": 192 + "startLine": 193 }, { "name": "log_event", "signature": "def log_event(event_type: str, worktree_name: str, task_id: str = \"\")", - "startLine": 202 + "startLine": 203 }, { "name": "create_worktree", "signature": "def create_worktree(name: str, task_id: str = \"\")", - "startLine": 210 + "startLine": 211 }, { "name": "bind_task_to_worktree", "signature": "def bind_task_to_worktree(task_id: str, worktree_name: str)", - "startLine": 234 + "startLine": 235 }, { "name": "_count_worktree_changes", "signature": "def _count_worktree_changes(path: Path)", - "startLine": 240 + "startLine": 241 }, { "name": "remove_worktree", "signature": "def remove_worktree(name: str, discard_changes: bool = False)", - "startLine": 253 + "startLine": 254 }, { "name": "keep_worktree", "signature": "def keep_worktree(name: str)", - "startLine": 276 + "startLine": 277 }, { "name": "_parse_frontmatter", "signature": "def _parse_frontmatter(text: str)", - "startLine": 289 + "startLine": 290 }, { "name": "scan_skills", @@ -3570,26 +3570,26 @@ { "name": "inject_background_notifications", "signature": "def inject_background_notifications(messages: list)", - "startLine": 1882 + "startLine": 1881 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list, context: dict)", - "startLine": 1902 + "startLine": 1901 }, { "name": "print_turn_assistants", "signature": "def print_turn_assistants(messages: list, turn_start: int)", - "startLine": 2008 + "startLine": 2007 }, { "name": "cron_autorun_loop", "signature": "def cron_autorun_loop(history: list, context: dict)", - "startLine": 2017 + "startLine": 2016 } ], "layer": "collaboration", - "source": "#!/usr/bin/env python3\n\"\"\"\ns20: Comprehensive Agent — all teaching components in one loop.\n\nRun: python s20_comprehensive/code.py\nNeed: pip install anthropic python-dotenv pyyaml + .env with ANTHROPIC_API_KEY\n\nThis final chapter intentionally puts the earlier teaching mechanisms back\ntogether: dispatch, permission, hooks, todo, subagent, skills, compaction,\nmemory, prompt assembly, error recovery, task graph, background tasks, cron,\nteams, protocols, autonomous agents, worktrees, and MCP.\n\"\"\"\n\nimport os, subprocess, json, time, random, threading, re\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\nimport yaml\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\n READLINE_AVAILABLE = True\nexcept ImportError:\n READLINE_AVAILABLE = False\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nPRIMARY_MODEL = MODEL\nFALLBACK_MODEL = os.getenv(\"FALLBACK_MODEL_ID\")\n\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\n\nDEFAULT_MAX_TOKENS = 8000\nESCALATED_MAX_TOKENS = 16000\nMAX_RETRIES = 3\nMAX_CONSECUTIVE_529 = 2\nMAX_RECOVERY_RETRIES = 2\nBASE_DELAY_MS = 500\nCONTEXT_LIMIT = 50000\nKEEP_RECENT_TOOL_RESULTS = 3\nPERSIST_THRESHOLD = 30000\nCONTINUATION_PROMPT = \"Continue from the previous response. Do not repeat completed work.\"\nPROMPT = \"\\033[36ms20 >> \\033[0m\"\nCLI_ACTIVE = False\n\n\ndef terminal_print(text: str):\n if threading.current_thread() is threading.main_thread() or not CLI_ACTIVE:\n print(text)\n return\n line = \"\"\n if READLINE_AVAILABLE:\n try:\n line = readline.get_line_buffer()\n except Exception:\n line = \"\"\n print(f\"\\r\\033[K{text}\")\n print(PROMPT + line, end=\"\", flush=True)\n\n# ── Task System ──\n\n# Tasks are tiny durable records. Later systems add ownership, dependencies,\n# worktrees, and teammates on top of this same file-backed state.\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\nCURRENT_TODOS: list[dict] = []\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n worktree: str | None = None\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task_json(task_id: str) -> str:\n return json.dumps(asdict(load_task(task_id)), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n # Dependencies are intentionally simple: every blocker must exist and be\n # completed before the task can be claimed.\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Worktree System ──\n\n# Worktree names become filesystem paths, so the teaching version keeps the\n# validation rules strict and reuses them for create/remove/keep.\nWORKTREES_DIR = WORKDIR / \".worktrees\"\nWORKTREES_DIR.mkdir(exist_ok=True)\n\nVALID_WT_NAME = re.compile(r'^[A-Za-z0-9._-]{1,64}$')\n\n\ndef validate_worktree_name(name: str) -> str | None:\n if not name:\n return \"Worktree name cannot be empty\"\n if name in (\".\", \"..\"):\n return f\"'{name}' is not a valid worktree name\"\n if not VALID_WT_NAME.match(name):\n return (f\"Invalid worktree name '{name}': \"\n \"only letters, digits, dots, underscores, dashes (1-64 chars)\")\n return None\n\n\ndef run_git(args: list[str]) -> tuple[bool, str]:\n try:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR,\n capture_output=True, text=True, timeout=30)\n out = (r.stdout + r.stderr).strip()\n return r.returncode == 0, out[:5000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return False, \"Error: git timeout\"\n\n\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n events_file = WORKTREES_DIR / \"events.jsonl\"\n with open(events_file, \"a\") as f:\n f.write(json.dumps(event) + \"\\n\")\n\n\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n # Tool-layer validation is part of the safety boundary; do it before git\n # sees the name, not only after git happens to reject something.\n err = validate_worktree_name(name)\n if err:\n return f\"Error: {err}\"\n if task_id:\n try:\n load_task(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n path = WORKTREES_DIR / name\n if path.exists():\n return f\"Worktree '{name}' already exists at {path}\"\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n print(f\" \\033[33m[worktree] created: {name} at {path}\\033[0m\")\n return f\"Worktree '{name}' created at {path}\"\n\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n task = load_task(task_id)\n task.worktree = worktree_name\n save_task(task)\n\n\ndef _count_worktree_changes(path: Path) -> tuple[int, int]:\n try:\n r1 = subprocess.run([\"git\", \"status\", \"--porcelain\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n files = len([l for l in r1.stdout.strip().splitlines() if l.strip()])\n r2 = subprocess.run([\"git\", \"log\", \"@{push}..HEAD\", \"--oneline\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n commits = len([l for l in r2.stdout.strip().splitlines() if l.strip()])\n return files, commits\n except Exception:\n return -1, -1\n\n\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n path = WORKTREES_DIR / name\n if not path.exists():\n return f\"Worktree '{name}' not found\"\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files < 0:\n return \"Cannot verify status. Use discard_changes=true to force.\"\n if files > 0 or commits > 0:\n return (f\"Worktree '{name}' has {files} file(s), {commits} commit(s). \"\n \"Use discard_changes=true or keep_worktree.\")\n ok1, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok1:\n return f\"Failed to remove worktree '{name}'\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n print(f\" \\033[33m[worktree] removed: {name}\\033[0m\")\n return f\"Worktree '{name}' removed\"\n\n\ndef keep_worktree(name: str) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n log_event(\"keep\", name)\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n\n\n# ── Skill Loading ──\n\nSKILL_REGISTRY: dict[str, dict] = {}\n\n\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n try:\n meta = yaml.safe_load(parts[1]) or {}\n except yaml.YAMLError:\n meta = {}\n return meta, parts[2].strip()\n\n\ndef scan_skills():\n SKILL_REGISTRY.clear()\n if not SKILLS_DIR.exists():\n return\n for directory in sorted(SKILLS_DIR.iterdir()):\n if not directory.is_dir():\n continue\n manifest = directory / \"SKILL.md\"\n if not manifest.exists():\n continue\n raw = manifest.read_text()\n meta, _ = _parse_frontmatter(raw)\n name = meta.get(\"name\", directory.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\n \"name\": name,\n \"description\": desc,\n \"content\": raw,\n }\n\n\nscan_skills()\n\n\ndef list_skills() -> str:\n if not SKILL_REGISTRY:\n return \"(no skills found)\"\n return \"\\n\".join(\n f\"- {skill['name']}: {skill['description']}\"\n for skill in SKILL_REGISTRY.values())\n\n\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n available = \", \".join(SKILL_REGISTRY.keys()) or \"(none)\"\n return f\"Skill not found: {name}. Available: {available}\"\n return skill[\"content\"]\n\n\n# ── Prompt Assembly ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, edit_file, glob, \"\n \"todo_write, task, load_skill, compact, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"schedule_cron, list_crons, cancel_cron, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan, \"\n \"create_worktree, remove_worktree, keep_worktree, \"\n \"connect_mcp. MCP tools are prefixed mcp__{server}__{tool}.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n # The system prompt is rebuilt each turn from live context. This is where\n # memory, skill catalog, MCP state, and active teammates become visible.\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n sections.append(f\"Current time: {datetime.now().isoformat(timespec='seconds')}\")\n sections.append(\"Skills catalog:\\n\" + list_skills() +\n \"\\nUse load_skill(name) when a skill is relevant.\")\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n mcp_names = list(mcp_clients.keys())\n if mcp_names:\n sections.append(f\"Connected MCP servers: {', '.join(mcp_names)}\")\n return \"\\n\\n\".join(sections)\n\n\n# ── Basic Tools ──\n\ndef safe_path(p: str, cwd: Path = None) -> Path:\n # File tools stay inside the workspace or teammate worktree. Bash remains\n # powerful on purpose and is controlled by the permission hook instead.\n base = cwd or WORKDIR\n path = (base / p).resolve()\n if not path.is_relative_to(base):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, cwd: Path = None,\n run_in_background: bool = False) -> str:\n # run_in_background is consumed by the dispatcher; direct execution ignores it.\n try:\n r = subprocess.run(command, shell=True, cwd=cwd or WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None,\n offset: int = 0, cwd: Path = None) -> str:\n try:\n lines = safe_path(path, cwd).read_text().splitlines()\n offset = max(int(offset or 0), 0)\n limit = int(limit) if limit is not None else None\n lines = lines[offset:]\n if limit is not None and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str, cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str,\n cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n text = fp.read_text()\n if old_text not in text:\n return f\"Error: text not found in {path}\"\n fp.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_glob(pattern: str, cwd: Path = None) -> str:\n import glob as g\n try:\n base = cwd or WORKDIR\n results = []\n for match in g.glob(pattern, root_dir=base):\n if (base / match).resolve().is_relative_to(base):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef call_tool_handler(handler, args: dict, name: str) -> str:\n if not handler:\n return f\"Unknown: {name}\"\n try:\n return handler(**(args or {}))\n except TypeError as e:\n return f\"Error: {e}\"\n\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n for i, todo in enumerate(todos):\n if \"content\" not in todo or \"status\" not in todo:\n return f\"Error: todos[{i}] missing 'content' or 'status'\"\n if todo[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return f\"Error: todos[{i}] has invalid status '{todo['status']}'\"\n CURRENT_TODOS = todos\n print(f\" \\033[33m[todo] updated {len(CURRENT_TODOS)} item(s)\\033[0m\")\n return f\"Updated {len(CURRENT_TODOS)} todos\"\n\n\n# ── MessageBus ──\n\n# Team communication is append-only JSONL mailboxes. This keeps the protocol\n# inspectable on disk and lets background teammates send messages.\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n terminal_print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n # Responses are matched by request_id so one protocol reply cannot approve\n # a different pending request.\n state = pending_requests.get(request_id)\n if not state:\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n return\n state.status = \"approved\" if approve else \"rejected\"\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\n# ── Autonomous Agent ──\n\nIDLE_POLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(agent_name: str, messages: list,\n name: str, role: str,\n worktree_context: dict | None = None) -> str:\n # Autonomous teammates wake up for inbox messages first, then look for\n # unclaimed tasks. This keeps direct protocol messages higher priority.\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return \"shutdown\"\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n return \"work\"\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task_data = unclaimed[0]\n result = claim_task(task_data[\"id\"], agent_name)\n if \"Claimed\" in result:\n wt_info = \"\"\n if task_data.get(\"worktree\"):\n wt_path = WORKTREES_DIR / task_data[\"worktree\"]\n wt_info = f\"\\nWork directory: {wt_path}\"\n if worktree_context is not None:\n worktree_context[\"path\"] = str(wt_path)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task_data['id']}: \"\n f\"{task_data['subject']}{wt_info}\"})\n return \"work\"\n return \"timeout\"\n\n\n# ── Teammate Thread ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n # Plan approval is a real gate: after submit_plan, the teammate stops\n # taking model/tool steps until lead sends plan_approval_response.\n protocol_ctx = {\"waiting_plan\": None}\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"If a task has a worktree, work in that directory.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return True\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if req_id == protocol_ctx[\"waiting_plan\"]:\n protocol_ctx[\"waiting_plan\"] = None\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved]\" if approve\n else f\"[Plan rejected] {msg['content']}\"})\n return False\n\n def run():\n wt_ctx = {\"path\": None}\n\n def _wt_cwd():\n # Once a task with a worktree is claimed, all teammate file tools\n # transparently run inside that isolated directory.\n p = wt_ctx[\"path\"]\n return Path(p) if p else None\n\n def _run_bash(command: str) -> str:\n return run_bash(command, cwd=_wt_cwd())\n\n def _run_read(path: str) -> str:\n return run_read(path, cwd=_wt_cwd())\n\n def _run_write(path: str, content: str) -> str:\n return run_write(path, content, cwd=_wt_cwd())\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n task = load_task(task_id)\n wt_ctx[\"path\"] = (str(WORKTREES_DIR / task.worktree)\n if task.worktree else None)\n return result\n\n def _run_complete_task(task_id: str):\n result = complete_task(task_id)\n wt_ctx[\"path\"] = None\n return result\n\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n sub_handlers = {\n \"bash\": _run_bash, \"read_file\": _run_read,\n \"write_file\": _run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n while True:\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if protocol_ctx[\"waiting_plan\"]:\n # Poll only for protocol replies while the approval gate is\n # closed; do not let the model continue with the task.\n time.sleep(IDLE_POLL_INTERVAL)\n continue\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(non_protocol) + \"\"})\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"submit_plan\":\n output = _teammate_submit_plan(\n name, block.input.get(\"plan\", \"\"))\n match = re.search(r\"\\((req_\\d+)\\)\", output)\n protocol_ctx[\"waiting_plan\"] = (\n match.group(1) if match else output)\n else:\n handler = sub_handlers.get(block.name)\n output = call_tool_handler(handler, block.input,\n block.name)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n if protocol_ctx[\"waiting_plan\"]:\n # Ignore later tool_use blocks from the same model\n # response; they belong after approval, not before.\n break\n messages.append({\"role\": \"user\", \"content\": results})\n if protocol_ctx[\"waiting_plan\"]:\n break\n if should_shutdown:\n break\n if protocol_ctx[\"waiting_plan\"]:\n continue\n idle_result = idle_poll(name, messages, name, role, wt_ctx)\n if idle_result in (\"shutdown\", \"timeout\"):\n break\n\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n return f\"Teammate '{name}' spawned as {role}\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id})\"\n\n\n# ── Lead Protocol Tools ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Shut down.\", \"shutdown_request\",\n {\"request_id\": req_id})\n return f\"Shutdown request sent to {teammate}\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n BUS.send(\"lead\", teammate, f\"Submit plan for: {task}\", \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n return f\"Plan {'approved' if approve else 'rejected'}\"\n\n\n# ── Hooks + Permission Pipeline ──\n\n# Hooks are intentionally outside tool handlers. The loop can add permission,\n# logging, and stop behavior without changing each individual tool.\nHOOKS = {\"UserPromptSubmit\": [], \"PreToolUse\": [],\n \"PostToolUse\": [], \"Stop\": []}\n\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None:\n return result\n return None\n\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"mkfs\", \"dd if=\"]\nDESTRUCTIVE = [\"rm \", \"> /etc/\", \"chmod 777\"]\n\n\ndef permission_hook(block):\n # The permission layer sees the raw tool_use before dispatch. It can deny,\n # ask the user, or allow execution to continue.\n if block.name == \"bash\":\n command = block.input.get(\"command\", \"\")\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Permission denied: '{pattern}' is on the deny list\"\n if any(token in command for token in DESTRUCTIVE):\n print(f\"\\n\\033[33m[permission] destructive command\\033[0m\")\n print(f\" {command}\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n try:\n safe_path(path)\n except Exception:\n return f\"Permission denied: path escapes workspace: {path}\"\n if block.name.startswith(\"mcp__\") and \"deploy\" in block.name:\n print(f\"\\n\\033[33m[permission] MCP destructive-looking tool: {block.name}\\033[0m\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n\ndef log_hook(block):\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\n\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"\\033[33m[HOOK] large output from {block.name}: \"\n f\"{len(str(output))} chars\\033[0m\")\n return None\n\n\ndef user_prompt_hook(query: str):\n print(f\"\\033[90m[HOOK] UserPromptSubmit: {WORKDIR}\\033[0m\")\n return None\n\n\ndef stop_hook(messages: list):\n tool_count = 0\n for msg in messages:\n content = msg.get(\"content\")\n if isinstance(content, list):\n tool_count += sum(1 for item in content\n if isinstance(item, dict)\n and item.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: {tool_count} tool result(s)\\033[0m\")\n return None\n\n\nregister_hook(\"UserPromptSubmit\", user_prompt_hook)\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\nregister_hook(\"Stop\", stop_hook)\n\n\n# ── Subagent Tool ──\n\nSUB_SYSTEM = (\n f\"You are a coding subagent at {WORKDIR}. \"\n \"Complete the task, then return a concise final summary. \"\n \"Do not spawn more agents.\"\n)\n\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"pattern\": {\"type\": \"string\"}},\n \"required\": [\"pattern\"]}},\n]\n\n\nSUB_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read,\n \"write_file\": run_write, \"edit_file\": run_edit,\n \"glob\": run_glob,\n}\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return str(content)\n return \"\\n\".join(\n getattr(block, \"text\", \"\")\n for block in content\n if getattr(block, \"type\", None) == \"text\").strip()\n\n\ndef has_tool_use(content) -> bool:\n # Do not rely on stop_reason alone; the concrete tool_use block is the\n # continuation signal used by the loop.\n return any(getattr(block, \"type\", None) == \"tool_use\"\n for block in content)\n\n\ndef spawn_subagent(description: str) -> str:\n messages = [{\"role\": \"user\", \"content\": description}]\n for _ in range(30):\n response = client.messages.create(\n model=MODEL, system=SUB_SYSTEM, messages=messages,\n tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n break\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n output = str(blocked)\n else:\n handler = SUB_HANDLERS.get(block.name)\n output = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, output)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n text = extract_text(msg[\"content\"])\n if text:\n return text\n return \"Subagent finished without a text summary.\"\n\n\n# ── Context Compaction ──\n\n# Compaction is layered: first shrink oversized tool results, then trim old\n# message ranges, and only call the model for a summary when the context is\n# still too large or the model explicitly asks for compact.\ndef estimate_size(messages: list) -> int:\n return len(json.dumps(messages, default=str))\n\n\ndef collect_tool_results(messages: list):\n found = []\n for mi, msg in enumerate(messages):\n content = msg.get(\"content\")\n if msg.get(\"role\") != \"user\" or not isinstance(content, list):\n continue\n for bi, block in enumerate(content):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n found.append((mi, bi, block))\n return found\n\n\ndef persist_large_output(tool_use_id: str, output: str) -> str:\n if len(output) <= PERSIST_THRESHOLD:\n return output\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not path.exists():\n path.write_text(output)\n return (f\"\\nFull output: {path}\\n\"\n f\"Preview:\\n{output[:2000]}\\n\")\n\n\ndef tool_result_budget(messages: list, max_bytes: int = 200_000) -> list:\n if not messages:\n return messages\n last = messages[-1]\n content = last.get(\"content\")\n if last.get(\"role\") != \"user\" or not isinstance(content, list):\n return messages\n blocks = [(i, b) for i, b in enumerate(content)\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n for _, block in sorted(blocks,\n key=lambda pair: len(str(pair[1].get(\"content\", \"\"))),\n reverse=True):\n if total <= max_bytes:\n break\n text = str(block.get(\"content\", \"\"))\n block[\"content\"] = persist_large_output(\n block.get(\"tool_use_id\", \"unknown\"), text)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return messages\n\n\ndef snip_compact(messages: list, max_messages: int = 50) -> list:\n if len(messages) <= max_messages:\n return messages\n keep_head, keep_tail = 3, max_messages - 3\n snipped = len(messages) - keep_head - keep_tail\n return (messages[:keep_head]\n + [{\"role\": \"user\", \"content\": f\"[snipped {snipped} messages]\"}]\n + messages[-keep_tail:])\n\n\ndef micro_compact(messages: list) -> list:\n tool_results = collect_tool_results(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(str(block.get(\"content\", \"\"))) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n\n\ndef write_transcript(messages: list) -> Path:\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n return path\n\n\ndef summarize_history(messages: list) -> str:\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\"Summarize this coding-agent conversation so work can continue. \"\n \"Preserve current goal, key findings, changed files, remaining work, \"\n \"and user constraints.\\n\\n\" + conversation)\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=2000)\n return extract_text(response.content) or \"(empty summary)\"\n\n\ndef compact_history(messages: list) -> list:\n transcript = write_transcript(messages)\n print(f\" \\033[36m[compact] transcript saved: {transcript}\\033[0m\")\n summary = summarize_history(messages)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\n\ndef reactive_compact(messages: list) -> list:\n transcript = write_transcript(messages)\n print(f\" \\033[31m[reactive compact] transcript saved: {transcript}\\033[0m\")\n try:\n summary = summarize_history(messages)\n except Exception:\n summary = \"Earlier conversation was trimmed after a prompt-too-long error.\"\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"},\n *messages[-5:]]\n\n\n# ── Error Recovery ──\n\nclass RecoveryState:\n def __init__(self):\n self.has_escalated = False\n self.recovery_count = 0\n self.consecutive_529 = 0\n self.has_attempted_reactive_compact = False\n self.current_model = PRIMARY_MODEL\n\n\ndef retry_delay(attempt: int) -> float:\n base = min(BASE_DELAY_MS * (2 ** attempt), 32000) / 1000\n return base + random.uniform(0, base * 0.25)\n\n\ndef with_retry(fn, state: RecoveryState):\n for attempt in range(MAX_RETRIES):\n try:\n result = fn()\n state.consecutive_529 = 0\n return result\n except Exception as e:\n name = type(e).__name__.lower()\n msg = str(e).lower()\n if \"ratelimit\" in name or \"429\" in msg:\n delay = retry_delay(attempt)\n print(f\" \\033[33m[429] retry {attempt + 1}/{MAX_RETRIES} \"\n f\"after {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n if \"overloaded\" in name or \"529\" in msg or \"overloaded\" in msg:\n state.consecutive_529 += 1\n if state.consecutive_529 >= MAX_CONSECUTIVE_529 and FALLBACK_MODEL:\n state.current_model = FALLBACK_MODEL\n state.consecutive_529 = 0\n print(f\" \\033[31m[529] switching to {FALLBACK_MODEL}\\033[0m\")\n delay = retry_delay(attempt)\n print(f\" \\033[33m[529] retry {attempt + 1}/{MAX_RETRIES} \"\n f\"after {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n raise\n raise RuntimeError(f\"Max retries ({MAX_RETRIES}) exceeded\")\n\n\ndef is_prompt_too_long_error(e: Exception) -> bool:\n msg = str(e).lower()\n return ((\"prompt\" in msg and \"long\" in msg)\n or \"context_length_exceeded\" in msg\n or \"max_context_window\" in msg)\n\n\n# ── Background Tasks ──\n\n# Slow tools return a placeholder tool_result immediately. Their real output is\n# later injected as a task_notification, so the main loop can keep moving.\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n if tool_name != \"bash\":\n return False\n command = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(keyword in command for keyword in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n if tool_name != \"bash\":\n return False\n return bool(tool_input.get(\"run_in_background\")) or is_slow_operation(tool_name, tool_input)\n\n\ndef start_background_task(block, handlers: dict) -> str:\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n command = block.input.get(\"command\", block.name)\n\n def worker():\n handler = handlers.get(block.name)\n result = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, result)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = str(result)\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": command,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] {bg_id}: {str(command)[:60]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n with background_lock:\n ready = [bg_id for bg_id, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n return notifications\n\n\n# ── Cron Scheduler ──\n\n# Cron jobs are stored separately from conversation history. When a job fires,\n# it becomes a scheduled prompt that is injected back into the same agent loop.\nDURABLE_PATH = WORKDIR / \".scheduled_tasks.json\"\n\n\n@dataclass\nclass CronJob:\n id: str\n cron: str\n prompt: str\n recurring: bool\n durable: bool\n\n\nscheduled_jobs: dict[str, CronJob] = {}\ncron_queue: list[CronJob] = []\ncron_lock = threading.Lock()\n_last_fired: dict[str, str] = {}\n\n\ndef _cron_field_matches(field: str, value: int) -> bool:\n if field == \"*\":\n return True\n if field.startswith(\"*/\"):\n step = int(field[2:])\n return step > 0 and value % step == 0\n if \",\" in field:\n return any(_cron_field_matches(part.strip(), value)\n for part in field.split(\",\"))\n if \"-\" in field:\n lo, hi = field.split(\"-\", 1)\n return int(lo) <= value <= int(hi)\n return value == int(field)\n\n\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n if not (m and h and month_ok):\n return False\n if dom == \"*\" and dow == \"*\":\n return True\n if dom == \"*\":\n return dow_ok\n if dow == \"*\":\n return dom_ok\n return dom_ok or dow_ok\n\n\ndef _validate_cron_field(field: str, lo: int, hi: int) -> str | None:\n if field == \"*\":\n return None\n if field.startswith(\"*/\"):\n step = field[2:]\n if not step.isdigit() or int(step) <= 0:\n return f\"Invalid step: {field}\"\n return None\n if \",\" in field:\n for part in field.split(\",\"):\n err = _validate_cron_field(part.strip(), lo, hi)\n if err:\n return err\n return None\n if \"-\" in field:\n left, right = field.split(\"-\", 1)\n if not left.isdigit() or not right.isdigit():\n return f\"Invalid range: {field}\"\n a, b = int(left), int(right)\n if a < lo or a > hi or b < lo or b > hi:\n return f\"Range {field} out of bounds [{lo}-{hi}]\"\n if a > b:\n return f\"Range start > end: {field}\"\n return None\n if not field.isdigit():\n return f\"Invalid field: {field}\"\n value = int(field)\n if value < lo or value > hi:\n return f\"Value {value} out of bounds [{lo}-{hi}]\"\n return None\n\n\ndef validate_cron(cron_expr: str) -> str | None:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return f\"Expected 5 fields, got {len(fields)}\"\n bounds = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n names = [\"minute\", \"hour\", \"day-of-month\", \"month\", \"day-of-week\"]\n for field, (lo, hi), name in zip(fields, bounds, names):\n err = _validate_cron_field(field, lo, hi)\n if err:\n return f\"{name}: {err}\"\n return None\n\n\ndef save_durable_jobs():\n durable = [asdict(job) for job in scheduled_jobs.values() if job.durable]\n DURABLE_PATH.write_text(json.dumps(durable, indent=2))\n\n\ndef load_durable_jobs():\n if not DURABLE_PATH.exists():\n return\n try:\n for item in json.loads(DURABLE_PATH.read_text()):\n job = CronJob(**item)\n if not validate_cron(job.cron):\n scheduled_jobs[job.id] = job\n except Exception:\n pass\n\n\ndef schedule_job(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> CronJob | str:\n err = validate_cron(cron)\n if err:\n return err\n job = CronJob(\n id=f\"cron_{random.randint(0, 999999):06d}\",\n cron=cron, prompt=prompt,\n recurring=recurring, durable=durable)\n with cron_lock:\n scheduled_jobs[job.id] = job\n if durable:\n save_durable_jobs()\n return job\n\n\ndef cancel_job(job_id: str) -> str:\n with cron_lock:\n job = scheduled_jobs.pop(job_id, None)\n if not job:\n return f\"Job {job_id} not found\"\n if job.durable:\n save_durable_jobs()\n return f\"Cancelled {job_id}\"\n\n\ndef cron_scheduler_loop():\n while True:\n time.sleep(1)\n now = datetime.now()\n marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now) and _last_fired.get(job.id) != marker:\n cron_queue.append(job)\n _last_fired[job.id] = marker\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\" \\033[31m[cron error] {job.id}: {e}\\033[0m\")\n\n\ndef consume_cron_queue() -> list[CronJob]:\n with cron_lock:\n fired = list(cron_queue)\n cron_queue.clear()\n return fired\n\n\ndef run_schedule_cron(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> str:\n result = schedule_job(cron, prompt, recurring, durable)\n if isinstance(result, str):\n return f\"Error: {result}\"\n return f\"Scheduled {result.id}: '{cron}' -> {prompt}\"\n\n\ndef run_list_crons() -> str:\n with cron_lock:\n jobs = list(scheduled_jobs.values())\n if not jobs:\n return \"No cron jobs.\"\n return \"\\n\".join(\n f\" {job.id}: '{job.cron}' -> {job.prompt[:40]} \"\n f\"[{'recurring' if job.recurring else 'one-shot'}, \"\n f\"{'durable' if job.durable else 'session'}]\"\n for job in jobs)\n\n\ndef run_cancel_cron(job_id: str) -> str:\n return cancel_job(job_id)\n\n\nload_durable_jobs()\nthreading.Thread(target=cron_scheduler_loop, daemon=True).start()\n\n\n# ── MCP System ──\n\n# MCP is modeled as late-bound tools: connect first, then discovered server\n# tools are merged into the normal tool pool with mcp__server__tool names.\nclass MCPClient:\n \"\"\"Discovers and calls tools on an MCP server (mock for teaching).\"\"\"\n\n def __init__(self, name: str):\n self.name = name\n self.tools: list[dict] = []\n self._handlers: dict[str, callable] = {}\n\n def register(self, tool_defs: list[dict],\n handlers: dict[str, callable]):\n self.tools = tool_defs\n self._handlers = handlers\n\n def call_tool(self, tool_name: str, args: dict) -> str:\n handler = self._handlers.get(tool_name)\n if not handler:\n return f\"MCP error: unknown tool '{tool_name}'\"\n try:\n return handler(**args)\n except Exception as e:\n return f\"MCP error: {e}\"\n\n\nmcp_clients: dict[str, MCPClient] = {}\n\n_DISALLOWED_CHARS = re.compile(r'[^a-zA-Z0-9_-]')\n\n\ndef normalize_mcp_name(name: str) -> str:\n \"\"\"Replace non [a-zA-Z0-9_-] with underscore.\"\"\"\n return _DISALLOWED_CHARS.sub('_', name)\n\n\ndef _mock_server_docs():\n client = MCPClient(\"docs\")\n client.register(\n tool_defs=[\n {\"name\": \"search\", \"description\": \"Search documentation. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"query\": {\"type\": \"string\"}},\n \"required\": [\"query\"]}},\n {\"name\": \"get_version\", \"description\": \"Get API version. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n ],\n handlers={\n \"search\": lambda query: f\"[docs] Found 3 results for '{query}'\",\n \"get_version\": lambda: \"[docs] API v2.1.0\",\n })\n return client\n\n\ndef _mock_server_deploy():\n client = MCPClient(\"deploy\")\n client.register(\n tool_defs=[\n {\"name\": \"trigger\",\n \"description\": \"Trigger a deployment. (destructive — requires approval in real CC)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n {\"name\": \"status\", \"description\": \"Check deployment status. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n ],\n handlers={\n \"trigger\": lambda service: f\"[deploy] Triggered: {service}\",\n \"status\": lambda service: f\"[deploy] {service}: running (v1.4.2)\",\n })\n return client\n\n\nMOCK_SERVERS = {\n \"docs\": _mock_server_docs,\n \"deploy\": _mock_server_deploy,\n}\n\n\ndef connect_mcp(name: str) -> str:\n if name in mcp_clients:\n return f\"MCP server '{name}' already connected\"\n factory = MOCK_SERVERS.get(name)\n if not factory:\n available = \", \".join(MOCK_SERVERS.keys())\n return f\"Unknown server '{name}'. Available: {available}\"\n mcp_client = factory()\n mcp_clients[name] = mcp_client\n tool_names = [t[\"name\"] for t in mcp_client.tools]\n print(f\" \\033[31m[mcp] connected: {name} → {tool_names}\\033[0m\")\n return (f\"Connected to MCP server '{name}'. \"\n f\"Discovered {len(mcp_client.tools)} tools: {', '.join(tool_names)}\")\n\n\ndef assemble_tool_pool() -> tuple[list[dict], dict]:\n \"\"\"Merge builtin tools + all MCP tools into one pool.\"\"\"\n tools = list(BUILTIN_TOOLS)\n handlers = dict(BUILTIN_HANDLERS)\n for server_name, mcp_client in mcp_clients.items():\n safe_server = normalize_mcp_name(server_name)\n for tool_def in mcp_client.tools:\n safe_tool = normalize_mcp_name(tool_def[\"name\"])\n prefixed = f\"mcp__{safe_server}__{safe_tool}\"\n tools.append({\n \"name\": prefixed,\n \"description\": tool_def.get(\"description\", \"\"),\n \"input_schema\": tool_def.get(\"inputSchema\", {}),\n })\n handlers[prefixed] = (\n lambda *, c=mcp_client, t=tool_def[\"name\"], **kw: c.call_tool(t, kw))\n return tools, handlers\n\n\n# ── Lead Worktree Tools ──\n\ndef run_create_worktree(name: str, task_id: str = \"\") -> str:\n return create_worktree(name, task_id)\n\ndef run_remove_worktree(name: str, discard_changes: bool = False) -> str:\n return remove_worktree(name, discard_changes)\n\ndef run_keep_worktree(name: str) -> str:\n return keep_worktree(name)\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task_json(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_claim_task(task_id: str) -> str:\n try:\n return claim_task(task_id, owner=\"agent\")\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_complete_task(task_id: str) -> str:\n try:\n return complete_task(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\ndef run_connect_mcp(name: str) -> str:\n return connect_mcp(name)\n\n\n# ── Tool Definitions ──\n\n# The model sees tool schemas; Python executes handlers. S20 keeps both tables\n# explicit so every added capability is visible in one place.\nBUILTIN_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"pattern\": {\"type\": \"string\"}},\n \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\",\n \"description\": \"Create and manage a task list for the current session.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"todos\": {\"type\": \"array\",\n \"items\": {\"type\": \"object\",\n \"properties\": {\n \"content\": {\"type\": \"string\"},\n \"status\": {\"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"]}},\n \"required\": [\"content\", \"status\"]}}},\n \"required\": [\"todos\"]}},\n {\"name\": \"task\",\n \"description\": \"Launch a focused subagent. Returns only its final summary.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"description\": {\"type\": \"string\"}},\n \"required\": [\"description\"]}},\n {\"name\": \"load_skill\",\n \"description\": \"Load the full content of a skill by name.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"compact\",\n \"description\": \"Summarize earlier conversation and continue with compacted context.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"focus\": {\"type\": \"string\"}},\n \"required\": []}},\n {\"name\": \"create_task\", \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\", \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\", \"description\": \"Get full task details.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\", \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\", \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"schedule_cron\",\n \"description\": (\"Schedule a cron job. cron is 5-field: min hour dom \"\n \"month dow. For one-shot reminders, compute the target \"\n \"minute and set recurring=false.\"),\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"cron\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"},\n \"recurring\": {\"type\": \"boolean\"},\n \"durable\": {\"type\": \"boolean\"}},\n \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"list_crons\", \"description\": \"List registered cron jobs.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"cancel_cron\", \"description\": \"Cancel a cron job by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"job_id\": {\"type\": \"string\"}},\n \"required\": [\"job_id\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"create_worktree\",\n \"description\": \"Create an isolated git worktree.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"remove_worktree\",\n \"description\": \"Remove a worktree. Refuses if changes exist.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"discard_changes\": {\"type\": \"boolean\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"keep_worktree\",\n \"description\": \"Keep a worktree for manual review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"connect_mcp\",\n \"description\": \"Connect to an MCP server (docs, deploy) and discover tools.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n]\n\nBUILTIN_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob,\n \"todo_write\": run_todo_write, \"task\": spawn_subagent,\n \"load_skill\": load_skill,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"schedule_cron\": run_schedule_cron,\n \"list_crons\": run_list_crons,\n \"cancel_cron\": run_cancel_cron,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n \"create_worktree\": run_create_worktree,\n \"remove_worktree\": run_remove_worktree,\n \"keep_worktree\": run_keep_worktree,\n \"connect_mcp\": run_connect_mcp,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\n \"memories\": memories,\n \"connected_mcp\": list(mcp_clients.keys()),\n \"active_teammates\": list(active_teammates.keys()),\n }\n\n\n# ── Agent Loop ──\n\nrounds_since_todo = 0\nagent_lock = threading.Lock()\n\n\ndef prepare_context(messages: list) -> list:\n # Every LLM turn enters through the same context budget pipeline.\n messages[:] = tool_result_budget(messages)\n messages[:] = snip_compact(messages)\n messages[:] = micro_compact(messages)\n if estimate_size(messages) > CONTEXT_LIMIT:\n messages[:] = compact_history(messages)\n return messages\n\n\ndef build_user_content(results: list[dict]) -> list[dict]:\n # Tool results and completed background notifications are both returned to\n # the model as user-side content, matching the tool_result feedback loop.\n content = []\n for note in collect_background_results():\n content.append({\"type\": \"text\", \"text\": note})\n content.extend(results)\n return content\n\n\ndef inject_background_notifications(messages: list):\n notes = collect_background_results()\n if notes:\n messages.append({\"role\": \"user\", \"content\": [\n {\"type\": \"text\", \"text\": note} for note in notes]})\n\n\ndef call_llm(messages: list, context: dict, tools: list,\n state: RecoveryState, max_tokens: int):\n system = assemble_system_prompt(context)\n return with_retry(\n lambda: client.messages.create(\n model=state.current_model,\n system=system,\n messages=messages,\n tools=tools,\n max_tokens=max_tokens),\n state)\n\n\ndef agent_loop(messages: list, context: dict):\n global rounds_since_todo\n tools, handlers = assemble_tool_pool()\n state = RecoveryState()\n max_tokens = DEFAULT_MAX_TOKENS\n\n while True:\n # One cycle: inject scheduled/background work, prepare context, call\n # the model, execute tool_use blocks, append tool_results, repeat.\n fired = consume_cron_queue()\n for job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n print(f\" \\033[35m[cron inject] {job.prompt[:60]}\\033[0m\")\n\n inject_background_notifications(messages)\n\n if rounds_since_todo >= 3:\n messages.append({\"role\": \"user\",\n \"content\": \"Update your todos.\"})\n rounds_since_todo = 0\n\n prepare_context(messages)\n context = update_context(context, messages)\n tools, handlers = assemble_tool_pool()\n\n try:\n response = call_llm(messages, context, tools, state, max_tokens)\n except Exception as e:\n if is_prompt_too_long_error(e) and not state.has_attempted_reactive_compact:\n messages[:] = reactive_compact(messages)\n state.has_attempted_reactive_compact = True\n continue\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n if response.stop_reason == \"max_tokens\":\n if not state.has_escalated:\n max_tokens = ESCALATED_MAX_TOKENS\n state.has_escalated = True\n print(f\" \\033[33m[max_tokens] retry with {max_tokens}\\033[0m\")\n continue\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if state.recovery_count < MAX_RECOVERY_RETRIES:\n messages.append({\"role\": \"user\", \"content\": CONTINUATION_PROMPT})\n state.recovery_count += 1\n continue\n return\n\n max_tokens = DEFAULT_MAX_TOKENS\n state.has_escalated = False\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n trigger_hooks(\"Stop\", messages)\n return\n\n results = []\n compacted_now = False\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n messages.append({\"role\": \"user\",\n \"content\": \"[Compacted. Continue with summarized context.]\"})\n compacted_now = True\n break\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block, handlers)\n output = (f\"[Background task {bg_id} started] \"\n \"Result will arrive as a task_notification.\")\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n continue\n\n handler = handlers.get(block.name)\n output = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, output)\n print(str(output)[:300])\n\n if block.name == \"todo_write\":\n rounds_since_todo = 0\n else:\n rounds_since_todo += 1\n\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n\n if compacted_now:\n continue\n\n messages.append({\"role\": \"user\", \"content\": build_user_content(results)})\n\n\ndef print_turn_assistants(messages: list, turn_start: int):\n for msg in messages[turn_start:]:\n if msg.get(\"role\") != \"assistant\":\n continue\n for block in msg.get(\"content\", []):\n if getattr(block, \"type\", None) == \"text\":\n terminal_print(block.text)\n\n\ndef cron_autorun_loop(history: list, context: dict):\n while True:\n time.sleep(1)\n fired = consume_cron_queue()\n if not fired:\n continue\n with agent_lock:\n turn_start = len(history)\n for job in fired:\n history.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n terminal_print(\n f\" \\033[35m[cron auto] {job.prompt[:60]}\\033[0m\")\n agent_loop(history, context)\n context.update(update_context(context, history))\n print_turn_assistants(history, turn_start)\n\n\nif __name__ == \"__main__\":\n CLI_ACTIVE = True\n print(\"s20: comprehensive agent\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n threading.Thread(target=cron_autorun_loop,\n args=(history, context), daemon=True).start()\n while True:\n try:\n query = input(PROMPT)\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n trigger_hooks(\"UserPromptSubmit\", query)\n turn_start = len(history)\n history.append({\"role\": \"user\", \"content\": query})\n with agent_lock:\n agent_loop(history, context)\n context = update_context(context, history)\n print_turn_assistants(history, turn_start)\n\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n def inbox_label(msg):\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n suffix = f\" req:{req_id}\" if req_id else \"\"\n return f\"{msg.get('type', 'message')}{suffix}\"\n\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{inbox_label(m)}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n", + "source": "#!/usr/bin/env python3\n\"\"\"\ns20: Comprehensive Agent — all teaching components in one loop.\n\nRun: python s20_comprehensive/code.py\nNeed: pip install anthropic python-dotenv pyyaml + .env with ANTHROPIC_API_KEY\n\nThis final chapter intentionally puts the earlier teaching mechanisms back\ntogether: dispatch, permission, hooks, todo, subagent, skills, compaction,\nmemory, prompt assembly, error recovery, task graph, background tasks, cron,\nteams, protocols, autonomous agents, worktrees, and MCP.\n\"\"\"\n\nimport os, subprocess, json, time, random, threading, re\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\nimport yaml\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\n READLINE_AVAILABLE = True\nexcept ImportError:\n READLINE_AVAILABLE = False\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nPRIMARY_MODEL = MODEL\nFALLBACK_MODEL = os.getenv(\"FALLBACK_MODEL_ID\")\n\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\n\nDEFAULT_MAX_TOKENS = 8000\nESCALATED_MAX_TOKENS = 16000\nMAX_RETRIES = 3\nMAX_CONSECUTIVE_529 = 2\nMAX_RECOVERY_RETRIES = 2\nBASE_DELAY_MS = 500\nCONTEXT_LIMIT = 50000\nKEEP_RECENT_TOOL_RESULTS = 3\nPERSIST_THRESHOLD = 30000\nCONTINUATION_PROMPT = \"Continue from the previous response. Do not repeat completed work.\"\nPROMPT = \"\\033[36ms20 >> \\033[0m\"\nCLI_ACTIVE = False\n\n\ndef terminal_print(text: str):\n if threading.current_thread() is threading.main_thread() or not CLI_ACTIVE:\n print(text)\n return\n line = \"\"\n if READLINE_AVAILABLE:\n try:\n line = readline.get_line_buffer()\n except Exception:\n line = \"\"\n print(f\"\\r\\033[K{text}\")\n print(PROMPT + line, end=\"\", flush=True)\n\n# ── Task System ──\n\n# Tasks are tiny durable records. Later systems add ownership, dependencies,\n# worktrees, and teammates on top of this same file-backed state.\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\nCURRENT_TODOS: list[dict] = []\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n worktree: str | None = None\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task_json(task_id: str) -> str:\n return json.dumps(asdict(load_task(task_id)), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n # Dependencies are intentionally simple: every blocker must exist and be\n # completed before the task can be claimed.\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Worktree System ──\n\n# Worktree names become filesystem paths, so the teaching version keeps the\n# validation rules strict and reuses them for create/remove/keep.\nWORKTREES_DIR = WORKDIR / \".worktrees\"\nWORKTREES_DIR.mkdir(exist_ok=True)\n\nVALID_WT_NAME = re.compile(r'^[A-Za-z0-9._-]{1,64}$')\n\n\ndef validate_worktree_name(name: str) -> str | None:\n if not name:\n return \"Worktree name cannot be empty\"\n if name in (\".\", \"..\"):\n return f\"'{name}' is not a valid worktree name\"\n if not VALID_WT_NAME.match(name):\n return (f\"Invalid worktree name '{name}': \"\n \"only letters, digits, dots, underscores, dashes (1-64 chars)\")\n return None\n\n\ndef run_git(args: list[str]) -> tuple[bool, str]:\n try:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR,\n capture_output=True, text=True, timeout=30)\n out = (r.stdout + r.stderr).strip()\n return r.returncode == 0, out[:5000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return False, \"Error: git timeout\"\n\n\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n events_file = WORKTREES_DIR / \"events.jsonl\"\n with open(events_file, \"a\") as f:\n f.write(json.dumps(event) + \"\\n\")\n\n\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n # Tool-layer validation is part of the safety boundary; do it before git\n # sees the name, not only after git happens to reject something.\n err = validate_worktree_name(name)\n if err:\n return f\"Error: {err}\"\n if task_id:\n try:\n load_task(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n path = WORKTREES_DIR / name\n if path.exists():\n return f\"Worktree '{name}' already exists at {path}\"\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n print(f\" \\033[33m[worktree] created: {name} at {path}\\033[0m\")\n return f\"Worktree '{name}' created at {path}\"\n\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n task = load_task(task_id)\n task.worktree = worktree_name\n save_task(task)\n\n\ndef _count_worktree_changes(path: Path) -> tuple[int, int]:\n try:\n r1 = subprocess.run([\"git\", \"status\", \"--porcelain\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n files = len([l for l in r1.stdout.strip().splitlines() if l.strip()])\n r2 = subprocess.run([\"git\", \"log\", \"@{push}..HEAD\", \"--oneline\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n commits = len([l for l in r2.stdout.strip().splitlines() if l.strip()])\n return files, commits\n except Exception:\n return -1, -1\n\n\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n path = WORKTREES_DIR / name\n if not path.exists():\n return f\"Worktree '{name}' not found\"\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files < 0:\n return \"Cannot verify status. Use discard_changes=true to force.\"\n if files > 0 or commits > 0:\n return (f\"Worktree '{name}' has {files} file(s), {commits} commit(s). \"\n \"Use discard_changes=true or keep_worktree.\")\n ok1, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok1:\n return f\"Failed to remove worktree '{name}'\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n print(f\" \\033[33m[worktree] removed: {name}\\033[0m\")\n return f\"Worktree '{name}' removed\"\n\n\ndef keep_worktree(name: str) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n log_event(\"keep\", name)\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n\n\n# ── Skill Loading ──\n\nSKILL_REGISTRY: dict[str, dict] = {}\n\n\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n try:\n meta = yaml.safe_load(parts[1]) or {}\n except yaml.YAMLError:\n meta = {}\n return meta, parts[2].strip()\n\n\ndef scan_skills():\n SKILL_REGISTRY.clear()\n if not SKILLS_DIR.exists():\n return\n for directory in sorted(SKILLS_DIR.iterdir()):\n if not directory.is_dir():\n continue\n manifest = directory / \"SKILL.md\"\n if not manifest.exists():\n continue\n raw = manifest.read_text()\n meta, _ = _parse_frontmatter(raw)\n name = meta.get(\"name\", directory.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\n \"name\": name,\n \"description\": desc,\n \"content\": raw,\n }\n\n\nscan_skills()\n\n\ndef list_skills() -> str:\n if not SKILL_REGISTRY:\n return \"(no skills found)\"\n return \"\\n\".join(\n f\"- {skill['name']}: {skill['description']}\"\n for skill in SKILL_REGISTRY.values())\n\n\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n available = \", \".join(SKILL_REGISTRY.keys()) or \"(none)\"\n return f\"Skill not found: {name}. Available: {available}\"\n return skill[\"content\"]\n\n\n# ── Prompt Assembly ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, edit_file, glob, \"\n \"todo_write, task, load_skill, compact, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"schedule_cron, list_crons, cancel_cron, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan, \"\n \"create_worktree, remove_worktree, keep_worktree, \"\n \"connect_mcp. MCP tools are prefixed mcp__{server}__{tool}.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n # The system prompt is rebuilt each turn from live context. This is where\n # memory, skill catalog, MCP state, and active teammates become visible.\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n sections.append(f\"Current time: {datetime.now().isoformat(timespec='seconds')}\")\n sections.append(\"Skills catalog:\\n\" + list_skills() +\n \"\\nUse load_skill(name) when a skill is relevant.\")\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n mcp_names = list(mcp_clients.keys())\n if mcp_names:\n sections.append(f\"Connected MCP servers: {', '.join(mcp_names)}\")\n return \"\\n\\n\".join(sections)\n\n\n# ── Basic Tools ──\n\ndef safe_path(p: str, cwd: Path = None) -> Path:\n # File tools stay inside the workspace or teammate worktree. Bash remains\n # powerful on purpose and is controlled by the permission hook instead.\n base = cwd or WORKDIR\n path = (base / p).resolve()\n if not path.is_relative_to(base):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, cwd: Path = None,\n run_in_background: bool = False) -> str:\n # run_in_background is consumed by the dispatcher; direct execution ignores it.\n try:\n r = subprocess.run(command, shell=True, cwd=cwd or WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None,\n offset: int = 0, cwd: Path = None) -> str:\n try:\n lines = safe_path(path, cwd).read_text().splitlines()\n offset = max(int(offset or 0), 0)\n limit = int(limit) if limit is not None else None\n lines = lines[offset:]\n if limit is not None and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str, cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str,\n cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n text = fp.read_text()\n if old_text not in text:\n return f\"Error: text not found in {path}\"\n fp.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_glob(pattern: str, cwd: Path = None) -> str:\n import glob as g\n try:\n base = cwd or WORKDIR\n results = []\n for match in g.glob(pattern, root_dir=base):\n if (base / match).resolve().is_relative_to(base):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef call_tool_handler(handler, args: dict, name: str) -> str:\n if not handler:\n return f\"Unknown: {name}\"\n try:\n return handler(**(args or {}))\n except TypeError as e:\n return f\"Error: {e}\"\n\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n for i, todo in enumerate(todos):\n if \"content\" not in todo or \"status\" not in todo:\n return f\"Error: todos[{i}] missing 'content' or 'status'\"\n if todo[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return f\"Error: todos[{i}] has invalid status '{todo['status']}'\"\n CURRENT_TODOS = todos\n print(f\" \\033[33m[todo] updated {len(CURRENT_TODOS)} item(s)\\033[0m\")\n return f\"Updated {len(CURRENT_TODOS)} todos\"\n\n\n# ── MessageBus ──\n\n# Team communication is append-only JSONL mailboxes. This keeps the protocol\n# inspectable on disk and lets background teammates send messages.\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n terminal_print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n # Responses are matched by request_id so one protocol reply cannot approve\n # a different pending request.\n state = pending_requests.get(request_id)\n if not state:\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n return\n state.status = \"approved\" if approve else \"rejected\"\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\n# ── Autonomous Agent ──\n\nIDLE_POLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(agent_name: str, messages: list,\n name: str, role: str,\n worktree_context: dict | None = None) -> str:\n # Autonomous teammates wake up for inbox messages first, then look for\n # unclaimed tasks. This keeps direct protocol messages higher priority.\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return \"shutdown\"\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n return \"work\"\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task_data = unclaimed[0]\n result = claim_task(task_data[\"id\"], agent_name)\n if \"Claimed\" in result:\n wt_info = \"\"\n if task_data.get(\"worktree\"):\n wt_path = WORKTREES_DIR / task_data[\"worktree\"]\n wt_info = f\"\\nWork directory: {wt_path}\"\n if worktree_context is not None:\n worktree_context[\"path\"] = str(wt_path)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task_data['id']}: \"\n f\"{task_data['subject']}{wt_info}\"})\n return \"work\"\n return \"timeout\"\n\n\n# ── Teammate Thread ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n # Plan approval is a real gate: after submit_plan, the teammate stops\n # taking model/tool steps until lead sends plan_approval_response.\n protocol_ctx = {\"waiting_plan\": None}\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"If a task has a worktree, work in that directory.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return True\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if req_id == protocol_ctx[\"waiting_plan\"]:\n protocol_ctx[\"waiting_plan\"] = None\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved]\" if approve\n else f\"[Plan rejected] {msg['content']}\"})\n return False\n\n def run():\n wt_ctx = {\"path\": None}\n\n def _wt_cwd():\n # Once a task with a worktree is claimed, all teammate file tools\n # transparently run inside that isolated directory.\n p = wt_ctx[\"path\"]\n return Path(p) if p else None\n\n def _run_bash(command: str) -> str:\n return run_bash(command, cwd=_wt_cwd())\n\n def _run_read(path: str) -> str:\n return run_read(path, cwd=_wt_cwd())\n\n def _run_write(path: str, content: str) -> str:\n return run_write(path, content, cwd=_wt_cwd())\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n task = load_task(task_id)\n wt_ctx[\"path\"] = (str(WORKTREES_DIR / task.worktree)\n if task.worktree else None)\n return result\n\n def _run_complete_task(task_id: str):\n result = complete_task(task_id)\n wt_ctx[\"path\"] = None\n return result\n\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n sub_handlers = {\n \"bash\": _run_bash, \"read_file\": _run_read,\n \"write_file\": _run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n while True:\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if protocol_ctx[\"waiting_plan\"]:\n # Poll only for protocol replies while the approval gate is\n # closed; do not let the model continue with the task.\n time.sleep(IDLE_POLL_INTERVAL)\n continue\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(non_protocol) + \"\"})\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"submit_plan\":\n output = _teammate_submit_plan(\n name, block.input.get(\"plan\", \"\"))\n match = re.search(r\"\\((req_\\d+)\\)\", output)\n protocol_ctx[\"waiting_plan\"] = (\n match.group(1) if match else output)\n else:\n handler = sub_handlers.get(block.name)\n output = call_tool_handler(handler, block.input,\n block.name)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n if protocol_ctx[\"waiting_plan\"]:\n # Ignore later tool_use blocks from the same model\n # response; they belong after approval, not before.\n break\n messages.append({\"role\": \"user\", \"content\": results})\n if protocol_ctx[\"waiting_plan\"]:\n break\n if should_shutdown:\n break\n if protocol_ctx[\"waiting_plan\"]:\n continue\n idle_result = idle_poll(name, messages, name, role, wt_ctx)\n if idle_result in (\"shutdown\", \"timeout\"):\n break\n\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n return f\"Teammate '{name}' spawned as {role}\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id})\"\n\n\n# ── Lead Protocol Tools ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Shut down.\", \"shutdown_request\",\n {\"request_id\": req_id})\n return f\"Shutdown request sent to {teammate}\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n BUS.send(\"lead\", teammate, f\"Submit plan for: {task}\", \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n return f\"Plan {'approved' if approve else 'rejected'}\"\n\n\n# ── Hooks + Permission Pipeline ──\n\n# Hooks are intentionally outside tool handlers. The loop can add permission,\n# logging, and stop behavior without changing each individual tool.\nHOOKS = {\"UserPromptSubmit\": [], \"PreToolUse\": [],\n \"PostToolUse\": [], \"Stop\": []}\n\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None:\n return result\n return None\n\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"mkfs\", \"dd if=\"]\nDESTRUCTIVE = [\"rm \", \"> /etc/\", \"chmod 777\"]\n\n\ndef permission_hook(block):\n # The permission layer sees the raw tool_use before dispatch. It can deny,\n # ask the user, or allow execution to continue.\n if block.name == \"bash\":\n command = block.input.get(\"command\", \"\")\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Permission denied: '{pattern}' is on the deny list\"\n if any(token in command for token in DESTRUCTIVE):\n print(f\"\\n\\033[33m[permission] destructive command\\033[0m\")\n print(f\" {command}\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n try:\n safe_path(path)\n except Exception:\n return f\"Permission denied: path escapes workspace: {path}\"\n if block.name.startswith(\"mcp__\") and \"deploy\" in block.name:\n print(f\"\\n\\033[33m[permission] MCP destructive-looking tool: {block.name}\\033[0m\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n\ndef log_hook(block):\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\n\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"\\033[33m[HOOK] large output from {block.name}: \"\n f\"{len(str(output))} chars\\033[0m\")\n return None\n\n\ndef user_prompt_hook(query: str):\n print(f\"\\033[90m[HOOK] UserPromptSubmit: {WORKDIR}\\033[0m\")\n return None\n\n\ndef stop_hook(messages: list):\n tool_count = 0\n for msg in messages:\n content = msg.get(\"content\")\n if isinstance(content, list):\n tool_count += sum(1 for item in content\n if isinstance(item, dict)\n and item.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: {tool_count} tool result(s)\\033[0m\")\n return None\n\n\nregister_hook(\"UserPromptSubmit\", user_prompt_hook)\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\nregister_hook(\"Stop\", stop_hook)\n\n\n# ── Subagent Tool ──\n\nSUB_SYSTEM = (\n f\"You are a coding subagent at {WORKDIR}. \"\n \"Complete the task, then return a concise final summary. \"\n \"Do not spawn more agents.\"\n)\n\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"pattern\": {\"type\": \"string\"}},\n \"required\": [\"pattern\"]}},\n]\n\n\nSUB_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read,\n \"write_file\": run_write, \"edit_file\": run_edit,\n \"glob\": run_glob,\n}\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return str(content)\n return \"\\n\".join(\n getattr(block, \"text\", \"\")\n for block in content\n if getattr(block, \"type\", None) == \"text\").strip()\n\n\ndef has_tool_use(content) -> bool:\n # Do not rely on stop_reason alone; the concrete tool_use block is the\n # continuation signal used by the loop.\n return any(getattr(block, \"type\", None) == \"tool_use\"\n for block in content)\n\n\ndef spawn_subagent(description: str) -> str:\n messages = [{\"role\": \"user\", \"content\": description}]\n for _ in range(30):\n response = client.messages.create(\n model=MODEL, system=SUB_SYSTEM, messages=messages,\n tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n break\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n output = str(blocked)\n else:\n handler = SUB_HANDLERS.get(block.name)\n output = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, output)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n text = extract_text(msg[\"content\"])\n if text:\n return text\n return \"Subagent finished without a text summary.\"\n\n\n# ── Context Compaction ──\n\n# Compaction is layered: first shrink oversized tool results, then trim old\n# message ranges, and only call the model for a summary when the context is\n# still too large or the model explicitly asks for compact.\ndef estimate_size(messages: list) -> int:\n return len(json.dumps(messages, default=str))\n\n\ndef collect_tool_results(messages: list):\n found = []\n for mi, msg in enumerate(messages):\n content = msg.get(\"content\")\n if msg.get(\"role\") != \"user\" or not isinstance(content, list):\n continue\n for bi, block in enumerate(content):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n found.append((mi, bi, block))\n return found\n\n\ndef persist_large_output(tool_use_id: str, output: str) -> str:\n if len(output) <= PERSIST_THRESHOLD:\n return output\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not path.exists():\n path.write_text(output)\n return (f\"\\nFull output: {path}\\n\"\n f\"Preview:\\n{output[:2000]}\\n\")\n\n\ndef tool_result_budget(messages: list, max_bytes: int = 200_000) -> list:\n if not messages:\n return messages\n last = messages[-1]\n content = last.get(\"content\")\n if last.get(\"role\") != \"user\" or not isinstance(content, list):\n return messages\n blocks = [(i, b) for i, b in enumerate(content)\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n for _, block in sorted(blocks,\n key=lambda pair: len(str(pair[1].get(\"content\", \"\"))),\n reverse=True):\n if total <= max_bytes:\n break\n text = str(block.get(\"content\", \"\"))\n block[\"content\"] = persist_large_output(\n block.get(\"tool_use_id\", \"unknown\"), text)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return messages\n\n\ndef snip_compact(messages: list, max_messages: int = 50) -> list:\n if len(messages) <= max_messages:\n return messages\n keep_head, keep_tail = 3, max_messages - 3\n snipped = len(messages) - keep_head - keep_tail\n return (messages[:keep_head]\n + [{\"role\": \"user\", \"content\": f\"[snipped {snipped} messages]\"}]\n + messages[-keep_tail:])\n\n\ndef micro_compact(messages: list) -> list:\n tool_results = collect_tool_results(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(str(block.get(\"content\", \"\"))) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n\n\ndef write_transcript(messages: list) -> Path:\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n return path\n\n\ndef summarize_history(messages: list) -> str:\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\"Summarize this coding-agent conversation so work can continue. \"\n \"Preserve current goal, key findings, changed files, remaining work, \"\n \"and user constraints.\\n\\n\" + conversation)\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=2000)\n return extract_text(response.content) or \"(empty summary)\"\n\n\ndef compact_history(messages: list) -> list:\n transcript = write_transcript(messages)\n print(f\" \\033[36m[compact] transcript saved: {transcript}\\033[0m\")\n summary = summarize_history(messages)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\n\ndef reactive_compact(messages: list) -> list:\n transcript = write_transcript(messages)\n print(f\" \\033[31m[reactive compact] transcript saved: {transcript}\\033[0m\")\n try:\n summary = summarize_history(messages)\n except Exception:\n summary = \"Earlier conversation was trimmed after a prompt-too-long error.\"\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"},\n *messages[-5:]]\n\n\n# ── Error Recovery ──\n\nclass RecoveryState:\n def __init__(self):\n self.has_escalated = False\n self.recovery_count = 0\n self.consecutive_529 = 0\n self.has_attempted_reactive_compact = False\n self.current_model = PRIMARY_MODEL\n\n\ndef retry_delay(attempt: int) -> float:\n base = min(BASE_DELAY_MS * (2 ** attempt), 32000) / 1000\n return base + random.uniform(0, base * 0.25)\n\n\ndef with_retry(fn, state: RecoveryState):\n for attempt in range(MAX_RETRIES):\n try:\n result = fn()\n state.consecutive_529 = 0\n return result\n except Exception as e:\n name = type(e).__name__.lower()\n msg = str(e).lower()\n if \"ratelimit\" in name or \"429\" in msg:\n delay = retry_delay(attempt)\n print(f\" \\033[33m[429] retry {attempt + 1}/{MAX_RETRIES} \"\n f\"after {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n if \"overloaded\" in name or \"529\" in msg or \"overloaded\" in msg:\n state.consecutive_529 += 1\n if state.consecutive_529 >= MAX_CONSECUTIVE_529 and FALLBACK_MODEL:\n state.current_model = FALLBACK_MODEL\n state.consecutive_529 = 0\n print(f\" \\033[31m[529] switching to {FALLBACK_MODEL}\\033[0m\")\n delay = retry_delay(attempt)\n print(f\" \\033[33m[529] retry {attempt + 1}/{MAX_RETRIES} \"\n f\"after {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n raise\n raise RuntimeError(f\"Max retries ({MAX_RETRIES}) exceeded\")\n\n\ndef is_prompt_too_long_error(e: Exception) -> bool:\n msg = str(e).lower()\n return ((\"prompt\" in msg and \"long\" in msg)\n or \"context_length_exceeded\" in msg\n or \"max_context_window\" in msg)\n\n\n# ── Background Tasks ──\n\n# Slow tools return a placeholder tool_result immediately. Their real output is\n# later injected as a task_notification, so the main loop can keep moving.\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n if tool_name != \"bash\":\n return False\n command = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(keyword in command for keyword in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n if tool_name != \"bash\":\n return False\n return bool(tool_input.get(\"run_in_background\")) or is_slow_operation(tool_name, tool_input)\n\n\ndef start_background_task(block, handlers: dict) -> str:\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n command = block.input.get(\"command\", block.name)\n\n def worker():\n handler = handlers.get(block.name)\n result = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, result)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = str(result)\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": command,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] {bg_id}: {str(command)[:60]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n with background_lock:\n ready = [bg_id for bg_id, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n return notifications\n\n\n# ── Cron Scheduler ──\n\n# Cron jobs are stored separately from conversation history. When a job fires,\n# it becomes a scheduled prompt that is injected back into the same agent loop.\nDURABLE_PATH = WORKDIR / \".scheduled_tasks.json\"\n\n\n@dataclass\nclass CronJob:\n id: str\n cron: str\n prompt: str\n recurring: bool\n durable: bool\n\n\nscheduled_jobs: dict[str, CronJob] = {}\ncron_queue: list[CronJob] = []\ncron_lock = threading.Lock()\n_last_fired: dict[str, str] = {}\n\n\ndef _cron_field_matches(field: str, value: int) -> bool:\n if field == \"*\":\n return True\n if field.startswith(\"*/\"):\n step = int(field[2:])\n return step > 0 and value % step == 0\n if \",\" in field:\n return any(_cron_field_matches(part.strip(), value)\n for part in field.split(\",\"))\n if \"-\" in field:\n lo, hi = field.split(\"-\", 1)\n return int(lo) <= value <= int(hi)\n return value == int(field)\n\n\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n if not (m and h and month_ok):\n return False\n if dom == \"*\" and dow == \"*\":\n return True\n if dom == \"*\":\n return dow_ok\n if dow == \"*\":\n return dom_ok\n return dom_ok or dow_ok\n\n\ndef _validate_cron_field(field: str, lo: int, hi: int) -> str | None:\n if field == \"*\":\n return None\n if field.startswith(\"*/\"):\n step = field[2:]\n if not step.isdigit() or int(step) <= 0:\n return f\"Invalid step: {field}\"\n return None\n if \",\" in field:\n for part in field.split(\",\"):\n err = _validate_cron_field(part.strip(), lo, hi)\n if err:\n return err\n return None\n if \"-\" in field:\n left, right = field.split(\"-\", 1)\n if not left.isdigit() or not right.isdigit():\n return f\"Invalid range: {field}\"\n a, b = int(left), int(right)\n if a < lo or a > hi or b < lo or b > hi:\n return f\"Range {field} out of bounds [{lo}-{hi}]\"\n if a > b:\n return f\"Range start > end: {field}\"\n return None\n if not field.isdigit():\n return f\"Invalid field: {field}\"\n value = int(field)\n if value < lo or value > hi:\n return f\"Value {value} out of bounds [{lo}-{hi}]\"\n return None\n\n\ndef validate_cron(cron_expr: str) -> str | None:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return f\"Expected 5 fields, got {len(fields)}\"\n bounds = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n names = [\"minute\", \"hour\", \"day-of-month\", \"month\", \"day-of-week\"]\n for field, (lo, hi), name in zip(fields, bounds, names):\n err = _validate_cron_field(field, lo, hi)\n if err:\n return f\"{name}: {err}\"\n return None\n\n\ndef save_durable_jobs():\n durable = [asdict(job) for job in scheduled_jobs.values() if job.durable]\n DURABLE_PATH.write_text(json.dumps(durable, indent=2))\n\n\ndef load_durable_jobs():\n if not DURABLE_PATH.exists():\n return\n try:\n for item in json.loads(DURABLE_PATH.read_text()):\n job = CronJob(**item)\n if not validate_cron(job.cron):\n scheduled_jobs[job.id] = job\n except Exception:\n pass\n\n\ndef schedule_job(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> CronJob | str:\n err = validate_cron(cron)\n if err:\n return err\n job = CronJob(\n id=f\"cron_{random.randint(0, 999999):06d}\",\n cron=cron, prompt=prompt,\n recurring=recurring, durable=durable)\n with cron_lock:\n scheduled_jobs[job.id] = job\n if durable:\n save_durable_jobs()\n return job\n\n\ndef cancel_job(job_id: str) -> str:\n with cron_lock:\n job = scheduled_jobs.pop(job_id, None)\n if not job:\n return f\"Job {job_id} not found\"\n if job.durable:\n save_durable_jobs()\n return f\"Cancelled {job_id}\"\n\n\ndef cron_scheduler_loop():\n while True:\n time.sleep(1)\n now = datetime.now()\n marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now) and _last_fired.get(job.id) != marker:\n cron_queue.append(job)\n _last_fired[job.id] = marker\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\" \\033[31m[cron error] {job.id}: {e}\\033[0m\")\n\n\ndef consume_cron_queue() -> list[CronJob]:\n with cron_lock:\n fired = list(cron_queue)\n cron_queue.clear()\n return fired\n\n\ndef run_schedule_cron(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> str:\n result = schedule_job(cron, prompt, recurring, durable)\n if isinstance(result, str):\n return f\"Error: {result}\"\n return f\"Scheduled {result.id}: '{cron}' -> {prompt}\"\n\n\ndef run_list_crons() -> str:\n with cron_lock:\n jobs = list(scheduled_jobs.values())\n if not jobs:\n return \"No cron jobs.\"\n return \"\\n\".join(\n f\" {job.id}: '{job.cron}' -> {job.prompt[:40]} \"\n f\"[{'recurring' if job.recurring else 'one-shot'}, \"\n f\"{'durable' if job.durable else 'session'}]\"\n for job in jobs)\n\n\ndef run_cancel_cron(job_id: str) -> str:\n return cancel_job(job_id)\n\n\nload_durable_jobs()\nthreading.Thread(target=cron_scheduler_loop, daemon=True).start()\n\n\n# ── MCP System ──\n\n# MCP is modeled as late-bound tools: connect first, then discovered server\n# tools are merged into the normal tool pool with mcp__server__tool names.\nclass MCPClient:\n \"\"\"Discovers and calls tools on an MCP server (mock for teaching).\"\"\"\n\n def __init__(self, name: str):\n self.name = name\n self.tools: list[dict] = []\n self._handlers: dict[str, callable] = {}\n\n def register(self, tool_defs: list[dict],\n handlers: dict[str, callable]):\n self.tools = tool_defs\n self._handlers = handlers\n\n def call_tool(self, tool_name: str, args: dict) -> str:\n handler = self._handlers.get(tool_name)\n if not handler:\n return f\"MCP error: unknown tool '{tool_name}'\"\n try:\n return handler(**args)\n except Exception as e:\n return f\"MCP error: {e}\"\n\n\nmcp_clients: dict[str, MCPClient] = {}\n\n_DISALLOWED_CHARS = re.compile(r'[^a-zA-Z0-9_-]')\n\n\ndef normalize_mcp_name(name: str) -> str:\n \"\"\"Replace non [a-zA-Z0-9_-] with underscore.\"\"\"\n return _DISALLOWED_CHARS.sub('_', name)\n\n\ndef _mock_server_docs():\n client = MCPClient(\"docs\")\n client.register(\n tool_defs=[\n {\"name\": \"search\", \"description\": \"Search documentation. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"query\": {\"type\": \"string\"}},\n \"required\": [\"query\"]}},\n {\"name\": \"get_version\", \"description\": \"Get API version. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n ],\n handlers={\n \"search\": lambda query: f\"[docs] Found 3 results for '{query}'\",\n \"get_version\": lambda: \"[docs] API v2.1.0\",\n })\n return client\n\n\ndef _mock_server_deploy():\n client = MCPClient(\"deploy\")\n client.register(\n tool_defs=[\n {\"name\": \"trigger\",\n \"description\": \"Trigger a deployment. (destructive — requires approval in real CC)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n {\"name\": \"status\", \"description\": \"Check deployment status. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n ],\n handlers={\n \"trigger\": lambda service: f\"[deploy] Triggered: {service}\",\n \"status\": lambda service: f\"[deploy] {service}: running (v1.4.2)\",\n })\n return client\n\n\nMOCK_SERVERS = {\n \"docs\": _mock_server_docs,\n \"deploy\": _mock_server_deploy,\n}\n\n\ndef connect_mcp(name: str) -> str:\n if name in mcp_clients:\n return f\"MCP server '{name}' already connected\"\n factory = MOCK_SERVERS.get(name)\n if not factory:\n available = \", \".join(MOCK_SERVERS.keys())\n return f\"Unknown server '{name}'. Available: {available}\"\n mcp_client = factory()\n mcp_clients[name] = mcp_client\n tool_names = [t[\"name\"] for t in mcp_client.tools]\n print(f\" \\033[31m[mcp] connected: {name} → {tool_names}\\033[0m\")\n return (f\"Connected to MCP server '{name}'. \"\n f\"Discovered {len(mcp_client.tools)} tools: {', '.join(tool_names)}\")\n\n\ndef assemble_tool_pool() -> tuple[list[dict], dict]:\n \"\"\"Merge builtin tools + all MCP tools into one pool.\"\"\"\n tools = list(BUILTIN_TOOLS)\n handlers = dict(BUILTIN_HANDLERS)\n for server_name, mcp_client in mcp_clients.items():\n safe_server = normalize_mcp_name(server_name)\n for tool_def in mcp_client.tools:\n safe_tool = normalize_mcp_name(tool_def[\"name\"])\n prefixed = f\"mcp__{safe_server}__{safe_tool}\"\n tools.append({\n \"name\": prefixed,\n \"description\": tool_def.get(\"description\", \"\"),\n \"input_schema\": tool_def.get(\"inputSchema\", {}),\n })\n handlers[prefixed] = (\n lambda *, c=mcp_client, t=tool_def[\"name\"], **kw: c.call_tool(t, kw))\n return tools, handlers\n\n\n# ── Lead Worktree Tools ──\n\ndef run_create_worktree(name: str, task_id: str = \"\") -> str:\n return create_worktree(name, task_id)\n\ndef run_remove_worktree(name: str, discard_changes: bool = False) -> str:\n return remove_worktree(name, discard_changes)\n\ndef run_keep_worktree(name: str) -> str:\n return keep_worktree(name)\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task_json(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_claim_task(task_id: str) -> str:\n try:\n return claim_task(task_id, owner=\"agent\")\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_complete_task(task_id: str) -> str:\n try:\n return complete_task(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\ndef run_connect_mcp(name: str) -> str:\n return connect_mcp(name)\n\n\n# ── Tool Definitions ──\n\n# The model sees tool schemas; Python executes handlers. S20 keeps both tables\n# explicit so every added capability is visible in one place.\nBUILTIN_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"pattern\": {\"type\": \"string\"}},\n \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\",\n \"description\": \"Create and manage a task list for the current session.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"todos\": {\"type\": \"array\",\n \"items\": {\"type\": \"object\",\n \"properties\": {\n \"content\": {\"type\": \"string\"},\n \"status\": {\"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"]}},\n \"required\": [\"content\", \"status\"]}}},\n \"required\": [\"todos\"]}},\n {\"name\": \"task\",\n \"description\": \"Launch a focused subagent. Returns only its final summary.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"description\": {\"type\": \"string\"}},\n \"required\": [\"description\"]}},\n {\"name\": \"load_skill\",\n \"description\": \"Load the full content of a skill by name.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"compact\",\n \"description\": \"Summarize earlier conversation and continue with compacted context.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"focus\": {\"type\": \"string\"}},\n \"required\": []}},\n {\"name\": \"create_task\", \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\", \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\", \"description\": \"Get full task details.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\", \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\", \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"schedule_cron\",\n \"description\": (\"Schedule a cron job. cron is 5-field: min hour dom \"\n \"month dow. For one-shot reminders, compute the target \"\n \"minute and set recurring=false.\"),\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"cron\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"},\n \"recurring\": {\"type\": \"boolean\"},\n \"durable\": {\"type\": \"boolean\"}},\n \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"list_crons\", \"description\": \"List registered cron jobs.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"cancel_cron\", \"description\": \"Cancel a cron job by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"job_id\": {\"type\": \"string\"}},\n \"required\": [\"job_id\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"create_worktree\",\n \"description\": \"Create an isolated git worktree.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"remove_worktree\",\n \"description\": \"Remove a worktree. Refuses if changes exist.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"discard_changes\": {\"type\": \"boolean\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"keep_worktree\",\n \"description\": \"Keep a worktree for manual review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"connect_mcp\",\n \"description\": \"Connect to an MCP server (docs, deploy) and discover tools.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n]\n\nBUILTIN_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob,\n \"todo_write\": run_todo_write, \"task\": spawn_subagent,\n \"load_skill\": load_skill,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"schedule_cron\": run_schedule_cron,\n \"list_crons\": run_list_crons,\n \"cancel_cron\": run_cancel_cron,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n \"create_worktree\": run_create_worktree,\n \"remove_worktree\": run_remove_worktree,\n \"keep_worktree\": run_keep_worktree,\n \"connect_mcp\": run_connect_mcp,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\n \"memories\": memories,\n \"connected_mcp\": list(mcp_clients.keys()),\n \"active_teammates\": list(active_teammates.keys()),\n }\n\n\n# ── Agent Loop ──\n\nrounds_since_todo = 0\nagent_lock = threading.Lock()\n\n\ndef prepare_context(messages: list) -> list:\n # Every LLM turn enters through the same context budget pipeline.\n messages[:] = tool_result_budget(messages)\n messages[:] = snip_compact(messages)\n messages[:] = micro_compact(messages)\n if estimate_size(messages) > CONTEXT_LIMIT:\n messages[:] = compact_history(messages)\n return messages\n\n\ndef build_user_content(results: list[dict]) -> list[dict]:\n # Tool results and completed background notifications are both returned to\n # the model as user-side content, matching the tool_result feedback loop.\n content = list(results)\n for note in collect_background_results():\n content.append({\"type\": \"text\", \"text\": note})\n return content\n\n\ndef inject_background_notifications(messages: list):\n notes = collect_background_results()\n if notes:\n messages.append({\"role\": \"user\", \"content\": [\n {\"type\": \"text\", \"text\": note} for note in notes]})\n\n\ndef call_llm(messages: list, context: dict, tools: list,\n state: RecoveryState, max_tokens: int):\n system = assemble_system_prompt(context)\n return with_retry(\n lambda: client.messages.create(\n model=state.current_model,\n system=system,\n messages=messages,\n tools=tools,\n max_tokens=max_tokens),\n state)\n\n\ndef agent_loop(messages: list, context: dict):\n global rounds_since_todo\n tools, handlers = assemble_tool_pool()\n state = RecoveryState()\n max_tokens = DEFAULT_MAX_TOKENS\n\n while True:\n # One cycle: inject scheduled/background work, prepare context, call\n # the model, execute tool_use blocks, append tool_results, repeat.\n fired = consume_cron_queue()\n for job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n print(f\" \\033[35m[cron inject] {job.prompt[:60]}\\033[0m\")\n\n inject_background_notifications(messages)\n\n if rounds_since_todo >= 3:\n messages.append({\"role\": \"user\",\n \"content\": \"Update your todos.\"})\n rounds_since_todo = 0\n\n prepare_context(messages)\n context = update_context(context, messages)\n tools, handlers = assemble_tool_pool()\n\n try:\n response = call_llm(messages, context, tools, state, max_tokens)\n except Exception as e:\n if is_prompt_too_long_error(e) and not state.has_attempted_reactive_compact:\n messages[:] = reactive_compact(messages)\n state.has_attempted_reactive_compact = True\n continue\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n if response.stop_reason == \"max_tokens\":\n if not state.has_escalated:\n max_tokens = ESCALATED_MAX_TOKENS\n state.has_escalated = True\n print(f\" \\033[33m[max_tokens] retry with {max_tokens}\\033[0m\")\n continue\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if state.recovery_count < MAX_RECOVERY_RETRIES:\n messages.append({\"role\": \"user\", \"content\": CONTINUATION_PROMPT})\n state.recovery_count += 1\n continue\n return\n\n max_tokens = DEFAULT_MAX_TOKENS\n state.has_escalated = False\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n trigger_hooks(\"Stop\", messages)\n return\n\n results = []\n compacted_now = False\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n messages.append({\"role\": \"user\",\n \"content\": \"[Compacted. Continue with summarized context.]\"})\n compacted_now = True\n break\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block, handlers)\n output = (f\"[Background task {bg_id} started] \"\n \"Result will arrive as a task_notification.\")\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n continue\n\n handler = handlers.get(block.name)\n output = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, output)\n print(str(output)[:300])\n\n if block.name == \"todo_write\":\n rounds_since_todo = 0\n else:\n rounds_since_todo += 1\n\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n\n if compacted_now:\n continue\n\n messages.append({\"role\": \"user\", \"content\": build_user_content(results)})\n\n\ndef print_turn_assistants(messages: list, turn_start: int):\n for msg in messages[turn_start:]:\n if msg.get(\"role\") != \"assistant\":\n continue\n for block in msg.get(\"content\", []):\n if getattr(block, \"type\", None) == \"text\":\n terminal_print(block.text)\n\n\ndef cron_autorun_loop(history: list, context: dict):\n while True:\n time.sleep(1)\n fired = consume_cron_queue()\n if not fired:\n continue\n with agent_lock:\n turn_start = len(history)\n for job in fired:\n history.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n terminal_print(\n f\" \\033[35m[cron auto] {job.prompt[:60]}\\033[0m\")\n agent_loop(history, context)\n context.update(update_context(context, history))\n print_turn_assistants(history, turn_start)\n\n\nif __name__ == \"__main__\":\n CLI_ACTIVE = True\n print(\"s20: comprehensive agent\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n threading.Thread(target=cron_autorun_loop,\n args=(history, context), daemon=True).start()\n while True:\n try:\n query = input(PROMPT)\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n trigger_hooks(\"UserPromptSubmit\", query)\n turn_start = len(history)\n history.append({\"role\": \"user\", \"content\": query})\n with agent_lock:\n agent_loop(history, context)\n context = update_context(context, history)\n print_turn_assistants(history, turn_start)\n\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n def inbox_label(msg):\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n suffix = f\" req:{req_id}\" if req_id else \"\"\n return f\"{msg.get('type', 'message')}{suffix}\"\n\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{inbox_label(m)}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n", "images": [ { "src": "/course-assets/s20_comprehensive/system-architecture.svg", @@ -3686,7 +3686,7 @@ "newTools": [ "load_skill" ], - "locDelta": 34 + "locDelta": 31 }, { "from": "s07", @@ -3707,7 +3707,7 @@ "newTools": [ "compact" ], - "locDelta": 44 + "locDelta": 47 }, { "from": "s08", @@ -3726,7 +3726,7 @@ "persist_large" ], "newTools": [], - "locDelta": 127 + "locDelta": 133 }, { "from": "s09", @@ -3738,7 +3738,7 @@ "update_context" ], "newTools": [], - "locDelta": -326 + "locDelta": -332 }, { "from": "s10", @@ -3796,7 +3796,7 @@ "collect_background_results" ], "newTools": [], - "locDelta": 83 + "locDelta": 82 }, { "from": "s13", @@ -3878,7 +3878,7 @@ "scan_unclaimed_tasks" ], "newTools": [], - "locDelta": -62 + "locDelta": -61 }, { "from": "s17", @@ -3999,7 +3999,7 @@ "list_crons", "cancel_cron" ], - "locDelta": 826 + "locDelta": 825 } ] } \ No newline at end of file