diff --git a/web/public/course-assets/s01_agent_loop/agent-loop.en.svg b/web/public/course-assets/s01_agent_loop/agent-loop.en.svg new file mode 100644 index 0000000..541ab3f --- /dev/null +++ b/web/public/course-assets/s01_agent_loop/agent-loop.en.svg @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + Agent Loop — A while Loop Drives the Entire Agent + + + + User Query + "Create hello.py for me" + + + + + + + messages[] + Accumulated message list + + + + + + + LLM + + Model reads message history + Decision: Need a tool? + Returns stop_reason signal + + + + + + + stop_reason + == "tool_use"? + + + + No + + + + Return Result + Loop Ends + + + + Yes + + + + Execute Tool Call + run_bash(command) + + + + Append tool_result to messages + + + + Core: a + while True + loop. Model calls tool → Execute → Feed back → Ask again. No tool call → Stop. + All subsequent chapters layer mechanisms on top of this loop. + diff --git a/web/public/course-assets/s01_agent_loop/agent-loop.ja.svg b/web/public/course-assets/s01_agent_loop/agent-loop.ja.svg new file mode 100644 index 0000000..ee726e6 --- /dev/null +++ b/web/public/course-assets/s01_agent_loop/agent-loop.ja.svg @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + Agent Loop — 一つの while ループで Agent 全体を駆動 + + + + ユーザーの質問 + "hello.py を作って" + + + + + + + messages[] + 累積メッセージリスト + + + + + + + 大規模言語モデル (LLM) + + モデルがメッセージ履歴を読む + 判断:ツールが必要か? + stop_reason シグナルを返す + + + + + + + stop_reason + == "tool_use"? + + + + No + + + + 結果を返す + ループ終了 + + + + Yes + + + + ツール呼び出しを実行 + run_bash(command) + + + + tool_result を messages に追加 + + + + 核心:一つの + while True + ループ。ツール呼出 → 実行 → 結果を戻す → 再度問う。ツールなし → 停止。 + 以降の全章がこのループの上に仕組みを積み重ねる。 + diff --git a/web/public/course-assets/s01_agent_loop/agent-loop.svg b/web/public/course-assets/s01_agent_loop/agent-loop.svg new file mode 100644 index 0000000..87c6b50 --- /dev/null +++ b/web/public/course-assets/s01_agent_loop/agent-loop.svg @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + Agent Loop — 一个 while 循环驱动整个 Agent + + + + 用户提问 + "帮我创建 hello.py" + + + + + + + messages[] + 累积式消息列表 + + + + + + + 大模型 (LLM) + + 模型阅读消息历史 + 判断:需要工具吗? + 返回 stop_reason 信号 + + + + + + + stop_reason + == "tool_use"? + + + + + + + + 返回结果 + 循环结束 + + + + + + + + 执行工具调用 + run_bash(command) + + + + 追加 tool_result 到 messages + + + + 核心:一个 + while True + 循环。模型调工具 → 执行 → 喂回 → 再问。不调工具就停。 + 后续所有章节都在这个循环上叠加机制。 + diff --git a/web/public/course-assets/s02_tool_use/concurrency-comparison.en.svg b/web/public/course-assets/s02_tool_use/concurrency-comparison.en.svg new file mode 100644 index 0000000..04dab32 --- /dev/null +++ b/web/public/course-assets/s02_tool_use/concurrency-comparison.en.svg @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + Tool Concurrency — Teaching Version vs Claude Code + + + + Model returns 5 tool calls at once + + + read A.py + + + glob *.py + + + bash "ls -la" + + + write B.py + + + read C.py + + + + Teaching: Original Order, One by One + + + for block in response.content: + TOOL_HANDLERS[name](**input) + + Result: 5 serial calls, no batches + + + 1. read A.py + + + 2. glob *.py + + + 3. bash "ls -la" + + + 4. write B.py + + + 5. read C.py + + Teaching focus: tool dispatch first; concurrency omitted + + + + Claude Code: isConcurrencySafe(input) + + + Each tool call judged individually: + tool.isConcurrencySafe(parsedInput) → bool + + Result: 3 batches (by consecutive blocks) + + + Batch 1 + Concurrent + read A · glob · bash "ls" + + + + + Batch 2 + Serial + write B + + + + + Batch 3 + Concurrent + read C + + bash "ls" is safe and consecutive, so it stays in Batch 1 + + ✓ Input-dependent safety, not tool-name hardcoding + ✓ Original order preserved; only safe consecutive calls run together + + + + Key Difference + • Teaching: executes response.content in original order, one tool call at a time; no concurrency or batching + • CC: checks isConcurrencySafe(input), then groups consecutive safe calls into one batch + • Key difference: teaching focuses on dispatch; CC optimizes safe concurrency while preserving order semantics + diff --git a/web/public/course-assets/s02_tool_use/concurrency-comparison.ja.svg b/web/public/course-assets/s02_tool_use/concurrency-comparison.ja.svg new file mode 100644 index 0000000..f130d5b --- /dev/null +++ b/web/public/course-assets/s02_tool_use/concurrency-comparison.ja.svg @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + ツール並列実行 — 教育版 vs Claude Code + + + + モデルが一度に 5 つのツール呼び出しを返す + + + read A.py + + + glob *.py + + + bash "ls -la" + + + write B.py + + + read C.py + + + + 教育版:元の順序で一つずつ実行 + + + for block in response.content: + TOOL_HANDLERS[name](**input) + + 結果:5 回の直列呼び出し、batch なし + + + 1. read A.py + + + 2. glob *.py + + + 3. bash "ls -la" + + + 4. write B.py + + + 5. read C.py + + 教育の焦点:まず tool_use 分配を理解し、並列は省略 + + + + Claude Code:isConcurrencySafe(input) + + + 各ツール呼び出しを個別に判定: + tool.isConcurrencySafe(parsedInput) → bool + + 結果:3 バッチ(連続ブロックごと) + + + Batch 1 + 並列 + read A · glob · bash "ls" + + + + + Batch 2 + 直列 + write B + + + + + Batch 3 + 並列 + read C + + bash "ls" は安全かつ連続しているため Batch 1 に入る + + ✓ 入力に基づく安全判定、ツール名ハードコードではない + ✓ 元の順序を保ち、連続する安全呼び出しだけ並列化 + + + + 核心的な違い + • 教育版:response.content の元の順序で一つずつ実行し、並列処理も batch 化もしない + • CC:isConcurrencySafe(input) で判定し、連続する安全呼び出しを同じ batch にまとめる + • 差分の要点:教育版は分配に集中し、CC は順序意味を保ったまま安全な並列を最適化する + diff --git a/web/public/course-assets/s02_tool_use/concurrency-comparison.svg b/web/public/course-assets/s02_tool_use/concurrency-comparison.svg new file mode 100644 index 0000000..e6941e6 --- /dev/null +++ b/web/public/course-assets/s02_tool_use/concurrency-comparison.svg @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + Tool Concurrency — 教学版 vs Claude Code + + + + 模型一次返回 5 个工具调用 + + + read A.py + + + glob *.py + + + bash "ls -la" + + + write B.py + + + read C.py + + + + 教学版:按原始顺序逐个执行 + + + for block in response.content: + TOOL_HANDLERS[name](**input) + + 结果:5 次串行调用,不做 batch + + + 1. read A.py + + + 2. glob *.py + + + 3. bash "ls -la" + + + 4. write B.py + + + 5. read C.py + + 教学重点:先理解 tool_use 分发,暂不引入并发执行 + + + + Claude Code:isConcurrencySafe(input) + + + 每个工具调用单独判断: + tool.isConcurrencySafe(parsedInput) → bool + + 结果:3 个 batch(按连续块分批) + + + Batch 1 + 并发 + read A · glob · bash "ls" + + + + + Batch 2 + 串行 + write B + + + + + Batch 3 + 并发 + read C + + bash "ls" 是并发安全调用,且和 read/glob 连续,所以留在 Batch 1 + + ✓ 按输入判断并发安全,不按工具名硬编码 + ✓ 保留原始顺序,只在连续安全块内部并发 + + + + 核心差异 + • 教学版:按 response.content 原始顺序逐个执行,不做并发,也不分 batch + • CC:按 isConcurrencySafe(input) 判断,并把连续的并发安全调用合成同一个 batch + • 差异重点:教学版聚焦工具分发;CC 在保持顺序语义的同时优化安全并发 + diff --git a/web/public/course-assets/s02_tool_use/tool-dispatch.en.svg b/web/public/course-assets/s02_tool_use/tool-dispatch.en.svg new file mode 100644 index 0000000..6fd2e66 --- /dev/null +++ b/web/public/course-assets/s02_tool_use/tool-dispatch.en.svg @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool Use — Loop Unchanged, Just Add Dispatch Mapping + + + s01 Preserved + + + + User Query + messages[] + + + + + + + LLM + stop_reason check + + + + + + + tool_use? + + + + No + + Return Result + + + + Yes + + + s02 New + + + + TOOL_HANDLERS Dispatch Mapping + + + + + + + bash + → run_bash() + + + + read_file + → run_read() + + + + write_file + → run_write() + + + + edit_file + → run_edit() + + + + glob + → run_glob() + + + + Append tool_result to messages + + + + + s01 Preserved (loop, LLM, decision — completely unchanged) + + s02 New (5 tools + dispatch mapping) + Only 1 line changed in the loop: run_bash() → TOOL_HANDLERS[block.name]() + diff --git a/web/public/course-assets/s02_tool_use/tool-dispatch.ja.svg b/web/public/course-assets/s02_tool_use/tool-dispatch.ja.svg new file mode 100644 index 0000000..8971d06 --- /dev/null +++ b/web/public/course-assets/s02_tool_use/tool-dispatch.ja.svg @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool Use — ループ不変、ディスパッチマッピングを追加 + + + s01 保持 + + + + ユーザーの質問 + messages[] + + + + + + + LLM + stop_reason 判定 + + + + + + + tool_use? + + + + No + + 結果を返す + + + + Yes + + + s02 新規 + + + + TOOL_HANDLERS ディスパッチマッピング + + + + + + + bash + → run_bash() + + + + read_file + → run_read() + + + + write_file + → run_write() + + + + edit_file + → run_edit() + + + + glob + → run_glob() + + + + tool_result を messages に追加 + + + + + s01 保持(ループ、LLM、判定 — 完全に不変) + + s02 新規(5 つのツール + ディスパッチマッピング) + ループ内で変更されたのは 1 行だけ:run_bash() → TOOL_HANDLERS[block.name]() + diff --git a/web/public/course-assets/s02_tool_use/tool-dispatch.svg b/web/public/course-assets/s02_tool_use/tool-dispatch.svg new file mode 100644 index 0000000..a6b16ce --- /dev/null +++ b/web/public/course-assets/s02_tool_use/tool-dispatch.svg @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Tool Use — 循环不变,只加分发映射 + + + s01 保留 + + + + 用户提问 + messages[] + + + + + + + 大模型 (LLM) + stop_reason 判断 + + + + + + + tool_use? + + + + + + 返回结果 + + + + + + + s02 新增 + + + + TOOL_HANDLERS 分发映射 + + + + + + + bash + → run_bash() + + + + read_file + → run_read() + + + + write_file + → run_write() + + + + edit_file + → run_edit() + + + + glob + → run_glob() + + + + tool_result 追加到 messages + + + + + s01 保留(循环、LLM、判断——完全不变) + + s02 新增(5 个工具 + 分发映射) + 循环里只改了 1 行:run_bash() → TOOL_HANDLERS[block.name]() + diff --git a/web/public/course-assets/s03_permission/permission-overview.en.svg b/web/public/course-assets/s03_permission/permission-overview.en.svg new file mode 100644 index 0000000..9c2537d --- /dev/null +++ b/web/public/course-assets/s03_permission/permission-overview.en.svg @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + + + + + + Permission — Loop unchanged, a gate before tool execution + + + s02 preserved + + + + messages[] + + + + + + + LLM + stop_reason? + + + + No + + + Return result + + + + Yes + + + s03 new + + + + check_permission() + + + + Gate 1: Deny List + + + + Gate 2: Rule Matching + + + + Gate 3: User Approval + + + + Deny + + + + Pass + + + s02 + + + + TOOL_ + HANDLERS + bash/read/write/... + + + + + + + + s02 preserved (loop, LLM, dispatch — unchanged) + + s03 new (three-gate permission pipeline) + diff --git a/web/public/course-assets/s03_permission/permission-overview.ja.svg b/web/public/course-assets/s03_permission/permission-overview.ja.svg new file mode 100644 index 0000000..c381d5e --- /dev/null +++ b/web/public/course-assets/s03_permission/permission-overview.ja.svg @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + + + + + + Permission — ループは変更なし、ツール実行前にゲートを追加 + + + s02 維持 + + + + messages[] + + + + + + + LLM + stop_reason? + + + + No + + + 結果を返す + + + + Yes + + + s03 新規 + + + + check_permission() + + + + ゲート 1: 拒否リスト + + + + ゲート 2: ルール照合 + + + + ゲート 3: ユーザー承認 + + + + 拒否 + + + + 通過 + + + s02 + + + + TOOL_ + HANDLERS + bash/read/write/... + + + + + + + + s02 維持(ループ、LLM、ディスパッチ — 変更なし) + + s03 新規(3 ゲート権限パイプライン) + diff --git a/web/public/course-assets/s03_permission/permission-overview.svg b/web/public/course-assets/s03_permission/permission-overview.svg new file mode 100644 index 0000000..b5097a3 --- /dev/null +++ b/web/public/course-assets/s03_permission/permission-overview.svg @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + + + + + + Permission — 循环不变,工具执行前加一道门 + + + s02 保留 + + + + messages[] + + + + + + + LLM + stop_reason? + + + + + + + 返回结果 + + + + + + + s03 新增 + + + + check_permission() + + + + 闸门 1: 拒绝列表 + + + + 闸门 2: 规则匹配 + + + + 闸门 3: 用户审批 + + + + 拒绝 + + + + 通过 + + + s02 + + + + TOOL_ + HANDLERS + bash/read/write/... + + + + + + + + s02 保留(循环、LLM、分发——完全不变) + + s03 新增(三道闸门权限管线) + diff --git a/web/public/course-assets/s03_permission/permission-pipeline.en.svg b/web/public/course-assets/s03_permission/permission-pipeline.en.svg new file mode 100644 index 0000000..1eb1051 --- /dev/null +++ b/web/public/course-assets/s03_permission/permission-pipeline.en.svg @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + Permission Pipeline — Three Gates + + + + Tool call enters + + + + + + Gate 1: Deny List + rm -rf /, sudo, shutdown + + + + + + Gate 2: Rule Matching + Write outside ws? Destructive? + no match → allow + + + match + + + + Gate 3 + User approval + allow / deny + + + + Three Decisions + + + Deny + Gate 1 hit, or user denied + + + Ask + Gate 2 matched, enter Gate 3 + + + Allow + No rule hit, or user approved + + Priority: hard deny → rule matching → if matched ask user; if unmatched allow by default + diff --git a/web/public/course-assets/s03_permission/permission-pipeline.ja.svg b/web/public/course-assets/s03_permission/permission-pipeline.ja.svg new file mode 100644 index 0000000..090aaf1 --- /dev/null +++ b/web/public/course-assets/s03_permission/permission-pipeline.ja.svg @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + Permission Pipeline — 3 つのゲート + + + + ツール呼び出し + + + + + + ゲート 1: 拒否リスト + rm -rf /, sudo, shutdown + + + + + + ゲート 2: ルール照合 + ws 外への書き込み?破壊的? + 不一致 → allow + + + 一致 + + + + ゲート 3 + ユーザー承認 + allow / deny + + + + 3 つの決定 + + + 拒否 (deny) + ゲート 1 一致、またはユーザー拒否 + + + 確認 (ask) + ゲート 2 一致、ゲート 3 へ + + + 許可 (allow) + ルール不一致、またはユーザー許可 + + 優先順位:ハード拒否 → ルール照合 → 一致ならユーザー承認、不一致ならデフォルト許可 + diff --git a/web/public/course-assets/s03_permission/permission-pipeline.svg b/web/public/course-assets/s03_permission/permission-pipeline.svg new file mode 100644 index 0000000..c3b0b95 --- /dev/null +++ b/web/public/course-assets/s03_permission/permission-pipeline.svg @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + Permission Pipeline — 三道闸门 + + + + 工具调用进入 + + + + + + 闸门 1: 拒绝列表 + rm -rf /, sudo, shutdown + + + + + + 闸门 2: 规则匹配 + 写工作区外?读敏感路径? + 未命中 → allow + + + 命中 + + + + 闸门 3 + 用户审批 + 允许 / 拒绝 + + + + 三种决策 + + + 阻止 (deny) + 闸门 1 命中,或用户拒绝 + + + 询问 (ask) + 闸门 2 命中,进入闸门 3 + + + 允许 (allow) + 规则未命中,或用户允许 + + 规则优先:闸门 1 硬拒绝 → 闸门 2 规则匹配 → 命中则用户审批,未命中默认允许 + diff --git a/web/public/course-assets/s04_hooks/hooks-overview.en.svg b/web/public/course-assets/s04_hooks/hooks-overview.en.svg new file mode 100644 index 0000000..87afdc0 --- /dev/null +++ b/web/public/course-assets/s04_hooks/hooks-overview.en.svg @@ -0,0 +1,100 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Hooks — Extension Logic Hangs Outside, Loop Unchanged + + + + + + messages[] + (s01 preserved) + + + + + + + LLM + stop_reason=tool_use? + + + + No + + Return Result + + + + Yes + + + + trigger_hooks() + PreToolUse + + permission_hook · log_hook + Teaching: non-None → block + + + + + Write tool_result + + + + Pass + + + + TOOL_ + HANDLERS + bash/read/... + + + + After exec + + + + trigger_hooks() + PostToolUse + + large_output_hook + + + + Results appended to messages[], loop continues + + + + s03: + if not check_permission(block): ... + ← every new check requires modifying the loop + s04: + blocked = trigger_hooks("PreToolUse", block) + ← add check = register_hook(), loop unchanged + diff --git a/web/public/course-assets/s04_hooks/hooks-overview.ja.svg b/web/public/course-assets/s04_hooks/hooks-overview.ja.svg new file mode 100644 index 0000000..d1addf6 --- /dev/null +++ b/web/public/course-assets/s04_hooks/hooks-overview.ja.svg @@ -0,0 +1,100 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Hooks — 拡張ロジックは外側に、ループは一文字も変更しない + + + + + + messages[] + (s01 保持) + + + + + + + LLM + stop_reason=tool_use? + + + + No + + 結果を返す + + + + Yes + + + + trigger_hooks() + PreToolUse + + permission_hook · log_hook + 教育版: 非 None → ブロック + + + + + tool_result に返す + + + + 通過 + + + + TOOL_ + HANDLERS + bash/read/... + + + + 実行後 + + + + trigger_hooks() + PostToolUse + + large_output_hook + + + + 結果を messages[] に追加、ループ継続 + + + + s03: + if not check_permission(block): ... + ← チェックを追加するたびにループを修正 + s04: + blocked = trigger_hooks("PreToolUse", block) + ← チェック追加 = register_hook()、ループ不変 + diff --git a/web/public/course-assets/s04_hooks/hooks-overview.svg b/web/public/course-assets/s04_hooks/hooks-overview.svg new file mode 100644 index 0000000..410593a --- /dev/null +++ b/web/public/course-assets/s04_hooks/hooks-overview.svg @@ -0,0 +1,100 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Hooks — 扩展逻辑挂在外面,循环本身一字不改 + + + + + + messages[] + (s01 保留) + + + + + + + LLM + stop_reason=tool_use? + + + + + + 返回结果 + + + + + + + + trigger_hooks() + PreToolUse + + permission_hook · log_hook + 教学版:非 None → 阻止 + + + + + 写入 tool_result + + + + 通过 + + + + TOOL_ + HANDLERS + bash/read/... + + + + 执行后 + + + + trigger_hooks() + PostToolUse + + large_output_hook + + + + 结果追加到 messages[],循环继续 + + + + s03: + if not check_permission(block): ... + ← 每加一个检查就要改循环 + s04: + blocked = trigger_hooks("PreToolUse", block) + ← 加检查 = register_hook(),循环不改 + diff --git a/web/public/course-assets/s05_todo_write/todo-overview.en.svg b/web/public/course-assets/s05_todo_write/todo-overview.en.svg new file mode 100644 index 0000000..b4655e1 --- /dev/null +++ b/web/public/course-assets/s05_todo_write/todo-overview.en.svg @@ -0,0 +1,93 @@ + + + + + + + + + + + + + + + + + + + + + + + + TodoWrite — Loop Unchanged, One More Tool Auto-Dispatched + + + s04 Preserved + + + + messages[] + + + + + + + LLM + stop_reason=tool_use? + + + + No + + Return Result + + + + Yes + + + + trigger_hooks + PreToolUse + + + + + + + + TOOL_HANDLERS + + + + bash · read · write + + + edit · glob + + + + todo_write + s05 New + + → in-memory TODO list + + + + Results appended to messages[], loop continues + + + + Nag Reminder + Model hasn't called todo_write for 3 rounds → auto-inject <reminder>Update your todos.</reminder> + + + + + s04 Preserved (loop, hooks, 5 base tools) + + s05 New (todo_write + nag reminder) + diff --git a/web/public/course-assets/s05_todo_write/todo-overview.ja.svg b/web/public/course-assets/s05_todo_write/todo-overview.ja.svg new file mode 100644 index 0000000..ce0f697 --- /dev/null +++ b/web/public/course-assets/s05_todo_write/todo-overview.ja.svg @@ -0,0 +1,93 @@ + + + + + + + + + + + + + + + + + + + + + + + + TodoWrite — ループ不変、ツール一つ追加で自動ディスパッチ + + + s04 保持 + + + + messages[] + + + + + + + LLM + stop_reason=tool_use? + + + + No + + 結果を返す + + + + Yes + + + + trigger_hooks + PreToolUse + + + + + + + + TOOL_HANDLERS + + + + bash · read · write + + + edit · glob + + + + todo_write + s05 新規 + + → メモリ内 TODO リスト + + + + 結果を messages[] に追加、ループ継続 + + + + Nag リマインダー(催促機構) + モデルが連続 3 ラウンド todo_write 未呼び出し → 自動注入 <reminder>Update your todos.</reminder> + + + + + s04 保持(ループ、フック、5 つの基本ツール) + + s05 新規(todo_write + Nag リマインダー) + diff --git a/web/public/course-assets/s05_todo_write/todo-overview.svg b/web/public/course-assets/s05_todo_write/todo-overview.svg new file mode 100644 index 0000000..25e12fe --- /dev/null +++ b/web/public/course-assets/s05_todo_write/todo-overview.svg @@ -0,0 +1,93 @@ + + + + + + + + + + + + + + + + + + + + + + + + TodoWrite — 循环不变,多一个工具自动分发 + + + s04 保留 + + + + messages[] + + + + + + + LLM + stop_reason=tool_use? + + + + + + 返回结果 + + + + + + + + trigger_hooks + PreToolUse + + + + + + + + TOOL_HANDLERS + + + + bash · read · write + + + edit · glob + + + + todo_write + s05 新增 + + → 进程内 TODO 列表 + + + + 结果追加到 messages[],循环继续 + + + + Nag Reminder(催更机制) + 模型连续 3 轮没调 todo_write → 自动注入 <reminder>Update your todos.</reminder> + + + + + s04 保留(循环、钩子、5 个基础工具) + + s05 新增(todo_write + nag reminder) + diff --git a/web/public/course-assets/s06_subagent/subagent-overview.en.svg b/web/public/course-assets/s06_subagent/subagent-overview.en.svg new file mode 100644 index 0000000..d6eb4d6 --- /dev/null +++ b/web/public/course-assets/s06_subagent/subagent-overview.en.svg @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Subagent — Independent messages[], All Intermediate Steps Discarded + + + + Parent Agent + + + + messages[] + + + + + + + LLM + + + + tool_use + + + + TOOL_HANDLERS + + + + Base Tools + bash / read / write / ... + + + + task → spawn + + + + tool_result + + + append messages[] + Normal tool results also append to messages[] + + + + Subagent (Fresh Context) + + + + messages = [task] + fresh — no parent history + + + + + + + LLM + + + + Own while loop (max 30 rounds) + bash · read · write · edit · glob + No task — recursive spawn forbidden + + + + Intermediate 30+ tool calls + results + All discarded ✗ + + + + ✓ Extract only final text → return to Parent + + + + + ① task desc + + + + + ② summary + + + + + + s05 Preserved: loop, hooks, todo_write, 6 base tools + + + s06 New: task tool + spawn_subagent() — independent messages[], returns only summary + + + + ① Parent → Sub: + task description (a short string) + ② Sub → Parent: + extract_text() (final conclusion only) + diff --git a/web/public/course-assets/s06_subagent/subagent-overview.ja.svg b/web/public/course-assets/s06_subagent/subagent-overview.ja.svg new file mode 100644 index 0000000..87a4570 --- /dev/null +++ b/web/public/course-assets/s06_subagent/subagent-overview.ja.svg @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Subagent — 独立した messages[]、中間過程はすべて破棄 + + + + 親 Agent + + + + messages[] + + + + + + + LLM + + + + tool_use + + + + TOOL_HANDLERS + + + + 基本ツール + bash / read / write / ... + + + + task → spawn + + + + tool_result + + + messages[] に追加 + 通常ツール結果も messages[] に戻る + + + + サブエージェント(新規コンテキスト) + + + + messages = [task] + 新規 — 親の会話を継承しない + + + + + + + LLM + + + + 独自の while ループ(最大 30 ラウンド) + bash · read · write · edit · glob + task なし — 再帰 spawn 禁止 + + + + 中間 30+ ラウンドのツール呼び出し + 結果 + すべて破棄 ✗ + + + + ✓ 最後のテキストのみ抽出 → 親に返却 + + + + + ① task 説明 + + + + + ② summary + + + + + + s05 保持:ループ、フック、todo_write、6 つの基本ツール + + + s06 新規:task ツール + spawn_subagent() — 独立 messages[]、要約のみ返却 + + + + ① 親 → サブ: + task description(短い文字列) + ② サブ → 親: + extract_text()(最終結論のみ) + diff --git a/web/public/course-assets/s06_subagent/subagent-overview.svg b/web/public/course-assets/s06_subagent/subagent-overview.svg new file mode 100644 index 0000000..c18d660 --- /dev/null +++ b/web/public/course-assets/s06_subagent/subagent-overview.svg @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Subagent — 独立 messages[],中间过程全部丢弃 + + + + Parent Agent + + + + messages[] + + + + + + + LLM + + + + tool_use + + + + TOOL_HANDLERS + + + + 基础工具 + bash / read / write / ... + + + + task → spawn + + + + tool_result + + + append messages[] + 普通工具结果也回填 messages[] + + + + Subagent (全新上下文) + + + + messages = [task] + fresh — 不继承父对话 + + + + + + + LLM + + + + 自己的 while 循环(最多 30 轮) + bash · read · write · edit · glob + 无 task — 禁止递归 spawn + + + + 中间 30+ 轮工具调用 + 结果 + 全部丢弃 ✗ + + + + ✓ 只提取最后一段文本 → 返回给 Parent + + + + + ① task 描述 + + + + + ② summary + + + + + + s05 保留:循环、hook、todo_write、6 个基础工具 + + + s06 新增:task 工具 + spawn_subagent() — 独立 messages[],只回传摘要 + + + + ① Parent → Sub: + task description(一小段文字) + ② Sub → Parent: + extract_text()(只有最终结论) + diff --git a/web/public/course-assets/s07_skill_loading/skill-overview.en.svg b/web/public/course-assets/s07_skill_loading/skill-overview.en.svg new file mode 100644 index 0000000..ff31907 --- /dev/null +++ b/web/public/course-assets/s07_skill_loading/skill-overview.en.svg @@ -0,0 +1,110 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Skill Loading — catalog at startup, content on demand + + + History preserved + + + + messages[] + + + + + + + LLM + stop_reason=tool_use? + + + + No + + Return result + + + + Yes + + + + trigger_hooks + PreToolUse + + + + + + + TOOL_HANDLERS + + + + bash · read · write + + edit · glob · todo + + task (subagent) + + + load_skill + + + + Results appended to messages[], loop continues + + + + s07 new + + + + ① build_system() + Scan skills/ first line at startup + → inject SYSTEM prompt + + + + ② load_skill(name) + Read full SKILL.md at runtime + → inject tool_result + + + + SYSTEM has skill catalog, carried every turn + + + + + + + + History preserved (loop, hooks, TODO, subagent — unchanged) + + s07 new (startup catalog in SYSTEM + load_skill tool) + diff --git a/web/public/course-assets/s07_skill_loading/skill-overview.ja.svg b/web/public/course-assets/s07_skill_loading/skill-overview.ja.svg new file mode 100644 index 0000000..596dcd5 --- /dev/null +++ b/web/public/course-assets/s07_skill_loading/skill-overview.ja.svg @@ -0,0 +1,110 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Skill Loading — 起動時にカタログ注入、実行時にオンデマンド読み込み + + + 過去章を保持 + + + + messages[] + + + + + + + LLM + stop_reason=tool_use? + + + + No + + 結果を返す + + + + Yes + + + + trigger_hooks + PreToolUse + + + + + + + TOOL_HANDLERS + + + + bash · read · write + + edit · glob · todo + + task (subagent) + + + load_skill + + + + 結果を messages[] に追加、ループ継続 + + + + s07 新規 + + + + ① build_system() + 起動時に skills/ の 1 行目をスキャン + → SYSTEM プロンプトに注入 + + + + ② load_skill(name) + 実行時に完全な SKILL.md を読み取り + → tool_result に注入 + + + + SYSTEM にスキルカタログ、毎ターン携帯 + + + + + + + + 過去章を保持(ループ、フック、TODO、サブ Agent — 変更なし) + + s07 新規(起動時カタログ注入 SYSTEM + load_skill ツール) + diff --git a/web/public/course-assets/s07_skill_loading/skill-overview.svg b/web/public/course-assets/s07_skill_loading/skill-overview.svg new file mode 100644 index 0000000..600747b --- /dev/null +++ b/web/public/course-assets/s07_skill_loading/skill-overview.svg @@ -0,0 +1,110 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + Skill Loading — 启动时注入目录,运行时按需加载内容 + + + 历史章节保留 + + + + messages[] + + + + + + + LLM + stop_reason=tool_use? + + + + + + 返回结果 + + + + + + + + trigger_hooks + PreToolUse + + + + + + + TOOL_HANDLERS + + + + bash · read · write + + edit · glob · todo + + task (subagent) + + + load_skill + + + + 结果追加到 messages[],循环继续 + + + + s07 新增 + + + + ① build_system() + 启动时扫描 skills/ 第一行 + → 注入 SYSTEM prompt + + + + ② load_skill(name) + 运行时读完整 SKILL.md + → 注入 tool_result + + + + SYSTEM 含技能目录,每轮都带 + + + + + + + + 历史章节保留(循环、钩子、TODO、subagent — 完全不变) + + s07 新增(启动时目录注入 SYSTEM + load_skill 工具) + diff --git a/web/public/course-assets/s08_context_compact/auto-compact.en.svg b/web/public/course-assets/s08_context_compact/auto-compact.en.svg new file mode 100644 index 0000000..7577ac8 --- /dev/null +++ b/web/public/course-assets/s08_context_compact/auto-compact.en.svg @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + L4: autoCompact — LLM Full Summary + + + + Trigger Condition + All three preprocessing layers have run, estimated tokens > contextWindow - maxOutputTokens - 13_000. + Tries sessionMemoryCompact first (lightweight summary from existing memory), only calls LLM if insufficient. + + + + Step 1: Save transcript + Write full conversation to .transcripts/ + JSONL format, one message per line + Filename: transcript_{timestamp}.jsonl + No data lost, just moved out of active area + + + + + Step 2: LLM generates summary + Send conversation history to LLM + Summary must include 9 sections: + request · concepts · files · errors · resolutions + user messages · todos · current state · next steps + Generated only once + + + + + Step 3: Replace message list + All old messages → 1 summary + Model continues from summary + Includes recently_read file list + ⚠ This is an irreversible operation + + + + Before messages + user + assistant + user + assistant + user + ~180 messages, occupying 62K tokens + + + + + After messages + + [Compacted] Summary: goal → create hello.py ... + Recent files: hello.py, README.md ... + ~1 message, occupying 1K tokens + + + + Circuit breaker: + 3 consecutive autocompact failures → stop retrying. Prevents wasting API calls when context is unrecoverable. + diff --git a/web/public/course-assets/s08_context_compact/auto-compact.ja.svg b/web/public/course-assets/s08_context_compact/auto-compact.ja.svg new file mode 100644 index 0000000..2488bd0 --- /dev/null +++ b/web/public/course-assets/s08_context_compact/auto-compact.ja.svg @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + L4: autoCompact — LLM 完全要約 + + + + トリガー条件 + 前 3 層の前処理を全て実行後、推定 token > contextWindow - maxOutputTokens - 13_000。 + まず sessionMemoryCompact を試行(既存のメモリで軽量要約)、不足時のみ LLM を呼び出し。 + + + + ステップ 1:transcript 保存 + 完全な対話を .transcripts/ に書き込み + JSONL 形式、1 行 1 メッセージ + ファイル名:transcript_{timestamp}.jsonl + 情報は失われていない、アクティブ領域から移動のみ + + + + + ステップ 2:LLM 要約生成 + 対話履歴を LLM に送信 + 要約は 9 つのセクションを含む: + リクエスト・概念・ファイル・エラー・解決 + ユーザーメッセージ・TODO・現在・次ステップ + 1 回のみ生成 + + + + + ステップ 3:メッセージリスト置換 + 全旧メッセージ → 1 件の要約に + モデルは要約から作業を継続 + recently_read ファイルリストを付与 + ⚠ これは復元不可能な操作 + + + + 圧縮前 messages + user + assistant + user + assistant + user + ~180 件のメッセージ、62K トークンを占有 + + + + + 圧縮後 messages + + [Compacted] 要約:目標 → hello.py を作成 ... + 最近のファイル:hello.py, README.md ... + ~1 件のメッセージ、1K トークンを占有 + + + + サーキットブレーカー: + autocompact が連続 3 回失敗 → リトライ停止。コンテキストが復元不可能な場合の API 呼び出しの無駄な反復を防止。 + diff --git a/web/public/course-assets/s08_context_compact/auto-compact.svg b/web/public/course-assets/s08_context_compact/auto-compact.svg new file mode 100644 index 0000000..c7691f9 --- /dev/null +++ b/web/public/course-assets/s08_context_compact/auto-compact.svg @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + L4: autoCompact — LLM 全量摘要 + + + + 触发条件 + 前三层预处理全跑完,估算 token > contextWindow - maxOutputTokens - 13_000。 + 先尝试 sessionMemoryCompact(用已有记忆做轻量摘要),不足才调 LLM。 + + + + 步骤 1:保存 transcript + 完整对话写入 .transcripts/ + JSONL 格式,一行一条消息 + 文件名:transcript_{timestamp}.jsonl + 信息没有丢失,只是移出活跃区 + + + + + 步骤 2:LLM 生成摘要 + 把对话历史发给 LLM + 摘要需包含 9 个部分: + 请求·概念·文件·错误·解决 + 用户消息·待办·当前·下一步 + 只生成一次 + + + + + 步骤 3:替换消息列表 + 所有旧消息 → 1 条摘要 + 模型从摘要继续工作 + 附带 recently_read 文件列表 + ⚠ 这是无法恢复的操作 + + + + 压缩前 messages + user + assistant + user + assistant + user + ~180 条消息,占 62K token + + + + + 压缩后 messages + + [Compacted] 摘要:目标 → 创建 hello.py ... + 最近文件:hello.py, README.md ... + ~1 条消息,占 1K token + + + + 熔断器: + 连续 autocompact 失败 3 次 → 停止重试。防止上下文不可恢复时反复浪费 API 调用。 + diff --git a/web/public/course-assets/s08_context_compact/compact-overview.en.svg b/web/public/course-assets/s08_context_compact/compact-overview.en.svg new file mode 100644 index 0000000..542b156 --- /dev/null +++ b/web/public/course-assets/s08_context_compact/compact-overview.en.svg @@ -0,0 +1,138 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Context Compact — Compression Before LLM Call, Three Trigger Modes + + + s07 Preserved + s08 New + + + + messages[] + (s07 preserved) + + + + + + + Compression Pipeline + + + + ① Every Turn · Unconditional · 0 API + + + L3 tool_result_budget + + + L1 snip_compact + + + L2 micro_compact + + + + + + + Over threshold? + + + No → Pass + Straight to LLM + + + Yes↓ + + + + ② Conditional · Token Over Threshold · 1 API + + + L4 compact_history + + + + + + + LLM + stop_reason=tool_use? + + + + No + + Return Result + + + + Yes + + + + TOOL_HANDLERS + bash · read · write + task · load_skill · ... + + + + API error + + retry to compression pipeline + + + + ③ Emergency Trigger + API returns prompt_too_long + → reactive_compact → retry + + + + Tool results appended to messages[] → next turn → compress again → LLM + + + + + + s07 Preserved: loop, hooks, skill loading, sub-agents + + + ① Every Turn Auto: L3→L1→L2 run unconditionally before each LLM call, 0 API + + + ② Conditional: after L3/L1/L2, tokens still over threshold → compact_history, 1 API + + + ③ Emergency: API returns prompt_too_long → reactive_compact → retry + + Three modes with increasing cost: 0 API → 1 API → 1 API + more aggressive trimming + diff --git a/web/public/course-assets/s08_context_compact/compact-overview.ja.svg b/web/public/course-assets/s08_context_compact/compact-overview.ja.svg new file mode 100644 index 0000000..350cd13 --- /dev/null +++ b/web/public/course-assets/s08_context_compact/compact-overview.ja.svg @@ -0,0 +1,138 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Context Compact — LLM 呼び出し前に圧縮、3 つのトリガーモード + + + s07 保持 + s08 新規 + + + + messages[] + (s07 保持) + + + + + + + 圧縮パイプライン + + + + ① 毎ターン自動 · 無条件 · 0 API + + + L3 tool_result_budget + + + L1 snip_compact + + + L2 micro_compact + + + + + + + 閾値超過? + + + No → 通過 + 直接 LLM へ + + + Yes↓ + + + + ② 条件 · トークン閾値超過 · 1 API + + + L4 compact_history + + + + + + + LLM + stop_reason=tool_use? + + + + No + + 結果を返す + + + + Yes + + + + TOOL_HANDLERS + bash · read · write + task · load_skill · ... + + + + API 例外 + + 圧縮パイプラインへ再試行 + + + + ③ 緊急トリガー + API が prompt_too_long を返す + → reactive_compact → リトライ + + + + ツール結果を messages[] に追加 → 次ターン → 再圧縮 → LLM + + + + + + s07 保持:ループ、フック、スキルロード、サブエージェント + + + ① 毎ターン自動:L3→L1→L2 が各 LLM 呼び出し前に無条件実行、0 API + + + ② 条件トリガー:L3/L1/L2 後もトークン超過 → compact_history、1 API + + + ③ 緊急トリガー:API が prompt_too_long を返す → reactive_compact → リトライ + + 3 つのモードはコスト増加:0 API → 1 API → 1 API + より積極的なトリム + diff --git a/web/public/course-assets/s08_context_compact/compact-overview.svg b/web/public/course-assets/s08_context_compact/compact-overview.svg new file mode 100644 index 0000000..837e9bb --- /dev/null +++ b/web/public/course-assets/s08_context_compact/compact-overview.svg @@ -0,0 +1,138 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Context Compact — 压缩插在 LLM 调用前,三种触发模式 + + + s07 保留 + s08 新增 + + + + messages[] + (s07 保留) + + + + + + + 压缩管线 + + + + ① 每轮自动 · 无条件 · 0 API + + + L3 tool_result_budget + + + L1 snip_compact + + + L2 micro_compact + + + + + + + 超阈值? + + + 否 → 通过 + 直接进 LLM + + + 是↓ + + + + ② 条件触发 · token 超阈值 · 1 API + + + L4 compact_history + + + + + + + LLM + stop_reason=tool_use? + + + + + + 返回结果 + + + + + + + + TOOL_HANDLERS + bash · read · write + task · load_skill · ... + + + + API 异常 + + 重试回到压缩管线 + + + + ③ 异常触发 + API 返回 prompt_too_long + → reactive_compact → 重试 + + + + 工具结果追加到 messages[] → 下一轮 → 再次压缩 → LLM + + + + + + s07 保留:循环、hook、技能加载、子 Agent + + + ① 每轮自动:L3→L1→L2 在每次 LLM 调用前无条件执行,0 API + + + ② 条件触发:L3/L1/L2 跑完 token 仍超阈值 → compact_history,1 API + + + ③ 异常触发:API 返回 prompt_too_long → reactive_compact → 重试 + + 三种模式的代价递增:0 API → 1 API → 1 API + 更激进的裁剪 + diff --git a/web/public/course-assets/s08_context_compact/compaction-layers.en.svg b/web/public/course-assets/s08_context_compact/compaction-layers.en.svg new file mode 100644 index 0000000..5a27e96 --- /dev/null +++ b/web/public/course-assets/s08_context_compact/compaction-layers.en.svg @@ -0,0 +1,98 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Context Compaction — Pre-processing Pipeline + Auto-compact + Emergency Fallback + + + + Design Principles + Cheap operations first, expensive later + Trim text before dropping messages + Drop messages before calling LLM + + + + Increasing Cost + Text ops → LLM summary → Emergency trim + 0 API · 0 API · 0 API · 1 API · 1 API + + + + Pre-processing Pipeline (execution order: L3 → L1 → L2, before every LLM call, 0 API) + + + + L3 + toolResultBudget + tool_result total > 200KB → spill largest item + keep full content + Trigger: every turn, before microCompact can replace full content + + + + + + + L1 + snipCompact + messages > 50 → trim middle + keep head/tail + Trigger: message count exceeds threshold + + + + + + + L2 + microCompact + old tool_result → placeholder (keep latest 3) + compact old + Trigger: every turn automatically; tutorial uses text placeholder + + + + Auto-compact Decision (triggered when pre-processing is insufficient, 1 API call) + + + + L4 + autoCompact + tokens over threshold → LLM summary + 1 API call + Threshold: contextWindow - maxOutputTokens - 13,000 · Try sessionMemoryCompact first, then LLM + Circuit breaker: stop retrying after 3 consecutive failures + + + + Emergency Fallback (triggered when API still returns prompt_too_long) + + + + Emrg + reactiveCompact + API returns 413 / prompt_too_long → byte-level trim + Keep last 5 + summary; more aggressive than autoCompact + + diff --git a/web/public/course-assets/s08_context_compact/compaction-layers.ja.svg b/web/public/course-assets/s08_context_compact/compaction-layers.ja.svg new file mode 100644 index 0000000..8519054 --- /dev/null +++ b/web/public/course-assets/s08_context_compact/compaction-layers.ja.svg @@ -0,0 +1,98 @@ + + + + + + + + + + + + + + + + + + + + + + + + + コンテキスト圧縮 — 前処理パイプライン + 自動圧縮 + 緊急フォールバック + + + + 設計原則 + 安価な処理を先に、高価な処理を後に + テキスト修正 → メッセージ削除の順 + メッセージ削除 → LLM 呼び出しの順 + + + + コスト増加 + テキスト操作 → LLM 要約 → 緊急トリム + 0 API · 0 API · 0 API · 1 API · 1 API + + + + 前処理パイプライン(実行順:L3 → L1 → L2、各 LLM 呼び出し前に自動実行、0 API) + + + + L3 + toolResultBudget + tool_result 合計 > 200KB → 最大項目を退避 + 完全内容を保持 + トリガー:毎ターン、microCompact が完全内容を置換する前に実行 + + + + + + + L1 + snipCompact + メッセージ > 50 → 中間をトリム + 先頭/末尾保持 + トリガー:メッセージ数が閾値を超過 + + + + + + + L2 + microCompact + 古い tool_result → プレースホルダー(最新 3 件保持) + 旧結果を圧縮 + トリガー:毎ターン自動実行、チュートリアル版はテキストプレースホルダーで模擬 + + + + 自動圧縮判定(前処理で不足時にトリガー、1 API 呼び出し) + + + + L4 + autoCompact + トークンが閾値超過 → LLM 全量要約 + 1 API 呼び出し + 閾値: contextWindow - maxOutputTokens - 13,000 · sessionMemoryCompact を先に試行、不足時のみ LLM 呼び出し + サーキットブレーカー:連続 3 回失敗後にリトライ停止 + + + + 緊急フォールバック(API が引き続き prompt_too_long を返す場合にトリガー) + + + + 緊急 + reactiveCompact + API が 413 / prompt_too_long を返す → バイト単位でトリム + 最後の 5 件 + 要約を保持、autoCompact より積極的 + + diff --git a/web/public/course-assets/s08_context_compact/compaction-layers.svg b/web/public/course-assets/s08_context_compact/compaction-layers.svg new file mode 100644 index 0000000..818b44e --- /dev/null +++ b/web/public/course-assets/s08_context_compact/compaction-layers.svg @@ -0,0 +1,98 @@ + + + + + + + + + + + + + + + + + + + + + + + + + 上下文压缩 — 预处理管线 + 自动压缩 + 应急兜底 + + + + 设计原则 + 便宜的先跑,贵的后跑 + 能改文本 → 不删整条 + 能删整条 → 不调 LLM + + + + 代价递增 + 文本操作 → LLM 摘要 → 应急裁剪 + 0 API · 0 API · 0 API · 1 API · 1 API + + + + 预处理管线(执行顺序:L3 → L1 → L2,每轮 LLM 调用前自动执行,0 API) + + + + L3 + toolResultBudget + tool_result 总和 > 200KB → 最大项落盘 + 保留完整内容 + 触发:每轮自动,必须在 microCompact 之前保留完整内容 + + + + + + + L1 + snipCompact + 消息 > 50 条 → 裁掉中间 + 保留头尾 + 触发:消息数超过阈值 + + + + + + + L2 + microCompact + 旧 tool_result → 占位符(保留最近 3 条) + 压旧结果 + 触发:每轮自动,教学版用文本占位符模拟 + + + + 自动压缩决策(预处理不够时触发,1 API 调用) + + + + L4 + autoCompact + token 超阈值 → LLM 全量摘要 + 1 API 调用 + 阈值: contextWindow - maxOutputTokens - 13,000 · 先尝试 sessionMemoryCompact,不够才调 LLM + 熔断:连续失败 3 次后停止重试 + + + + 应急兜底(API 仍然返回 prompt_too_long 时触发) + + + + 应急 + reactiveCompact + API 返回 413 / prompt_too_long → 字节级裁剪 + 保留最后 5 条 + 摘要,比 autoCompact 更激进 + + diff --git a/web/public/course-assets/s08_context_compact/layer1-budget.en.svg b/web/public/course-assets/s08_context_compact/layer1-budget.en.svg new file mode 100644 index 0000000..1870c59 --- /dev/null +++ b/web/public/course-assets/s08_context_compact/layer1-budget.en.svg @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + L3: toolResultBudget — Large Result Persistence + + + + Pain Point + Model read 30 files in one turn; total tool_result adds up to 500KB, filling the entire context window + + + Before + + tool_result: (78KB) ... + tool_result: (142KB) ... + tool_result: (290KB) ... + Total 510KB → over budget + + + + + + After + + tool_result: <persisted-output> + Full output: .task_outputs/t1.txt + Preview: (first 2000 chars) ... + Total 18KB → normal + + + + How + 1. Sum the size of all tool_result in the latest turn + 2. Over 200KB → sort by size, persist the largest to .task_outputs/tool-results/ + 3. Keep only <persisted-output> marker + first 2000 chars preview in context + + + + Result: No data lost (full data on disk), context drops from 510KB to ~18KB, 0 API calls + diff --git a/web/public/course-assets/s08_context_compact/layer1-budget.ja.svg b/web/public/course-assets/s08_context_compact/layer1-budget.ja.svg new file mode 100644 index 0000000..b76862c --- /dev/null +++ b/web/public/course-assets/s08_context_compact/layer1-budget.ja.svg @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + L3: toolResultBudget — 大結果の永続化 + + + + ペインポイント + モデルが一度に 30 ファイルを読み込み、単一ターンの tool_result が合計 500KB に達し、コンテキストウィンドウを圧迫 + + + 圧縮前 + + tool_result: (78KB) ... + tool_result: (142KB) ... + tool_result: (290KB) ... + 合計 510KB → 予算超過 + + + + + + 圧縮後 + + tool_result: <persisted-output> + Full output: .task_outputs/t1.txt + Preview: (先頭 2000 文字) ... + 合計 18KB → 正常 + + + + 方法 + 1. 最終ターンの全 tool_result の合計サイズを集計 + 2. 200KB 超過 → サイズ順にソートし、最大のものから .task_outputs/tool-results/ に永続化 + 3. コンテキストには <persisted-output> マーカー + 先頭 2000 文字のプレビューのみ残す + + + + 結果:情報は失われていない(ディスクに完全なデータあり)、コンテキストは 510KB → ~18KB に削減、0 回 API 呼び出し + diff --git a/web/public/course-assets/s08_context_compact/layer1-budget.svg b/web/public/course-assets/s08_context_compact/layer1-budget.svg new file mode 100644 index 0000000..53f2d5c --- /dev/null +++ b/web/public/course-assets/s08_context_compact/layer1-budget.svg @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + L3: toolResultBudget — 大结果落盘 + + + + 痛点 + 模型一次读了 30 个文件,单轮 tool_result 加起来 500KB,直接把上下文窗口打满 + + + 压缩前 + + tool_result: (78KB) ... + tool_result: (142KB) ... + tool_result: (290KB) ... + 合计 510KB → 超预算 + + + + + + 压缩后 + + tool_result: <persisted-output> + Full output: .task_outputs/t1.txt + Preview: (前 2000 字符) ... + 合计 18KB → 正常 + + + + 怎么做 + 1. 统计最后一轮所有 tool_result 的总大小 + 2. 超过 200KB → 按大小排序,从最大的开始落盘到 .task_outputs/tool-results/ + 3. 上下文里只留 <persisted-output> 标记 + 前 2000 字符预览 + + + + 结果:信息没丢(磁盘有完整数据),上下文从 510KB 降到 ~18KB,0 次 API 调用 + diff --git a/web/public/course-assets/s08_context_compact/micro-compact.en.svg b/web/public/course-assets/s08_context_compact/micro-compact.en.svg new file mode 100644 index 0000000..51ed008 --- /dev/null +++ b/web/public/course-assets/s08_context_compact/micro-compact.en.svg @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + L2: microCompact — Old Result Placeholder Replacement + + + + Pain Point + Agent read 10 files in a row; the full content of reads 1-7 is still sitting in context, taking space but no longer useful + + + Before (all 10 tool_result complete) + + + Read file A: (full content, 3200 chars)... + + Read file B: (full content, 1800 chars)... + + Read file C: (full content, 4500 chars)... + + Read file J: (full content, 2800 chars) + 7 old results waste ~25K chars + + + + + + After (keep only latest 3 complete) + + + [Earlier result compacted. Re-run if needed.] + + [Earlier result compacted. Re-run if needed.] + + [Earlier result compacted. Re-run if needed.] + + Read file J: (full content, 2800 chars) + Keep only latest 3; first 7 become placeholders + + + + How (teaching version) + Iterate through tool_result, keep only latest 3 complete, replace older ones with placeholders. + Real CC + Clears old results via API cache_edits (without breaking prompt cache prefix), only for COMPACTABLE_TOOLS: + Read, Bash, Grep, Glob, WebSearch, WebFetch, Edit, Write. Teaching version uses text placeholders to simulate the same effect. + diff --git a/web/public/course-assets/s08_context_compact/micro-compact.ja.svg b/web/public/course-assets/s08_context_compact/micro-compact.ja.svg new file mode 100644 index 0000000..5d8bff7 --- /dev/null +++ b/web/public/course-assets/s08_context_compact/micro-compact.ja.svg @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + L2: microCompact — 旧結果のプレースホルダー置換 + + + + ペインポイント + Agent が連続で 10 ファイルを読み込み、1〜7 回目の完全なファイル内容がコンテキストに残ったまま、場所を占有しつつ既に不要 + + + 圧縮前(10 件の tool_result がすべて完全) + + + Read file A: (完全な内容, 3200 文字)... + + Read file B: (完全な内容, 1800 文字)... + + Read file C: (完全な内容, 4500 文字)... + + Read file J: (完全な内容, 2800 文字) + 7 件の旧結果が ~25K 文字を無駄に占有 + + + + + + 圧縮後(最新 3 件のみ完全保持) + + + [Earlier result compacted. Re-run if needed.] + + [Earlier result compacted. Re-run if needed.] + + [Earlier result compacted. Re-run if needed.] + + Read file J: (完全な内容, 2800 文字) + 最新 3 件のみ保持、前 7 件はプレースホルダー化 + + + + 方法(教学版) + tool_result を走査し、最新 3 件のみ完全保持、古いものはプレースホルダーに置換。 + 実際の CC + API cache_edits で旧結果をクリア(prompt cache プレフィックスを破壊しない)、COMPACTABLE_TOOLS のみ対象: + Read, Bash, Grep, Glob, WebSearch, WebFetch, Edit, Write。教学版はテキストプレースホルダーで同様の効果を模擬。 + diff --git a/web/public/course-assets/s08_context_compact/micro-compact.svg b/web/public/course-assets/s08_context_compact/micro-compact.svg new file mode 100644 index 0000000..e1728f7 --- /dev/null +++ b/web/public/course-assets/s08_context_compact/micro-compact.svg @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + L2: microCompact — 旧结果占位替换 + + + + 痛点 + Agent 连续读了 10 个文件,第 1-7 次的完整文件内容还躺在上下文里,占着位置但早就没用了 + + + 压缩前(10 条 tool_result 全部完整) + + + Read file A: (完整内容, 3200 字符)... + + Read file B: (完整内容, 1800 字符)... + + Read file C: (完整内容, 4500 字符)... + + Read file J: (完整内容, 2800 字符) + 7 条旧结果白占 ~25K 字符 + + + + + + 压缩后(只保留最近 3 条完整) + + + [Earlier result compacted. Re-run if needed.] + + [Earlier result compacted. Re-run if needed.] + + [Earlier result compacted. Re-run if needed.] + + Read file J: (完整内容, 2800 字符) + 只保留最近 3 条,前 7 条变占位 + + + + 怎么做(教学版) + 遍历 tool_result,只保留最近 3 条完整,更旧的替换为占位符。 + 真实 CC + 通过 API cache_edits 清除旧结果(不破坏 prompt cache 前缀),仅对 COMPACTABLE_TOOLS 生效: + Read, Bash, Grep, Glob, WebSearch, WebFetch, Edit, Write。教学版用文本占位模拟同样效果。 + diff --git a/web/public/course-assets/s09_memory/memory-overview.en.svg b/web/public/course-assets/s09_memory/memory-overview.en.svg new file mode 100644 index 0000000..51cd510 --- /dev/null +++ b/web/public/course-assets/s09_memory/memory-overview.en.svg @@ -0,0 +1,104 @@ + + + + + + + + + + + + + + + + + + + + + + Memory — Memory loading, extraction, and consolidation on s08 compression pipeline + + + + s08 preserved + + s09 new + + + + messages[] + + + + + + + Compression + budget → snip → micro + → autoCompact + (s08) + + + + + + + Loading + LLM side-query select + inject file contents + ≤ 5 items + + + + + + + LLM + stop_reason + =tool_use? + + + + no, stop + + return result + + + + yes + + + + TOOL_HANDLERS + bash · read · write + edit · glob · task + + + + .memory/ — MEMORY.md index + *.md files (cross-session persistent) + + + + read + + + + Extraction (after each turn) + + + Consolidation: triggers at ≥ 10 files, dedup·merge·prune + + + + tool results → messages[] → compress → load memories → LLM → extract after each turn + + + + + s08 preserved: compression pipeline (budget → snip → micro → auto) + emergency trim + loop + + s09 new: Loading (index in SYSTEM + on-demand inject) + Extraction (after each turn) + Consolidation (threshold) + diff --git a/web/public/course-assets/s09_memory/memory-overview.ja.svg b/web/public/course-assets/s09_memory/memory-overview.ja.svg new file mode 100644 index 0000000..3007a22 --- /dev/null +++ b/web/public/course-assets/s09_memory/memory-overview.ja.svg @@ -0,0 +1,104 @@ + + + + + + + + + + + + + + + + + + + + + + Memory — s08 圧縮パイプラインに記憶の読み込み・抽出・整理を挿入 + + + + s08 維持 + + s09 追加 + + + + messages[] + + + + + + + 圧縮パイプライン + budget → snip → micro + → autoCompact + (s08) + + + + + + + Loading + LLM side-query 選択 + ファイル内容を注入 + ≤ 5 件 + + + + + + + LLM + stop_reason + =tool_use? + + + + なし、停止 + + 結果を返す + + + + あり + + + + TOOL_HANDLERS + bash · read · write + edit · glob · task + + + + .memory/ — MEMORY.md インデックス + *.md ファイル(セッション間永続化) + + + + 読み込み + + + + Extraction(毎ターン終了後) + + + Consolidation: ファイル ≥ 10 でトリガー、重複排除・統合・剪定 + + + + ツール結果 → messages[] → 圧縮 → 記憶読み込み → LLM → 毎ターン終了後に抽出 + + + + + s08 維持:圧縮パイプライン(budget → snip → micro → auto)+ 緊急トリム + ループ + + s09 追加:Loading(インデックス常駐 + オンデマンド注入)+ Extraction(毎ターン終了後)+ Consolidation(閾値トリガー) + diff --git a/web/public/course-assets/s09_memory/memory-overview.svg b/web/public/course-assets/s09_memory/memory-overview.svg new file mode 100644 index 0000000..8932df1 --- /dev/null +++ b/web/public/course-assets/s09_memory/memory-overview.svg @@ -0,0 +1,104 @@ + + + + + + + + + + + + + + + + + + + + + + Memory — 在 s08 压缩管线上,插入记忆加载、提取与整理 + + + + s08 保留 + + s09 新增 + + + + messages[] + + + + + + + 压缩管线 + budget → snip → micro + → autoCompact + (s08) + + + + + + + Loading + LLM side-query 选文件 + 注入文件内容 + ≤ 5 条 + + + + + + + LLM + stop_reason + =tool_use? + + + + 否,停止 + + 返回结果 + + + + + + + + TOOL_HANDLERS + bash · read · write + edit · glob · task + + + + .memory/ — MEMORY.md 索引 + *.md 文件(跨会话持久化) + + + + 读取 + + + + Extraction(每轮结束后) + + + Consolidation: 文件数 ≥ 10 时触发,去重·合并·剪枝 + + + + 工具结果追加到 messages[] → 压缩 → 加载记忆 → LLM → 每轮结束后提取 + + + + + s08 保留:压缩管线(budget → snip → micro → auto)+ 应急裁剪 + 循环 + + s09 新增:Loading(索引常驻 + 按需注入)+ Extraction(每轮结束后)+ Consolidation(阈值触发) + diff --git a/web/public/course-assets/s09_memory/memory-subsystems.en.svg b/web/public/course-assets/s09_memory/memory-subsystems.en.svg new file mode 100644 index 0000000..2435fec --- /dev/null +++ b/web/public/course-assets/s09_memory/memory-subsystems.en.svg @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + Memory System — Store · Load · Extract · Consolidate + + + + Storage + + .memory/*.md files + MEMORY.md index + + + + + + Load + + Index in SYSTEM (always) + LLM side-query select files + ≤ 5 items, fallback to keyword + + + + + + Extract + + After each turn ends + LLM extracts prefs/constraints + Check existing, avoid duplicates + + + + Consolidate + + Triggers at ≥ 10 files + Dedup · merge · prune + CC: 3-layer gating + + + + .memory/ — MEMORY.md index + *.md files (YAML frontmatter: name / description / type) + + + + read/write + + + + write + + + + overwrite + + + + Four types: + user (who you are) · feedback (how to work) · project (what's happening) · reference (where to find things) + + + + CC Source Comparison + • Selection: LLM side-query (Sonnet selects), not embedding vector similarity + • Extraction timing: stop hook (after each turn ends), not after autoCompact + • Dream consolidation: 3-layer gating (time ≥ 24h + sessions ≥ 5 + file lock), not simple count + diff --git a/web/public/course-assets/s09_memory/memory-subsystems.ja.svg b/web/public/course-assets/s09_memory/memory-subsystems.ja.svg new file mode 100644 index 0000000..75309f5 --- /dev/null +++ b/web/public/course-assets/s09_memory/memory-subsystems.ja.svg @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + Memory System — ストレージ · 読み込み · 抽出 · 整理 + + + + ストレージ + + .memory/*.md ファイル + MEMORY.md インデックス + + + + + + 読み込み + + インデックスを SYSTEM に常駐 + LLM side-query でファイル選択 + ≤ 5 件、失敗時はキーワードに降格 + + + + + + 抽出 + + 毎ターン終了後にトリガー + LLM が好み/制約を抽出 + 既存を確認、重複回避 + + + + 整理 + + ファイル ≥ 10 でトリガー + 重複排除・統合・剪定 + CC: 3 層ゲート + + + + .memory/ — MEMORY.md インデックス + *.md ファイル(YAML frontmatter: name / description / type) + + + + 読み/書き + + + + 書き込み + + + + 上書き + + + + 4 種類の記憶: + user(あなたは誰か)· feedback(どう作業するか)· project(何が起きているか)· reference(どこで探すか) + + + + CC ソースコード対照 + • 記憶選択:LLM side-query(Sonnet が選択)、embedding ベクトル類似度ではない + • 抽出タイミング:stop hook(毎ターン終了後)、autoCompact 後ではない + • Dream 整理:3 層ゲート(時間 ≥ 24h + セッション ≥ 5 + ファイルロック)、単純な計数ではない + diff --git a/web/public/course-assets/s09_memory/memory-subsystems.svg b/web/public/course-assets/s09_memory/memory-subsystems.svg new file mode 100644 index 0000000..f767316 --- /dev/null +++ b/web/public/course-assets/s09_memory/memory-subsystems.svg @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + Memory System — 存储 · 加载 · 提取 · 整理 + + + + 存储 + + .memory/*.md 文件 + MEMORY.md 索引 + + + + + + 加载 + + 索引常驻 SYSTEM + LLM side-query 选文件 + ≤ 5 条,失败降级到关键词 + + + + + + 提取 + + 每轮结束后触发 + LLM 提取偏好/约束 + 检查已有,避免重复 + + + + 整理 + + 文件 ≥ 10 触发 + 去重·合并·剪枝 + CC: 三层门控 + + + + .memory/ — MEMORY.md 索引 + *.md 文件(YAML frontmatter: name / description / type) + + + + 写入/读取 + + + + 写入 + + + + 覆写 + + + + 四类记忆: + user(你是谁)· feedback(怎么做事)· project(正在发生什么)· reference(东西在哪找) + + + + CC 源码对照 + • 记忆选择:LLM side-query(Sonnet 选),不是 embedding 向量相似度 + • 提取时机:stop hook 中触发(每轮结束后),不是 autoCompact 后 + • Dream 整理:三层门控(时间 ≥ 24h + 会话 ≥ 5 + 文件锁),不是简单计数 + diff --git a/web/public/course-assets/s10_system_prompt/system-prompt-overview.en.svg b/web/public/course-assets/s10_system_prompt/system-prompt-overview.en.svg new file mode 100644 index 0000000..dfe0b92 --- /dev/null +++ b/web/public/course-assets/s10_system_prompt/system-prompt-overview.en.svg @@ -0,0 +1,107 @@ + + + + + + + + + + + + + + + + + + + System Prompt — PROMPT_SECTIONS + On-Demand Assembly + Cache + + + + s09 Preserved + + s10 New + + + + + + PROMPT_SECTIONS + ✓ identity (always) + ✓ tools (always) + ✓ workspace (always) + ○ memory + + + + + + + assemble_system_prompt + Input: context dict + Always: identity + tools + workspace + On-demand: memory + Output: "\n\n".join(selected) + + + + + + + get_system_prompt + json.dumps(context) + Hit → return cached + Miss → assemble + store + (s10 new) + + + + system=get_system_prompt(context) + + + + + + messages[] + + + + + + + Compression + Loading + snip → micro → budget → auto + → load memory (s09) + + + + + + + LLM + stop_reason=tool_use? + system assembled + + + + yes + + + + TOOL_HANDLERS + bash · read · write + (s09 preserved) + + + + Tool results → messages[] → compress → load memory → assemble prompt → LLM + + + + + s09 Preserved: loop, compression pipeline, memory loading, tool execution + + s10 New: PROMPT_SECTIONS (4 sections) + assemble_system_prompt + get_system_prompt (cache) + diff --git a/web/public/course-assets/s10_system_prompt/system-prompt-overview.ja.svg b/web/public/course-assets/s10_system_prompt/system-prompt-overview.ja.svg new file mode 100644 index 0000000..2bafa14 --- /dev/null +++ b/web/public/course-assets/s10_system_prompt/system-prompt-overview.ja.svg @@ -0,0 +1,107 @@ + + + + + + + + + + + + + + + + + + + System Prompt — PROMPT_SECTIONS + オンデマンド組み立て + キャッシュ + + + + s09 保持 + + s10 新規 + + + + + + PROMPT_SECTIONS + ✓ identity (常時) + ✓ tools (常時) + ✓ workspace (常時) + ○ memory + + + + + + + assemble_system_prompt + 入力: context dict + 常時: identity + tools + workspace + オンデマンド: memory + 出力: "\n\n".join(selected) + + + + + + + get_system_prompt + json.dumps(context) + ヒット → キャッシュ返却 + ミス → assemble + 保存 + (s10 新規) + + + + system=get_system_prompt(context) + + + + + + messages[] + + + + + + + 圧縮 + ロード + snip → micro → budget → auto + → 記憶ロード (s09) + + + + + + + LLM + stop_reason=tool_use? + system assembled + + + + あり + + + + TOOL_HANDLERS + bash · read · write + (s09 保持) + + + + ツール結果 → messages[] → 圧縮 → 記憶ロード → プロンプト組み立て → LLM + + + + + s09 保持:ループ、圧縮パイプライン、記憶ロード、ツール実行 + + s10 新規:PROMPT_SECTIONS(4 セクション)+ assemble_system_prompt + get_system_prompt(キャッシュ) + diff --git a/web/public/course-assets/s10_system_prompt/system-prompt-overview.svg b/web/public/course-assets/s10_system_prompt/system-prompt-overview.svg new file mode 100644 index 0000000..40c7df7 --- /dev/null +++ b/web/public/course-assets/s10_system_prompt/system-prompt-overview.svg @@ -0,0 +1,107 @@ + + + + + + + + + + + + + + + + + + + System Prompt — PROMPT_SECTIONS + 按需拼接 + 缓存 + + + + s09 保留 + + s10 新增 + + + + + + PROMPT_SECTIONS + ✓ identity (始终) + ✓ tools (始终) + ✓ workspace (始终) + ○ memory + + + + + + + assemble_system_prompt + 输入: context dict + 始终: identity + tools + workspace + 按需: memory + 输出: "\n\n".join(selected) + + + + + + + get_system_prompt + json.dumps(context) + 命中 → 返回缓存 + 未命中 → assemble + 存 + (s10 新增) + + + + system=get_system_prompt(context) + + + + + + messages[] + + + + + + + 压缩 + Loading + snip → micro → budget → auto + → 加载记忆 (s09) + + + + + + + LLM + stop_reason=tool_use? + system assembled + + + + + + + + TOOL_HANDLERS + bash · read · write + (s09 保留) + + + + 工具结果 → messages[] → 压缩 → 加载记忆 → 组装 prompt → LLM + + + + + s09 保留:循环、压缩管线、记忆加载、工具执行 + + s10 新增:PROMPT_SECTIONS(4 段)+ assemble_system_prompt + get_system_prompt(缓存) + diff --git a/web/public/course-assets/s11_error_recovery/error-recovery-overview.en.svg b/web/public/course-assets/s11_error_recovery/error-recovery-overview.en.svg new file mode 100644 index 0000000..22790a3 --- /dev/null +++ b/web/public/course-assets/s11_error_recovery/error-recovery-overview.en.svg @@ -0,0 +1,98 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + Error Recovery — try/except wrapping LLM calls, three recovery modes + + + + s10 retained + + s11 new + + + + messages + + + + + prompt assembly + (s10) + + + + + compress + load + (s08-s09) + + + + + + LLM + try/except + + + + + TOOL_HANDLERS + bash · read · write + + + + error + + + + Error Recovery (classify, recover, retry LLM) + + + + Path 1 + max_tokens + Output truncated → escalate 8K→64K (once) / continuation prompt (max 3) + Trigger: stop_reason == "max_tokens" · Cost: 0-1 API · Recover then continue + + + + Path 2 + prompt_too_long + Context overflow → reactive compact → retry (one chance) + Trigger: API returns 413 · Cost: 1 API · Still over after compact → exit + + + + Path 3 + 429/529 + Transient failure → exponential backoff + jitter (max 10) / 3×529 → switch model + Trigger: RateLimitError / OverloadedError · Formula: min(500×2^n, 32s) + jitter + + + + Three most common recovery modes. CC has 13+ reason codes (image_error, aborted_streaming, etc.), each with dedicated handling. + All paths after recovery → continue back to LLM · Normal flow: tool results → messages → loop + diff --git a/web/public/course-assets/s11_error_recovery/error-recovery-overview.ja.svg b/web/public/course-assets/s11_error_recovery/error-recovery-overview.ja.svg new file mode 100644 index 0000000..36c4fd6 --- /dev/null +++ b/web/public/course-assets/s11_error_recovery/error-recovery-overview.ja.svg @@ -0,0 +1,98 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + Error Recovery — try/except で LLM 呼び出しをラップ、3 つの復旧モード + + + + s10 維持 + + s11 新規 + + + + messages + + + + + prompt assembly + (s10) + + + + + compress + load + (s08-s09) + + + + + + LLM + try/except + + + + + TOOL_HANDLERS + bash · read · write + + + + エラー + + + + エラー復旧(分類処理、復旧後 LLM に戻りリトライ) + + + + パス 1 + max_tokens + 出力が途切れた → 8K→64K に拡張(1 回)/ 続行プロンプト(最大 3 回) + トリガー: stop_reason == "max_tokens" · コスト: 0-1 API · 復旧後 continue + + + + パス 2 + prompt_too_long + コンテキスト超過 → reactive compact → リトライ(1 回のみ) + トリガー: API が 413 返却 · コスト: 1 API · 圧縮後も超過 → 終了 + + + + パス 3 + 429/529 + 一時障害 → 指数バックオフ + ジッター(最大 10 回)/ 3 回 529 → モデル切替 + トリガー: RateLimitError / OverloadedError · 式: min(500×2^n, 32s) + jitter + + + + 最も一般的な 3 つの復旧モード。CC は実際に 13+ の reason code を持ち(image_error, aborted_streaming 等)、それぞれ専用の処理がある。 + 全パス復旧後 → continue で LLM に戻る · 正常フロー: ツール結果 → messages → ループ + \ No newline at end of file diff --git a/web/public/course-assets/s11_error_recovery/error-recovery-overview.svg b/web/public/course-assets/s11_error_recovery/error-recovery-overview.svg new file mode 100644 index 0000000..63f4b2f --- /dev/null +++ b/web/public/course-assets/s11_error_recovery/error-recovery-overview.svg @@ -0,0 +1,98 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + Error Recovery — try/except 包裹 LLM 调用,三种恢复模式 + + + + s10 保留 + + s11 新增 + + + + messages + + + + + prompt assembly + (s10) + + + + + compress + load + (s08-s09) + + + + + + LLM + try/except + + + + + TOOL_HANDLERS + bash · read · write + + + + 报错 + + + + 错误恢复(分类处理,恢复后回到 LLM 重试) + + + + 路径 1 + max_tokens + 输出被截断 → 升级 8K→64K(一次)/ 续写提示(最多 3 次) + 触发: stop_reason == "max_tokens" · 代价: 0-1 API · 恢复后 continue + + + + 路径 2 + prompt_too_long + 上下文超限 → reactive compact → 重试(一次机会) + 触发: API 返回 413 · 代价: 1 API · 压缩过还是超 → 退出 + + + + 路径 3 + 429/529 + 临时故障 → 指数退避 + 抖动(最多 10 次)/ 3 次 529 → 切换模型 + 触发: RateLimitError / OverloadedError · 公式: min(500×2^n, 32s) + jitter + + + + 三种最常见的恢复模式。CC 实际有 13+ reason code(image_error、aborted_streaming 等),各有专门处理。 + 所有路径恢复后 → continue 回到 LLM · 正常流程: 工具结果 → messages → 循环 + diff --git a/web/public/course-assets/s12_task_system/task-dag.en.svg b/web/public/course-assets/s12_task_system/task-dag.en.svg new file mode 100644 index 0000000..6a07511 --- /dev/null +++ b/web/public/course-assets/s12_task_system/task-dag.en.svg @@ -0,0 +1,59 @@ + + + + + + + + + + + + + Task DAG — Dependency Example: Database → API → Tests → Deploy + + + + ✓ schema + completed + + + + + + + + ● endpoints + in_progress · owner: agent-1 + + + ○ docs + pending · blockedBy: schema ✓ + + + + + + + + ○ tests + blockedBy: endpoints ● + + + + + + ○ deploy + blockedBy: tests, docs + + + + + completed + + in_progress + + pending + → blockedBy (arrows = dependency direction) + docs' blockedBy (schema) is completed → can_start returns True, can be claimed + diff --git a/web/public/course-assets/s12_task_system/task-dag.ja.svg b/web/public/course-assets/s12_task_system/task-dag.ja.svg new file mode 100644 index 0000000..37ee46c --- /dev/null +++ b/web/public/course-assets/s12_task_system/task-dag.ja.svg @@ -0,0 +1,59 @@ + + + + + + + + + + + + + Task DAG — 依存関係の例:データベース → API → テスト → デプロイ + + + + ✓ schema + completed + + + + + + + + ● endpoints + in_progress · owner: agent-1 + + + ○ docs + pending · blockedBy: schema ✓ + + + + + + + + ○ tests + blockedBy: endpoints ● + + + + + + ○ deploy + blockedBy: tests, docs + + + + + completed + + in_progress + + pending + → blockedBy(矢印 = 依存方向) + docs の blockedBy (schema) は完了済み → can_start が True を返し、claim 可能 + diff --git a/web/public/course-assets/s12_task_system/task-dag.svg b/web/public/course-assets/s12_task_system/task-dag.svg new file mode 100644 index 0000000..c044bd6 --- /dev/null +++ b/web/public/course-assets/s12_task_system/task-dag.svg @@ -0,0 +1,59 @@ + + + + + + + + + + + + + Task DAG — 依赖关系示例:搭数据库 → API → 测试 → 部署 + + + + ✓ schema + completed + + + + + + + + ● endpoints + in_progress · owner: agent-1 + + + ○ docs + pending · blockedBy: schema ✓ + + + + + + + + ○ tests + blockedBy: endpoints ● + + + + + + ○ deploy + blockedBy: tests, docs + + + + + completed + + in_progress + + pending + → blockedBy(箭头 = 依赖方向) + docs 的 blockedBy (schema) 已完成 → can_start 返回 True,可被 claim + diff --git a/web/public/course-assets/s12_task_system/task-system-overview.en.svg b/web/public/course-assets/s12_task_system/task-system-overview.en.svg new file mode 100644 index 0000000..b4a74b6 --- /dev/null +++ b/web/public/course-assets/s12_task_system/task-system-overview.en.svg @@ -0,0 +1,94 @@ + + + + + + + + + + + + + + + + + + + Task System — 5 Task Tools + .tasks/ Persistence + blockedBy Dependencies + + + + s11 Preserved + + s12 New + + + + messages + + + + + prompt + compress + (s10-s11) + + + + + LLM (try/except) + (s11) + + + + + + TOOL_HANDLERS + bash · read · write + create_task · list_tasks + get_task · claim_task · complete_task + + + + + + + .tasks/ — Cross-session Persistence + task_xxx.json · task_yyy.json · task_zzz.json + {id, subject, description, status, owner, blockedBy} + Tutorial ID: timestamp + random | CC: sequential ID + highwatermark + + + + create / save / read + + + + Dependency Check + Lifecycle + can_start: all blockedBy completed? + claim_task → owner = agent, pending → in_progress + complete_task → completed + unblock downstream + + + + State Machine: + + pending + + claim + + in_progress + + complete_task + + completed + No release rollback; crash → unassign owner + + + + + s11 Preserved: loop, prompt assembly, compression (error recovery independent from task system) + + s12 New: Task dataclass + 5 tools + .tasks/ persistence + blockedBy dependency graph + diff --git a/web/public/course-assets/s12_task_system/task-system-overview.ja.svg b/web/public/course-assets/s12_task_system/task-system-overview.ja.svg new file mode 100644 index 0000000..906a0db --- /dev/null +++ b/web/public/course-assets/s12_task_system/task-system-overview.ja.svg @@ -0,0 +1,94 @@ + + + + + + + + + + + + + + + + + + + Task System — 5 つのタスクツール + .tasks/ 永続化 + blockedBy 依存 + + + + s11 保持 + + s12 新規 + + + + messages + + + + + prompt + compress + (s10-s11) + + + + + LLM (try/except) + (s11) + + + + + + TOOL_HANDLERS + bash · read · write + create_task · list_tasks + get_task · claim_task · complete_task + + + + + + + .tasks/ — セッション横断永続化 + task_xxx.json · task_yyy.json · task_zzz.json + {id, subject, description, status, owner, blockedBy} + チュートリアル ID: timestamp + random | CC: 順次 ID + highwatermark + + + + create / save / read + + + + 依存チェック + ライフサイクル + can_start: blockedBy がすべて completed? + claim_task → owner = agent, pending → in_progress + complete_task → completed + 下流をアンロック + + + + 状態マシン: + + pending + + claim + + in_progress + + complete_task + + completed + release ロールバックなし、クラッシュ時は unassign で owner クリア + + + + + s11 保持:ループ、プロンプト組み立て、圧縮(エラーリカバリとタスクシステムは独立) + + s12 新規:Task dataclass + 5 ツール + .tasks/ 永続化 + blockedBy 依存グラフ + diff --git a/web/public/course-assets/s12_task_system/task-system-overview.svg b/web/public/course-assets/s12_task_system/task-system-overview.svg new file mode 100644 index 0000000..097b61f --- /dev/null +++ b/web/public/course-assets/s12_task_system/task-system-overview.svg @@ -0,0 +1,94 @@ + + + + + + + + + + + + + + + + + + + Task System — 5 个任务工具 + .tasks/ 持久化 + blockedBy 依赖 + + + + s11 保留 + + s12 新增 + + + + messages + + + + + prompt + compress + (s10-s11) + + + + + LLM (try/except) + (s11) + + + + + + TOOL_HANDLERS + bash · read · write + create_task · list_tasks + get_task · claim_task · complete_task + + + + + + + .tasks/ — 跨会话持久化 + task_xxx.json · task_yyy.json · task_zzz.json + {id, subject, description, status, owner, blockedBy} + 教学版 ID: timestamp + random | CC: 顺序 ID + highwatermark + + + + create / save / read + + + + 依赖检查 + 生命周期 + can_start: blockedBy 全部 completed? + claim_task → owner = agent, pending → in_progress + complete_task → completed + 解锁下游 + + + + 状态机: + + pending + + claim + + in_progress + + complete_task + + completed + CC 无 release 回退,崩溃时用 unassign 清 owner + + + + + s11 保留:循环、prompt 组装、压缩(错误恢复与任务系统独立) + + s12 新增:Task dataclass + 5 个工具 + .tasks/ 持久化 + blockedBy 依赖图 + diff --git a/web/public/course-assets/s13_background_tasks/background-tasks-overview.en.svg b/web/public/course-assets/s13_background_tasks/background-tasks-overview.en.svg new file mode 100644 index 0000000..830ffb9 --- /dev/null +++ b/web/public/course-assets/s13_background_tasks/background-tasks-overview.en.svg @@ -0,0 +1,105 @@ + + + + + + + + + + + + + + + + + + + Background Tasks — Slow ops to background, Agent keeps thinking + + + + s12 retained + + s13 new + + + + messages + + + + + prompt + cache + (s10-s12) + + + + + LLM call + (s11 retry) + + + + + + TOOL DISPATCH + fast? → sync execute (s12) + slow? → run_in_background ★ + + + + + + + Background thread execution + run_in_background(tool_use_id, fn, *args) + threading.Thread(target=worker, daemon=True) + result → background_results[id] (threading.Lock protected) + + + + slow op + + + + Notification injection + collect_background_results() check each turn + completed → tool_result inject into messages + pending → "[Running in background...]" placeholder + + + + + + + Heuristic: + + fast + read_file · git status · glob + + slow + npm install · pip install · pytest (timeout > 30s) + + + + s12 sync blocking + + think + + waiting for bash 3min... + + continue + Total ~3min, Agent idled for 3 minutes + + + s13 background execution + + think + + keep doing other work + + notification: result ready + Total ~3min, but Agent wasn't idle + \ No newline at end of file diff --git a/web/public/course-assets/s13_background_tasks/background-tasks-overview.ja.svg b/web/public/course-assets/s13_background_tasks/background-tasks-overview.ja.svg new file mode 100644 index 0000000..207eec4 --- /dev/null +++ b/web/public/course-assets/s13_background_tasks/background-tasks-overview.ja.svg @@ -0,0 +1,105 @@ + + + + + + + + + + + + + + + + + + + Background Tasks — 遅い操作はバックグラウンドへ、Agent は考え続ける + + + + s12 維持 + + s13 新規 + + + + messages + + + + + prompt + cache + (s10-s12) + + + + + LLM call + (s11 retry) + + + + + + TOOL DISPATCH + fast? → 同期実行 (s12) + slow? → run_in_background ★ + + + + + + + バックグラウンドスレッド実行 + run_in_background(tool_use_id, fn, *args) + threading.Thread(target=worker, daemon=True) + 結果 → background_results[id] (threading.Lock で保護) + + + + slow op + + + + 通知注入 + collect_background_results() 毎ターン確認 + 完了 → tool_result を messages に注入 + 未完了 → "[Running in background...]" プレースホルダー + + + + + + + ヒューリスティック判定: + + fast + read_file · git status · glob + + slow + npm install · pip install · pytest (timeout > 30s) + + + + s12 同期ブロッキング + + 思考 + + bash 待ち 3分... + + 継続 + 合計 ~3分、Agent は3分間待機 + + + s13 バックグラウンド実行 + + 思考 + + 別の作業を継続 + + 通知: 結果完了 + 合計 ~3分、Agent は遊ばず + diff --git a/web/public/course-assets/s13_background_tasks/background-tasks-overview.svg b/web/public/course-assets/s13_background_tasks/background-tasks-overview.svg new file mode 100644 index 0000000..ac6dff0 --- /dev/null +++ b/web/public/course-assets/s13_background_tasks/background-tasks-overview.svg @@ -0,0 +1,105 @@ + + + + + + + + + + + + + + + + + + + Background Tasks — 慢操作丢后台,Agent 继续思考 + + + + s12 保留 + + s13 新增 + + + + messages + + + + + prompt + cache + (s10-s12) + + + + + LLM call + (s11 retry) + + + + + + TOOL DISPATCH + fast? → 同步执行 (s12) + slow? → run_in_background ★ + + + + + + + 后台线程执行 + run_in_background(tool_use_id, fn, *args) + threading.Thread(target=worker, daemon=True) + 结果 → background_results[id] (threading.Lock 保护) + + + + slow op + + + + 通知注入 + collect_background_results() 每轮检查 + 已完成 → tool_result 注入 messages + 未完成 → "[Running in background...]" 占位 + + + + + + + 启发式判断: + + fast + read_file · git status · glob + + slow + npm install · pip install · pytest (timeout > 30s) + + + + s12 同步阻塞 + + 思考 + + 等 bash 3 分钟... + + 继续 + 总耗时 ~3min,Agent 空 etc. 等了 3 分钟 + + + s13 后台执行 + + 思考 + + 继续做别的事 + + 通知: 结果来了 + 总耗时 ~3min,但 Agent 没闲着 + diff --git a/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.en.svg b/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.en.svg new file mode 100644 index 0000000..77bfd3a --- /dev/null +++ b/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.en.svg @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + Cron Scheduler — Independent scheduler thread + cron_queue injection point + + + + s10-s13 retained + + s14 new + + + + + + consume + cron_queue + ★ s14 injection + + + + + + messages + + + + + + prompt + cache + assemble_system_prompt + (s10) + + + + + + LLM (try/except) + with_retry + (s11) + + + + + + TOOL DISPATCH + fast → sync (bash, read, write) + slow → background thread (s13) + cron → schedule_cron, list, cancel (s14) + task → create, list, claim, complete (s12) + + + + loop back: tool_results → next turn + + + + cron_scheduler_loop (daemon thread) + time.sleep(1) → cron_matches(job.cron, now) + match → cron_queue.append(job) + minute_marker prevents double-fire per minute + one-shot jobs auto-delete after firing + + + + + + + cron_queue + cron_lock · scheduler writes · loop reads + + + + next agent_loop consumes + + + + CronJob + Persistence + CronJob dataclass: + id, cron, prompt, recurring, durable + Durable → .scheduled_tasks.json + restored via load_durable_jobs after restart + Session-only → memory only + lost when process exits + ⚠ Process exit = scheduler stops (not OS-level crontab) + + + + 5-field Cron Expression + + * + + * + + * + + * + + * + min + hour + day + month + dow + + */5 * * * * → every 5 minutes + 0 9 * * 1-5 → weekdays 9:00 + 0 9 * * * → daily 9:00 + Supports: *, */N, N, N-M, N,M,... + diff --git a/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.ja.svg b/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.ja.svg new file mode 100644 index 0000000..bc63ff6 --- /dev/null +++ b/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.ja.svg @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + Cron Scheduler — 独立スケジューラスレッド + cron_queue 注入ポイント + + + + s10-s13 維持 + + s14 新規 + + + + + + consume + cron_queue + ★ s14 注入点 + + + + + + messages + + + + + + prompt + cache + assemble_system_prompt + (s10) + + + + + + LLM (try/except) + with_retry + (s11) + + + + + + TOOL DISPATCH + fast → sync (bash, read, write) + slow → background thread (s13) + cron → schedule_cron, list, cancel (s14) + task → create, list, claim, complete (s12) + + + + loop back: tool_results → next turn + + + + cron_scheduler_loop (daemon スレッド) + time.sleep(1) → cron_matches(job.cron, now) + マッチ → cron_queue.append(job) + minute_marker で同一分の重複発火を防止 + 一度きりのタスクは発火後自動削除 + + + + + + + cron_queue + cron_lock · スケジューラ書込 · loop 読込 + + + + 次の agent_loop が消費 + + + + CronJob + 永続化 + CronJob dataclass: + id, cron, prompt, recurring, durable + Durable → .scheduled_tasks.json + 再起動後 load_durable_jobs で復元 + Session-only → メモリのみ + プロセス終了で消失 + ⚠ プロセス終了 = スケジューラ停止(OS レベルの crontab ではない) + + + + 5 フィールド Cron 式 + + * + + * + + * + + * + + * + + + + + 曜日 + + */5 * * * * → 5 分ごと + 0 9 * * 1-5 → 平日 9:00 + 0 9 * * * → 毎日 9:00 + 対応: *, */N, N, N-M, N,M,... + diff --git a/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.svg b/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.svg new file mode 100644 index 0000000..3a8c4db --- /dev/null +++ b/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.svg @@ -0,0 +1,125 @@ + + + + + + + + + + + + + + + + + + + Cron Scheduler — 独立调度线程 + cron_queue 注入点 + + + + s10-s13 保留 + + s14 新增 + + + + + + consume + cron_queue + ★ s14 注入点 + + + + + + messages + + + + + + prompt + cache + assemble_system_prompt + (s10) + + + + + + LLM (try/except) + with_retry + (s11) + + + + + + TOOL DISPATCH + fast → sync (bash, read, write) + slow → background thread (s13) + cron → schedule_cron, list, cancel (s14) + task → create, list, claim, complete (s12) + + + + loop back: tool_results → next turn + + + + cron_scheduler_loop(独立 daemon 线程) + time.sleep(1) → cron_matches(job.cron, now) + 匹配 → cron_queue.append(job) + minute_marker 防同分钟重复触发 + 一次性任务触发后自动删除 + + + + + + + cron_queue + cron_lock 保护 · 调度线程写 · agent_loop 读 + + + + 下次 agent_loop 消费 + + + + CronJob + 持久化 + CronJob dataclass: + id, cron, prompt, recurring, durable + Durable → .scheduled_tasks.json + 重启后 load_durable_jobs 恢复 + Session-only → 内存 only + 进程关闭即丢 + ⚠ 进程关闭 = 调度停止(不是 OS 级 crontab) + + + + 五段式 Cron 表达式 + + * + + * + + * + + * + + * + 分钟 + 小时 + + + 星期 + + */5 * * * * → 每 5 分钟 + 0 9 * * 1-5 → 工作日 9:00 + 0 9 * * * → 每天 9:00 + 支持: *, */N, N, N-M, N,M,... + diff --git a/web/public/course-assets/s15_agent_teams/agent-teams-overview.en.svg b/web/public/course-assets/s15_agent_teams/agent-teams-overview.en.svg new file mode 100644 index 0000000..f87995a --- /dev/null +++ b/web/public/course-assets/s15_agent_teams/agent-teams-overview.en.svg @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Agent Teams — Lead Loop + Teammate Threads + MessageBus + + + + s10-s14 Preserved + + s15 New + + Teammate + + Real CC detail + + + + cron_queue + + + + + messages + + + + + prompt + cache + + + + + LLM call + + + + + TOOL DISPATCH + bash · read · write · task(4) · cron(3) + ★ spawn_teammate · send_message · check_inbox + + + + + + + + spawn + + + + MessageBus (.mailboxes/*.jsonl) + + + + + + receive + receive + receive + + + + + send + send + send + + + Teammate: alice (Backend) + inbox → LLM → bash/read/write/send + Max 10 rounds → summary → BUS.send + + + Teammate: bob (Frontend) + Independent agent_loop, shared client + Thread(daemon=True) + + + Teammate: charlie (QA) + Cannot spawn other teammates + spawn → work → summary + + + + + permission_request + + + Permission Bubbling (real CC; omitted in teaching code) + ① Teammate needs approval → MessageBus sends permission_request ② Lead receives → user approval → approve/deny + + + + + s10-s14: prompt assembly, error recovery, task graph, background threads, cron scheduling + + s15: MessageBus + spawn_teammate_thread + send_message + check_inbox (permission bubbling is a real CC detail) + diff --git a/web/public/course-assets/s15_agent_teams/agent-teams-overview.ja.svg b/web/public/course-assets/s15_agent_teams/agent-teams-overview.ja.svg new file mode 100644 index 0000000..47c9665 --- /dev/null +++ b/web/public/course-assets/s15_agent_teams/agent-teams-overview.ja.svg @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Agent Teams — Lead ループ + チームメイトスレッド + MessageBus + + + + s10-s14 保持 + + s15 新規 + + チームメイト + + 真实 CC 補足 + + + + cron_queue + + + + + messages + + + + + prompt + cache + + + + + LLM call + + + + + TOOL DISPATCH + bash · read · write · task(4) · cron(3) + ★ spawn_teammate · send_message · check_inbox + + + + + + + + spawn + + + + MessageBus (.mailboxes/*.jsonl) + + + + + + receive + receive + receive + + + + + send + send + send + + + チームメイト: alice (Backend) + inbox → LLM → bash/read/write/send + 最大 10 ラウンド → summary → BUS.send + + + チームメイト: bob (Frontend) + 独立 agent_loop、共有 client + Thread(daemon=True) + + + チームメイト: charlie (QA) + 他のチームメイトを spawn 不可 + spawn → work → summary + + + + + permission_request + + + 権限バブリング(真实 CC、教学版は省略) + ① 承認が必要 → MessageBus が permission_request 送信 ② Lead が受信 → ユーザー承認 → approve/deny + + + + + s10-s14:プロンプト組み立て、エラーリカバリ、タスクグラフ、バックグラウンドスレッド、cron + + s15:MessageBus + spawn_teammate_thread + send_message + check_inbox(権限バブリングは真实 CC 補足) + diff --git a/web/public/course-assets/s15_agent_teams/agent-teams-overview.svg b/web/public/course-assets/s15_agent_teams/agent-teams-overview.svg new file mode 100644 index 0000000..e708a33 --- /dev/null +++ b/web/public/course-assets/s15_agent_teams/agent-teams-overview.svg @@ -0,0 +1,132 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Agent Teams — Lead Loop + Teammate Threads + MessageBus + + + + s10-s14 保留 + + s15 新增 + + Teammate + + 真实 CC 补充 + + + + + + cron_queue + + + + + messages + + + + + prompt + cache + + + + + LLM call + + + + + TOOL DISPATCH + bash · read · write · task(4) · cron(3) + ★ spawn_teammate · send_message · check_inbox + + + + + + + + + spawn + + + + + MessageBus (.mailboxes/*.jsonl) + + + + + + + + + receive + receive + receive + + + + + + send + send + send + + + + Teammate: alice (Backend) + inbox → LLM → bash/read/write/send + 最多 10 轮 → summary → BUS.send + + + + Teammate: bob (Frontend) + 独立 agent_loop,共享 client + Thread(daemon=True) + + + + Teammate: charlie (QA) + 不能 spawn 其他 teammate + spawn → work → summary + + + + + + permission_request + + + 权限冒泡(真实 CC,教学版省略) + ① 队友需审批 → MessageBus 发送 permission_request ② Lead 收到 → 用户审批 → 回复 approve/deny + + + + + s10-s14: prompt 组装、错误恢复、任务图、后台线程、cron 调度 + + s15: MessageBus + spawn_teammate_thread + send_message + check_inbox(权限冒泡见真实 CC 补充) + diff --git a/web/public/course-assets/s15_agent_teams/team-topology.en.svg b/web/public/course-assets/s15_agent_teams/team-topology.en.svg new file mode 100644 index 0000000..7540db7 --- /dev/null +++ b/web/public/course-assets/s15_agent_teams/team-topology.en.svg @@ -0,0 +1,65 @@ + + + + + + + + + + + + + + + + + + + + Team Topology — Lead ↔ MessageBus ↔ Teammates + + + + Lead Agent + Main loop + spawn + inbox handling + check_inbox receives teammate messages + + + + Message Bus (.mailboxes/*.jsonl) + + + + Alice (Backend) + own loop → inbox → work → reply + + + Bob (Frontend) + own loop → inbox → work → reply + + + Charlie (QA) + own loop → inbox → work → reply + + + + send + + inbox + + + + + + receive + receive + receive + + + + send + send + send + + diff --git a/web/public/course-assets/s15_agent_teams/team-topology.ja.svg b/web/public/course-assets/s15_agent_teams/team-topology.ja.svg new file mode 100644 index 0000000..77c8709 --- /dev/null +++ b/web/public/course-assets/s15_agent_teams/team-topology.ja.svg @@ -0,0 +1,65 @@ + + + + + + + + + + + + + + + + + + + + Team Topology — Lead ↔ MessageBus ↔ チームメイト + + + + Lead Agent + メインループ + spawn + inbox 処理 + check_inbox でチームメイトのメッセージ受信 + + + + Message Bus (.mailboxes/*.jsonl) + + + + Alice (Backend) + 独立 loop → inbox → 作業 → 返信 + + + Bob (Frontend) + 独立 loop → inbox → 作業 → 返信 + + + Charlie (QA) + 独立 loop → inbox → 作業 → 返信 + + + + send + + inbox + + + + + + receive + receive + receive + + + + send + send + send + + diff --git a/web/public/course-assets/s15_agent_teams/team-topology.svg b/web/public/course-assets/s15_agent_teams/team-topology.svg new file mode 100644 index 0000000..9272e1b --- /dev/null +++ b/web/public/course-assets/s15_agent_teams/team-topology.svg @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + Team Topology — Lead ↔ MessageBus ↔ Teammates + + + + Lead Agent + 主循环 + spawn + inbox 处理 + check_inbox 接收队友消息 + + + + Message Bus (.mailboxes/*.jsonl) + + + + Alice (Backend) + 独立 loop → inbox → 干活 → 回复 + + + Bob (Frontend) + 独立 loop → inbox → 干活 → 回复 + + + Charlie (QA) + 独立 loop → inbox → 干活 → 回复 + + + + + send + + + inbox + + + + + + + receive + receive + receive + + + + + send + send + send + + diff --git a/web/public/course-assets/s16_team_protocols/team-protocols-overview.en.svg b/web/public/course-assets/s16_team_protocols/team-protocols-overview.en.svg new file mode 100644 index 0000000..7dd6b28 --- /dev/null +++ b/web/public/course-assets/s16_team_protocols/team-protocols-overview.en.svg @@ -0,0 +1,143 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + Team Protocols — Request-Response + request_id Correlation + State Machine + + + + s15 Preserved + + s16 New + + + + turn + + + + + messages + + + + + prompt + + + + + LLM + + + + + TOOL DISPATCH (core tool set) + bash · read · write · task(4) · spawn · send · inbox + ★ request_shutdown · request_plan · review_plan + + + + + + + Request-Response Protocol Flow (request_id throughout) + + + + ① Lead sends request + BUS.send("shutdown_request" + metadata={request_id}) + + + + + + ② Teammate receives + dispatch_by_type(inbox) + → handler(type, metadata) + + + + + + ③ Teammate responds + BUS.send("shutdown_response" + same request_id + approve) + + + + + + ④ Lead receives + match_response(request_id) + → resolve/reject callback + + + + + State Machine (same for both protocols) + + + pending + + + approve + + + approved + + + reject + + + rejected + + + + pending_requests Storage + pending_requests: dict[str, ProtocolState] + request_id → {type, sender, status, created_at} + match_response: find request by request_id + + + + Two protocols, one mechanism: + + shutdown_request + and + + plan_approval_request + share the same pending→approved/rejected FSM + New protocol type = new msg_type, no new state machine. request_id links request and response. + + + + + s15: MessageBus + spawn_teammate + inbox + + s16: request_id protocol + dispatch + pending_requests + state machine + diff --git a/web/public/course-assets/s16_team_protocols/team-protocols-overview.ja.svg b/web/public/course-assets/s16_team_protocols/team-protocols-overview.ja.svg new file mode 100644 index 0000000..28b368b --- /dev/null +++ b/web/public/course-assets/s16_team_protocols/team-protocols-overview.ja.svg @@ -0,0 +1,141 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + Team Protocols — リクエスト・レスポンス + request_id 紐付け + 状態機械 + + + + s15 保持 + + s16 新規 + + + + turn + + + + + messages + + + + + prompt + + + + + LLM + + + + + TOOL DISPATCH(コアツールセット) + bash · read · write · task(4) · spawn · send · inbox + ★ request_shutdown · request_plan · review_plan + + + + + + + リクエスト・レスポンスプロトコルフロー(request_id が全チェーンを貫通) + + + + ① Lead が要求送信 + BUS.send("shutdown_request" + metadata={request_id}) + + + + + + ② チームメイト受信 + dispatch_by_type(inbox) + → handler(type, metadata) + + + + + + ③ チームメイト応答 + BUS.send("shutdown_response" + 同じ request_id + approve) + + + + + + ④ Lead 応答受信 + match_response(request_id) + → resolve/reject callback + + + + 状態機械(2 つのプロトコルで共通) + + + pending + + + approve + + + approved + + + reject + + + rejected + + + pending_requests ストレージ + pending_requests: dict[str, ProtocolState] + request_id → {type, sender, status, created_at} + match_response: request_id で要求を検索 + + + + 2 つのプロトコル、1 つの仕組み: + + shutdown_request + + + plan_approval_request + が pending→approved/rejected 状態機械を共有 + 新しいプロトコルタイプ = 新しい msg_type、新しい状態機械は不要。request_id が要求と応答を紐付け。 + + + + + s15: MessageBus + spawn_teammate + inbox + + s16: request_id プロトコル + dispatch + pending_requests + 状態機械 + diff --git a/web/public/course-assets/s16_team_protocols/team-protocols-overview.svg b/web/public/course-assets/s16_team_protocols/team-protocols-overview.svg new file mode 100644 index 0000000..04a9a80 --- /dev/null +++ b/web/public/course-assets/s16_team_protocols/team-protocols-overview.svg @@ -0,0 +1,148 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + Team Protocols — 请求-响应协议 + request_id 关联 + 状态机 + + + + s15 保留 + + s16 新增 + + + + turn + + + + + messages + + + + + prompt + + + + + LLM + + + + + TOOL DISPATCH(核心工具集) + bash · read · write · task(4) · spawn · send · inbox + ★ request_shutdown · request_plan · review_plan + + + + + + + 请求-响应协议流程(request_id 贯穿) + + + + ① Lead 发请求 + BUS.send("shutdown_request" + metadata={request_id}) + + + + + + ② 队友收到 + dispatch_by_type(inbox) + → handler(type, metadata) + + + + + + ③ 队友回复 + BUS.send("shutdown_response" + 同 request_id + approve) + + + + + + ④ Lead 收响应 + match_response(request_id) + → resolve/reject callback + + + + + 状态机(同一套,两种协议) + + + + pending + + + + approve + + + + approved + + + + reject + + + + rejected + + + + pending_requests 存储 + pending_requests: dict[str, ProtocolState] + request_id → {type, sender, status, created_at} + match_response: 按 request_id 找回对应请求 + + + + 两种协议,同一套机制: + + shutdown_request + + + plan_approval_request + 共用 pending→approved/rejected 状态机 + 新增协议类型 = 新的 msg_type,不需要新状态机。request_id 关联请求和响应。 + + + + + s15: MessageBus + spawn_teammate + inbox + + s16: request_id 协议 + dispatch + pending_requests + 状态机 + diff --git a/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.en.svg b/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.en.svg new file mode 100644 index 0000000..709676b --- /dev/null +++ b/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.en.svg @@ -0,0 +1,109 @@ + + + + + + + + + + + + + + + + + + + + + + Autonomous Agents — Idle Loop + Auto-Claim + WORK/IDLE Lifecycle + + + + s16 Preserved + + s17 New + + + + turn + + + + + messages + + + + + prompt + + + + + LLM + + + + + TOOL DISPATCH (all s16 preserved) + bash · read · write · task(4) · send · inbox + ★ request_shutdown · request_plan · review_plan + + + + + + + same inner LLM/tool loop inside WORK + + + + Teammate Lifecycle (s17 new: WORK → IDLE → SHUTDOWN) + + + + WORK Phase + inner loop: inbox → LLM → bash / read / write + stop_reason == tool_use → loop + stop_reason != tool_use → IDLE + Max 10 rounds / interruptible by shutdown_request + + + + task done + + + + work found + + + + IDLE Phase (poll every 5s) + ├ Check inbox → has message → back to WORK + ├ scan_unclaimed_tasks → claim → back to WORK + └ 60s timeout → SHUTDOWN ↓ + idle_poll() + claim_task() + + + + SHUTDOWN + + + + 60s timeout + + + + + s16: MessageBus + protocols + request_shutdown + plan approval + + s17: idle_poll + scan_unclaimed_tasks + auto_claim + identity re-injection + + + + Lead tools unchanged (14) · Teammate tools 5 → 8 (+3 task tools) · Teammates self-claim, Lead only creates tasks + diff --git a/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.ja.svg b/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.ja.svg new file mode 100644 index 0000000..65d9a76 --- /dev/null +++ b/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.ja.svg @@ -0,0 +1,109 @@ + + + + + + + + + + + + + + + + + + + + + + Autonomous Agents — アイドルポーリング + 自動認領 + WORK/IDLE ライフサイクル + + + + s16 保持 + + s17 新規 + + + + turn + + + + + messages + + + + + prompt + + + + + LLM + + + + + TOOL DISPATCH(s16 全保持) + bash · read · write · task(4) · send · inbox + ★ request_shutdown · request_plan · review_plan + + + + + + + 同じ内側 LLM/tool loop を WORK に入れる + + + + チームメイトライフサイクル(s17 新規:WORK → IDLE → SHUTDOWN) + + + + WORK フェーズ + 内側ループ:inbox → LLM → bash / read / write + stop_reason == tool_use → ループ + stop_reason != tool_use → IDLE + 最大 10 ラウンド / shutdown_request で中断可能 + + + + タスク完了 + + + + 仕事を発見 + + + + IDLE フェーズ(5 秒ごとにポーリング) + ├ inbox チェック → メッセージあり → WORK に戻る + ├ scan_unclaimed_tasks → 認領 → WORK に戻る + └ 60 秒タイムアウト → SHUTDOWN ↓ + idle_poll() + claim_task() + + + + SHUTDOWN + + + + 60 秒タイムアウト + + + + + s16: MessageBus + protocols + request_shutdown + plan approval + + s17: idle_poll + scan_unclaimed_tasks + auto_claim + identity re-injection + + + + Lead ツール不変(14) · チームメイトツール 5 → 8(+3 task tools) · チームメイトが自己認領、Lead はタスク作成のみ + diff --git a/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.svg b/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.svg new file mode 100644 index 0000000..df99675 --- /dev/null +++ b/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.svg @@ -0,0 +1,109 @@ + + + + + + + + + + + + + + + + + + + + + + Autonomous Agents — 空闲循环 + 自动认领 + WORK/IDLE 生命周期 + + + + s16 保留 + + s17 新增 + + + + turn + + + + + messages + + + + + prompt + + + + + LLM + + + + + TOOL DISPATCH (s16 全保留) + bash · read · write · task(4) · send · inbox + ★ request_shutdown · request_plan · review_plan + + + + + + + 同一个内层 LLM/tool loop 放进 WORK + + + + 队友生命周期(s17 新增:WORK → IDLE → SHUTDOWN) + + + + WORK 阶段 + 内层循环:inbox → LLM → bash / read / write + stop_reason == tool_use → loop + stop_reason != tool_use → IDLE + 最多 10 轮 / 可被 shutdown_request 中断 + + + + 任务完成 + + + + 发现新任务 + + + + IDLE 阶段(每 5s 轮询) + ├ 检查 inbox → 有消息 → 回 WORK + ├ scan_unclaimed_tasks → 认领 → 回 WORK + └ 60s 超时 → SHUTDOWN ↓ + idle_poll() + claim_task() + + + + SHUTDOWN + + + + 60s 超时 + + + + + s16: MessageBus + protocols + request_shutdown + plan approval + + s17: idle_poll + scan_unclaimed_tasks + auto_claim + identity re-injection + + + + Lead 工具不变(14) · 队友工具 5 → 8(+3 task tools) · 队友自主认领,Lead 只创建任务 + diff --git a/web/public/course-assets/s18_worktree_isolation/worktree-overview.en.svg b/web/public/course-assets/s18_worktree_isolation/worktree-overview.en.svg new file mode 100644 index 0000000..57c915f --- /dev/null +++ b/web/public/course-assets/s18_worktree_isolation/worktree-overview.en.svg @@ -0,0 +1,103 @@ + + + + + + + + + + + + + + + + + + + + + + Worktree Isolation — Git Worktree + Task-Directory Binding + Event Log + + + + s17 Preserved + + s18 New + + + + turn + + + + + messages + + + + + prompt + + + + + LLM + + + + + TOOL DISPATCH (s17 + s18) + bash · read · write · task(4) · send · inbox + request_shutdown · request_plan · review_plan + ★ create_worktree · remove_worktree · keep_worktree + + + + + + + Worktree Isolation (s18 new: each task gets its own directory + branch) + + + + Main repo (.tasks/ + .worktrees/ + .mailboxes/) + + + + create + bind + + + + create + bind + + + + Alice: .worktrees/auth/ + branch: wt/auth-refactor + Task: Refactor auth module + ✓ Isolated, no impact on Bob or main repo + + + + Bob: .worktrees/ui/ + branch: wt/ui-login + Task: Refactor UI login page + ✓ Isolated, no impact on Alice or main repo + + + + Event log: .worktrees/events.jsonl → create / remove / keep + + + Cleanup: keep (preserve branch for review) / remove (delete + mark done) + + + + + s17: idle_poll + auto_claim + protocols + WORK/IDLE lifecycle + + s18: create_worktree + bind_task + remove/keep + events.jsonl (Lead 14→17) + diff --git a/web/public/course-assets/s18_worktree_isolation/worktree-overview.ja.svg b/web/public/course-assets/s18_worktree_isolation/worktree-overview.ja.svg new file mode 100644 index 0000000..2a26071 --- /dev/null +++ b/web/public/course-assets/s18_worktree_isolation/worktree-overview.ja.svg @@ -0,0 +1,103 @@ + + + + + + + + + + + + + + + + + + + + + + Worktree Isolation — Git Worktree + タスク・ディレクトリ紐付け + イベントログ + + + + s17 保持 + + s18 新規 + + + + turn + + + + + messages + + + + + prompt + + + + + LLM + + + + + TOOL DISPATCH(s17 + s18) + bash · read · write · task(4) · send · inbox + request_shutdown · request_plan · review_plan + ★ create_worktree · remove_worktree · keep_worktree + + + + + + + Worktree 隔離(s18 新規:各タスクに独立ディレクトリ + 独立ブランチ) + + + + メインリポジトリ(.tasks/ + .worktrees/ + .mailboxes/) + + + + create + bind + + + + create + bind + + + + Alice: .worktrees/auth/ + branch: wt/auth-refactor + Task: 認証モジュールのリファクタリング + ✓ 隔離、Bob とメインリポジトリに影響なし + + + + Bob: .worktrees/ui/ + branch: wt/ui-login + Task: UI ログインページのリファクタリング + ✓ 隔離、Alice とメインリポジトリに影響なし + + + + イベントログ: .worktrees/events.jsonl → create / remove / keep + + + 片付け: keep(ブランチ保持 review)/ remove(削除+完了マーク) + + + + + s17: idle_poll + auto_claim + protocols + WORK/IDLE ライフサイクル + + s18: create_worktree + bind_task + remove/keep + events.jsonl(Lead 14→17) + diff --git a/web/public/course-assets/s18_worktree_isolation/worktree-overview.svg b/web/public/course-assets/s18_worktree_isolation/worktree-overview.svg new file mode 100644 index 0000000..2b88a75 --- /dev/null +++ b/web/public/course-assets/s18_worktree_isolation/worktree-overview.svg @@ -0,0 +1,103 @@ + + + + + + + + + + + + + + + + + + + + + + Worktree Isolation — Git Worktree + 任务-目录绑定 + 事件日志 + + + + s17 保留 + + s18 新增 + + + + turn + + + + + messages + + + + + prompt + + + + + LLM + + + + + TOOL DISPATCH (s17 + s18) + bash · read · write · task(4) · send · inbox + request_shutdown · request_plan · review_plan + ★ create_worktree · remove_worktree · keep_worktree + + + + + + + Worktree 隔离(s18 新增:每个任务独立目录 + 独立分支) + + + + 主仓库 (.tasks/ + .worktrees/ + .mailboxes/) + + + + create + bind + + + + create + bind + + + + Alice: .worktrees/auth/ + branch: wt/auth-refactor + Task: 重构认证模块 + ✓ 隔离,不影响 Bob 和主仓库 + + + + Bob: .worktrees/ui/ + branch: wt/ui-login + Task: 重构 UI 登录页 + ✓ 隔离,不影响 Alice 和主仓库 + + + + 事件日志: .worktrees/events.jsonl → create / remove / keep + + + 收尾: keep (保留分支 review) / remove (删除+标记完成) + + + + + s17: idle_poll + auto_claim + protocols + WORK/IDLE lifecycle + + s18: create_worktree + bind_task + remove/keep + events.jsonl (Lead 14→17) + diff --git a/web/public/course-assets/s19_mcp_plugin/mcp-architecture.en.svg b/web/public/course-assets/s19_mcp_plugin/mcp-architecture.en.svg new file mode 100644 index 0000000..01d0c06 --- /dev/null +++ b/web/public/course-assets/s19_mcp_plugin/mcp-architecture.en.svg @@ -0,0 +1,112 @@ + + + + + + + + + + + + + + + + + + + + + + MCP Plugin — Standard Protocol + External Tool Integration + Tool Pool Assembly + + + + s18 Preserved + + s19 New + + + + turn + + + + + messages + + + + + prompt + + + + + LLM + + + + + TOOL DISPATCH (Lead 18 tools) + bash · read · write · task(4) · send · inbox + request_shutdown · request_plan · review_plan + create_worktree · remove_worktree · keep_worktree + ★ connect_mcp + dynamic mcp__server__tool tools + + + + + + + MCP Architecture (s19 new: standard protocol + external tools dynamic integration) + + + + Agent Side (MCPClient) + + + connect_mcp → discover → register tools + + + assemble_tool_pool assembles builtin + mcp + + + call_tool("mcp__docs__search", ...) + + + + tools/list + + + tools/call + response + + + + MCP Servers (External Services) + + + docs server: search · get_version + + + deploy server: trigger · status + + + Any language, just needs stdio JSON-RPC + + + + Tool naming: mcp__{server}__{tool} → e.g. mcp__docs__search · mcp__deploy__trigger · prevents name collisions across servers + + + + + s18: worktree + events + protocols (Lead 17) + + s19: MCP + dynamic tools (Lead 18) + + + + Next: s20 combines tools, permissions, teams, worktrees, MCP, and more into one while True loop. + diff --git a/web/public/course-assets/s19_mcp_plugin/mcp-architecture.ja.svg b/web/public/course-assets/s19_mcp_plugin/mcp-architecture.ja.svg new file mode 100644 index 0000000..d2b5255 --- /dev/null +++ b/web/public/course-assets/s19_mcp_plugin/mcp-architecture.ja.svg @@ -0,0 +1,112 @@ + + + + + + + + + + + + + + + + + + + + + + MCP Plugin — 標準プロトコル + 外部ツール接続 + ツールプール組み立て + + + + s18 保持 + + s19 新規 + + + + turn + + + + + messages + + + + + prompt + + + + + LLM + + + + + TOOL DISPATCH(Lead 18 tools) + bash · read · write · task(4) · send · inbox + request_shutdown · request_plan · review_plan + create_worktree · remove_worktree · keep_worktree + ★ connect_mcp + 動的 mcp__server__tool ツール + + + + + + + MCP アーキテクチャ(s19 新規:標準プロトコル + 外部ツール動的統合) + + + + Agent 側(MCPClient) + + + connect_mcp → discover → ツール登録 + + + assemble_tool_pool builtin + mcp 組み立て + + + call_tool("mcp__docs__search", ...) + + + + tools/list + + + tools/call + response + + + + MCP Servers(外部サービス) + + + docs server: search · get_version + + + deploy server: trigger · status + + + 任意言語実装、stdio JSON-RPC のみ必要 + + + + ツール命名: mcp__{server}__{tool} → 例: mcp__docs__search · mcp__deploy__trigger · サーバー間の名前衝突を防止 + + + + + s18: worktree + events + protocols(Lead 17) + + s19: MCP + dynamic tools(Lead 18) + + + + 次の s20:tools、permissions、teams、worktree、MCP などを 1 つの while True ループに統合。 + diff --git a/web/public/course-assets/s19_mcp_plugin/mcp-architecture.svg b/web/public/course-assets/s19_mcp_plugin/mcp-architecture.svg new file mode 100644 index 0000000..6b365d6 --- /dev/null +++ b/web/public/course-assets/s19_mcp_plugin/mcp-architecture.svg @@ -0,0 +1,112 @@ + + + + + + + + + + + + + + + + + + + + + + MCP Plugin — 标准协议 + 外部工具接入 + 工具池组装 + + + + s18 保留 + + s19 新增 + + + + turn + + + + + messages + + + + + prompt + + + + + LLM + + + + + TOOL DISPATCH (Lead 18 tools) + bash · read · write · task(4) · send · inbox + request_shutdown · request_plan · review_plan + create_worktree · remove_worktree · keep_worktree + ★ connect_mcp + 动态 mcp__server__tool 工具 + + + + + + + MCP 架构(s19 新增:标准协议 + 外部工具动态接入) + + + + Agent Side (MCPClient) + + + connect_mcp → discover → 注册工具 + + + assemble_tool_pool 组装 builtin + mcp + + + call_tool("mcp__docs__search", ...) + + + + tools/list + + + tools/call + response + + + + MCP Servers (外部服务) + + + docs server: search · get_version + + + deploy server: trigger · status + + + 任意语言实现,只需 stdio JSON-RPC + + + + 工具命名: mcp__{server}__{tool} → 例: mcp__docs__search · mcp__deploy__trigger · 避免不同 server 的工具名冲突 + + + + + s18: worktree + events + protocols (Lead 17) + + s19: MCP + dynamic tools (Lead 18) + + + + 下一章 s20:把工具、权限、团队、worktree、MCP 等机制合回同一个 while True 循环。 + diff --git a/web/public/course-assets/s20_comprehensive/system-architecture.en.svg b/web/public/course-assets/s20_comprehensive/system-architecture.en.svg new file mode 100644 index 0000000..01ac3df --- /dev/null +++ b/web/public/course-assets/s20_comprehensive/system-architecture.en.svg @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + s20 Comprehensive Agent — Every Mechanism Around One Loop + + Core Agent Loop + + messages[] + + + Before LLM + cron/background injection + compact + memory + prompt + + + LLM + stop_reason=tool_use? + + + Before Tools + PreToolUse hooks + permission pipeline + + + handlers + builtin + MCP + + tool_result / task_notification → messages[] → next turn + + Context & Knowledge + s07 skills + load_skill + s09 memory selection + s10 prompt sections + s08 compact pipeline + + + Governance + s03 permission + s04 hooks + s11 retry / fallback + Stop hooks + + + Durable Work + s05 todo_write + s12 task graph + s13 background + s14 cron scheduler + + + Teams & Plugins + s06 subagent + s15-s17 team protocols + s18 worktree isolation + s19 MCP tools + + + TOOL POOL: 27 builtins + dynamic mcp__server__tool + file/shell: bash · read · write · edit · glob + single-agent: todo_write · task · load_skill · compact + durable work: task tools · cron tools + team: spawn_teammate · send_message · check_inbox + protocol: request_shutdown · request_plan · review_plan + isolation/plugin: worktree tools · connect_mcp + + diff --git a/web/public/course-assets/s20_comprehensive/system-architecture.ja.svg b/web/public/course-assets/s20_comprehensive/system-architecture.ja.svg new file mode 100644 index 0000000..0461be0 --- /dev/null +++ b/web/public/course-assets/s20_comprehensive/system-architecture.ja.svg @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + s20 Comprehensive Agent — すべての仕組みを 1 つのループへ + + Core Agent Loop + + messages[] + + + LLM 前 + cron/background 注入 + compact + memory + prompt + + + LLM + stop_reason=tool_use? + + + Tool 前 + PreToolUse hooks + permission pipeline + + + handlers + builtin + MCP + + tool_result / task_notification → messages[] → 次のターン + + Context / Knowledge + s07 skills + load_skill + s09 memory selection + s10 prompt sections + s08 compact pipeline + + + Governance + s03 permission + s04 hooks + s11 retry / fallback + Stop hooks + + + Durable Work + s05 todo_write + s12 task graph + s13 background + s14 cron scheduler + + + Teams / Plugins + s06 subagent + s15-s17 team protocols + s18 worktree isolation + s19 MCP tools + + + TOOL POOL: 27 builtins + dynamic mcp__server__tool + file/shell: bash · read · write · edit · glob + single-agent: todo_write · task · load_skill · compact + durable work: task tools · cron tools + team: spawn_teammate · send_message · check_inbox + protocol: request_shutdown · request_plan · review_plan + isolation/plugin: worktree tools · connect_mcp + + diff --git a/web/public/course-assets/s20_comprehensive/system-architecture.svg b/web/public/course-assets/s20_comprehensive/system-architecture.svg new file mode 100644 index 0000000..72e52f8 --- /dev/null +++ b/web/public/course-assets/s20_comprehensive/system-architecture.svg @@ -0,0 +1,105 @@ + + + + + + + + + + + + + + + + + + + + + + + + s20 Comprehensive Agent — 全部机制挂在同一个循环上 + + + + 核心 Agent Loop + + + messages[] + + + + + LLM 前处理 + cron / background 注入 + compact + memory + prompt + + + + + LLM + stop_reason=tool_use? + + + + + 工具前闸门 + PreToolUse hooks + permission pipeline + + + + + handlers + builtin + MCP + + + tool_result / task_notification → messages[] → 下一轮 + + + + 上下文与知识 + s07 skills catalog + load_skill + s09 memory selection + s10 prompt sections + s08 compact pipeline + + + + 治理与扩展点 + s03 permission + s04 hooks + s11 retry / fallback + Stop hooks + + + + 持久工作 + s05 todo_write + s12 task graph + s13 background + s14 cron scheduler + + + + 团队与插件 + s06 subagent + s15-s17 team protocols + s18 worktree isolation + s19 MCP tools + + + + + TOOL POOL: 27 builtins + dynamic mcp__server__tool + file/shell: bash · read · write · edit · glob + single-agent: todo_write · task · load_skill · compact + durable work: create/list/get/claim/complete_task · schedule/list/cancel_cron + team: spawn_teammate · send_message · check_inbox + protocol: request_shutdown · request_plan · review_plan + isolation/plugin: create/remove/keep_worktree · connect_mcp + + diff --git a/web/scripts/extract-content.ts b/web/scripts/extract-content.ts index 6e35bad..7750b04 100644 --- a/web/scripts/extract-content.ts +++ b/web/scripts/extract-content.ts @@ -5,31 +5,58 @@ import type { VersionDiff, DocContent, VersionIndex, + ChapterImage, } from "../src/types/agent-data"; import { VERSION_META, VERSION_ORDER, LEARNING_PATH } from "../src/lib/constants"; -// Resolve paths relative to this script's location (web/scripts/) const WEB_DIR = path.resolve(__dirname, ".."); const REPO_ROOT = path.resolve(WEB_DIR, ".."); -const AGENTS_DIR = path.join(REPO_ROOT, "agents"); -const DOCS_DIR = path.join(REPO_ROOT, "docs"); +const LEGACY_AGENTS_DIR = path.join(REPO_ROOT, "agents"); +const LEGACY_DOCS_DIR = path.join(REPO_ROOT, "docs"); const OUT_DIR = path.join(WEB_DIR, "src", "data", "generated"); +const PUBLIC_DIR = path.join(WEB_DIR, "public"); +const COURSE_ASSETS_DIR = path.join(PUBLIC_DIR, "course-assets"); -// Map python filenames to version IDs -// s01_agent_loop.py -> s01 -// s02_tools.py -> s02 -// s_full.py -> s_full (reference agent, typically skipped) -function filenameToVersionId(filename: string): string | null { - const base = path.basename(filename, ".py"); - if (base === "s_full") return null; - if (base === "__init__") return null; +type Locale = "en" | "zh" | "ja"; - const match = base.match(/^(s\d+[a-c]?)_/); - if (!match) return null; - return match[1]; +interface ChapterSource { + id: string; + dirName: string; + dirPath: string; + codePath: string; +} + +function dirToVersionId(dirName: string): string | null { + const match = dirName.match(/^(s\d{2})_/); + return match ? match[1] : null; +} + +function filenameToVersionId(filename: string): string | null { + const base = path.basename(filename, ".py"); + if (base === "s_full" || base === "__init__") return null; + + const match = base.match(/^(s\d+[a-c]?)_/); + return match ? match[1] : null; +} + +function listRootChapters(): ChapterSource[] { + return fs + .readdirSync(REPO_ROOT, { withFileTypes: true }) + .filter((entry) => entry.isDirectory()) + .map((entry) => entry.name) + .filter((name) => /^s\d{2}_/.test(name)) + .sort() + .map((dirName) => { + const id = dirToVersionId(dirName); + if (!id) return null; + const dirPath = path.join(REPO_ROOT, dirName); + const codePath = path.join(dirPath, "code.py"); + if (!fs.existsSync(codePath)) return null; + return { id, dirName, dirPath, codePath }; + }) + .filter((chapter): chapter is ChapterSource => chapter !== null); } -// Extract classes from Python source function extractClasses( lines: string[] ): { name: string; startLine: number; endLine: number }[] { @@ -37,29 +64,31 @@ function extractClasses( const classPattern = /^class\s+(\w+)/; for (let i = 0; i < lines.length; i++) { - const m = lines[i].match(classPattern); - if (m) { - const name = m[1]; - const startLine = i + 1; - // Find end of class: next class/function at indent 0, or EOF - let endLine = lines.length; - for (let j = i + 1; j < lines.length; j++) { - if ( - lines[j].match(/^class\s/) || - lines[j].match(/^def\s/) || - (lines[j].match(/^\S/) && lines[j].trim() !== "" && !lines[j].startsWith("#") && !lines[j].startsWith("@")) - ) { - endLine = j; - break; - } + const match = lines[i].match(classPattern); + if (!match) continue; + + const name = match[1]; + const startLine = i + 1; + let endLine = lines.length; + for (let j = i + 1; j < lines.length; j++) { + if ( + lines[j].match(/^class\s/) || + lines[j].match(/^def\s/) || + (lines[j].match(/^\S/) && + lines[j].trim() !== "" && + !lines[j].startsWith("#") && + !lines[j].startsWith("@")) + ) { + endLine = j; + break; } - classes.push({ name, startLine, endLine }); } + classes.push({ name, startLine, endLine }); } + return classes; } -// Extract top-level functions from Python source function extractFunctions( lines: string[] ): { name: string; signature: string; startLine: number }[] { @@ -67,31 +96,28 @@ function extractFunctions( const funcPattern = /^def\s+(\w+)\((.*?)\)/; for (let i = 0; i < lines.length; i++) { - const m = lines[i].match(funcPattern); - if (m) { - functions.push({ - name: m[1], - signature: `def ${m[1]}(${m[2]})`, - startLine: i + 1, - }); - } + const match = lines[i].match(funcPattern); + if (!match) continue; + functions.push({ + name: match[1], + signature: `def ${match[1]}(${match[2]})`, + startLine: i + 1, + }); } + return functions; } -// Extract tool names from Python source -// Looks for "name": "tool_name" patterns in dict literals function extractTools(source: string): string[] { - const toolPattern = /"name"\s*:\s*"(\w+)"/g; + const toolPattern = /"name"\s*:\s*"([\w-]+)"/g; const tools = new Set(); - let m; - while ((m = toolPattern.exec(source)) !== null) { - tools.add(m[1]); + let match; + while ((match = toolPattern.exec(source)) !== null) { + tools.add(match[1]); } return Array.from(tools); } -// Count non-blank, non-comment lines function countLoc(lines: string[]): number { return lines.filter((line) => { const trimmed = line.trim(); @@ -99,181 +125,287 @@ function countLoc(lines: string[]): number { }).length; } -// Detect locale from subdirectory path -// docs/en/s01-the-agent-loop.md -> "en" -// docs/zh/s01-the-agent-loop.md -> "zh" -// docs/ja/s01-the-agent-loop.md -> "ja" -function detectLocale(relPath: string): "en" | "zh" | "ja" { +function detectLocale(relPath: string): Locale { if (relPath.startsWith("zh/") || relPath.startsWith("zh\\")) return "zh"; if (relPath.startsWith("ja/") || relPath.startsWith("ja\\")) return "ja"; return "en"; } -// Extract version from doc filename (e.g., "s01-the-agent-loop.md" -> "s01") function extractDocVersion(filename: string): string | null { - const m = filename.match(/^(s\d+[a-c]?)-/); - return m ? m[1] : null; + const match = filename.match(/^(s\d+[a-c]?)-/); + return match ? match[1] : null; } -// Main extraction -function main() { - console.log("Extracting content from agents and docs..."); - console.log(` Repo root: ${REPO_ROOT}`); - console.log(` Agents dir: ${AGENTS_DIR}`); - console.log(` Docs dir: ${DOCS_DIR}`); +function titleFromMarkdown(content: string, fallback: string): string { + const titleMatch = content.match(/^#\s+(.+)$/m); + return titleMatch ? titleMatch[1] : fallback; +} - // Skip extraction if source directories don't exist (e.g. Vercel build). - // Pre-committed generated data will be used instead. - if (!fs.existsSync(AGENTS_DIR)) { - console.log(" Agents directory not found, skipping extraction."); - console.log(" Using pre-committed generated data."); - return; - } +function cleanCourseAssets() { + fs.rmSync(COURSE_ASSETS_DIR, { recursive: true, force: true }); + fs.mkdirSync(COURSE_ASSETS_DIR, { recursive: true }); +} - // 1. Read all agent files - const agentFiles = fs - .readdirSync(AGENTS_DIR) - .filter((f) => f.startsWith("s") && f.endsWith(".py")); +function copyChapterAssets(chapter: ChapterSource): ChapterImage[] { + const imagesDir = path.join(chapter.dirPath, "images"); + if (!fs.existsSync(imagesDir)) return []; - console.log(` Found ${agentFiles.length} agent files`); + const outDir = path.join(COURSE_ASSETS_DIR, chapter.dirName); + fs.mkdirSync(outDir, { recursive: true }); + fs.cpSync(imagesDir, outDir, { recursive: true }); - const versions: AgentVersion[] = []; + return fs + .readdirSync(imagesDir) + .filter((filename) => filename.endsWith(".svg")) + .filter((filename) => !filename.includes(".en.") && !filename.includes(".ja.")) + .sort() + .map((filename) => ({ + src: `/course-assets/${chapter.dirName}/${filename}`, + alt: filename.replace(/\.svg$/, "").replace(/-/g, " "), + })); +} - for (const filename of agentFiles) { - const versionId = filenameToVersionId(filename); - if (!versionId) { - console.warn(` Skipping ${filename}: could not determine version ID`); - continue; - } +function localeReadmeName(locale: Locale): string { + if (locale === "zh") return "README.md"; + return `README.${locale}.md`; +} - const filePath = path.join(AGENTS_DIR, filename); - const source = fs.readFileSync(filePath, "utf-8"); - const lines = source.split("\n"); +function rewriteChapterMarkdown( + content: string, + chapter: ChapterSource, + locale: Locale +): string { + let next = content; - const meta = VERSION_META[versionId]; - const classes = extractClasses(lines); - const functions = extractFunctions(lines); - const tools = extractTools(source); - const loc = countLoc(lines); - - versions.push({ - id: versionId, - filename, - title: meta?.title ?? versionId, - subtitle: meta?.subtitle ?? "", - loc, - tools, - newTools: [], // computed after all versions are loaded - coreAddition: meta?.coreAddition ?? "", - keyInsight: meta?.keyInsight ?? "", - classes, - functions, - layer: meta?.layer ?? "tools", - source, - }); - } - - // Sort versions according to VERSION_ORDER - const orderMap = new Map(VERSION_ORDER.map((v, i) => [v, i])); - versions.sort( - (a, b) => (orderMap.get(a.id as any) ?? 99) - (orderMap.get(b.id as any) ?? 99) + next = next.replace( + /^\[中文\]\(README\.md\)\s*.\s*\[English\]\(README\.en\.md\)\s*.\s*\[日本語\]\(README\.ja\.md\)\n\n?/m, + "" ); - // 2. Compute newTools for each version - for (let i = 0; i < versions.length; i++) { - const prev = i > 0 ? new Set(versions[i - 1].tools) : new Set(); - versions[i].newTools = versions[i].tools.filter((t) => !prev.has(t)); + next = next.replace( + /(!\[[^\]]*\]\()images\/([^)]+)(\))/g, + `$1/course-assets/${chapter.dirName}/$2$3` + ); + + next = next.replace( + /\]\(\.\.\/(s\d{2}_[^)\/]+)\/?\)/g, + (_match, dirName) => { + const id = dirToVersionId(dirName); + return id ? `](/${locale}/${id})` : `](../${dirName}/)`; + } + ); + + next = next.replace( + /\]\(\.\/(s\d{2}_[^)\/]+)\/?\)/g, + (_match, dirName) => { + const id = dirToVersionId(dirName); + return id ? `](/${locale}/${id})` : `](./${dirName}/)`; + } + ); + + return next; +} + +function buildRootVersions(chapters: ChapterSource[]): AgentVersion[] { + return chapters.map((chapter) => { + const source = fs.readFileSync(chapter.codePath, "utf-8"); + const lines = source.split("\n"); + const meta = VERSION_META[chapter.id]; + + return { + id: chapter.id, + filename: `${chapter.dirName}/code.py`, + title: meta?.title ?? chapter.id, + subtitle: meta?.subtitle ?? "", + loc: countLoc(lines), + tools: extractTools(source), + newTools: [] as string[], + coreAddition: meta?.coreAddition ?? "", + keyInsight: meta?.keyInsight ?? "", + classes: extractClasses(lines), + functions: extractFunctions(lines), + layer: meta?.layer ?? "tools", + source, + images: copyChapterAssets(chapter), + }; + }); +} + +function buildLegacyVersions(): AgentVersion[] { + if (!fs.existsSync(LEGACY_AGENTS_DIR)) return []; + + const agentFiles = fs + .readdirSync(LEGACY_AGENTS_DIR) + .filter((filename) => filename.startsWith("s") && filename.endsWith(".py")); + + const versions = agentFiles + .map((filename) => { + const id = filenameToVersionId(filename); + if (!id) return null; + + const filePath = path.join(LEGACY_AGENTS_DIR, filename); + const source = fs.readFileSync(filePath, "utf-8"); + const lines = source.split("\n"); + const meta = VERSION_META[id]; + + return { + id, + filename, + title: meta?.title ?? id, + subtitle: meta?.subtitle ?? "", + loc: countLoc(lines), + tools: extractTools(source), + newTools: [] as string[], + coreAddition: meta?.coreAddition ?? "", + keyInsight: meta?.keyInsight ?? "", + classes: extractClasses(lines), + functions: extractFunctions(lines), + layer: meta?.layer ?? "tools", + source, + images: [] as ChapterImage[], + }; + }) + .filter((version): version is AgentVersion => version !== null); + + return versions; +} + +function buildRootDocs(chapters: ChapterSource[]): DocContent[] { + const docs: DocContent[] = []; + const locales: Locale[] = ["en", "zh", "ja"]; + + for (const chapter of chapters) { + for (const locale of locales) { + const filename = localeReadmeName(locale); + const filePath = path.join(chapter.dirPath, filename); + if (!fs.existsSync(filePath)) continue; + + const raw = fs.readFileSync(filePath, "utf-8"); + const content = rewriteChapterMarkdown(raw, chapter, locale); + docs.push({ + version: chapter.id, + locale, + title: titleFromMarkdown(content, filename), + content, + }); + } } - // 3. Compute diffs between adjacent versions in LEARNING_PATH + return docs; +} + +function buildLegacyDocs(): DocContent[] { + const docs: DocContent[] = []; + if (!fs.existsSync(LEGACY_DOCS_DIR)) return docs; + + const localeDirs: Locale[] = ["en", "zh", "ja"]; + for (const locale of localeDirs) { + const localeDir = path.join(LEGACY_DOCS_DIR, locale); + if (!fs.existsSync(localeDir)) continue; + + const docFiles = fs.readdirSync(localeDir).filter((f) => f.endsWith(".md")); + for (const filename of docFiles) { + const version = extractDocVersion(filename); + if (!version) continue; + + const relPath = path.join(locale, filename); + const filePath = path.join(LEGACY_DOCS_DIR, relPath); + const content = fs.readFileSync(filePath, "utf-8"); + docs.push({ + version, + locale: detectLocale(relPath), + title: titleFromMarkdown(content, filename), + content, + }); + } + } + + return docs; +} + +function computeNewTools(versions: AgentVersion[]) { + for (let i = 0; i < versions.length; i++) { + const prev = i > 0 ? new Set(versions[i - 1].tools) : new Set(); + versions[i].newTools = versions[i].tools.filter((tool) => !prev.has(tool)); + } +} + +function buildDiffs(versions: AgentVersion[]): VersionDiff[] { const diffs: VersionDiff[] = []; - const versionMap = new Map(versions.map((v) => [v.id, v])); + const versionMap = new Map(versions.map((version) => [version.id, version])); for (let i = 1; i < LEARNING_PATH.length; i++) { const fromId = LEARNING_PATH[i - 1]; const toId = LEARNING_PATH[i]; const fromVer = versionMap.get(fromId); const toVer = versionMap.get(toId); - if (!fromVer || !toVer) continue; - const fromClassNames = new Set(fromVer.classes.map((c) => c.name)); - const fromFuncNames = new Set(fromVer.functions.map((f) => f.name)); + const fromClassNames = new Set(fromVer.classes.map((cls) => cls.name)); + const fromFuncNames = new Set(fromVer.functions.map((fn) => fn.name)); const fromToolNames = new Set(fromVer.tools); diffs.push({ from: fromId, to: toId, newClasses: toVer.classes - .map((c) => c.name) - .filter((n) => !fromClassNames.has(n)), + .map((cls) => cls.name) + .filter((name) => !fromClassNames.has(name)), newFunctions: toVer.functions - .map((f) => f.name) - .filter((n) => !fromFuncNames.has(n)), - newTools: toVer.tools.filter((t) => !fromToolNames.has(t)), + .map((fn) => fn.name) + .filter((name) => !fromFuncNames.has(name)), + newTools: toVer.tools.filter((tool) => !fromToolNames.has(tool)), locDelta: toVer.loc - fromVer.loc, }); } - // 4. Read doc files from locale subdirectories (en/, zh/, ja/) - const docs: DocContent[] = []; + return diffs; +} - if (fs.existsSync(DOCS_DIR)) { - const localeDirs = ["en", "zh", "ja"]; - let totalDocFiles = 0; +function sortVersions(versions: AgentVersion[]) { + const orderMap = new Map(VERSION_ORDER.map((id, index) => [id, index])); + versions.sort( + (a, b) => (orderMap.get(a.id as any) ?? 99) - (orderMap.get(b.id as any) ?? 99) + ); +} - for (const locale of localeDirs) { - const localeDir = path.join(DOCS_DIR, locale); - if (!fs.existsSync(localeDir)) continue; +function main() { + console.log("Extracting course content..."); + console.log(` Repo root: ${REPO_ROOT}`); - const docFiles = fs - .readdirSync(localeDir) - .filter((f) => f.endsWith(".md")); + cleanCourseAssets(); - totalDocFiles += docFiles.length; + const rootChapters = listRootChapters(); + const useRootTrack = rootChapters.length > 0; - for (const filename of docFiles) { - const version = extractDocVersion(filename); - if (!version) { - console.warn(` Skipping doc ${locale}/${filename}: could not determine version`); - continue; - } + console.log( + useRootTrack + ? ` Source: root chapter folders (${rootChapters.length})` + : " Source: legacy agents/docs folders" + ); - const filePath = path.join(localeDir, filename); - const content = fs.readFileSync(filePath, "utf-8"); + const versions = useRootTrack + ? buildRootVersions(rootChapters) + : buildLegacyVersions(); + const docs = useRootTrack ? buildRootDocs(rootChapters) : buildLegacyDocs(); - const titleMatch = content.match(/^#\s+(.+)$/m); - const title = titleMatch ? titleMatch[1] : filename; + sortVersions(versions); + computeNewTools(versions); + const diffs = buildDiffs(versions); - docs.push({ version, locale: locale as "en" | "zh" | "ja", title, content }); - } - } - - console.log(` Found ${totalDocFiles} doc files across ${localeDirs.length} locales`); - } else { - console.warn(` Docs directory not found: ${DOCS_DIR}`); - } - - // 5. Write output fs.mkdirSync(OUT_DIR, { recursive: true }); const index: VersionIndex = { versions, diffs }; - const indexPath = path.join(OUT_DIR, "versions.json"); - fs.writeFileSync(indexPath, JSON.stringify(index, null, 2)); - console.log(` Wrote ${indexPath}`); + fs.writeFileSync(path.join(OUT_DIR, "versions.json"), JSON.stringify(index, null, 2)); + fs.writeFileSync(path.join(OUT_DIR, "docs.json"), JSON.stringify(docs, null, 2)); - const docsPath = path.join(OUT_DIR, "docs.json"); - fs.writeFileSync(docsPath, JSON.stringify(docs, null, 2)); - console.log(` Wrote ${docsPath}`); - - // Summary console.log("\nExtraction complete:"); console.log(` ${versions.length} versions`); console.log(` ${diffs.length} diffs`); console.log(` ${docs.length} docs`); - for (const v of versions) { + for (const version of versions) { console.log( - ` ${v.id}: ${v.loc} LOC, ${v.tools.length} tools, ${v.classes.length} classes, ${v.functions.length} functions` + ` ${version.id}: ${version.loc} LOC, ${version.tools.length} tools, ${version.classes.length} classes, ${version.functions.length} functions` ); } } diff --git a/web/src/app/globals.css b/web/src/app/globals.css index 7aeef1a..2d773a0 100644 --- a/web/src/app/globals.css +++ b/web/src/app/globals.css @@ -28,6 +28,11 @@ body { color: var(--color-text); } +.prose-custom { + max-width: 100%; + overflow-wrap: break-word; +} + @media (max-width: 640px) { pre, code { font-size: 11px; @@ -188,6 +193,7 @@ body { .prose-custom pre { position: relative; overflow-x: auto; + max-width: 100%; margin-top: 1rem; margin-bottom: 1rem; border-radius: 0.75rem; @@ -200,6 +206,11 @@ body { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace; } +.prose-custom pre code { + display: block; + min-width: 100%; +} + .prose-custom pre.code-block { padding-top: 2.25rem; } @@ -368,10 +379,18 @@ body { /* -- Tables -- */ -.prose-custom table { - width: 100%; +.prose-custom .table-scroll { + max-width: 100%; margin-top: 1.25rem; margin-bottom: 1.25rem; + overflow-x: auto; + border-radius: 0.75rem; +} + +.prose-custom table { + width: 100%; + margin-top: 0; + margin-bottom: 0; border-collapse: separate; border-spacing: 0; font-size: 0.8125rem; diff --git a/web/src/components/architecture/arch-diagram.tsx b/web/src/components/architecture/arch-diagram.tsx index 2d8fa9e..5b7aa58 100644 --- a/web/src/components/architecture/arch-diagram.tsx +++ b/web/src/components/architecture/arch-diagram.tsx @@ -16,6 +16,10 @@ const CLASS_DESCRIPTIONS: Record = { TeammateManager: "Multi-agent team lifecycle and coordination", Teammate: "Individual agent identity and state tracking", SharedBoard: "Cross-agent shared state coordination", + CronJob: "Durable recurring job definition", + ProtocolState: "Pending team protocol requests and response matching", + MCPClient: "External tool discovery and invocation client", + RecoveryState: "Retry, fallback, and continuation state", }; interface ArchDiagramProps { @@ -68,29 +72,23 @@ function getLayerColorClasses(versionId: string): { } } -function collectClassesUpTo( +function collectClassesForVersion( targetId: string ): { name: string; introducedIn: string }[] { - const { versions, diffs } = versionsData; - const order = versions.map((v) => v.id); - const targetIdx = order.indexOf(targetId); - if (targetIdx < 0) return []; + const targetIndex = versionsData.versions.findIndex((v) => v.id === targetId); + const version = targetIndex >= 0 ? versionsData.versions[targetIndex] : undefined; - const result: { name: string; introducedIn: string }[] = []; - const seen = new Set(); - - for (let i = 0; i <= targetIdx; i++) { - const v = versions[i]; - if (!v.classes) continue; - for (const cls of v.classes) { - if (!seen.has(cls.name)) { - seen.add(cls.name); - result.push({ name: cls.name, introducedIn: v.id }); - } - } - } - - return result; + return ( + version?.classes?.map((cls) => ({ + name: cls.name, + introducedIn: + versionsData.versions + .slice(0, targetIndex + 1) + .find((candidate) => + candidate.classes?.some((candidateCls) => candidateCls.name === cls.name) + )?.id ?? targetId, + })) ?? [] + ); } function getNewClassNames(version: string): Set { @@ -103,7 +101,7 @@ function getNewClassNames(version: string): Set { } export function ArchDiagram({ version }: ArchDiagramProps) { - const allClasses = collectClassesUpTo(version); + const allClasses = collectClassesForVersion(version); const newClassNames = getNewClassNames(version); const versionData = versionsData.versions.find((v) => v.id === version); const tools = versionData?.tools ?? []; diff --git a/web/src/components/architecture/design-decisions.tsx b/web/src/components/architecture/design-decisions.tsx index 5fa47fa..8640f2d 100644 --- a/web/src/components/architecture/design-decisions.tsx +++ b/web/src/components/architecture/design-decisions.tsx @@ -18,6 +18,14 @@ import s09Annotations from "@/data/annotations/s09.json"; import s10Annotations from "@/data/annotations/s10.json"; import s11Annotations from "@/data/annotations/s11.json"; import s12Annotations from "@/data/annotations/s12.json"; +import s13Annotations from "@/data/annotations/s13.json"; +import s14Annotations from "@/data/annotations/s14.json"; +import s15Annotations from "@/data/annotations/s15.json"; +import s16Annotations from "@/data/annotations/s16.json"; +import s17Annotations from "@/data/annotations/s17.json"; +import s18Annotations from "@/data/annotations/s18.json"; +import s19Annotations from "@/data/annotations/s19.json"; +import s20Annotations from "@/data/annotations/s20.json"; interface Decision { id: string; @@ -46,6 +54,14 @@ const ANNOTATIONS: Record = { s10: s10Annotations as AnnotationFile, s11: s11Annotations as AnnotationFile, s12: s12Annotations as AnnotationFile, + s13: s13Annotations as AnnotationFile, + s14: s14Annotations as AnnotationFile, + s15: s15Annotations as AnnotationFile, + s16: s16Annotations as AnnotationFile, + s17: s17Annotations as AnnotationFile, + s18: s18Annotations as AnnotationFile, + s19: s19Annotations as AnnotationFile, + s20: s20Annotations as AnnotationFile, }; interface DesignDecisionsProps { @@ -124,7 +140,14 @@ export function DesignDecisions({ version }: DesignDecisionsProps) { const annotations = ANNOTATIONS[version]; if (!annotations || annotations.decisions.length === 0) { - return null; + return ( +
+

{t("design_decisions")}

+
+ Design decisions are not available for this lesson yet. +
+
+ ); } return ( diff --git a/web/src/components/architecture/execution-flow.tsx b/web/src/components/architecture/execution-flow.tsx index efeb1b7..4d4676d 100644 --- a/web/src/components/architecture/execution-flow.tsx +++ b/web/src/components/architecture/execution-flow.tsx @@ -6,8 +6,9 @@ import { getFlowForVersion } from "@/data/execution-flows"; import type { FlowNode, FlowEdge } from "@/types/agent-data"; const NODE_WIDTH = 140; -const NODE_HEIGHT = 40; -const DIAMOND_SIZE = 50; +const NODE_HEIGHT = 44; +const DIAMOND_WIDTH = 92; +const DIAMOND_HEIGHT = 64; const LAYER_COLORS: Record = { start: "#3B82F6", @@ -17,39 +18,207 @@ const LAYER_COLORS: Record = { end: "#EF4444", }; -function getNodeCenter(node: FlowNode): { cx: number; cy: number } { - return { cx: node.x, cy: node.y }; +function getNodeLines(node: FlowNode): string[] { + const maxChars = node.type === "decision" ? 12 : 18; + return node.label.split("\n").flatMap((line) => { + if (line.length <= maxChars) return [line]; + + const parts = line.split(/(\s+\/\s+|\s+|_)/).filter(Boolean); + const chunks: string[] = []; + let current = ""; + + for (const part of parts) { + const next = `${current}${part}`; + if (current && next.trim().length > maxChars) { + chunks.push(current.trim()); + current = part.trimStart(); + } else { + current = next; + } + } + + if (current.trim()) chunks.push(current.trim()); + return chunks.length ? chunks : [line]; + }); +} + +function estimateTextWidth(line: string, fontSize: number): number { + return line.length * fontSize * 0.62; +} + +function getNodeMetrics(node: FlowNode) { + const lines = getNodeLines(node); + const longest = Math.max(...lines.map((line) => estimateTextWidth(line, 11)), 0); + + if (node.type === "decision") { + return { + lines, + width: Math.max(DIAMOND_WIDTH, longest + 54), + height: Math.max(DIAMOND_HEIGHT, lines.length * 15 + 42), + }; + } + + if (node.type === "start" || node.type === "end") { + return { + lines, + width: Math.max(NODE_WIDTH, longest + 34), + height: Math.max(NODE_HEIGHT, lines.length * 15 + 24), + }; + } + + return { + lines, + width: Math.max(NODE_WIDTH, longest + 30), + height: Math.max(NODE_HEIGHT, lines.length * 15 + 24), + }; +} + +function getNodeBounds(node: FlowNode) { + const metrics = getNodeMetrics(node); + const halfW = metrics.width / 2; + const halfH = metrics.height / 2; + + return { + cx: node.x, + cy: node.y, + left: node.x - halfW, + right: node.x + halfW, + top: node.y - halfH, + bottom: node.y + halfH, + }; +} + +const LOOP_RAIL_X = -48; +const RIGHT_LOOP_RAIL_X = 576; +const FLOW_CENTER_X = 300; +const LOOP_PAD = 28; +const LOOP_BACK_DX_LIMIT = 360; +const LOOP_BACK_DY_LIMIT = 70; + +type LoopSide = "left" | "right"; + +function getLoopSide(start: ReturnType, end: ReturnType): LoopSide { + return (start.cx + end.cx) / 2 > FLOW_CENTER_X ? "right" : "left"; +} + +function getLoopRailX( + start: ReturnType, + end: ReturnType, + side = getLoopSide(start, end), +) { + if (side === "right") { + return Math.max(RIGHT_LOOP_RAIL_X, start.right + LOOP_PAD, end.right + LOOP_PAD); + } + + return Math.min(LOOP_RAIL_X, start.left - LOOP_PAD, end.left - LOOP_PAD); +} + +function isLoopBack(start: ReturnType, end: ReturnType) { + const dx = end.cx - start.cx; + const dy = end.cy - start.cy; + return dy < -LOOP_BACK_DY_LIMIT && Math.abs(dx) <= LOOP_BACK_DX_LIMIT; +} + +function shouldUseStepRoute(start: ReturnType, end: ReturnType) { + const dx = end.cx - start.cx; + const dy = end.cy - start.cy; + return dy > 28 && Math.abs(dx) > 44 && end.top > start.bottom; +} + +function getStepBusY(start: ReturnType, end: ReturnType) { + const room = end.top - start.bottom; + return Math.min(end.top - 16, start.bottom + Math.max(18, room * 0.35)); } function getEdgePath(from: FlowNode, to: FlowNode): string { - const { cx: x1, cy: y1 } = getNodeCenter(from); - const { cx: x2, cy: y2 } = getNodeCenter(to); + const start = getNodeBounds(from); + const end = getNodeBounds(to); + const dx = end.cx - start.cx; + const dy = end.cy - start.cy; - const halfH = from.type === "decision" ? DIAMOND_SIZE / 2 : NODE_HEIGHT / 2; - const halfHTo = to.type === "decision" ? DIAMOND_SIZE / 2 : NODE_HEIGHT / 2; - - if (Math.abs(x1 - x2) < 10) { - const startY = y1 + halfH; - const endY = y2 - halfHTo; - return `M ${x1} ${startY} L ${x2} ${endY}`; + if (isLoopBack(start, end)) { + const side = getLoopSide(start, end); + const railX = getLoopRailX(start, end, side); + const startX = side === "right" ? start.right : start.left; + const endX = side === "right" ? end.right : end.left; + const midY = (start.cy + end.cy) / 2; + return `M ${startX} ${start.cy} C ${railX} ${start.cy}, ${railX} ${midY}, ${railX} ${midY} C ${railX} ${end.cy}, ${endX} ${end.cy}, ${endX} ${end.cy}`; } - const startY = y1 + halfH; - const endY = y2 - halfHTo; - const midY = (startY + endY) / 2; - return `M ${x1} ${startY} L ${x1} ${midY} L ${x2} ${midY} L ${x2} ${endY}`; + if (Math.abs(dx) < 10) { + if (dy >= 0) { + return `M ${start.cx} ${start.bottom} L ${end.cx} ${end.top}`; + } + return `M ${start.cx} ${start.top} L ${end.cx} ${end.bottom}`; + } + + if (Math.abs(dy) < 10) { + const startX = dx > 0 ? start.right : start.left; + const endX = dx > 0 ? end.left : end.right; + const midX = (startX + endX) / 2; + return `M ${startX} ${start.cy} C ${midX} ${start.cy}, ${midX} ${end.cy}, ${endX} ${end.cy}`; + } + + if (shouldUseStepRoute(start, end)) { + const busY = getStepBusY(start, end); + return `M ${start.cx} ${start.bottom} L ${start.cx} ${busY} L ${end.cx} ${busY} L ${end.cx} ${end.top}`; + } + + if (Math.abs(dx) > 70) { + const startX = dx > 0 ? start.right : start.left; + const endX = dx > 0 ? end.left : end.right; + const control = Math.max(56, Math.abs(dx) * 0.45); + return `M ${startX} ${start.cy} C ${startX + (dx > 0 ? control : -control)} ${start.cy}, ${endX - (dx > 0 ? control : -control)} ${end.cy}, ${endX} ${end.cy}`; + } + + const startY = dy > 0 ? start.bottom : start.top; + const endY = dy > 0 ? end.top : end.bottom; + const controlDistance = Math.max(44, Math.abs(endY - startY) * 0.42); + const controlY1 = startY + (endY > startY ? controlDistance : -controlDistance); + const controlY2 = endY - (endY > startY ? controlDistance : -controlDistance); + + return `M ${start.cx} ${startY} C ${start.cx} ${controlY1}, ${end.cx} ${controlY2}, ${end.cx} ${endY}`; +} + +function getEdgeLabelPosition(from: FlowNode, to: FlowNode): { x: number; y: number } { + const start = getNodeBounds(from); + const end = getNodeBounds(to); + const dx = end.cx - start.cx; + const dy = end.cy - start.cy; + + if (isLoopBack(start, end)) { + const side = getLoopSide(start, end); + return { + x: getLoopRailX(start, end, side) + (side === "right" ? -24 : 24), + y: (start.cy + end.cy) / 2 - 6, + }; + } + + if (Math.abs(dy) < 10) { + return { x: (start.cx + end.cx) / 2, y: start.cy - 12 }; + } + + if (shouldUseStepRoute(start, end)) { + return { x: (start.cx + end.cx) / 2, y: getStepBusY(start, end) - 8 }; + } + + return { + x: (start.cx + end.cx) / 2 + (dx > 0 ? 18 : -18), + y: (start.bottom + end.top) / 2 - 8, + }; } function NodeShape({ node }: { node: FlowNode }) { const color = LAYER_COLORS[node.type]; - const lines = node.label.split("\n"); + const { lines, width, height } = getNodeMetrics(node); if (node.type === "decision") { - const half = DIAMOND_SIZE / 2; + const halfW = width / 2; + const halfH = height / 2; return ( 2 ? 9 : 10} fontFamily="monospace" fill="currentColor" > @@ -76,27 +245,30 @@ function NodeShape({ node }: { node: FlowNode }) { return ( - - {node.label} - + {lines.map((line, i) => ( + + {line} + + ))} ); } @@ -105,10 +277,10 @@ function NodeShape({ node }: { node: FlowNode }) { return ( {edge.label && ( + Execution flow is not available for this lesson yet. + + ); + } - const maxY = Math.max(...flow.nodes.map((n) => n.y)) + 50; + const bounds = flow.nodes.map(getNodeBounds); + const minX = Math.min(-40, ...bounds.map((b) => b.left)) - 24; + const maxX = Math.max(700, ...bounds.map((b) => b.right)) + 24; + const maxY = Math.max(...bounds.map((b) => b.bottom)) + 50; return (
@@ -219,12 +407,14 @@ export function ExecutionFlow({ version }: ExecutionFlowProps) { {flow.edges.map((edge, i) => ( - + ))} {flow.nodes.map((node, i) => ( ' ); + // Keep wide Markdown tables inside the prose column on small screens. + html = html.replace(//g, '
'); + html = html.replace(/<\/table>/g, '
'); + // Mark the first blockquote as hero callout html = html.replace( /
/, diff --git a/web/src/components/simulator/agent-loop-simulator.tsx b/web/src/components/simulator/agent-loop-simulator.tsx index 8de470c..f7fc5a4 100644 --- a/web/src/components/simulator/agent-loop-simulator.tsx +++ b/web/src/components/simulator/agent-loop-simulator.tsx @@ -21,6 +21,14 @@ const scenarioModules: Record Promise<{ default: Scenario }>> = { s10: () => import("@/data/scenarios/s10.json") as Promise<{ default: Scenario }>, s11: () => import("@/data/scenarios/s11.json") as Promise<{ default: Scenario }>, s12: () => import("@/data/scenarios/s12.json") as Promise<{ default: Scenario }>, + s13: () => import("@/data/scenarios/s13.json") as Promise<{ default: Scenario }>, + s14: () => import("@/data/scenarios/s14.json") as Promise<{ default: Scenario }>, + s15: () => import("@/data/scenarios/s15.json") as Promise<{ default: Scenario }>, + s16: () => import("@/data/scenarios/s16.json") as Promise<{ default: Scenario }>, + s17: () => import("@/data/scenarios/s17.json") as Promise<{ default: Scenario }>, + s18: () => import("@/data/scenarios/s18.json") as Promise<{ default: Scenario }>, + s19: () => import("@/data/scenarios/s19.json") as Promise<{ default: Scenario }>, + s20: () => import("@/data/scenarios/s20.json") as Promise<{ default: Scenario }>, }; interface AgentLoopSimulatorProps { @@ -33,10 +41,27 @@ export function AgentLoopSimulator({ version }: AgentLoopSimulatorProps) { const scrollRef = useRef(null); useEffect(() => { + let cancelled = false; const loader = scenarioModules[version]; - if (loader) { - loader().then((mod) => setScenario(mod.default)); + setScenario(null); + + if (!loader) { + return () => { + cancelled = true; + }; } + + loader() + .then((mod) => { + if (!cancelled) setScenario(mod.default); + }) + .catch(() => { + if (!cancelled) setScenario(null); + }); + + return () => { + cancelled = true; + }; }, [version]); const sim = useSimulator(scenario?.steps ?? []); @@ -50,7 +75,16 @@ export function AgentLoopSimulator({ version }: AgentLoopSimulatorProps) { } }, [sim.visibleSteps.length]); - if (!scenario) return null; + if (!scenario) { + return ( +
+

{t("simulator")}

+
+ Simulator scenario is not available for this lesson yet. +
+
+ ); + } return (
diff --git a/web/src/components/visualizations/index.tsx b/web/src/components/visualizations/index.tsx index 5fc6222..51b7380 100644 --- a/web/src/components/visualizations/index.tsx +++ b/web/src/components/visualizations/index.tsx @@ -9,16 +9,24 @@ const visualizations: Record< > = { s01: lazy(() => import("./s01-agent-loop")), s02: lazy(() => import("./s02-tool-dispatch")), - s03: lazy(() => import("./s03-todo-write")), - s04: lazy(() => import("./s04-subagent")), - s05: lazy(() => import("./s05-skill-loading")), - s06: lazy(() => import("./s06-context-compact")), - s07: lazy(() => import("./s07-task-system")), - s08: lazy(() => import("./s08-background-tasks")), - s09: lazy(() => import("./s09-agent-teams")), - s10: lazy(() => import("./s10-team-protocols")), - s11: lazy(() => import("./s11-autonomous-agents")), - s12: lazy(() => import("./s12-worktree-task-isolation")), + s03: lazy(() => import("./s03-permission")), + s04: lazy(() => import("./s04-hooks")), + s05: lazy(() => import("./s03-todo-write")), + s06: lazy(() => import("./s04-subagent")), + s07: lazy(() => import("./s05-skill-loading")), + s08: lazy(() => import("./s06-context-compact")), + s09: lazy(() => import("./s09-memory")), + s10: lazy(() => import("./s10-system-prompt")), + s11: lazy(() => import("./s11-error-recovery")), + s12: lazy(() => import("./s07-task-system")), + s13: lazy(() => import("./s08-background-tasks")), + s14: lazy(() => import("./s14-cron-scheduler")), + s15: lazy(() => import("./s09-agent-teams")), + s16: lazy(() => import("./s10-team-protocols")), + s17: lazy(() => import("./s11-autonomous-agents")), + s18: lazy(() => import("./s12-worktree-task-isolation")), + s19: lazy(() => import("./s19-mcp-tools")), + s20: lazy(() => import("./s20-comprehensive")), }; export function SessionVisualization({ version }: { version: string }) { diff --git a/web/src/components/visualizations/s03-permission.tsx b/web/src/components/visualizations/s03-permission.tsx new file mode 100644 index 0000000..6513493 --- /dev/null +++ b/web/src/components/visualizations/s03-permission.tsx @@ -0,0 +1,386 @@ +"use client"; + +import { AnimatePresence, motion } from "framer-motion"; +import { CheckCircle2, ClipboardCheck, OctagonAlert, PlayCircle, ShieldAlert, ShieldCheck, UserCheck } from "lucide-react"; +import { StepControls } from "@/components/visualizations/shared/step-controls"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { cn } from "@/lib/utils"; + +const STEPS = [ + { + title: "Three Requests, Three Routes", + desc: "Permission is a router: safe calls run, risky calls ask, forbidden calls stop.", + mode: "overview", + }, + { + title: "Allow: Safe Read Runs Immediately", + desc: "A read-only file request passes policy and reaches the handler without a user ticket.", + mode: "allow", + }, + { + title: "Ask: Risky Local Delete Becomes a Ticket", + desc: "A local delete command is not forbidden, but it must pause for explicit confirmation.", + mode: "ask", + }, + { + title: "Approved Ask: Handler Runs After Yes", + desc: "The same risky request executes only after the user approves this exact action.", + mode: "ask-approved", + }, + { + title: "Deny: Forbidden Pattern Stops Early", + desc: "A root-level sudo delete is blocked before any handler can touch the machine.", + mode: "deny", + }, + { + title: "One Permission Desk, Three Outcomes", + desc: "The harness keeps allow, ask, and deny decisions outside the model, then returns the decision to the loop.", + mode: "summary", + }, +] as const; + +const REQUESTS = [ + { + id: "allow", + tool: "read_file", + command: "README.md", + result: "allow", + detail: "read-only workspace file", + tone: "emerald", + }, + { + id: "ask", + tool: "bash", + command: "rm -rf ./tmp/build-cache", + result: "ask", + detail: "local destructive command", + tone: "amber", + }, + { + id: "deny", + tool: "bash", + command: "sudo rm -rf /", + result: "deny", + detail: "forbidden root delete", + tone: "red", + }, +] as const; + +function toneClass(tone: "emerald" | "amber" | "red" | "blue" | "zinc") { + if (tone === "emerald") return "border-emerald-200 bg-emerald-50 text-emerald-800 dark:border-emerald-900 dark:bg-emerald-950/40 dark:text-emerald-200"; + if (tone === "amber") return "border-amber-200 bg-amber-50 text-amber-800 dark:border-amber-900 dark:bg-amber-950/40 dark:text-amber-200"; + if (tone === "red") return "border-red-200 bg-red-50 text-red-800 dark:border-red-900 dark:bg-red-950/40 dark:text-red-200"; + if (tone === "blue") return "border-blue-200 bg-blue-50 text-blue-800 dark:border-blue-900 dark:bg-blue-950/40 dark:text-blue-200"; + return "border-zinc-200 bg-white text-zinc-700 dark:border-zinc-700 dark:bg-zinc-900 dark:text-zinc-200"; +} + +function Surface({ + title, + icon, + active, + children, +}: { + title: string; + icon: React.ReactNode; + active: boolean; + children: React.ReactNode; +}) { + return ( +
+
+ + {icon} + + {title} +
+ {children} +
+ ); +} + +type StepMode = (typeof STEPS)[number]["mode"]; +type RequestId = (typeof REQUESTS)[number]["id"]; + +function activeRequestId(mode: StepMode): RequestId | null { + if (mode === "allow") return "allow"; + if (mode === "ask" || mode === "ask-approved") return "ask"; + if (mode === "deny") return "deny"; + return null; +} + +function RequestCard({ + request, + active, + muted, +}: { + request: (typeof REQUESTS)[number]; + active: boolean; + muted: boolean; +}) { + return ( + +
+
tool request
+ + {request.tool} + +
+ + {request.command} + +
+ {request.detail} + + {request.result} + +
+
+ ); +} + +function CheckRow({ + label, + detail, + status, + active, +}: { + label: string; + detail: string; + status: "waiting" | "pass" | "allow" | "ask" | "approved" | "deny" | "skip"; + active: boolean; +}) { + const icon = + status === "deny" ? : status === "pass" || status === "allow" ? : status === "ask" ? : status === "approved" ? : ; + const tone = status === "deny" ? "red" : status === "pass" || status === "allow" || status === "approved" ? "emerald" : status === "ask" ? "amber" : "zinc"; + + return ( + +
+
+ {icon} + {label} +
+ + {status} + +
+
{detail}
+
+ ); +} + +function PermissionDesk({ mode }: { mode: StepMode }) { + if (mode === "overview" || mode === "summary") { + return ( +
+ + + +
+ ); + } + + if (mode === "allow") { + return ( +
+ + + +
+ ); + } + + if (mode === "deny") { + return ( +
+ + + +
+ ); + } + + return ( +
+ + + +
+ ); +} + +function CodeLine({ label, value }: { label: string; value: string }) { + return ( +
+
{label}
+ {value} +
+ ); +} + +function Outcome({ mode }: { mode: StepMode }) { + if (mode === "overview") { + return
select a request route
; + } + + if (mode === "allow") { + return ( + +
+ + Handler runs now +
+ + +
+ ); + } + + if (mode === "ask") { + return ( + +
+ + Approval ticket +
+
"Allow deleting local build cache?"
+
+ ); + } + + if (mode === "ask-approved") { + return ( + +
+ + Handler runs after approval +
+ + +
+ ); + } + + if (mode === "deny") { + return ( + +
+ + Blocked before handler +
+
No tool execution, no user prompt, no filesystem touch.
+
+ ); + } + + return ( + + {REQUESTS.map((request) => ( +
+
+ {request.result === "deny" ? : request.result === "ask" ? : } + {request.result} +
+
{request.detail}
+
+ ))} +
+
+ + decision returned to loop +
+
Permission stays outside the model, but the loop still receives a normal tool_result or blocked result.
+
+
+ ); +} + +export default function PermissionVisualization({ title }: { title?: string }) { + const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 }); + const step = vis.currentStep; + const current = STEPS[step]; + const mode = current.mode; + const activeId = activeRequestId(mode); + + return ( +
+

+ {title || "Permission Desk"} +

+ +
+
+ } active={mode === "overview" || activeId !== null}> +
+ {REQUESTS.map((request) => ( + + ))} +
+
+ + } active={mode !== "overview"}> + + + + } active={mode !== "overview"}> + + + + +
+ +
+ Beginner rule: the model proposes tools; the runtime routes each request to allow, ask, or deny before execution. +
+ + +
+
+ ); +} diff --git a/web/src/components/visualizations/s03-todo-write.tsx b/web/src/components/visualizations/s03-todo-write.tsx index 223327e..041d908 100644 --- a/web/src/components/visualizations/s03-todo-write.tsx +++ b/web/src/components/visualizations/s03-todo-write.tsx @@ -99,12 +99,12 @@ function KanbanColumn({ headerBg: string; }) { return ( -
+
- {title} - + {title} + {tasks.length}
@@ -147,19 +147,19 @@ function TaskCard({ task }: { task: Task }) { animate={{ opacity: 1, scale: 1 }} exit={{ opacity: 0, scale: 0.8 }} transition={{ type: "spring", stiffness: 400, damping: 30 }} - className={`rounded-md border p-2.5 ${borderStyles[task.status]}`} + className={`min-w-0 rounded-md border p-2.5 ${borderStyles[task.status]}`} > -
+
#{task.id} {task.status.replace("_", " ")}
-
+
{task.label}
@@ -264,7 +264,7 @@ export default function TodoWrite({ title }: { title?: string }) {
{/* Kanban board */} -
+
+
+ + {icon} + + {title} +
+ {children} +
+ ); +} + +function HookCard({ + hook, + active, +}: { + hook: (typeof HOOKS)[number]; + active: boolean; +}) { + return ( + +
+
{hook.id}
+ {active && } +
+
{hook.when}
+
+ {hook.callbacks.map((callback) => ( + + {callback} + + ))} +
+
+ ); +} + +function TurnCard({ step }: { step: number }) { + const state = + step <= 1 + ? { title: "User input", body: "Read README.md and summarize it.", icon: } + : step === 2 + ? { title: "LLM chooses tool", body: "tool_use: read_file({ path: 'README.md' })", icon: } + : step === 3 + ? { title: "Tool waits at pre-hook", body: "permission_hook + log_hook inspect the call.", icon: } + : step === 4 + ? { title: "Handler returned output", body: "large_output_hook checks result size.", icon: } + : { title: "No more tool_use", body: "summary_hook records final session stats.", icon: }; + + return ( + +
+ {state.icon} + {state.title} +
+
+ {state.body} +
+
+ ); +} + +function AuditLog({ step }: { step: number }) { + const items = [ + "[registry] four hook slots registered", + "[UserPromptSubmit] working directory logged", + "[loop] model returned read_file tool_use", + "[PreToolUse] permission allowed; tool call logged", + "[PostToolUse] output size checked", + "[Stop] session used 1 tool call", + ].slice(0, step + 1); + + return ( +
+ + {items.map((item) => ( + + {item} + + ))} + +
+ ); +} + +export default function HooksVisualization({ title }: { title?: string }) { + const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 }); + const step = vis.currentStep; + const current = STEPS[step]; + const activeHook = current.active; + + return ( +
+

+ {title || "Hook Workbench"} +

+ +
+
+ The loop stays boring on purpose: it calls trigger_hooks(event), and the registry decides what extra logic runs. +
+ +
+ } active={step === 0 || activeHook !== null}> +
+ {HOOKS.map((hook) => ( + + ))} +
+
+ + } active={step >= 1}> +
+ +
+
Audit log
+ +
+
+
+
+ +
+ Beginner rule: adding behavior means registering a callback, not editing the core model-tool-result loop. +
+ + +
+
+ ); +} diff --git a/web/src/components/visualizations/s06-context-compact.tsx b/web/src/components/visualizations/s06-context-compact.tsx index 3c4270d..f31ec54 100644 --- a/web/src/components/visualizations/s06-context-compact.tsx +++ b/web/src/components/visualizations/s06-context-compact.tsx @@ -193,6 +193,36 @@ const STEPS = [ }, ]; +const COMPRESSION_LAYERS = [ + { + label: "Micro", + full: "MICRO-COMPACT", + trigger: "old tool_result", + action: "shrink bulky outputs", + step: 3, + classes: + "border-amber-200 bg-amber-50 text-amber-800 dark:border-amber-900 dark:bg-amber-950/30 dark:text-amber-200", + }, + { + label: "Auto", + full: "AUTO-COMPACT", + trigger: "token threshold", + action: "summarize the conversation", + step: 5, + classes: + "border-blue-200 bg-blue-50 text-blue-800 dark:border-blue-900 dark:bg-blue-950/30 dark:text-blue-200", + }, + { + label: "Manual", + full: "/compact", + trigger: "user command", + action: "keep one compact summary", + step: 6, + classes: + "border-emerald-200 bg-emerald-50 text-emerald-800 dark:border-emerald-900 dark:bg-emerald-950/30 dark:text-emerald-200", + }, +]; + export default function ContextCompact({ title }: { title?: string }) { const { currentStep, @@ -222,17 +252,17 @@ export default function ContextCompact({ title }: { title?: string }) {
-
+
{/* Token Window (tall vertical bar on the left) */} -
+
Context Window
{/* Blocks stacked from bottom up */} @@ -293,14 +323,14 @@ export default function ContextCompact({ title }: { title?: string }) {
{/* Right side: state display and compression stage */} -
+
{/* Top: horizontal token bar */}
-
+
Token usage - + {state.tokenCount.toLocaleString()} / {MAX_TOKENS.toLocaleString()}
@@ -314,7 +344,7 @@ export default function ContextCompact({ title }: { title?: string }) {
{/* Message type legend */} -
+
user @@ -329,6 +359,37 @@ export default function ContextCompact({ title }: { title?: string }) {
+
+ {COMPRESSION_LAYERS.map((layer) => { + const reached = currentStep >= layer.step; + const active = state.compressionLabel === layer.full; + return ( + +
+ {layer.label} + + {reached ? "used" : "waiting"} + +
+
+
{layer.trigger}
+
{layer.action}
+
+
+ ); + })} +
+ {/* Highlight old tool_results at step 2 */} {currentStep === 2 && ( @@ -336,12 +397,12 @@ export default function ContextCompact({ title }: { title?: string }) { initial={{ opacity: 0, y: 8 }} animate={{ opacity: 1, y: 0 }} exit={{ opacity: 0 }} - className="mt-3 rounded border border-amber-300 bg-amber-50 px-3 py-2 dark:border-amber-700 dark:bg-amber-900/20" + className="mt-3 rounded-lg border border-amber-300 bg-amber-50 px-3 py-2 dark:border-amber-700 dark:bg-amber-900/20" >
tool_results are the largest blocks
-
+
File contents, command outputs, search results -- each one is thousands of tokens.
@@ -374,7 +435,7 @@ export default function ContextCompact({ title }: { title?: string }) { }`}> {state.compressionLabel}
-
-
-
- - Stage 1: Micro -- shrink old tool_results - - - automatic - -
-
-
- - Stage 2: Auto -- summarize entire conversation - - - at threshold - -
-
-
- - Stage 3: /compact -- user-triggered, deepest compression - - - manual - -
+ {COMPRESSION_LAYERS.map((layer, index) => ( +
+ + Stage {index + 1}: {layer.label} -- {layer.action} + + + {layer.trigger} + +
+ ))} )}
diff --git a/web/src/components/visualizations/s07-task-system.tsx b/web/src/components/visualizations/s07-task-system.tsx index 5ace18b..7c56933 100644 --- a/web/src/components/visualizations/s07-task-system.tsx +++ b/web/src/components/visualizations/s07-task-system.tsx @@ -1,494 +1,214 @@ "use client"; -import { useMemo } from "react"; -import { motion } from "framer-motion"; -import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { AnimatePresence, motion } from "framer-motion"; +import { CheckCircle2, ClipboardList, FileJson, LockKeyhole, PlayCircle } from "lucide-react"; import { StepControls } from "@/components/visualizations/shared/step-controls"; -import { useDarkMode, useSvgPalette } from "@/hooks/useDarkMode"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { cn } from "@/lib/utils"; -type TaskStatus = "pending" | "in_progress" | "completed" | "blocked"; +type Status = "blocked" | "ready" | "active" | "done"; -interface TaskNode { +interface TaskCard { id: string; - label: string; - x: number; - y: number; - deps: string[]; -} - -interface StepInfo { title: string; - description: string; + blockers: string[]; + status: Status; } -const TASKS: TaskNode[] = [ - { id: "T1", label: "T1: Setup DB", x: 80, y: 160, deps: [] }, - { id: "T2", label: "T2: API routes", x: 280, y: 80, deps: ["T1"] }, - { id: "T3", label: "T3: Auth module", x: 280, y: 240, deps: ["T1"] }, - { id: "T4", label: "T4: Integration", x: 480, y: 160, deps: ["T2", "T3"] }, - { id: "T5", label: "T5: Deploy", x: 650, y: 160, deps: ["T4"] }, +const STEPS = [ + { + title: "Tasks Become Files", + desc: "The agent writes work as task cards on disk, so the plan survives compaction and restarts.", + }, + { + title: "Find the First Ready Card", + desc: "A task with no blockers is ready immediately. Everything else waits visibly.", + }, + { + title: "Work One Card", + desc: "The active task is not just text in the model's head; it has a durable status.", + }, + { + title: "Completion Unlocks Dependents", + desc: "When T1 is done, the cards that depended on T1 become ready.", + }, + { + title: "Parallel Ready Work", + desc: "T2 and T3 can run independently, while T4 still waits for both.", + }, + { + title: "All Blockers Cleared", + desc: "Once T2 and T3 are done, T4 moves from waiting to active.", + }, + { + title: "Board Resolved", + desc: "Every card reaches done. The dependency idea is visible without drawing a graph.", + }, +] as const; + +const BASE_TASKS = [ + { id: "T1", title: "Set up database", blockers: [] }, + { id: "T2", title: "Add API routes", blockers: ["T1"] }, + { id: "T3", title: "Build auth module", blockers: ["T1"] }, + { id: "T4", title: "Integration pass", blockers: ["T2", "T3"] }, + { id: "T5", title: "Deploy", blockers: ["T4"] }, ]; -const NODE_W = 140; -const NODE_H = 50; - -const STEP_INFO: StepInfo[] = [ - { - title: "File-Based Tasks", - description: - "Tasks are stored in JSON files on disk. They survive context compaction -- unlike in-memory state.", - }, - { - title: "Start T1", - description: - "Tasks without dependencies can start immediately. T1 has no blockers.", - }, - { - title: "T1 Complete", - description: "Completing T1 unblocks its dependents: T2 and T3.", - }, - { - title: "Parallel Work", - description: - "T2 and T3 have no dependency on each other. Both can run simultaneously.", - }, - { - title: "Partial Unblock", - description: - "T4 depends on BOTH T2 and T3. It waits for all blockers to complete.", - }, - { - title: "Fully Unblocked", - description: "All blockers resolved. T4 can now proceed.", - }, - { - title: "Graph Resolved", - description: - "The entire dependency graph is resolved. File-based persistence means this works across context compressions.", - }, -]; - -function getTaskStatus(taskId: string, step: number): TaskStatus { - const statusMap: Record = { - T1: [ - "pending", - "in_progress", - "completed", - "completed", - "completed", - "completed", - "completed", - ], - T2: [ - "pending", - "pending", - "pending", - "in_progress", - "completed", - "completed", - "completed", - ], - T3: [ - "pending", - "pending", - "pending", - "in_progress", - "in_progress", - "completed", - "completed", - ], - T4: [ - "pending", - "pending", - "pending", - "pending", - "blocked", - "in_progress", - "completed", - ], - T5: [ - "pending", - "pending", - "pending", - "pending", - "pending", - "pending", - "completed", - ], +function taskStatus(id: string, step: number): Status { + const table: Record = { + T1: ["ready", "ready", "active", "done", "done", "done", "done"], + T2: ["blocked", "blocked", "blocked", "ready", "active", "done", "done"], + T3: ["blocked", "blocked", "blocked", "ready", "active", "done", "done"], + T4: ["blocked", "blocked", "blocked", "blocked", "blocked", "active", "done"], + T5: ["blocked", "blocked", "blocked", "blocked", "blocked", "blocked", "done"], }; - return statusMap[taskId]?.[step] ?? "pending"; + return table[id]?.[step] ?? "blocked"; } -function isEdgeActive(fromId: string, toId: string, step: number): boolean { - const fromStatus = getTaskStatus(fromId, step); - const toStatus = getTaskStatus(toId, step); +function getTasks(step: number): TaskCard[] { + return BASE_TASKS.map((task) => ({ ...task, status: taskStatus(task.id, step) })); +} + +function statusClass(status: Status): string { + if (status === "done") return "border-emerald-200 bg-emerald-50 text-emerald-800 dark:border-emerald-900 dark:bg-emerald-950/40 dark:text-emerald-200"; + if (status === "active") return "border-blue-200 bg-blue-50 text-blue-800 dark:border-blue-900 dark:bg-blue-950/40 dark:text-blue-200"; + if (status === "ready") return "border-amber-200 bg-amber-50 text-amber-800 dark:border-amber-900 dark:bg-amber-950/40 dark:text-amber-200"; + return "border-zinc-200 bg-zinc-50 text-zinc-600 dark:border-zinc-700 dark:bg-zinc-800 dark:text-zinc-300"; +} + +function statusIcon(status: Status) { + if (status === "done") return ; + if (status === "active") return ; + if (status === "ready") return ; + return ; +} + +function TaskCardView({ task }: { task: TaskCard }) { return ( - fromStatus === "completed" && - (toStatus === "in_progress" || toStatus === "completed") + +
+
{task.id}
+
+ {statusIcon(task.status)} + {task.status} +
+
+
{task.title}
+
+ {task.blockers.length === 0 ? ( + + no blockers + + ) : ( + task.blockers.map((blocker) => ( + + waits for {blocker} + + )) + )} +
+
); } -function getStatusColor(status: TaskStatus) { - switch (status) { - case "pending": - return { - fill: "#e2e8f0", - darkFill: "#27272a", - stroke: "#cbd5e1", - darkStroke: "#3f3f46", - text: "#475569", - darkText: "#d4d4d8", - }; - case "in_progress": - return { - fill: "#fef3c7", - darkFill: "#451a0340", - stroke: "#f59e0b", - darkStroke: "#d97706", - text: "#b45309", - darkText: "#fbbf24", - }; - case "completed": - return { - fill: "#d1fae5", - darkFill: "#06402740", - stroke: "#10b981", - darkStroke: "#059669", - text: "#047857", - darkText: "#34d399", - }; - case "blocked": - return { - fill: "#fecaca", - darkFill: "#45050540", - stroke: "#ef4444", - darkStroke: "#dc2626", - text: "#dc2626", - darkText: "#f87171", - }; - } -} - -function getStatusLabel(status: TaskStatus): string { - switch (status) { - case "pending": - return "pending"; - case "in_progress": - return "in_progress"; - case "completed": - return "done"; - case "blocked": - return "blocked"; - } -} - -function buildCurvePath( - x1: number, - y1: number, - x2: number, - y2: number -): string { - const midX = (x1 + x2) / 2; - return `M ${x1} ${y1} C ${midX} ${y1}, ${midX} ${y2}, ${x2} ${y2}`; +function Lane({ + title, + subtitle, + tasks, +}: { + title: string; + subtitle: string; + tasks: TaskCard[]; +}) { + return ( +
+
+
{title}
+
{subtitle}
+
+
+ + {tasks.length > 0 ? ( + tasks.map((task) => ) + ) : ( + + empty + + )} + +
+
+ ); } export default function TaskSystem({ title }: { title?: string }) { - const { - currentStep, - totalSteps, - next, - prev, - reset, - isPlaying, - toggleAutoPlay, - } = useSteppedVisualization({ totalSteps: 7, autoPlayInterval: 2500 }); + const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 }); + const step = vis.currentStep; + const tasks = getTasks(step); + const current = STEPS[step]; - const isDark = useDarkMode(); - const palette = useSvgPalette(); - - const edges = useMemo(() => { - const result: { - fromId: string; - toId: string; - x1: number; - y1: number; - x2: number; - y2: number; - }[] = []; - for (const task of TASKS) { - for (const depId of task.deps) { - const dep = TASKS.find((t) => t.id === depId); - if (!dep) continue; - result.push({ - fromId: dep.id, - toId: task.id, - x1: dep.x + NODE_W, - y1: dep.y + NODE_H / 2, - x2: task.x, - y2: task.y + NODE_H / 2, - }); - } - } - return result; - }, []); - - const stepInfo = STEP_INFO[currentStep]; + const blocked = tasks.filter((task) => task.status === "blocked"); + const ready = tasks.filter((task) => task.status === "ready"); + const active = tasks.filter((task) => task.status === "active"); + const done = tasks.filter((task) => task.status === "done"); return (

- {title || "Task Dependency Graph"} + {title || "Task Board Dependencies"}

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - {/* Dependency edges */} - {edges.map(({ fromId, toId, x1, y1, x2, y2 }) => { - const active = isEdgeActive(fromId, toId, currentStep); - const toStatus = getTaskStatus(toId, currentStep); - const isBlocked = toStatus === "blocked"; - let markerEnd = "url(#arrowGray)"; - let strokeColor = palette.arrowFill; - if (active) { - markerEnd = "url(#arrowGreen)"; - strokeColor = "#10b981"; - } else if (isBlocked) { - markerEnd = "url(#arrowRed)"; - strokeColor = "#ef4444"; - } - - return ( - - ); - })} - - {/* Task nodes */} - {TASKS.map((task) => { - const status = getTaskStatus(task.id, currentStep); - const colors = getStatusColor(status); - const statusLabel = getStatusLabel(status); - const isActive = status === "in_progress"; - const isComplete = status === "completed"; - - let filterAttr: string | undefined; - if (isActive) filterAttr = "url(#glowAmber)"; - else if (isComplete) filterAttr = "url(#glowGreen)"; - - return ( - - - - {task.label} - - - {statusLabel} - - - ); - })} - - {/* Blocked annotation for T4 at step 4 */} - {currentStep === 4 && ( - - - - Blocked: waiting on T3 - - - )} - - - {/* File persistence indicator */} -
- - - -
- - .tasks/tasks.json - - - Persisted to disk -- survives context compaction - +
+
+ + .tasks board
- -
- - {/* Legend */} -
-
-
- - pending - -
-
-
- - in_progress - -
-
-
- - completed - -
-
-
- - blocked - +
+
{blocked.length} blocked
+
{ready.length} ready
+
{active.length} active
+
{done.length} done
+ +
+ + + + +
+ +
+ A dependency is not an arrow students must trace. It is a visible blocker badge on the card. +
+ +
- -
); } diff --git a/web/src/components/visualizations/s09-agent-teams.tsx b/web/src/components/visualizations/s09-agent-teams.tsx index bdb5887..d10d17b 100644 --- a/web/src/components/visualizations/s09-agent-teams.tsx +++ b/web/src/components/visualizations/s09-agent-teams.tsx @@ -1,392 +1,237 @@ "use client"; -import { motion, AnimatePresence } from "framer-motion"; -import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { AnimatePresence, motion } from "framer-motion"; +import { Inbox, MessageSquareText, UsersRound } from "lucide-react"; import { StepControls } from "@/components/visualizations/shared/step-controls"; -import { useSvgPalette } from "@/hooks/useDarkMode"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { cn } from "@/lib/utils"; -// -- Layout constants -- -const SVG_W = 560; -const SVG_H = 340; -const AGENT_R = 40; +type AgentId = "lead" | "coder" | "reviewer"; -// Agent positions: inverted triangle (Lead top-center, Coder bottom-left, Reviewer bottom-right) -const AGENTS = [ - { id: "lead", label: "Lead", cx: SVG_W / 2, cy: 70, inbox: "lead.jsonl" }, - { id: "coder", label: "Coder", cx: 140, cy: 230, inbox: "coder.jsonl" }, - { id: "reviewer", label: "Reviewer", cx: SVG_W - 140, cy: 230, inbox: "reviewer.jsonl" }, -] as const; - -// Inbox tray dimensions, positioned below each agent circle -const TRAY_W = 72; -const TRAY_H = 22; -const TRAY_OFFSET_Y = AGENT_R + 14; - -// Message block dimensions -const MSG_W = 60; -const MSG_H = 20; - -function agentById(id: string) { - return AGENTS.find((a) => a.id === id)!; +interface Mail { + id: string; + from: AgentId; + to: AgentId; + subject: string; + body: string; + appearsAt: number; + consumedAt?: number; } -function trayCenter(id: string) { - const a = agentById(id); - return { x: a.cx, y: a.cy + TRAY_OFFSET_Y + TRAY_H / 2 }; -} - -// Step configuration -const STEPS = [ - { title: "The Team", desc: "Teams use a leader-worker pattern. Each teammate has a file-based mailbox inbox." }, - { title: "Lead Assigns Work", desc: "Communication is async: write a message to the recipient's .jsonl inbox file." }, - { title: "Read Inbox", desc: "Teammates poll their inbox before each LLM call. New messages become context." }, - { title: "Independent Work", desc: "Each teammate runs its own agent loop independently." }, - { title: "Pass Result", desc: "Results flow through the same mailbox mechanism. All communication is via files." }, - { title: "Feedback Loop", desc: "The mailbox pattern supports any communication topology: linear, broadcast, round-robin." }, - { title: "File-Based Coordination", desc: "No shared memory, no locks. All coordination through append-only files. Simple, robust, debuggable." }, +const AGENTS: { id: AgentId; label: string; role: string }[] = [ + { id: "lead", label: "Lead", role: "splits work and reads results" }, + { id: "coder", label: "Coder", role: "implements one slice" }, + { id: "reviewer", label: "Reviewer", role: "checks the result" }, ]; -// Helper: determine which agent glows at each step -function agentGlows(agentId: string, step: number): boolean { - if (step === 1 && agentId === "lead") return true; - if (step === 2 && agentId === "coder") return true; - if (step === 3 && agentId === "coder") return true; - if (step === 4 && agentId === "coder") return true; - if (step === 5 && agentId === "reviewer") return true; - return false; +const MAIL: Mail[] = [ + { + id: "assign", + from: "lead", + to: "coder", + subject: "Build login UI", + body: "Please implement the login form and report back.", + appearsAt: 1, + consumedAt: 2, + }, + { + id: "result", + from: "coder", + to: "reviewer", + subject: "Login UI done", + body: "Files changed, ready for review.", + appearsAt: 4, + consumedAt: 5, + }, + { + id: "feedback", + from: "reviewer", + to: "lead", + subject: "Review passed", + body: "No blockers. One small polish note.", + appearsAt: 5, + }, +]; + +const STEPS = [ + { + title: "A Team Is Mailboxes", + desc: "Each agent has its own inbox file. The team does not need shared memory to coordinate.", + }, + { + title: "Lead Drops a Card", + desc: "Assigning work means appending a message to the coder's inbox.", + }, + { + title: "Coder Reads Before Thinking", + desc: "Before its next model call, the coder drains its inbox and turns messages into context.", + }, + { + title: "Coder Works Alone", + desc: "The coder now runs its own loop. The lead does not have to hold the full context.", + }, + { + title: "Result Becomes Mail", + desc: "The coder sends a result card to the reviewer through the same mailbox mechanism.", + }, + { + title: "Reviewer Sends Feedback", + desc: "Review feedback is just another card. The lead reads it from its inbox.", + }, + { + title: "Files Are the Coordination Layer", + desc: "The whole team is inspectable as append-only inbox files: lead.jsonl, coder.jsonl, reviewer.jsonl.", + }, +] as const; + +function visibleMail(agent: AgentId, step: number) { + return MAIL.filter((mail) => mail.to === agent && mail.appearsAt <= step && (mail.consumedAt === undefined || step < mail.consumedAt)); } -// Helper: determine which inbox tray has a message sitting in it -function trayHasMessage(agentId: string, step: number): boolean { - if (step === 2 && agentId === "coder") return true; - if (step === 4 && agentId === "reviewer") return false; - if (step === 5 && agentId === "reviewer") return true; - return false; +function agentState(agent: AgentId, step: number): "waiting" | "reading" | "working" | "reviewing" | "done" { + if (agent === "lead" && step === 1) return "working"; + if (agent === "coder" && step === 2) return "reading"; + if (agent === "coder" && (step === 3 || step === 4)) return "working"; + if (agent === "reviewer" && step === 5) return "reviewing"; + if (agent === "lead" && step >= 5) return "reading"; + if (step === 6) return "done"; + return "waiting"; } -// Animated message that travels from one point to another -function TravelingMessage({ - fromX, - fromY, - toX, - toY, - label, - delay = 0, -}: { - fromX: number; - fromY: number; - toX: number; - toY: number; - label: string; - delay?: number; -}) { +function stateClass(state: ReturnType) { + if (state === "working") return "border-blue-300 bg-blue-50 dark:border-blue-800 dark:bg-blue-950/30"; + if (state === "reading" || state === "reviewing") return "border-amber-300 bg-amber-50 dark:border-amber-800 dark:bg-amber-950/30"; + if (state === "done") return "border-emerald-300 bg-emerald-50 dark:border-emerald-800 dark:bg-emerald-950/30"; + return "border-zinc-200 bg-white dark:border-zinc-700 dark:bg-zinc-900"; +} + +function MailCard({ mail }: { mail: Mail }) { return ( - - - - {label} - - +
+ {mail.from} -> {mail.to} + +
+
{mail.subject}
+
{mail.body}
+ ); } -// Faded trace line between two agents -function TraceLine({ from, to, strokeColor }: { from: string; to: string; strokeColor: string }) { - const f = trayCenter(from); - const t = trayCenter(to); +function AgentPanel({ agent, step }: { agent: (typeof AGENTS)[number]; step: number }) { + const state = agentState(agent.id, step); + const inbox = visibleMail(agent.id, step); + return ( - +
+
+
+
{agent.label}
+
{agent.role}
+
+ + {state} + +
+ +
+
+ + {agent.id}.jsonl +
+
+ + {inbox.length > 0 ? ( + inbox.map((mail) => ) + ) : ( + + inbox empty + + )} + +
+
+
+ ); +} + +function ActivityLog({ step }: { step: number }) { + const items = [ + "team config creates lead, coder, reviewer", + "lead appends task card to coder.jsonl", + "coder drains inbox before model call", + "coder works in its own loop", + "coder appends result to reviewer.jsonl", + "reviewer appends feedback to lead.jsonl", + "all coordination remains visible on disk", + ].slice(0, step + 1); + + return ( +
+
+ + What changed +
+
+ {items.map((item) => ( + + {item} + + ))} +
+
); } export default function AgentTeams({ title }: { title?: string }) { const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 }); const step = vis.currentStep; - const palette = useSvgPalette(); + const current = STEPS[step]; return ( -
+

{title || "Agent Team Mailboxes"}

-
-
- {/* SVG visualization */} -
- - - - - - - - - - - {/* Step 6: trace lines */} - {step === 6 && ( - <> - - - - - )} - - {/* Agent nodes */} - {AGENTS.map((agent) => { - const glowing = agentGlows(agent.id, step); - const pulsing = step === 3 && agent.id === "coder"; - - return ( - - {/* Agent circle */} - - {/* Agent label */} - - {agent.label} - - - {/* Inbox tray (file icon style) */} - - - {agent.inbox} - - - ); - })} - - {/* Step 0: team config card */} - {step === 0 && ( - - - - team.config - - - workers: [coder, reviewer] - - - )} - - {/* Step 1: message from Lead to Coder inbox */} - - {step === 1 && ( - - )} - - - {/* Step 2: message from Coder inbox to Coder circle */} - - {step === 2 && ( - - )} - - - {/* Step 3: Coder working, result appears */} - - {step === 3 && ( - - - - result:done - - - )} - - - {/* Step 4: Coder result message travels to Reviewer inbox */} - - {step === 4 && ( - - )} - - - {/* Step 5: Reviewer reads inbox, sends feedback to Lead */} - - {step === 5 && ( - <> - - - - )} - - - {/* Step 6: filesystem tree */} - {step === 6 && ( - - - - .claude/teams/project/ - - - lead.jsonl - - - coder.jsonl - - - reviewer.jsonl - - - )} - -
+
+
+ {AGENTS.map((agent) => ( + + ))} +
- {/* Step controls */} -
- -
+
); diff --git a/web/src/components/visualizations/s09-memory.tsx b/web/src/components/visualizations/s09-memory.tsx new file mode 100644 index 0000000..b3e0f29 --- /dev/null +++ b/web/src/components/visualizations/s09-memory.tsx @@ -0,0 +1,392 @@ +"use client"; + +import { AnimatePresence, motion } from "framer-motion"; +import { BookOpen, CheckCircle2, FileText, Inbox, Search, Sparkles } from "lucide-react"; +import { StepControls } from "@/components/visualizations/shared/step-controls"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { cn } from "@/lib/utils"; + +type MemoryType = "feedback" | "project" | "reference"; + +interface MemoryFile { + id: string; + type: MemoryType; + title: string; + filename: string; + description: string; + body: string; + relevant?: boolean; +} + +const MEMORY_FILES: MemoryFile[] = [ + { + id: "visual-preference", + type: "feedback", + title: "Beginner visual preference", + filename: "lcc_visual_preference.md", + description: "Use concrete mental models for LCC web pages.", + body: "Prefer cards, boards, shelves, and workbenches over abstract flowcharts.", + relevant: true, + }, + { + id: "project-path", + type: "project", + title: "LCC web paths", + filename: "lcc_web_paths.md", + description: "Web app reads root lesson folders and generated JSON.", + body: "Build from web/, extract content from s01-s20 lesson directories.", + }, + { + id: "test-command", + type: "reference", + title: "Verification commands", + filename: "lcc_test_commands.md", + description: "Useful smoke checks for the course website.", + body: "Run npm run build, then browser-check /zh/s09 and /zh/s20.", + }, +]; + +const STEPS = [ + { + title: "A Fact Worth Keeping", + desc: "The user says something that should survive future sessions.", + }, + { + title: "Stamp It After the Turn", + desc: "Memory extraction happens after useful work, so the main loop stays focused.", + }, + { + title: "Write One Memory File", + desc: "The durable detail goes into a Markdown file with a readable title and metadata.", + }, + { + title: "Update the Catalog", + desc: "MEMORY.md is the cheap catalog: short enough to keep nearby.", + }, + { + title: "A Future Request Arrives", + desc: "Later, the agent sees a new request and the catalog, not the whole library.", + }, + { + title: "Catalog Picks One", + desc: "Selection chooses the one memory file that is relevant now.", + }, + { + title: "Build the Reading Stack", + desc: "Only the selected memory joins the current request before the model call.", + }, + { + title: "Continuity Without Clutter", + desc: "The answer reflects old context while unrelated memories stay on the shelf.", + }, +] as const; + +function typeClass(type: MemoryType): string { + if (type === "feedback") return "bg-amber-100 text-amber-800 dark:bg-amber-900/30 dark:text-amber-200"; + if (type === "project") return "bg-blue-100 text-blue-800 dark:bg-blue-900/30 dark:text-blue-200"; + return "bg-emerald-100 text-emerald-800 dark:bg-emerald-900/30 dark:text-emerald-200"; +} + +function Surface({ + title, + icon, + active, + children, + className, +}: { + title: string; + icon: React.ReactNode; + active: boolean; + children: React.ReactNode; + className?: string; +}) { + return ( +
+
+ + {icon} + + {title} +
+ {children} +
+ ); +} + +function QuoteCard({ children }: { children: React.ReactNode }) { + return ( +
+ {children} +
+ ); +} + +function CatalogRow({ file, visible, selected }: { file: MemoryFile; visible: boolean; selected: boolean }) { + if (!visible) return null; + return ( + +
+
+ {file.title} +
+ + {file.type} + +
+
+ {file.description} +
+
+ {file.filename} +
+
+ ); +} + +function MemoryDetail({ file, selected }: { file: MemoryFile; selected: boolean }) { + return ( + +
+
+
+ {file.title} +
+
+ {file.filename} +
+
+ {selected && ( + + + selected + + )} +
+
+ {file.body} +
+
+ ); +} + +function EmptyState({ label }: { label: string }) { + return ( +
+ {label} +
+ ); +} + +export default function MemoryVisualization({ title }: { title?: string }) { + const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 }); + const step = vis.currentStep; + const current = STEPS[step]; + const selectedFile = MEMORY_FILES[0]; + const catalogVisible = step >= 3; + const futureVisible = step >= 4; + const selected = step >= 5; + const injected = step >= 6; + + return ( +
+

+ {title || "Memory Library"} +

+ +
+
+ {["learn", "catalog", "recall"].map((label, index) => { + const active = + (index === 0 && step <= 2) || + (index === 1 && (step === 3 || selected)) || + (index === 2 && futureVisible); + return ( +
+ {index + 1}. {label} +
+ ); + })} +
+ +
+ } active={step <= 2}> +
+ "Please keep LCC pages concrete for beginners." + + {step >= 1 && ( + +
Memory extractor stamp
+ Save a durable preference after the useful work is done. +
+ )} +
+ + {step >= 2 && } + +
+
+ + : } active={futureVisible}> +
+ {!futureVisible && } + {futureVisible && "Continue improving the web lesson visuals."} + {selected && ( + + Catalog search selects lcc_visual_preference.md + + )} + {injected && ( +
+
+ Reading stack before LLM +
+
+
current request
+
+ selected memory detail +
+ {step >= 7 && ( +
+ answer keeps the user's preference +
+ )} +
+
+ )} +
+
+
+ + } + active={catalogVisible || selected} + className="mt-3" + > +
+
+
+ + MEMORY.md catalog +
+
+ {MEMORY_FILES.map((file, index) => ( + = 4)} + selected={selected && file.relevant === true} + /> + ))} + {!catalogVisible && } +
+
+ +
+
+ Memory file preview +
+ {step >= 2 ? ( +
+ +
+ {MEMORY_FILES.slice(1).map((file) => ( +
+
+
+ {file.title} +
+ + not loaded + +
+
+ {file.description} +
+
+ ))} +
+
+ ) : ( + + )} +
+
+
+ +
+ Beginner rule: the catalog stays cheap and readable; full memory files are borrowed only when the current request needs them. +
+ + +
+
+ ); +} diff --git a/web/src/components/visualizations/s10-system-prompt.tsx b/web/src/components/visualizations/s10-system-prompt.tsx new file mode 100644 index 0000000..1aff83d --- /dev/null +++ b/web/src/components/visualizations/s10-system-prompt.tsx @@ -0,0 +1,260 @@ +"use client"; + +import { AnimatePresence, motion } from "framer-motion"; +import { Boxes, Brain, CheckCircle2, FileText, KeyRound, Library, Rocket, Wrench } from "lucide-react"; +import { StepControls } from "@/components/visualizations/shared/step-controls"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { cn } from "@/lib/utils"; + +const STEPS = [ + { + title: "Runtime State Arrives", + desc: "The prompt is not a fixed paragraph; it starts from workspace, tools, memory, and skills.", + mode: "state", + }, + { + title: "Section Shelf Selects Owners", + desc: "Each subsystem owns one prompt section, so a bad rule has a place to debug.", + mode: "sections", + }, + { + title: "Context Key Checks the Cache", + desc: "The same runtime state produces the same deterministic cache key.", + mode: "cache-miss", + }, + { + title: "Prompt Is Assembled", + desc: "Selected sections are joined into one system prompt that the LLM can read.", + mode: "assemble", + }, + { + title: "Same Key Reuses the Prompt", + desc: "If nothing changed, the runtime skips assembly and reuses the cached prompt.", + mode: "cache-hit", + }, + { + title: "LLM Sees the Built Prompt", + desc: "The model receives a traceable product of runtime state, not a stale hardcoded string.", + mode: "llm", + }, +] as const; + +const SOURCES = [ + { id: "workspace", label: "workspace", value: "/repo", icon: , tone: "blue" }, + { id: "tools", label: "tools", value: "bash, read_file", icon: , tone: "emerald" }, + { id: "memory", label: "memory", value: "enabled", icon: , tone: "amber" }, + { id: "skills", label: "skills", value: "code-review", icon: , tone: "violet" }, +] as const; + +const SECTIONS = [ + { id: "identity", title: "identity", body: "You are a helpful coding agent.", owner: "core" }, + { id: "tools", title: "tools", body: "Available tools: bash, read_file.", owner: "tool registry" }, + { id: "workspace", title: "workspace", body: "Current workspace: /repo.", owner: "runtime" }, + { id: "memory", title: "memory + skills", body: "Load memory index and code-review skill.", owner: "context loader" }, +] as const; + +type StepMode = (typeof STEPS)[number]["mode"]; +type Tone = "blue" | "emerald" | "amber" | "violet" | "zinc"; + +function toneClass(tone: Tone, active = true) { + if (!active) return "border-zinc-200 bg-white text-zinc-700 dark:border-zinc-700 dark:bg-zinc-900 dark:text-zinc-200"; + if (tone === "blue") return "border-blue-200 bg-blue-50 text-blue-800 dark:border-blue-900 dark:bg-blue-950/40 dark:text-blue-200"; + if (tone === "emerald") return "border-emerald-200 bg-emerald-50 text-emerald-800 dark:border-emerald-900 dark:bg-emerald-950/40 dark:text-emerald-200"; + if (tone === "amber") return "border-amber-200 bg-amber-50 text-amber-800 dark:border-amber-900 dark:bg-amber-950/40 dark:text-amber-200"; + if (tone === "violet") return "border-violet-200 bg-violet-50 text-violet-800 dark:border-violet-900 dark:bg-violet-950/40 dark:text-violet-200"; + return "border-zinc-200 bg-zinc-50 text-zinc-700 dark:border-zinc-700 dark:bg-zinc-800 dark:text-zinc-200"; +} + +function Surface({ + title, + icon, + active, + children, +}: { + title: string; + icon: React.ReactNode; + active: boolean; + children: React.ReactNode; +}) { + return ( +
+
+ + {icon} + + {title} +
+ {children} +
+ ); +} + +function SourceCard({ + source, + active, +}: { + source: (typeof SOURCES)[number]; + active: boolean; +}) { + return ( + +
+ {source.icon} + {source.label} +
+ + {source.value} + +
+ ); +} + +function SectionCard({ + section, + active, + assembled, +}: { + section: (typeof SECTIONS)[number]; + active: boolean; + assembled: boolean; +}) { + return ( + +
+
{section.title}
+ {(active || assembled) && } +
+
owner: {section.owner}
+
{section.body}
+
+ ); +} + +function CachePanel({ mode }: { mode: StepMode }) { + const isHit = mode === "cache-hit"; + const isActive = mode === "cache-miss" || mode === "cache-hit"; + + return ( +
+
+ + context key +
+ + json.dumps(context, sort_keys=True) + +
{isHit ? "cache hit: reuse prompt" : isActive ? "cache miss: assemble sections" : "waiting for state"}
+
+ ); +} + +function PromptPreview({ mode }: { mode: StepMode }) { + const assembled = mode === "assemble" || mode === "cache-hit" || mode === "llm"; + + if (!assembled) { + return ( +
+ prompt not built yet +
+ ); + } + + return ( + + {SECTIONS.map((section) => ( +
+
[{section.title}]
+
{section.body}
+
+ ))} +
+
+ + {mode === "llm" ? "sent to LLM" : "system prompt ready"} +
+
Traceable prompt text, assembled from named runtime owners.
+
+
+ ); +} + +export default function SystemPromptVisualization({ title }: { title?: string }) { + const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2600 }); + const current = STEPS[vis.currentStep]; + const mode = current.mode; + const sourceActive = mode === "state" || mode === "sections" || mode === "cache-miss"; + const sectionsActive = mode === "sections" || mode === "assemble"; + const promptActive = mode === "assemble" || mode === "cache-hit" || mode === "llm"; + + return ( +
+

{title || "Runtime Prompt Assembly"}

+ +
+
+ } active={sourceActive}> +
+ {SOURCES.map((source) => ( + + ))} +
+
+ + } active={sectionsActive || mode === "cache-miss" || mode === "cache-hit"}> +
+
+ {SECTIONS.map((section) => ( + + ))} +
+ +
+
+ + } active={promptActive}> + + + + +
+ +
+ Beginner rule: system prompts should be assembled from named runtime facts, then cached only when those facts are unchanged. +
+ + +
+
+ ); +} diff --git a/web/src/components/visualizations/s10-team-protocols.tsx b/web/src/components/visualizations/s10-team-protocols.tsx index 1114619..73dbe08 100644 --- a/web/src/components/visualizations/s10-team-protocols.tsx +++ b/web/src/components/visualizations/s10-team-protocols.tsx @@ -1,496 +1,362 @@ "use client"; -import { useState } from "react"; -import { motion, AnimatePresence } from "framer-motion"; -import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { useState, type ReactNode } from "react"; +import { AnimatePresence, motion } from "framer-motion"; +import { ArrowRight, CheckCircle2, ClipboardCheck, FileText, LockKeyhole, UserCheck } from "lucide-react"; import { StepControls } from "@/components/visualizations/shared/step-controls"; -import { useSvgPalette } from "@/hooks/useDarkMode"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { cn } from "@/lib/utils"; type Protocol = "shutdown" | "plan"; -// -- Layout constants for the sequence diagram -- -const SVG_W = 560; -const SVG_H = 360; -const LIFELINE_LEFT_X = 140; -const LIFELINE_RIGHT_X = 420; -const LIFELINE_TOP = 60; -const LIFELINE_BOTTOM = 330; -const ACTIVATION_W = 12; -const ARROW_Y_START = 110; -const ARROW_Y_GAP = 70; - -// Request ID shown on message tags const REQUEST_ID = "req_abc"; -// -- Shutdown protocol step definitions -- const SHUTDOWN_STEPS = [ - { title: "Structured Protocols", desc: "Protocols define structured message exchanges with correlated request IDs." }, - { title: "Shutdown Request", desc: "The leader initiates shutdown. The request_id links the request to its response." }, - { title: "Teammate Decides", desc: "The teammate can accept or reject. It's not a forced kill -- it's a polite request." }, - { title: "Approved", desc: "Same request_id in the response. Teammate exits cleanly." }, + { + title: "Agree on a Small Form", + desc: "A protocol is just a shared card shape: request type, request_id, and the expected answer.", + }, + { + title: "Leader Files a Request", + desc: "The leader writes a shutdown request card instead of force-stopping the teammate.", + }, + { + title: "Teammate Chooses", + desc: "The teammate can approve or reject, and the request_id keeps the answer attached to the right request.", + }, + { + title: "Clean Exit", + desc: "The approved response returns to the leader, and the teammate exits cleanly.", + }, ]; -// -- Plan approval protocol step definitions -- const PLAN_STEPS = [ - { title: "Plan Approval", desc: "Teammates in plan_mode must get approval before implementing changes." }, - { title: "Submit Plan", desc: "The teammate designs a plan and sends it to the leader for review." }, - { title: "Leader Reviews", desc: "Leader reviews and approves or rejects with feedback. Same request-response pattern." }, + { + title: "Work Is Locked", + desc: "In plan mode, implementation stays locked until a plan card is approved.", + }, + { + title: "Submit the Plan Card", + desc: "The teammate sends a concrete plan with the same request-response shape.", + }, + { + title: "Approval Unlocks Action", + desc: "The leader approves the card, then implementation can begin.", + }, ]; -// Horizontal arrow between lifelines -function SequenceArrow({ - y, - direction, - label, - tagLabel, - color, - tagBg, - tagStroke, - tagText, -}: { - y: number; - direction: "right" | "left"; - label: string; - tagLabel?: string; - color: string; - tagBg?: string; - tagStroke?: string; - tagText?: string; -}) { - const fromX = direction === "right" ? LIFELINE_LEFT_X + ACTIVATION_W / 2 : LIFELINE_RIGHT_X - ACTIVATION_W / 2; - const toX = direction === "right" ? LIFELINE_RIGHT_X - ACTIVATION_W / 2 : LIFELINE_LEFT_X + ACTIVATION_W / 2; - const arrowTip = direction === "right" ? toX - 6 : toX + 6; - const labelX = (fromX + toX) / 2; +const PROTOCOL_STATES: Record = { + shutdown: [ + { label: "drafted", detail: "Lead creates request_id" }, + { label: "pending", detail: "card waits in inbox" }, + { label: "deciding", detail: "teammate replies" }, + { label: "closed", detail: "Lead matches response" }, + ], + plan: [ + { label: "locked", detail: "work cannot start" }, + { label: "submitted", detail: "plan card is sent" }, + { label: "approved", detail: "implementation unlocks" }, + ], +}; +function ToggleButton({ + active, + onClick, + children, +}: { + active: boolean; + onClick: () => void; + children: ReactNode; +}) { return ( - - {/* Arrow line */} - - {/* Arrow head */} - - {/* Message label */} - - {label} - - {/* Request ID tag */} - {tagLabel && ( - - - - {tagLabel} - - + ); } -// Activation bar on a lifeline -function ActivationBar({ - x, - yStart, - yEnd, - color, +function StateRail({ + states, + currentStep, }: { - x: number; - yStart: number; - yEnd: number; - color: string; + states: { label: string; detail: string }[]; + currentStep: number; }) { return ( - +
+
+
+ Protocol state +
+
+ request_id: {REQUEST_ID} +
+
+
+ {states.map((state, index) => { + const active = index === currentStep; + const done = index < currentStep; + return ( +
+ +
{state.label}
+
+ {state.detail} +
+
+ {index < states.length - 1 && ( +
+ +
+ )} +
+ ); + })} +
+
+ ); +} + +function Desk({ + title, + icon, + active, + children, +}: { + title: string; + icon: ReactNode; + active: boolean; + children: ReactNode; +}) { + return ( +
+
+ + {icon} + + {title} +
+ {children} +
+ ); +} + +function ProtocolCard({ + title, + rows, + tone = "blue", +}: { + title: string; + rows: string[]; + tone?: "blue" | "amber" | "emerald" | "zinc"; +}) { + const toneClass = { + blue: "border-blue-200 bg-blue-50 text-blue-800 dark:border-blue-900 dark:bg-blue-950/40 dark:text-blue-200", + amber: "border-amber-200 bg-amber-50 text-amber-800 dark:border-amber-900 dark:bg-amber-950/40 dark:text-amber-200", + emerald: + "border-emerald-200 bg-emerald-50 text-emerald-800 dark:border-emerald-900 dark:bg-emerald-950/40 dark:text-emerald-200", + zinc: "border-zinc-200 bg-zinc-50 text-zinc-700 dark:border-zinc-700 dark:bg-zinc-800 dark:text-zinc-200", + }[tone]; + + return ( + +
{title}
+
+ {rows.map((row) => ( +
+ {row} +
+ ))} +
+
+ ); +} + +function EmptyTray({ label }: { label: string }) { + return ( +
+ {label} +
); } export default function TeamProtocols({ title }: { title?: string }) { const [protocol, setProtocol] = useState("shutdown"); - - const totalSteps = protocol === "shutdown" ? SHUTDOWN_STEPS.length : PLAN_STEPS.length; const steps = protocol === "shutdown" ? SHUTDOWN_STEPS : PLAN_STEPS; - - const vis = useSteppedVisualization({ totalSteps, autoPlayInterval: 2500 }); + const vis = useSteppedVisualization({ totalSteps: steps.length, autoPlayInterval: 2500 }); const step = vis.currentStep; - const palette = useSvgPalette(); - const switchProtocol = (p: Protocol) => { - setProtocol(p); + const switchProtocol = (value: Protocol) => { + setProtocol(value); vis.reset(); }; - const leftLabel = protocol === "shutdown" ? "Leader" : "Leader"; - const rightLabel = protocol === "shutdown" ? "Teammate" : "Teammate"; + const isPlan = protocol === "plan"; return ( -
+

- {title || "FSM Team Protocols"} + {title || "Team Protocol Cards"}

-
- {/* Protocol toggle */} -
- - + +
+
+ switchProtocol("shutdown")}> + Shutdown + + switchProtocol("plan")}> + Plan Approval +
- {/* Sequence diagram SVG */} - - - - - - + - {/* Lifeline headers */} - - - {leftLabel} - - - - - {rightLabel} - - - {/* Lifeline dashed lines */} - - - - - {protocol === "shutdown" && ( - - {/* Activation bars appear as needed */} - {step >= 1 && ( - = 3 ? ARROW_Y_START + ARROW_Y_GAP * 2 + 20 : ARROW_Y_START + 30} - color="#3b82f6" +
+ } + active={(!isPlan && (step === 1 || step === 3)) || (isPlan && step === 2)} + > +
+ + {!isPlan && step >= 1 && ( + = 3 ? "zinc" : "blue"} /> )} - {step >= 1 && ( - = 3 ? ARROW_Y_START + ARROW_Y_GAP * 2 + 15 : ARROW_Y_START + ARROW_Y_GAP + 20} - color="#8b5cf6" + {!isPlan && step >= 3 && ( + )} - - {/* Step 1: shutdown_request arrow (Leader -> Teammate) */} - {step >= 1 && ( - = 2 && ( + )} + + {((!isPlan && step === 0) || (isPlan && step < 2)) && ( + + )} +
+
- {/* Step 2: decision box on teammate lifeline */} - {step >= 2 && ( - } + active={(!isPlan && step === 0) || (isPlan && step === 0)} + > +
+ +
+ The key idea is correlation, not ceremony. +
+ {isPlan && ( +
+ + implementation locked until approval +
+ )} +
+ + + : } + active={(!isPlan && step === 2) || (isPlan && step === 1)} + > +
+ + {!isPlan && step >= 2 && ( + = 3 ? "state: exited" : "state: deciding"]} + tone={step >= 3 ? "emerald" : "amber"} /> )} - - {/* Step 3: shutdown_response arrow (Teammate -> Leader) */} - {step >= 3 && ( - = 1 && ( + = 2 ? "emerald" : "blue"} /> )} - - {/* Step 3: exit annotation */} - {step >= 3 && ( - - - - - exit - - - )} - - )} - - {protocol === "plan" && ( - - {/* Activation bars */} - {step >= 1 && ( - = 2 ? ARROW_Y_START + ARROW_Y_GAP * 2 + 15 : ARROW_Y_START + 30} - color="#8b5cf6" - /> - )} - {step >= 1 && ( - = 2 ? ARROW_Y_START + ARROW_Y_GAP * 2 + 15 : ARROW_Y_START + ARROW_Y_GAP + 10} - color="#3b82f6" - /> - )} - - {/* Step 1: plan submission arrow (Teammate -> Leader) */} - {step >= 1 && ( - - )} - - {/* Step 1: plan content box */} - {step >= 1 && ( - - - - Plan: - - - 1. Add error handler - - - 2. Update tests - - - 3. Refactor module - - - )} - - {/* Step 2: approval response arrow (Leader -> Teammate) */} - {step >= 2 && ( - - )} - - {/* Step 2: checkmark */} - {step >= 2 && ( - - - - OK - - - )} - - )} - - - - {/* Step controls */} -
- + + {((!isPlan && step < 2) || (isPlan && step === 0)) && ( + + )} +
+
+ +
); diff --git a/web/src/components/visualizations/s11-autonomous-agents.tsx b/web/src/components/visualizations/s11-autonomous-agents.tsx index cfd175a..d6fa855 100644 --- a/web/src/components/visualizations/s11-autonomous-agents.tsx +++ b/web/src/components/visualizations/s11-autonomous-agents.tsx @@ -1,465 +1,276 @@ "use client"; -import { motion } from "framer-motion"; -import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { AnimatePresence, motion } from "framer-motion"; +import { CheckCircle2, ClipboardList, Hourglass, UserRoundCog } from "lucide-react"; import { StepControls } from "@/components/visualizations/shared/step-controls"; -import { useSvgPalette } from "@/hooks/useDarkMode"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { cn } from "@/lib/utils"; -// -- FSM states and their layout positions (diamond: idle top, poll right, claim bottom, work left) -- -type Phase = "idle" | "poll" | "claim" | "work"; +type AgentPhase = "idle" | "polling" | "claiming" | "working" | "done"; +type TaskStatus = "open" | "claimed" | "complete"; -const FSM_CX = 110; -const FSM_CY = 110; -const FSM_R = 65; -const FSM_STATE_R = 22; - -const FSM_STATES: { id: Phase; label: string; angle: number }[] = [ - { id: "idle", label: "idle", angle: -Math.PI / 2 }, - { id: "poll", label: "poll", angle: 0 }, - { id: "claim", label: "claim", angle: Math.PI / 2 }, - { id: "work", label: "work", angle: Math.PI }, -]; - -const FSM_TRANSITIONS: { from: Phase; to: Phase }[] = [ - { from: "idle", to: "poll" }, - { from: "poll", to: "claim" }, - { from: "claim", to: "work" }, - { from: "work", to: "idle" }, -]; - -function fsmPos(angle: number) { - return { x: FSM_CX + FSM_R * Math.cos(angle), y: FSM_CY + FSM_R * Math.sin(angle) }; -} - -const PHASE_COLORS: Record = { - idle: "#a1a1aa", - poll: "#f59e0b", - claim: "#3b82f6", - work: "#10b981", -}; - -// -- Task board data -- -interface TaskRow { - id: string; - name: string; - status: "unclaimed" | "active" | "complete"; - owner: string; -} - -const INITIAL_TASKS: TaskRow[] = [ - { id: "T1", name: "Fix auth bug", status: "unclaimed", owner: "-" }, - { id: "T2", name: "Add rate limiter", status: "unclaimed", owner: "-" }, - { id: "T3", name: "Write tests", status: "unclaimed", owner: "-" }, - { id: "T4", name: "Update API docs", status: "unclaimed", owner: "-" }, -]; - -// Agent positions around the task board (left panel) -const BOARD_CX = 140; -const BOARD_CY = 90; -const AGENT_ORBIT = 85; -const AGENT_R = 20; - -const AGENT_ANGLES = [-Math.PI / 2, Math.PI / 6, (5 * Math.PI) / 6]; - -function agentPos(index: number) { - const angle = AGENT_ANGLES[index]; - return { x: BOARD_CX + AGENT_ORBIT * Math.cos(angle), y: BOARD_CY + AGENT_ORBIT * Math.sin(angle) }; -} - -// -- Step definitions -- -const STEPS = [ - { title: "Self-Governing Agents", desc: "Autonomous agents need no coordinator. They govern themselves with an idle-poll-claim-work cycle." }, - { title: "Idle Timer", desc: "Each idle agent counts rounds. A timeout triggers self-directed task polling." }, - { title: "Poll Task Board", desc: "Timeout! The agent reads the task board looking for unclaimed work." }, - { title: "Claim Task", desc: "The agent writes its name to the task record. Atomic, no conflicts." }, - { title: "Work", desc: "The agent works on the claimed task using its own agent loop." }, - { title: "Independent Polling", desc: "Multiple agents poll and claim independently. No central coordinator needed." }, - { title: "Complete & Reset", desc: "Task done. Agent returns to idle. The cycle repeats." }, - { title: "Self-Organization", desc: "Three agents, zero coordination overhead. Polling + timeout = emergent organization." }, -]; - -// Per-step state for each agent interface AgentState { - phase: Phase; - timerFill: number; - color: string; - taskClaim: string | null; + id: string; + phase: AgentPhase; + timer: number; + task?: string; } -function getAgentStates(step: number): AgentState[] { - const idle: AgentState = { phase: "idle", timerFill: 0, color: PHASE_COLORS.idle, taskClaim: null }; +interface TaskState { + id: string; + title: string; + status: TaskStatus; + owner?: string; +} - switch (step) { - case 0: - return [ - { ...idle }, - { ...idle }, - { ...idle }, - ]; - case 1: - return [ - { phase: "idle", timerFill: 0.6, color: PHASE_COLORS.idle, taskClaim: null }, - { ...idle }, - { ...idle }, - ]; - case 2: - return [ - { phase: "poll", timerFill: 1.0, color: PHASE_COLORS.poll, taskClaim: null }, - { ...idle }, - { ...idle }, - ]; - case 3: - return [ - { phase: "claim", timerFill: 0, color: PHASE_COLORS.claim, taskClaim: "T1" }, - { ...idle }, - { ...idle }, - ]; - case 4: - return [ - { phase: "work", timerFill: 0, color: PHASE_COLORS.work, taskClaim: "T1" }, - { ...idle }, - { ...idle }, - ]; - case 5: - return [ - { phase: "work", timerFill: 0, color: PHASE_COLORS.work, taskClaim: "T1" }, - { phase: "claim", timerFill: 0, color: PHASE_COLORS.claim, taskClaim: "T2" }, - { ...idle }, - ]; - case 6: - return [ - { phase: "idle", timerFill: 0, color: PHASE_COLORS.idle, taskClaim: null }, - { phase: "work", timerFill: 0, color: PHASE_COLORS.work, taskClaim: "T2" }, - { ...idle }, - ]; - case 7: - return [ - { phase: "idle", timerFill: 0, color: PHASE_COLORS.idle, taskClaim: null }, - { phase: "work", timerFill: 0, color: PHASE_COLORS.work, taskClaim: "T2" }, - { phase: "claim", timerFill: 0, color: PHASE_COLORS.claim, taskClaim: "T3" }, - ]; - default: - return [{ ...idle }, { ...idle }, { ...idle }]; +const STEPS = [ + { + title: "Quiet Agents", + desc: "Autonomous agents start by waiting. The important mental model is a work board, not a central dispatcher.", + }, + { + title: "Idle Timer Fills", + desc: "An agent watches its own idle timer. When it waits long enough, it decides to look for work.", + }, + { + title: "Read the Board", + desc: "The agent polls the shared task board and looks for an open card.", + }, + { + title: "Claim One Card", + desc: "Claiming writes the agent name onto one task, making ownership visible.", + }, + { + title: "Work Independently", + desc: "The claimed task moves into the agent workspace. No coordinator has to babysit it.", + }, + { + title: "Others Join In", + desc: "A second agent can claim a different card through the same simple habit.", + }, + { + title: "Finish and Free Up", + desc: "Completed work goes back to the board as done, and the agent returns to waiting.", + }, + { + title: "Self Organization", + desc: "Timers plus visible ownership let a small group organize itself without a manager loop.", + }, +] as const; + +const TASKS = [ + { id: "T1", title: "Fix auth bug" }, + { id: "T2", title: "Add rate limiter" }, + { id: "T3", title: "Write docs" }, + { id: "T4", title: "Clean tests" }, +]; + +function getAgents(step: number): AgentState[] { + if (step === 0) { + return [ + { id: "A", phase: "idle", timer: 0.1 }, + { id: "B", phase: "idle", timer: 0 }, + { id: "C", phase: "idle", timer: 0 }, + ]; } + if (step === 1) { + return [ + { id: "A", phase: "idle", timer: 0.85 }, + { id: "B", phase: "idle", timer: 0.25 }, + { id: "C", phase: "idle", timer: 0 }, + ]; + } + if (step === 2) { + return [ + { id: "A", phase: "polling", timer: 1 }, + { id: "B", phase: "idle", timer: 0.25 }, + { id: "C", phase: "idle", timer: 0 }, + ]; + } + if (step === 3) { + return [ + { id: "A", phase: "claiming", timer: 0, task: "T1" }, + { id: "B", phase: "idle", timer: 0.45 }, + { id: "C", phase: "idle", timer: 0.1 }, + ]; + } + if (step === 4) { + return [ + { id: "A", phase: "working", timer: 0, task: "T1" }, + { id: "B", phase: "idle", timer: 0.65 }, + { id: "C", phase: "idle", timer: 0.2 }, + ]; + } + if (step === 5) { + return [ + { id: "A", phase: "working", timer: 0, task: "T1" }, + { id: "B", phase: "claiming", timer: 0, task: "T2" }, + { id: "C", phase: "idle", timer: 0.35 }, + ]; + } + if (step === 6) { + return [ + { id: "A", phase: "done", timer: 0, task: "T1" }, + { id: "B", phase: "working", timer: 0, task: "T2" }, + { id: "C", phase: "idle", timer: 0.6 }, + ]; + } + return [ + { id: "A", phase: "idle", timer: 0.15 }, + { id: "B", phase: "working", timer: 0, task: "T2" }, + { id: "C", phase: "claiming", timer: 0, task: "T3" }, + ]; } -function getTaskStates(step: number): TaskRow[] { - const tasks = INITIAL_TASKS.map((t) => ({ ...t })); - if (step >= 3) { tasks[0].status = "active"; tasks[0].owner = "A"; } - if (step >= 5) { tasks[1].status = "active"; tasks[1].owner = "B"; } - if (step >= 6) { tasks[0].status = "complete"; } - if (step >= 7) { tasks[2].status = "active"; tasks[2].owner = "C"; } - return tasks; +function getTasks(step: number): TaskState[] { + return TASKS.map((task) => { + if (task.id === "T1" && step >= 6) { + return { ...task, status: "complete", owner: "A" }; + } + if (task.id === "T1" && step >= 3) { + return { ...task, status: "claimed", owner: "A" }; + } + if (task.id === "T2" && step >= 5) { + return { ...task, status: "claimed", owner: "B" }; + } + if (task.id === "T3" && step >= 7) { + return { ...task, status: "claimed", owner: "C" }; + } + return { ...task, status: "open" }; + }); } -function getActivePhase(step: number): Phase { - if (step <= 1) return "idle"; - if (step === 2) return "poll"; - if (step === 3) return "claim"; - if (step === 4 || step === 5) return "work"; - if (step === 6) return "idle"; - return "claim"; +function phaseClass(phase: AgentPhase): string { + if (phase === "working") return "border-emerald-300 bg-emerald-50 dark:border-emerald-800 dark:bg-emerald-950/30"; + if (phase === "claiming" || phase === "polling") return "border-amber-300 bg-amber-50 dark:border-amber-800 dark:bg-amber-950/30"; + if (phase === "done") return "border-blue-300 bg-blue-50 dark:border-blue-800 dark:bg-blue-950/30"; + return "border-zinc-200 bg-white dark:border-zinc-700 dark:bg-zinc-900"; } -// Ring timer around an agent -function TimerRing({ cx, cy, r, fill }: { cx: number; cy: number; r: number; fill: number }) { - if (fill <= 0) return null; - const circumference = 2 * Math.PI * (r + 4); - const offset = circumference * (1 - fill); - return ( - - ); +function statusClass(status: TaskStatus): string { + if (status === "complete") return "bg-emerald-100 text-emerald-700 dark:bg-emerald-900/30 dark:text-emerald-300"; + if (status === "claimed") return "bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-300"; + return "bg-zinc-100 text-zinc-600 dark:bg-zinc-800 dark:text-zinc-300"; } -// FSM arrow between two states -function FSMArrow({ from, to, active, inactiveStroke }: { from: Phase; to: Phase; active: boolean; inactiveStroke: string }) { - const fState = FSM_STATES.find((s) => s.id === from)!; - const tState = FSM_STATES.find((s) => s.id === to)!; - const fPos = fsmPos(fState.angle); - const tPos = fsmPos(tState.angle); - - const dx = tPos.x - fPos.x; - const dy = tPos.y - fPos.y; - const dist = Math.sqrt(dx * dx + dy * dy); - const ux = dx / dist; - const uy = dy / dist; - - const x1 = fPos.x + ux * FSM_STATE_R; - const y1 = fPos.y + uy * FSM_STATE_R; - const x2 = tPos.x - ux * (FSM_STATE_R + 6); - const y2 = tPos.y - uy * (FSM_STATE_R + 6); - - const perpX = -uy * 12; - const perpY = ux * 12; - const cx = (x1 + x2) / 2 + perpX; - const cy = (y1 + y2) / 2 + perpY; +function AgentCard({ agent }: { agent: AgentState }) { + const timerPercent = Math.round(agent.timer * 100); return ( - - - + +
+
+ + {agent.id} + +
+
Agent {agent.id}
+
{agent.phase}
+
+
+ {agent.phase === "done" ? ( + + ) : ( + + )} +
+ +
+ +
+
+ {agent.task ? `task: ${agent.task}` : `idle timer: ${timerPercent}%`} +
+
); } export default function AutonomousAgents({ title }: { title?: string }) { const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 }); const step = vis.currentStep; - const palette = useSvgPalette(); - - const agentStates = getAgentStates(step); - const tasks = getTaskStates(step); - const activePhase = getActivePhase(step); - const agentNames = ["A", "B", "C"]; + const agents = getAgents(step); + const tasks = getTasks(step); + const current = STEPS[step]; return ( -
+

- {title || "Autonomous Agent Cycle"} + {title || "Autonomous Work Board"}

-
-
- {/* Left panel: spatial view with agents and task board */} -
-
Spatial View
- - {/* Task board (small table in center) */} - - - Task Board - - - {tasks.filter((t) => t.status === "unclaimed").length} unclaimed - - - {tasks.filter((t) => t.status === "complete").length} complete - - {/* Agents */} - {agentStates.map((state, i) => { - const pos = agentPos(i); - const isPulsing = state.phase === "work"; - const isPolling = state.phase === "poll"; - - return ( - - {/* Dashed line from agent to board when polling */} - {isPolling && ( - - )} - {/* Solid line from agent to board when claiming */} - {state.phase === "claim" && ( - - )} - - {/* Timer ring */} - - - {/* Agent circle */} - - - {agentNames[i]} - - - {/* Task label below agent when claiming or working */} - {state.taskClaim && ( - - {state.taskClaim} - - )} - - ); - })} - - - {/* Task table below the spatial view */} -
- - - - - - - - - - {tasks.map((task) => ( - - - - - - ))} - -
TaskStatusOwner
{task.name} - - {task.status} - - {task.owner}
+
+
+
+
+ + Agents watch their own idle timer
-
- - {/* Right panel: FSM state machine diagram */} -
-
FSM Cycle
- - - - - - - - {/* Transition arrows */} - {FSM_TRANSITIONS.map((t) => { - const isActive = - (activePhase === t.from) || - (activePhase === t.to && t.from === FSM_TRANSITIONS.find((tr) => tr.to === activePhase)?.from); - return ( - - ); - })} - - {/* State circles */} - {FSM_STATES.map((state) => { - const pos = fsmPos(state.angle); - const isActive = state.id === activePhase; - return ( - - - - {state.label} - - - ); - })} - - - {/* Legend */} -
- {FSM_STATES.map((s) => ( -
- - {s.label} -
+
+ {agents.map((agent) => ( + ))}
+ +
+
+ + Shared task board +
+
+ + {tasks.map((task) => ( + +
+ {task.id} + + {task.status} + +
+
{task.title}
+
+ owner: {task.owner ?? "-"} +
+
+ ))} +
+
+
+ Nobody assigns tasks directly; agents claim visible open cards when their timers wake them. +
+
- {/* Step controls */} -
- -
+
); diff --git a/web/src/components/visualizations/s11-error-recovery.tsx b/web/src/components/visualizations/s11-error-recovery.tsx new file mode 100644 index 0000000..d026df3 --- /dev/null +++ b/web/src/components/visualizations/s11-error-recovery.tsx @@ -0,0 +1,347 @@ +"use client"; + +import { AnimatePresence, motion } from "framer-motion"; +import { Activity, AlertTriangle, Gauge, History, Repeat2, RotateCcw, ShieldCheck, TimerReset, Workflow } from "lucide-react"; +import { StepControls } from "@/components/visualizations/shared/step-controls"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { cn } from "@/lib/utils"; + +const STEPS = [ + { + title: "Normal Call Still Comes First", + desc: "The runtime starts with a regular LLM call and only enters recovery when a specific failure appears.", + mode: "normal", + }, + { + title: "max_tokens Means Output Was Cut Off", + desc: "First recovery is to retry with a larger budget before adding any synthetic continuation message.", + mode: "max-tokens", + }, + { + title: "prompt_too_long Means Context Must Shrink", + desc: "The runtime performs reactive compact once, then retries the same task with a smaller message list.", + mode: "prompt-too-long", + }, + { + title: "429 Means Wait, Then Retry", + desc: "Rate limits use exponential backoff with jitter so retries do not stampede the provider.", + mode: "rate-limit", + }, + { + title: "Repeated 529 Can Switch Models", + desc: "Provider overload increments RecoveryState and can move to a fallback model after repeated failures.", + mode: "overloaded", + }, + { + title: "Recovered Calls Return to the Loop", + desc: "Each recovery path is bounded, inspectable, and eventually returns to the normal tool loop or exits cleanly.", + mode: "summary", + }, +] as const; + +const CASES = [ + { + id: "max-tokens", + label: "max_tokens", + symptom: "model stopped mid-answer", + action: "8K -> 64K, retry same request", + state: "token escalated once", + tone: "amber", + }, + { + id: "prompt-too-long", + label: "prompt_too_long", + symptom: "context too large", + action: "reactive_compact(messages), retry once", + state: "compact retry used", + tone: "orange", + }, + { + id: "rate-limit", + label: "429", + symptom: "rate limited", + action: "backoff + jitter, max 10 retries", + state: "retry attempt counted", + tone: "blue", + }, + { + id: "overloaded", + label: "529", + symptom: "provider overloaded", + action: "backoff; 3 consecutive -> fallback model", + state: "consecutive_529 tracked", + tone: "red", + }, +] as const; + +type StepMode = (typeof STEPS)[number]["mode"]; +type CaseId = (typeof CASES)[number]["id"]; +type Tone = "amber" | "orange" | "blue" | "red" | "emerald" | "zinc"; + +function toneClass(tone: Tone, active = true) { + if (!active) return "border-zinc-200 bg-white text-zinc-700 dark:border-zinc-700 dark:bg-zinc-900 dark:text-zinc-200"; + if (tone === "amber") return "border-amber-200 bg-amber-50 text-amber-800 dark:border-amber-900 dark:bg-amber-950/40 dark:text-amber-200"; + if (tone === "orange") return "border-orange-200 bg-orange-50 text-orange-800 dark:border-orange-900 dark:bg-orange-950/40 dark:text-orange-200"; + if (tone === "blue") return "border-blue-200 bg-blue-50 text-blue-800 dark:border-blue-900 dark:bg-blue-950/40 dark:text-blue-200"; + if (tone === "red") return "border-red-200 bg-red-50 text-red-800 dark:border-red-900 dark:bg-red-950/40 dark:text-red-200"; + if (tone === "emerald") return "border-emerald-200 bg-emerald-50 text-emerald-800 dark:border-emerald-900 dark:bg-emerald-950/40 dark:text-emerald-200"; + return "border-zinc-200 bg-zinc-50 text-zinc-700 dark:border-zinc-700 dark:bg-zinc-800 dark:text-zinc-200"; +} + +function activeCase(mode: StepMode): CaseId | null { + if (mode === "max-tokens") return "max-tokens"; + if (mode === "prompt-too-long") return "prompt-too-long"; + if (mode === "rate-limit") return "rate-limit"; + if (mode === "overloaded") return "overloaded"; + return null; +} + +function Surface({ + title, + icon, + active, + children, +}: { + title: string; + icon: React.ReactNode; + active: boolean; + children: React.ReactNode; +}) { + return ( +
+
+ + {icon} + + {title} +
+ {children} +
+ ); +} + +function CaseCard({ + item, + active, + muted, +}: { + item: (typeof CASES)[number]; + active: boolean; + muted: boolean; +}) { + return ( + +
+
{item.label}
+ {active && } +
+
{item.symptom}
+
{item.action}
+
+ ); +} + +function RecoveryStatePanel({ mode }: { mode: StepMode }) { + const values = { + token: mode === "max-tokens" || mode === "summary" ? "64K used" : "8K", + compact: mode === "prompt-too-long" || mode === "summary" ? "used once" : "unused", + retry: mode === "rate-limit" || mode === "overloaded" || mode === "summary" ? "counting" : "0", + model: mode === "overloaded" ? "fallback ready" : "primary", + }; + + return ( +
+ {[ + ["max_tokens", values.token], + ["reactive_compact", values.compact], + ["retry_attempt", values.retry], + ["current_model", values.model], + ].map(([label, value]) => ( +
+
{label}
+
{value}
+
+ ))} +
+ ); +} + +function ActionPanel({ mode }: { mode: StepMode }) { + if (mode === "normal") { + return ( + +
+ + normal tool loop +
+
LLM succeeds, tool_use continues as usual.
+
+ ); + } + + if (mode === "max-tokens") { + return ( + +
+ + escalate output budget +
+
+ + +
+
No fake "continue" user message on the first escalation.
+
+ ); + } + + if (mode === "prompt-too-long") { + return ( + +
+ + shrink context, retry once +
+ +
If it is still too long after compact, exit cleanly instead of looping forever.
+
+ ); + } + + if (mode === "rate-limit") { + return ( + +
+ + exponential backoff +
+
+ {["0.5s", "1s", "2s"].map((delay) => ( +
{delay} + jitter
+ ))} +
+
Wait before retrying so the provider has time to recover.
+
+ ); + } + + if (mode === "overloaded") { + return ( + +
+ + fallback model path +
+ + +
+ ); + } + + return ( + + {CASES.map((item) => ( +
+
{item.label}
+
{item.state}
+
+ ))} +
+
+ + continue or exit cleanly +
+
Every path has a limit, then returns to the normal loop or stops with an explicit error.
+
+
+ ); +} + +function CodePill({ label, value }: { label: string; value: string }) { + return ( +
+
{label}
+ {value} +
+ ); +} + +export default function ErrorRecoveryVisualization({ title }: { title?: string }) { + const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2600 }); + const current = STEPS[vis.currentStep]; + const mode = current.mode; + const active = activeCase(mode); + const isSummary = mode === "summary"; + + return ( +
+

{title || "Error Recovery Paths"}

+ +
+
+ } active={mode !== "normal"}> +
+
+
+ + success +
+
No recovery needed; continue to tool loop.
+
+ {CASES.map((item) => ( + + ))} +
+
+ + } active={mode !== "normal"}> + + + + } active> + + + + +
+ +
+ Beginner rule: do not blindly retry; classify the failure, run the smallest recovery, and track whether that recovery was already used. +
+ + +
+
+ ); +} diff --git a/web/src/components/visualizations/s14-cron-scheduler.tsx b/web/src/components/visualizations/s14-cron-scheduler.tsx new file mode 100644 index 0000000..0ac47d4 --- /dev/null +++ b/web/src/components/visualizations/s14-cron-scheduler.tsx @@ -0,0 +1,248 @@ +"use client"; + +import { AnimatePresence, motion } from "framer-motion"; +import { Bot, CalendarDays, CheckCircle2, Clock3, Database, Inbox } from "lucide-react"; +import { StepControls } from "@/components/visualizations/shared/step-controls"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { cn } from "@/lib/utils"; + +const STEPS = [ + { + title: "Make It Repeatable", + desc: "The user turns one normal prompt into a reusable schedule card.", + active: "composer", + }, + { + title: "Store the Card", + desc: "The schedule lives in durable data, so it is not tied to the current chat turn.", + active: "ledger", + }, + { + title: "Time Keeps Moving", + desc: "A tiny scheduler watches the clock while the agent can do other work.", + active: "clock", + }, + { + title: "Copy Goes to the Queue", + desc: "When the cron expression matches, the scheduler puts a due copy in the queue.", + active: "queue", + }, + { + title: "Run as a Normal Turn", + desc: "The queue processor hands the due prompt to the same agent loop beginners already know.", + active: "inbox", + }, + { + title: "Keep the Original", + desc: "The result is recorded, and the schedule card remains ready for the next matching time.", + active: "done", + }, +] as const; + +const DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri"]; + +function Panel({ + title, + icon, + active, + children, +}: { + title: string; + icon: React.ReactNode; + active: boolean; + children: React.ReactNode; +}) { + return ( +
+
+ + {icon} + + {title} +
+ {children} +
+ ); +} + +function ScheduleCard({ + title, + subtitle, + tone = "blue", +}: { + title: string; + subtitle: string; + tone?: "blue" | "amber" | "emerald"; +}) { + const toneClass = { + blue: "border-blue-200 bg-blue-50 text-blue-800 dark:border-blue-900 dark:bg-blue-950/40 dark:text-blue-200", + amber: "border-amber-200 bg-amber-50 text-amber-800 dark:border-amber-900 dark:bg-amber-950/40 dark:text-amber-200", + emerald: + "border-emerald-200 bg-emerald-50 text-emerald-800 dark:border-emerald-900 dark:bg-emerald-950/40 dark:text-emerald-200", + }[tone]; + + return ( + +
{title}
+
{subtitle}
+
+ ); +} + +export default function CronSchedulerVisualization({ title }: { title?: string }) { + const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 }); + const step = vis.currentStep; + const current = STEPS[step]; + + return ( +
+

+ {title || "Cron Scheduler"} +

+ +
+
+
+
+ + Weekly clock +
+ = 2 ? { scale: [1, 1.08, 1] } : { scale: 1 }} + transition={{ duration: 1.1, repeat: step >= 2 && step <= 4 ? Infinity : 0 }} + className="rounded-md bg-white px-2 py-1 font-mono text-xs text-zinc-600 shadow-sm dark:bg-zinc-900 dark:text-zinc-300" + > + {step < 2 ? "08:59" : "09:00"} + +
+
+ {DAYS.map((day, index) => ( +
= 2 && index === 2 + ? "border-amber-300 bg-amber-100 text-amber-800 dark:border-amber-800 dark:bg-amber-950/50 dark:text-amber-200" + : "border-zinc-200 bg-white text-zinc-500 dark:border-zinc-700 dark:bg-zinc-900 dark:text-zinc-400" + )} + > + {day} +
+ ))} +
+
+ +
+ } + active={current.active === "ledger" || current.active === "done"} + > +
+ {step === 0 && ( + + )} + + {step >= 1 && ( + + )} + +
+ {step >= 1 ? "stored schedules stay here" : "no saved schedule yet"} +
+
+
+ + } + active={current.active === "clock" || current.active === "queue"} + > +
+
+ watcher: {step >= 2 ? "running" : "waiting"} +
+ + {step >= 3 && step <= 4 && ( + + )} + + {step < 3 && ( +
+ queue is empty +
+ )} + {step === 5 && ( + + )} +
+
+ + } + active={current.active === "inbox" || current.active === "done"} + > +
+ + {step >= 4 && ( + = 5 ? "result appended" : "runs like a normal prompt"} + tone={step >= 5 ? "emerald" : "blue"} + /> + )} + +
+ {step >= 5 ? : } + {step >= 5 ? "review summary saved" : "agent loop available"} +
+
+
+
+ + +
+
+ ); +} diff --git a/web/src/components/visualizations/s19-mcp-tools.tsx b/web/src/components/visualizations/s19-mcp-tools.tsx new file mode 100644 index 0000000..11503a1 --- /dev/null +++ b/web/src/components/visualizations/s19-mcp-tools.tsx @@ -0,0 +1,268 @@ +"use client"; + +import { AnimatePresence, motion } from "framer-motion"; +import { Cable, CheckCircle2, PlugZap, Search, Server, Wrench } from "lucide-react"; +import { StepControls } from "@/components/visualizations/shared/step-controls"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { cn } from "@/lib/utils"; + +const STEPS = [ + { + title: "Need a New Tool", + desc: "The agent starts with built-in tools, then notices this task needs an outside capability.", + active: "need", + }, + { + title: "Plug In a Server", + desc: "MCP is easiest to picture as plugging a named toolbox into the agent workbench.", + active: "server", + }, + { + title: "Read the Tool Labels", + desc: "The server advertises schemas, so the agent can see what each tool expects.", + active: "discover", + }, + { + title: "Name the Tools Clearly", + desc: "Each external tool gets a namespaced label, which avoids collisions with built-ins.", + active: "belt", + }, + { + title: "Use It Like Any Tool", + desc: "Once on the tool belt, the MCP tool follows the same call-and-result rhythm.", + active: "call", + }, + { + title: "Result Comes Back", + desc: "The returned data is just another tool result for the next model turn.", + active: "result", + }, +] as const; + +const BUILT_INS = ["read_file", "edit_file", "bash"]; +const SERVER_TOOLS = [ + { raw: "search", namespaced: "mcp__docs__search" }, + { raw: "fetch", namespaced: "mcp__docs__fetch" }, + { raw: "list_sections", namespaced: "mcp__docs__list_sections" }, +]; + +function ToolChip({ + label, + active, + external, +}: { + label: string; + active?: boolean; + external?: boolean; +}) { + return ( + + {label} + + ); +} + +function Shelf({ + title, + icon, + active, + children, +}: { + title: string; + icon: React.ReactNode; + active: boolean; + children: React.ReactNode; +}) { + return ( +
+
+ + {icon} + + {title} +
+ {children} +
+ ); +} + +export default function McpToolsVisualization({ title }: { title?: string }) { + const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 }); + const step = vis.currentStep; + const current = STEPS[step]; + const connected = step >= 1; + const discovered = step >= 2; + const namespaced = step >= 3; + const called = step >= 4; + const returned = step >= 5; + + return ( +
+

+ {title || "MCP Tool Bridge"} +

+ +
+
+ } + active={current.active === "need"} + > +
+ {BUILT_INS.map((tool) => ( + + ))} +
+ limited to local skills +
+
+
+ +
+ } + active={current.active === "server" || current.active === "discover"} + > +
+
+ + docs-server +
+ + {connected ? "connected" : "offline"} + +
+
+ + {discovered ? ( + SERVER_TOOLS.map((tool) => ( + + )) + ) : ( + + schemas hidden until connected + + )} + +
+
+ + } + active={current.active === "belt" || current.active === "call"} + > +
+ + {namespaced ? ( + SERVER_TOOLS.slice(0, 2).map((tool, index) => ( + + )) + ) : ( + + no MCP tools on the belt + + )} + +
+
+
+ + : } + active={current.active === "call" || current.active === "result"} + > +
+ + {called ? "mcp__docs__search({ query })" : "waiting for a tool call"} + + + {returned && ( + + tool_result: 3 relevant docs found + + )} + +
+
+
+ + +
+
+ ); +} diff --git a/web/src/components/visualizations/s20-comprehensive.tsx b/web/src/components/visualizations/s20-comprehensive.tsx new file mode 100644 index 0000000..1b50a7a --- /dev/null +++ b/web/src/components/visualizations/s20-comprehensive.tsx @@ -0,0 +1,413 @@ +"use client"; + +import { type ReactNode } from "react"; +import { AnimatePresence, motion } from "framer-motion"; +import { + Archive, + Blocks, + Bot, + CheckCircle2, + Clock3, + FileText, + GitBranch, + Inbox, + Network, + ShieldCheck, + Sparkles, + Wrench, +} from "lucide-react"; +import { StepControls } from "@/components/visualizations/shared/step-controls"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { cn } from "@/lib/utils"; + +type StageId = + | "intake" + | "guardrails" + | "route" + | "execute" + | "external" + | "recover" + | "append"; + +const STAGES: { + id: StageId; + label: string; + detail: string; + icon: ReactNode; +}[] = [ + { + id: "intake", + label: "Intake", + detail: "request, memory, background notes", + icon: , + }, + { + id: "guardrails", + label: "Guardrails", + detail: "permissions, hooks, policy", + icon: , + }, + { + id: "route", + label: "Route", + detail: "choose the right work surface", + icon: , + }, + { + id: "execute", + label: "Execute", + detail: "local tools, teams, worktrees", + icon: , + }, + { + id: "external", + label: "External", + detail: "MCP toolboxes return results", + icon: , + }, + { + id: "recover", + label: "Recover", + detail: "retry, compact, repair state", + icon: , + }, + { + id: "append", + label: "Append", + detail: "one transcript stays authoritative", + icon: , + }, +]; + +const SURFACES = [ + { label: "background", icon: , text: "slow commands can finish later" }, + { label: "team", icon: , text: "teammates work through mailboxes" }, + { label: "worktree", icon: , text: "risky edits stay isolated" }, + { label: "MCP", icon: , text: "external tools are normalized" }, +]; + +const STEPS: { + title: string; + desc: string; + stage: StageId; + used: StageId[]; + packet: { + request: string; + carried: string[]; + decision: string; + result: string; + }; + transcript: string[]; +}[] = [ + { + title: "A Turn Starts as a Packet", + desc: "The comprehensive agent first gathers everything the model should see, instead of scattering context across hidden places.", + stage: "intake", + used: ["intake"], + packet: { + request: "Fix the web lesson visuals and verify the pages.", + carried: ["recent messages", "relevant memory", "background notes"], + decision: "build one model-visible input packet", + result: "ready for a model call", + }, + transcript: ["user request enters", "memory and notes are attached"], + }, + { + title: "Guardrails Check the Packet", + desc: "Permissions and hooks are not separate side quests; they are the inspection gate before work happens.", + stage: "guardrails", + used: ["intake", "guardrails"], + packet: { + request: "Edit files, run build, open browser.", + carried: ["permission mode", "hook output", "workspace rules"], + decision: "allowed work continues; risky work asks first", + result: "safe action envelope", + }, + transcript: ["policy checked", "allowed actions are visible"], + }, + { + title: "The Agent Picks Work Surfaces", + desc: "The model does not need every mechanism at once. It chooses the smallest surface that matches the job.", + stage: "route", + used: ["route", "execute", "external"], + packet: { + request: "Search code, patch UI, verify rendered pages.", + carried: ["available tools", "team status", "MCP registry"], + decision: "local edit first, external tools only when needed", + result: "work split into clear lanes", + }, + transcript: ["route: code search", "route: browser check", "route: no teammate needed"], + }, + { + title: "Work Runs in Bounded Places", + desc: "Tools, teammates, and worktrees all produce small result cards, so parallel work does not become one unreadable chat log.", + stage: "execute", + used: ["execute", "route"], + packet: { + request: "Apply the patch and run the build.", + carried: ["tool call", "worktree lane", "expected output"], + decision: "execute, then return summarized results", + result: "local evidence collected", + }, + transcript: ["patch applied", "build output summarized"], + }, + { + title: "External Results Re-enter the Same Lane", + desc: "MCP tools expand capability, but they still come back as ordinary tool results the agent can reason over.", + stage: "external", + used: ["external", "execute"], + packet: { + request: "Use an external source or tool if local context is missing.", + carried: ["MCP tool name", "structured arguments", "returned artifact"], + decision: "normalize external output before the next model step", + result: "outside work is no longer special", + }, + transcript: ["MCP result received", "result card appended"], + }, + { + title: "Recovery Keeps the Turn Understandable", + desc: "Long context, command errors, and retries are handled as named recovery moves, not as mysterious branches.", + stage: "recover", + used: ["recover", "intake"], + packet: { + request: "If context or execution gets messy, repair before continuing.", + carried: ["error text", "retry count", "compact summary"], + decision: "retry once, compact old detail, keep the reason visible", + result: "the turn remains legible", + }, + transcript: ["error classified", "recovery note added", "work resumes"], + }, + { + title: "Everything Writes Back to One Transcript", + desc: "The big lesson is boring in the best way: all mechanisms eventually append evidence to the same source of truth.", + stage: "append", + used: ["append", "intake"], + packet: { + request: "Report what changed and what was verified.", + carried: ["tool evidence", "browser checks", "remaining risks"], + decision: "answer from the transcript, not from memory alone", + result: "next turn has a clean starting point", + }, + transcript: ["tests pass", "visual checks recorded", "final answer drafted"], + }, +]; + +function StageNode({ + stage, + index, + currentIndex, +}: { + stage: (typeof STAGES)[number]; + index: number; + currentIndex: number; +}) { + const active = index === currentIndex; + const done = index < currentIndex; + + return ( + +
+ + {done ? : stage.icon} + +
+
+ {index + 1}. {stage.label} +
+
{stage.detail}
+
+
+
+ ); +} + +function PacketLine({ + label, + value, + tone = "zinc", +}: { + label: string; + value: string; + tone?: "zinc" | "blue" | "emerald"; +}) { + const toneClass = { + zinc: "border-zinc-200 bg-white text-zinc-700 dark:border-zinc-700 dark:bg-zinc-900 dark:text-zinc-200", + blue: "border-blue-200 bg-blue-50 text-blue-800 dark:border-blue-900 dark:bg-blue-950/35 dark:text-blue-200", + emerald: + "border-emerald-200 bg-emerald-50 text-emerald-800 dark:border-emerald-900 dark:bg-emerald-950/35 dark:text-emerald-200", + }[tone]; + + return ( + +
{label}
+
{value}
+
+ ); +} + +export default function ComprehensiveVisualization({ title }: { title?: string }) { + const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2800 }); + const step = STEPS[vis.currentStep]; + const currentStageIndex = STAGES.findIndex((stage) => stage.id === step.stage); + + return ( +
+

+ {title || "Comprehensive Agent Turn"} +

+ +
+
+
+
+ + One-turn journey +
+
+ {STAGES.map((stage, index) => ( + + ))} +
+
+ +
+
+
+
+ + Turn packet +
+ + step {vis.currentStep + 1}/{STEPS.length} + +
+ + + + + +
+
+
+ carried context +
+
+ {step.packet.carried.map((item) => ( + + {item} + + ))} +
+
+ +
+ + +
+
+
+ +
+
+ + Source-of-truth transcript +
+
+ + {step.transcript.map((item) => ( + + {item} + + ))} + +
+
+
+
+ +
+ {SURFACES.map((surface) => ( +
+
+ + {surface.icon} + + {surface.label} +
+
+ {surface.text} +
+
+ ))} +
+ + +
+
+ ); +} diff --git a/web/src/components/visualizations/shared/mechanism-flow.tsx b/web/src/components/visualizations/shared/mechanism-flow.tsx new file mode 100644 index 0000000..6276abe --- /dev/null +++ b/web/src/components/visualizations/shared/mechanism-flow.tsx @@ -0,0 +1,301 @@ +"use client"; + +import { motion } from "framer-motion"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { useSvgPalette } from "@/hooks/useDarkMode"; +import { StepControls } from "./step-controls"; +import { cn } from "@/lib/utils"; + +type NodeKind = "start" | "process" | "decision" | "store" | "external" | "end"; + +export interface MechanismNode { + id: string; + label: string; + x: number; + y: number; + kind?: NodeKind; + appearsAt?: number; +} + +export interface MechanismEdge { + id: string; + from: string; + to: string; + label?: string; + appearsAt?: number; +} + +export interface MechanismStep { + title: string; + description: string; + focus?: string[]; +} + +interface MechanismFlowProps { + title?: string; + fallbackTitle: string; + nodes: MechanismNode[]; + edges: MechanismEdge[]; + steps: MechanismStep[]; + viewBox?: string; + footer?: string[]; +} + +const NODE_WIDTH = 118; +const NODE_HEIGHT = 42; +const DIAMOND_SIZE = 54; + +const KIND_COLORS: Record = { + start: "#3b82f6", + process: "#10b981", + decision: "#f59e0b", + store: "#8b5cf6", + external: "#ef4444", + end: "#64748b", +}; + +function bounds(node: MechanismNode) { + const halfW = node.kind === "decision" ? DIAMOND_SIZE / 2 : NODE_WIDTH / 2; + const halfH = node.kind === "decision" ? DIAMOND_SIZE / 2 : NODE_HEIGHT / 2; + return { + left: node.x - halfW, + right: node.x + halfW, + top: node.y - halfH, + bottom: node.y + halfH, + }; +} + +function edgePath(from: MechanismNode, to: MechanismNode) { + const a = bounds(from); + const b = bounds(to); + + if (Math.abs(from.x - to.x) < 12) { + return `M ${from.x} ${a.bottom} L ${to.x} ${b.top}`; + } + + if (Math.abs(from.y - to.y) < 12) { + const startX = to.x > from.x ? a.right : a.left; + const endX = to.x > from.x ? b.left : b.right; + const midX = (startX + endX) / 2; + return `M ${startX} ${from.y} C ${midX} ${from.y}, ${midX} ${to.y}, ${endX} ${to.y}`; + } + + const startY = to.y > from.y ? a.bottom : a.top; + const endY = to.y > from.y ? b.top : b.bottom; + const control = Math.max(36, Math.abs(endY - startY) * 0.45); + const c1 = startY + (endY > startY ? control : -control); + const c2 = endY - (endY > startY ? control : -control); + return `M ${from.x} ${startY} C ${from.x} ${c1}, ${to.x} ${c2}, ${to.x} ${endY}`; +} + +function labelPosition(from: MechanismNode, to: MechanismNode) { + return { + x: (from.x + to.x) / 2, + y: (from.y + to.y) / 2 - 10, + }; +} + +function FlowNode({ + node, + active, + visible, +}: { + node: MechanismNode; + active: boolean; + visible: boolean; +}) { + const kind = node.kind ?? "process"; + const color = KIND_COLORS[kind]; + const lines = node.label.split("\n"); + + if (kind === "decision") { + const half = DIAMOND_SIZE / 2; + return ( + + + {lines.map((line, i) => ( + + {line} + + ))} + + ); + } + + return ( + + + {lines.map((line, i) => ( + + {line} + + ))} + + ); +} + +export function MechanismFlow({ + title, + fallbackTitle, + nodes, + edges, + steps, + viewBox = "0 0 720 360", + footer, +}: MechanismFlowProps) { + const vis = useSteppedVisualization({ totalSteps: steps.length, autoPlayInterval: 2300 }); + const step = steps[vis.currentStep]; + const palette = useSvgPalette(); + const focused = new Set(step.focus ?? []); + + return ( +
+

+ {title || fallbackTitle} +

+ +
+ + + + + + + + {edges.map((edge) => { + const from = nodes.find((node) => node.id === edge.from); + const to = nodes.find((node) => node.id === edge.to); + if (!from || !to) return null; + const active = focused.has(edge.id) || focused.has(edge.from) || focused.has(edge.to); + const visible = active || vis.currentStep >= (edge.appearsAt ?? 0); + const label = labelPosition(from, to); + + return ( + + + {edge.label && visible && ( + + {edge.label} + + )} + + ); + })} + + {nodes.map((node) => { + const active = focused.has(node.id); + const visible = active || vis.currentStep >= (node.appearsAt ?? 0); + return ( + + ); + })} + + + {footer && ( +
+ {footer.map((item) => ( + + {item} + + ))} +
+ )} +
+ + +
+ ); +} diff --git a/web/src/data/annotations/s03.json b/web/src/data/annotations/s03.json index b8e408a..7ceb11f 100644 --- a/web/src/data/annotations/s03.json +++ b/web/src/data/annotations/s03.json @@ -2,45 +2,45 @@ "version": "s03", "decisions": [ { - "id": "visible-planning", - "title": "Making Plans Visible via TodoWrite", - "description": "Instead of letting the model plan silently in its chain-of-thought, we force plans to be externalized through the TodoWrite tool. Each plan item has a status (pending, in_progress, completed) that gets tracked explicitly. This has three benefits: (1) users can see what the agent intends to do before it does it, (2) developers can debug agent behavior by inspecting the plan state, (3) the agent itself can refer back to its plan in later turns when earlier context has scrolled away.", - "alternatives": "The model could plan internally via chain-of-thought reasoning (as it does in v0/v1). Internal planning works but is invisible and ephemeral -- once the thinking scrolls out of context, the plan is lost. Claude's extended thinking is another option, but it's not inspectable by the user or by downstream tools.", + "id": "permission-before-execution", + "title": "Permission Runs Before Tool Execution", + "description": "The permission check is inserted between the model's tool call and the handler. The model can ask for an action, but the harness decides whether the action may touch the real workspace.", + "alternatives": "Putting permission inside each tool duplicates policy and makes it easier for a new tool to forget the check.", "zh": { - "title": "通过 TodoWrite 让计划可见", - "description": "我们不让模型在思维链中默默规划,而是强制通过 TodoWrite 工具将计划外化。每个计划项都有可追踪的状态(pending、in_progress、completed)。这有三个好处:(1) 用户可以在执行前看到 agent 打算做什么;(2) 开发者可以通过检查计划状态来调试 agent 行为;(3) agent 自身可以在后续轮次中引用计划,即使早期上下文已经滚出窗口。" + "title": "权限检查发生在工具执行前", + "description": "权限检查插在模型 tool call 和 handler 之间。模型可以请求动作,但是否允许触碰真实工作区由 harness 决定。" }, "ja": { - "title": "TodoWrite による計画の可視化", - "description": "モデルが思考の連鎖の中で黙って計画するのではなく、TodoWrite ツールを通じて計画を外部化することを強制します。各計画項目には追跡可能なステータス(pending、in_progress、completed)があります。利点は3つ:(1) ユーザーがエージェントの意図を実行前に確認できる、(2) 開発者が計画状態を検査してデバッグできる、(3) エージェント自身が以前のコンテキストがスクロールアウトした後でも計画を参照できる。" + "title": "権限確認はツール実行前に行う", + "description": "権限確認はモデルの tool call と handler の間に入ります。モデルは操作を要求できますが、実際のワークスペースに触れてよいかは harness が決めます。" } }, { - "id": "single-in-progress", - "title": "Only One Task Can Be In-Progress", - "description": "The TodoWrite tool enforces that at most one task has status 'in_progress' at any time. If the model tries to start a second task, it must first complete or abandon the current one. This constraint prevents a subtle failure mode: models that try to 'multitask' by interleaving work on multiple items tend to lose track of state and produce half-finished results. Sequential focus produces higher quality than parallel thrashing.", - "alternatives": "Allowing multiple in-progress items would let the agent context-switch between tasks, which seems more flexible. In practice, LLMs handle context-switching poorly -- they lose track of which task they were working on and mix up details between tasks. The single-focus constraint is a guardrail that improves output quality.", + "id": "three-gate-model", + "title": "Three Gates Keep Policy Explainable", + "description": "Hard denies, rule matches, and user approval are separate gates. That makes it clear whether a command was impossible, risky, or simply waiting for confirmation.", + "alternatives": "A single allow/deny function is shorter, but it hides why a command stopped.", "zh": { - "title": "同一时间只允许一个任务进行中", - "description": "TodoWrite 工具强制要求任何时候最多只能有一个任务处于 in_progress 状态。如果模型想开始第二个任务,必须先完成或放弃当前任务。这个约束防止了一种隐蔽的失败模式:试图通过交替处理多个项目来'多任务'的模型,往往会丢失状态并产出半成品。顺序执行的专注度远高于并行切换。" + "title": "三道门让策略可解释", + "description": "硬拒绝、规则匹配、用户确认是分开的关卡。这样能看清命令是绝对禁止、有风险,还是只是在等确认。" }, "ja": { - "title": "同時に進行中にできるタスクは1つだけ", - "description": "TodoWrite ツールは、同時に 'in_progress' 状態のタスクを最大1つに制限します。モデルが2つ目のタスクを開始しようとする場合、まず現在のタスクを完了または中断する必要があります。この制約は微妙な失敗モードを防ぎます:複数の項目を交互に処理して「マルチタスク」しようとするモデルは、状態を見失い中途半端な結果を生みがちです。逐次的な集中は並行的な切り替えよりも高品質な出力を生み出します。" + "title": "三つのゲートでポリシーを説明可能にする", + "description": "ハード拒否、ルール照合、ユーザー承認を分けます。コマンドが不可能なのか、危険なのか、確認待ちなのかが明確になります。" } }, { - "id": "max-twenty-items", - "title": "Maximum of 20 Plan Items", - "description": "TodoWrite caps the plan at 20 items. This is a deliberate constraint against over-planning. Models tend to decompose tasks into increasingly fine-grained steps when unconstrained, producing 50-item plans where each step is trivial. Long plans are fragile: if step 15 fails, the remaining 35 steps may all be invalid. Short plans (under 20 items) stay at the right abstraction level and are easier to adapt when reality diverges from the plan.", - "alternatives": "No cap would give the model full flexibility, but in practice leads to absurdly detailed plans. A dynamic cap (proportional to task complexity) would be smarter but adds complexity. The fixed cap of 20 is a simple heuristic that works well empirically -- most real coding tasks can be expressed in 5-15 meaningful steps.", + "id": "continue-with-tool-result", + "title": "Blocked Calls Still Produce Loop State", + "description": "A blocked tool call must still leave the loop in a coherent state. The user and model need to know why execution did not happen before choosing the next action.", + "alternatives": "Silently skipping a blocked tool is simpler, but the model may repeat the same unsafe request.", "zh": { - "title": "计划项上限为 20 条", - "description": "TodoWrite 将计划项限制在 20 条以内。这是对过度规划的刻意约束。不加限制时,模型倾向于将任务分解成越来越细粒度的步骤,产出 50 条的计划,每一步都微不足道。冗长的计划很脆弱:如果第 15 步失败,剩下的 35 步可能全部作废。20 条以内的短计划保持在正确的抽象层级,更容易在现实偏离计划时做出调整。" + "title": "被拦截的调用也要留下循环状态", + "description": "被拦截的工具调用仍然要让循环状态保持一致。用户和模型都需要知道为什么没有执行,才能决定下一步。" }, "ja": { - "title": "計画項目の上限は20個", - "description": "TodoWrite は計画を20項目に制限します。これは過度な計画に対する意図的な制約です。制約がないとモデルはタスクをどんどん細かいステップに分解し、各ステップが些末な50項目の計画を作りがちです。長い計画は脆弱です:ステップ15が失敗すると残りの35ステップは全て無効になりかねません。20項目以内の短い計画は適切な抽象度を保ち、現実が計画から逸脱した際の適応が容易です。" + "title": "ブロックされた呼び出しもループ状態を残す", + "description": "ブロックされたツール呼び出しでも、ループの状態は一貫している必要があります。実行されなかった理由が分かって初めて次の行動を選べます。" } } ] diff --git a/web/src/data/annotations/s04.json b/web/src/data/annotations/s04.json index 08ad115..1f6389b 100644 --- a/web/src/data/annotations/s04.json +++ b/web/src/data/annotations/s04.json @@ -2,45 +2,45 @@ "version": "s04", "decisions": [ { - "id": "context-isolation", - "title": "Subagents Get Fresh Context, Not Shared History", - "description": "When a parent agent spawns a subagent via the Task tool, the subagent starts with a clean message history containing only the system prompt and the delegated task description. It does NOT inherit the parent's conversation. This is context isolation: the subagent can focus entirely on its specific subtask without being distracted by hundreds of messages from the parent's broader conversation. The result is returned to the parent as a single tool_result, collapsing potentially dozens of subagent turns into one concise answer.", - "alternatives": "Sharing the parent's full context would give the subagent more information, but it would also flood the subagent with irrelevant details. Context window is finite -- filling it with parent history leaves less room for the subagent's own work. Fork-based approaches (copy the parent context) are a middle ground but still waste tokens on irrelevant history.", + "id": "hooks-around-loop", + "title": "Hooks Wrap the Loop Instead of Rewriting It", + "description": "UserPromptSubmit, PreToolUse, PostToolUse, and Stop hooks attach behavior at lifecycle points while the model-tool-result loop stays unchanged.", + "alternatives": "Inlining logging and permission into the loop is quicker at first, but every new cross-cutting concern would make the loop harder to read.", "zh": { - "title": "子代理获得全新上下文,而非共享历史", - "description": "当父代理通过 Task 工具创建子代理时,子代理从全新的消息历史开始,只包含系统提示词和委派的任务描述,不继承父代理的对话。这就是上下文隔离:子代理可以完全专注于特定子任务,不会被父代理长达数百条消息的对话干扰。结果作为单条 tool_result 返回给父代理,将子代理可能数十轮的交互压缩为一个简洁的回答。" + "title": "Hook 包裹循环,而不是改写循环", + "description": "UserPromptSubmit、PreToolUse、PostToolUse、Stop 等 hook 挂在生命周期节点上,模型-工具-结果循环本身保持不变。" }, "ja": { - "title": "サブエージェントは共有履歴ではなく新しいコンテキストを取得", - "description": "親エージェントが Task ツールでサブエージェントを生成すると、サブエージェントはシステムプロンプトと委任されたタスク説明のみを含むクリーンなメッセージ履歴から開始します。親の会話は引き継ぎません。これがコンテキスト分離です:サブエージェントは親の広範な会話の何百ものメッセージに気を取られることなく、特定のサブタスクに完全に集中できます。結果は単一の tool_result として親に返され、サブエージェントの数十ターンが1つの簡潔な回答に凝縮されます。" + "title": "フックはループを書き換えず包み込む", + "description": "UserPromptSubmit、PreToolUse、PostToolUse、Stop の各フックはライフサイクル地点に処理を追加し、model-tool-result ループ自体は保ちます。" } }, { - "id": "tool-filtering", - "title": "Explore Agents Cannot Write Files", - "description": "When spawning a subagent with the 'Explore' type, it receives only read-only tools: bash (with restrictions), read_file, and search tools. It cannot call write_file or edit_file. This implements the principle of least privilege: an agent tasked with 'find all usages of function X' doesn't need write access. Removing write tools eliminates the risk of accidental file modification during exploration, and it also narrows the tool space so the model makes better decisions with fewer options.", - "alternatives": "Giving all subagents full tool access is simpler to implement but violates least privilege. A permission-request system (subagent asks parent for write access) adds complexity and latency. Static tool filtering by role is the pragmatic middle ground -- simple to implement, effective at preventing accidents.", + "id": "pretooluse-can-block", + "title": "PreToolUse Is the Policy Boundary", + "description": "PreToolUse hooks run after the model chooses a tool but before the handler executes. That is the natural point for permission, logging, and safety policy.", + "alternatives": "Checking after execution can only report damage; checking before execution can prevent it.", "zh": { - "title": "Explore 代理不能写入文件", - "description": "创建 Explore 类型的子代理时,它只获得只读工具:bash(有限制)、read_file 和搜索工具,不能调用 write_file 或 edit_file。这实现了最小权限原则:一个被委派'查找函数 X 所有使用位置'的代理不需要写权限。移除写工具消除了探索过程中误修改文件的风险,同时缩小了工具空间,让模型在更少的选项中做出更好的决策。" + "title": "PreToolUse 是策略边界", + "description": "PreToolUse 在模型选择工具之后、handler 执行之前运行。这是权限、日志和安全策略最自然的插入点。" }, "ja": { - "title": "Explore エージェントはファイルを書き込めない", - "description": "Explore タイプのサブエージェントを生成すると、読み取り専用ツールのみが提供されます:bash(制限付き)、read_file、検索ツール。write_file や edit_file は使えません。これは最小権限の原則の実装です:「関数 X の全使用箇所を見つける」タスクに書き込み権限は不要です。書き込みツールを除外することで探索中の誤ったファイル変更リスクを排除し、ツール空間を狭めてモデルがより良い判断を下せるようにします。" + "title": "PreToolUse はポリシー境界", + "description": "PreToolUse はモデルがツールを選んだ後、handler 実行前に動きます。権限、ログ、安全ポリシーを入れる自然な場所です。" } }, { - "id": "no-recursive-task", - "title": "Subagents Cannot Spawn Their Own Subagents", - "description": "The Task tool is not included in the subagent's tool set. A subagent must complete its work directly -- it cannot delegate further. This prevents infinite delegation loops: without this constraint, an agent could spawn a subagent that spawns another subagent, each one re-delegating the same task in slightly different words, consuming tokens without making progress. One level of delegation handles the vast majority of use cases. If a task is too complex for a single subagent, the parent should decompose it differently.", - "alternatives": "Allowing recursive delegation (bounded by depth) would handle deeply nested tasks but adds complexity and the risk of runaway token consumption. In practice, single-level delegation covers most real-world coding tasks. Multi-level delegation is addressed in later versions (v6+) through persistent team structures instead of recursive spawning.", + "id": "posttooluse-observability", + "title": "PostToolUse Keeps Results Observable", + "description": "PostToolUse hooks can inspect output size, record traces, or transform metadata without changing every tool handler.", + "alternatives": "Each tool could log its own result, but cross-tool behavior would drift.", "zh": { - "title": "子代理不能再创建子代理", - "description": "Task 工具不包含在子代理的工具集中。子代理必须直接完成工作,不能继续委派。这防止了无限委派循环:没有这个约束,一个代理可能创建子代理,子代理又创建子代理,每一层都用略微不同的措辞重新委派同一任务,消耗 token 却毫无进展。一层委派足以处理绝大多数场景。如果任务对单个子代理来说太复杂,应该由父代理重新分解。" + "title": "PostToolUse 让结果可观察", + "description": "PostToolUse 可以检查输出大小、记录 trace 或转换元数据,而不需要修改每个工具 handler。" }, "ja": { - "title": "サブエージェントは自身のサブエージェントを生成できない", - "description": "Task ツールはサブエージェントのツールセットに含まれません。サブエージェントは作業を直接完了しなければならず、さらなる委任はできません。これにより無限委任ループを防止します:この制約がなければ、エージェントがサブエージェントを生成し、そのサブエージェントがさらにサブエージェントを生成し、それぞれが微妙に異なる言葉で同じタスクを再委任してトークンを消費するだけで進捗しない可能性があります。一段階の委任で大多数のユースケースに対応できます。" + "title": "PostToolUse が結果を観測可能にする", + "description": "PostToolUse は出力サイズ確認、trace 記録、メタデータ変換を各 handler へ散らさずに行えます。" } } ] diff --git a/web/src/data/annotations/s05.json b/web/src/data/annotations/s05.json index 10aa15c..c29fe92 100644 --- a/web/src/data/annotations/s05.json +++ b/web/src/data/annotations/s05.json @@ -2,45 +2,45 @@ "version": "s05", "decisions": [ { - "id": "tool-result-injection", - "title": "Skills Inject via tool_result, Not System Prompt", - "description": "When the agent invokes the Skill tool, the skill's content (a SKILL.md file) is returned as a tool_result in a user message, not injected into the system prompt. This is a deliberate caching optimization: the system prompt remains static across turns, which means API providers can cache it (Anthropic's prompt caching, OpenAI's system message caching). If skill content were in the system prompt, it would change every time a new skill is loaded, invalidating the cache. By putting dynamic content in tool_result, we keep the expensive system prompt cacheable while still getting skill knowledge into context.", - "alternatives": "Injecting skills into the system prompt is simpler and gives skills higher priority in the model's attention. But it breaks prompt caching (every skill load creates a new system prompt variant) and bloats the system prompt over time as skills accumulate. The tool_result approach keeps things cache-friendly at the cost of slightly lower attention priority.", + "id": "todo-as-tool", + "title": "TodoWrite Is a Tool, Not a Hidden Planner", + "description": "Planning enters the same tool dispatch path as bash or read_file. This keeps the planning state visible and lets the model decide when to update it.", + "alternatives": "An automatic planner could create todos behind the scenes, but the user would not see when the model's intent changed.", "zh": { - "title": "Skill 通过 tool_result 注入,而非系统提示词", - "description": "当 agent 调用 Skill 工具时,Skill 内容(SKILL.md 文件)作为 tool_result 在用户消息中返回,而非注入系统提示词。这是一个刻意的缓存优化:系统提示词在各轮次间保持静态,API 提供商可以缓存它(Anthropic 的 prompt caching、OpenAI 的 system message caching)。如果 Skill 内容在系统提示词中,每次加载新 Skill 都会使缓存失效。将动态内容放在 tool_result 中,既保持了昂贵的系统提示词可缓存,又让 Skill 知识进入了上下文。" + "title": "TodoWrite 是工具,不是隐藏规划器", + "description": "规划通过和 bash、read_file 相同的工具分发路径进入系统。这样计划状态可见,模型也能决定何时更新。" }, "ja": { - "title": "スキルはシステムプロンプトではなく tool_result で注入", - "description": "エージェントが Skill ツールを呼び出すと、スキルの内容(SKILL.md ファイル)はシステムプロンプトへの注入ではなく、ユーザーメッセージ内の tool_result として返されます。これは意図的なキャッシュ最適化です:システムプロンプトはターン間で静的に保たれるため、API プロバイダーがキャッシュできます(Anthropic のプロンプトキャッシュ、OpenAI のシステムメッセージキャッシュ)。スキル内容がシステムプロンプト内にあると、新しいスキルをロードするたびにキャッシュが無効化されます。動的コンテンツを tool_result に配置することで、高コストなシステムプロンプトのキャッシュ可能性を維持しつつ、スキル知識をコンテキストに取り込めます。" + "title": "TodoWrite は隠れたプランナーではなくツール", + "description": "計画は bash や read_file と同じツールディスパッチ経路に入ります。計画状態が可視化され、モデルが更新タイミングを選べます。" } }, { - "id": "lazy-loading", - "title": "On-Demand Skill Loading Instead of Upfront", - "description": "Skills are not loaded at startup. The agent starts with only the skill names and descriptions (from frontmatter). When the agent decides it needs a specific skill, it calls the Skill tool, which loads the full SKILL.md body into context. This keeps the initial prompt small and focused. An agent solving a Python bug doesn't need the Kubernetes deployment skill loaded -- that would waste context window space and potentially confuse the model with irrelevant instructions.", - "alternatives": "Loading all skills upfront guarantees the model always has all knowledge available, but wastes tokens on irrelevant skills and may hit context limits. A recommendation system (model suggests skills, human approves) adds latency. Lazy loading lets the model self-serve the knowledge it needs, when it needs it.", + "id": "nag-reminder", + "title": "A Lightweight Reminder Prevents Plan Drift", + "description": "After several rounds without todo updates, the runtime injects a reminder. The reminder nudges the model without taking over the plan.", + "alternatives": "Forcing a todo update every turn would be noisy and slow down simple tasks.", "zh": { - "title": "按需加载 Skill 而非预加载", - "description": "Skill 不会在启动时加载。Agent 初始只拥有 Skill 名称和描述(来自 frontmatter)。当 agent 判断需要特定 Skill 时,调用 Skill 工具将完整的 SKILL.md 内容加载到上下文中。这保持了初始提示词的精简。一个正在修复 Python bug 的 agent 不需要加载 Kubernetes 部署 Skill——那会浪费上下文窗口空间,还可能用无关指令干扰模型。" + "title": "轻量提醒防止计划漂移", + "description": "多个回合没有更新 todo 后,运行时会注入提醒。它只提醒模型,不接管计划。" }, "ja": { - "title": "起動時ではなくオンデマンドでスキルを読み込み", - "description": "スキルは起動時に読み込まれません。エージェントは最初、スキルの名前と説明(フロントマターから)のみを持ちます。エージェントが特定のスキルが必要だと判断すると、Skill ツールを呼び出して完全な SKILL.md の内容をコンテキストに読み込みます。これにより初期プロンプトを小さく保ちます。Python のバグを修正しているエージェントに Kubernetes デプロイのスキルは不要です――コンテキストウィンドウの無駄遣いであり、無関係な指示でモデルを混乱させかねません。" + "title": "軽量リマインダーで計画の漂流を防ぐ", + "description": "数ターン todo 更新がない場合、ランタイムはリマインダーを注入します。計画を奪わず、モデルに更新を促します。" } }, { - "id": "frontmatter-body-split", - "title": "YAML Frontmatter + Markdown Body in SKILL.md", - "description": "Each SKILL.md file has two parts: YAML frontmatter (name, description, globs) and a markdown body (the actual instructions). The frontmatter serves as metadata for the skill registry -- it's what gets listed when the agent asks 'what skills are available?' The body is the payload that gets loaded on demand. This separation means you can list 100 skills (reading only frontmatter, a few bytes each) without loading 100 full instruction sets (potentially thousands of tokens each).", - "alternatives": "A separate metadata file (skill.yaml + skill.md) would work but doubles the number of files. Embedding metadata in the markdown (as headings or comments) requires parsing the full file to extract metadata. Frontmatter is a well-established convention (Jekyll, Hugo, Astro) that keeps metadata and content co-located but separately parseable.", + "id": "in-memory-current-work", + "title": "Current Todos Stay Session-Local", + "description": "TodoWrite is for the current session's working plan, not durable project management. Later task systems handle persistent work.", + "alternatives": "Persisting every todo immediately would blur the difference between a scratch plan and a durable task graph.", "zh": { - "title": "SKILL.md 采用 YAML Frontmatter + Markdown 正文", - "description": "每个 SKILL.md 文件有两部分:YAML frontmatter(名称、描述、globs)和 markdown 正文(实际指令)。Frontmatter 作为 Skill 注册表的元数据——当 agent 问'有哪些可用 Skill'时,展示的就是这些信息。正文是按需加载的有效负载。这种分离意味着可以列出 100 个 Skill(每个只读几字节的 frontmatter)而不必加载 100 套完整指令集(每套可能数千 token)。" + "title": "当前 Todo 保持会话内状态", + "description": "TodoWrite 面向当前会话的工作计划,不是持久项目管理。后面的任务系统负责持久工作。" }, "ja": { - "title": "SKILL.md で YAML フロントマター + Markdown 本文", - "description": "各 SKILL.md ファイルは2つの部分で構成されます:YAML フロントマター(名前、説明、globs)と Markdown 本文(実際の指示)。フロントマターはスキルレジストリのメタデータとして機能し、エージェントが「どんなスキルが利用可能か」と問い合わせた際に一覧表示されます。本文はオンデマンドで読み込まれるペイロードです。この分離により、100個のスキル一覧表示(各数バイトのフロントマターのみ読み取り)が100個の完全な指示セット(各数千トークン)のロードなしに可能になります。" + "title": "現在の todo はセッション内に留める", + "description": "TodoWrite は現在セッションの作業計画であり、永続的なプロジェクト管理ではありません。永続作業は後のタスクシステムが扱います。" } } ] diff --git a/web/src/data/annotations/s06.json b/web/src/data/annotations/s06.json index fae1f73..bbeb555 100644 --- a/web/src/data/annotations/s06.json +++ b/web/src/data/annotations/s06.json @@ -2,59 +2,45 @@ "version": "s06", "decisions": [ { - "id": "three-layer-compression", - "title": "Three-Layer Compression Strategy", - "description": "Context management uses three distinct layers, each with different cost/benefit profiles. (1) Microcompact runs every turn and is nearly free: it truncates tool_result blocks from older messages, stripping verbose command output that's no longer needed. (2) Auto_compact triggers when token count exceeds a threshold: it calls the LLM to generate a conversation summary, which is expensive but dramatically reduces context size. (3) Manual compact is user-triggered for explicit 'start fresh' moments. Layering these means the cheap operation runs constantly (keeping context tidy) while the expensive operation runs rarely (only when actually needed).", - "alternatives": "A single compression strategy (e.g., always summarize at 80% capacity) would be simpler but wasteful -- most of the time, microcompact alone keeps things manageable. A sliding window (drop oldest N messages) is cheap but loses important context. The three-layer approach gives the best token efficiency: cheap cleanup constantly, expensive summarization rarely.", + "id": "fresh-subagent-context", + "title": "Subagents Start with Fresh Messages", + "description": "The child agent receives only the delegated prompt. This isolates exploratory work and prevents the parent context from filling with every intermediate tool result.", + "alternatives": "Sharing the full parent history gives more context, but it defeats the purpose of delegation as context isolation.", "zh": { - "title": "三层压缩策略", - "description": "上下文管理使用三个独立的层次,各有不同的成本收益比。(1) 微压缩每轮都运行,几乎零成本:它截断旧消息中的 tool_result 块,去除不再需要的冗长命令输出。(2) 自动压缩在 token 数超过阈值时触发:调用 LLM 生成对话摘要,代价高但能大幅缩减上下文。(3) 手动压缩由用户触发,用于明确的'重新开始'场景。分层意味着低成本操作持续运行(保持上下文整洁),而高成本操作很少触发(仅在真正需要时)。" + "title": "子代理从全新 Messages 开始", + "description": "子代理只收到被委派的 prompt。这样探索性工作被隔离,父上下文不会塞满每个中间工具结果。" }, "ja": { - "title": "3層圧縮戦略", - "description": "コンテキスト管理は、異なるコスト・効果プロファイルを持つ3つの層を使用します。(1) マイクロコンパクトは毎ターン実行されほぼ無コスト:古いメッセージの tool_result ブロックを切り詰め、不要な冗長出力を除去します。(2) 自動コンパクトはトークン数が閾値を超えると発動:LLM を呼び出して会話の要約を生成し、コストは高いがコンテキストサイズを劇的に削減します。(3) 手動コンパクトはユーザーが明示的に「最初からやり直し」する時に使用します。この階層化により、安価な操作が常に実行され(コンテキストを整頓)、高価な操作はめったに実行されません(本当に必要な時のみ)。" + "title": "サブエージェントは新しい messages で始まる", + "description": "子エージェントは委任された prompt だけを受け取ります。探索作業を隔離し、親コンテキストが中間 tool result で膨らむのを防ぎます。" } }, { - "id": "min-savings-threshold", - "title": "MIN_SAVINGS = 20,000 Tokens Before Compressing", - "description": "Auto_compact only triggers when the estimated savings (current tokens minus estimated summary size) exceed 20,000 tokens. Compression is not free: the summary itself consumes tokens, plus there's the API call cost to generate it. If the conversation is only 25,000 tokens, compressing might save 5,000 tokens but cost an API call and produce a summary that's less coherent than the original. The 20K threshold ensures compression only happens when the savings meaningfully exceed the overhead.", - "alternatives": "A percentage-based threshold (compress when context is 80% full) adapts to different context window sizes but doesn't account for the fixed cost of generating a summary. A fixed threshold of 10K would compress more aggressively but often isn't worth it. The 20K value was chosen empirically: it's the point where compression savings consistently outweigh the quality loss from summarization.", + "id": "summary-only-return", + "title": "Only the Summary Returns to the Parent", + "description": "The parent receives a compact final answer, not the child's full transcript. That gives delegation a predictable context cost.", + "alternatives": "Returning the full transcript can help debugging, but it makes large subagent runs expensive to continue.", "zh": { - "title": "最小节省量 = 20,000 Token 才触发压缩", - "description": "自动压缩仅在估算节省量(当前 token 数减去预估摘要大小)超过 20,000 token 时才触发。压缩不是免费的:摘要本身会消耗 token,还有生成摘要的 API 调用成本。如果对话只有 25,000 token,压缩可能节省 5,000 token,但需要一次 API 调用,且产出的摘要可能不如原文连贯。20K 的阈值确保只在节省量明显超过开销时才进行压缩。" + "title": "只有摘要返回父循环", + "description": "父循环收到的是压缩后的最终答案,而不是子代理的完整 transcript。这样委派的上下文成本可预测。" }, "ja": { - "title": "圧縮前に MIN_SAVINGS = 20,000 トークンが必要", - "description": "自動コンパクトは推定節約量(現在のトークン数マイナス推定要約サイズ)が20,000トークンを超えた場合にのみ発動します。圧縮は無料ではありません:要約自体がトークンを消費し、さらに生成のための API コール費用がかかります。会話が25,000トークンしかない場合、圧縮で5,000トークン節約できても、API コールが必要で元の会話より一貫性の低い要約になる可能性があります。20K の閾値は、節約量がオーバーヘッドを確実に上回る場合にのみ圧縮を実行することを保証します。" + "title": "親に戻るのは要約だけ", + "description": "親が受け取るのは子の完全な transcript ではなく、圧縮された最終回答です。委任のコンテキストコストを予測可能にします。" } }, { - "id": "summary-replaces-all", - "title": "Summary Replaces ALL Messages, Not Partial History", - "description": "When auto_compact fires, it generates a summary and replaces the ENTIRE message history with that summary. It does not keep the last N messages alongside the summary. This avoids a subtle coherence problem: if you keep recent messages plus a summary of older ones, the model sees two representations of overlapping content. The summary might say 'we decided to use approach X' while a recent message still shows the deliberation process, creating contradictory signals. A clean summary is a single coherent narrative.", - "alternatives": "Keeping the last 5-10 messages alongside the summary preserves recent detail and gives the model more to work with. But it creates the overlap problem described above, and makes the total context size less predictable. Some systems use a 'sliding window + summary' approach which works but requires careful tuning of the overlap region.", + "id": "no-recursive-task-tool", + "title": "Subagents Cannot Spawn Subagents", + "description": "The child tool set omits task, preventing recursive delegation from exploding. The lesson keeps isolation visible before adding richer team behavior later.", + "alternatives": "Allowing recursion is powerful, but much harder to bound and explain in a teaching runtime.", "zh": { - "title": "摘要替换全部消息,而非保留部分历史", - "description": "自动压缩触发时,生成摘要并替换全部消息历史,不会在摘要旁保留最近的 N 条消息。这避免了一个微妙的连贯性问题:如果同时保留近期消息和旧消息的摘要,模型会看到重叠内容的两种表示。摘要可能说'我们决定使用方案 X',而近期消息仍在展示讨论过程,产生矛盾信号。干净的摘要是一个连贯的单一叙述。" + "title": "子代理不能再创建子代理", + "description": "子代理工具集中不包含 task,避免递归委派失控。课程先把隔离讲清楚,再在后续章节加入更复杂团队行为。" }, "ja": { - "title": "要約が部分的な履歴ではなく全メッセージを置換", - "description": "自動コンパクトが発動すると、要約を生成してメッセージ履歴の全体をその要約で置換します。要約と並べて直近 N 件のメッセージを保持することはしません。これにより微妙な一貫性の問題を回避します:直近のメッセージと古いメッセージの要約を併存させると、モデルは重複するコンテンツの2つの表現を見ることになります。要約が「アプローチ X を使うことに決めた」と言う一方で、直近のメッセージにはまだ検討過程が表示されているかもしれず、矛盾するシグナルを生じます。クリーンな要約は単一の一貫した物語です。" - } - }, - { - "id": "transcript-archival", - "title": "Full Conversation Archived to JSONL on Disk", - "description": "Even though context is compressed in memory, the full uncompressed conversation is appended to a JSONL file on disk. Every message, every tool call, every result -- nothing is lost. This means compression is a lossy operation on the in-memory context but a lossless operation on the permanent record. Post-hoc analysis (debugging agent behavior, computing token usage, training data extraction) can always work from the complete transcript. The JSONL format is append-only, making it safe for concurrent writes and easy to stream-process.", - "alternatives": "Not archiving saves disk space but makes debugging hard -- when the agent makes a mistake, you can't see what it was 'thinking' 200 messages ago because that context was compressed away. Database storage (SQLite) would provide queryability but adds a dependency. JSONL is the simplest format that supports append-only writes and line-by-line processing.", - "zh": { - "title": "完整对话以 JSONL 格式归档到磁盘", - "description": "尽管上下文在内存中被压缩,完整的未压缩对话仍会追加到磁盘上的 JSONL 文件中。每条消息、每次工具调用、每个结果都不会丢失。压缩对内存上下文是有损操作,但对永久记录是无损的。事后分析(调试 agent 行为、计算 token 用量、提取训练数据)始终可以基于完整记录进行。JSONL 格式仅追加写入,对并发写入安全,易于流式处理。" - }, - "ja": { - "title": "完全な会話を JSONL としてディスクに保存", - "description": "メモリ上でコンテキストが圧縮されても、完全な非圧縮会話はディスク上の JSONL ファイルに追記されます。全てのメッセージ、全てのツール呼び出し、全ての結果――何も失われません。圧縮はインメモリコンテキストに対しては不可逆ですが、永続記録に対しては可逆です。事後分析(エージェントの挙動デバッグ、トークン使用量の計算、学習データの抽出)は常に完全な記録から行えます。JSONL フォーマットは追記専用で、並行書き込みに安全であり行単位の処理が容易です。" + "title": "サブエージェントはさらにサブエージェントを作れない", + "description": "子のツールセットから task を外し、再帰的委任の爆発を防ぎます。まず隔離を明確にし、後の章でより豊かなチーム動作を扱います。" } } ] diff --git a/web/src/data/annotations/s07.json b/web/src/data/annotations/s07.json index c519094..24a8759 100644 --- a/web/src/data/annotations/s07.json +++ b/web/src/data/annotations/s07.json @@ -2,59 +2,45 @@ "version": "s07", "decisions": [ { - "id": "file-based-persistence", - "title": "Tasks Stored as JSON Files, Not In-Memory", - "description": "Tasks are persisted as JSON files in a .tasks/ directory on the filesystem instead of being held in memory. This has three critical benefits: (1) Tasks survive process crashes -- if the agent dies mid-task, the task board is still on disk when it restarts. (2) Multiple agents can read and write to the same task directory, enabling multi-agent coordination without shared memory. (3) Humans can inspect and manually edit task files for debugging. The filesystem becomes the shared database.", - "alternatives": "In-memory storage (like v2's TodoWrite) is simpler and faster but loses state on crash and doesn't work across multiple agent processes. A proper database (SQLite, Redis) would provide ACID guarantees and better concurrency, but adds a dependency and operational complexity. Files are the zero-dependency persistence layer that works everywhere.", + "id": "catalog-first", + "title": "Inject a Skill Catalog First", + "description": "The system prompt includes only skill names and short descriptions. The model can discover capabilities without paying for every full SKILL.md on every turn.", + "alternatives": "Loading all skills upfront is simple, but it wastes context and makes unrelated instructions compete.", "zh": { - "title": "任务存储为 JSON 文件,而非内存", - "description": "任务以 JSON 文件形式持久化在 .tasks/ 目录中,而非保存在内存里。这有三个关键好处:(1) 任务在进程崩溃后仍然存在——如果 agent 在任务中途崩溃,重启后任务板仍在磁盘上;(2) 多个 agent 可以读写同一任务目录,无需共享内存即可实现多代理协调;(3) 人类可以查看和手动编辑任务文件来调试。文件系统就是共享数据库。" + "title": "先注入技能目录", + "description": "系统提示词只包含技能名和简短描述。模型能发现能力,但不用每轮都为完整 SKILL.md 付出上下文成本。" }, "ja": { - "title": "タスクをメモリではなく JSON ファイルとして保存", - "description": "タスクはメモリ内ではなく .tasks/ ディレクトリに JSON ファイルとして永続化されます。3つの重要な利点があります:(1) プロセスのクラッシュ後もタスクが存続する――エージェントがタスク途中でクラッシュしても、再起動時にタスクボードはディスク上に残っています。(2) 複数のエージェントが同じタスクディレクトリを読み書きでき、共有メモリなしにマルチエージェント連携が可能になります。(3) 人間がデバッグのためにタスクファイルを検査・手動編集できます。ファイルシステムが共有データベースになります。" + "title": "まずスキルカタログを注入する", + "description": "システムプロンプトにはスキル名と短い説明だけを入れます。モデルは能力を発見できますが、毎ターン全 SKILL.md を読む必要はありません。" } }, { - "id": "dependency-graph", - "title": "Tasks Have blocks/blockedBy Dependency Fields", - "description": "Each task can declare which other tasks it blocks (downstream dependents) and which tasks block it (upstream dependencies). An agent will not start a task that has unresolved blockedBy dependencies. This is essential for multi-agent coordination: when Agent A is writing the database schema and Agent B needs to write queries against it, Agent B's task is blockedBy Agent A's task. Without dependencies, both agents might start simultaneously and Agent B would work against a schema that doesn't exist yet.", - "alternatives": "Simple priority ordering (high/medium/low) doesn't capture 'task B literally cannot start until task A finishes.' A centralized coordinator that assigns tasks in order would work but creates a single point of failure and bottleneck. Declarative dependencies let each agent independently determine what it can work on by reading the task files.", + "id": "full-skill-via-tool-result", + "title": "Full Skills Arrive Through tool_result", + "description": "load_skill returns full instructions as a tool result. That makes skill loading an explicit event in the transcript and keeps it compatible with the normal loop.", + "alternatives": "Mutating the system prompt mid-turn would hide when the model gained new instructions.", "zh": { - "title": "任务具有 blocks/blockedBy 依赖字段", - "description": "每个任务可以声明它阻塞哪些任务(下游依赖)以及它被哪些任务阻塞(上游依赖)。Agent 不会开始有未解决 blockedBy 依赖的任务。这对多代理协调至关重要:当 Agent A 在编写数据库 schema、Agent B 需要写查询时,Agent B 的任务被 Agent A 的任务阻塞。没有依赖关系,两个 agent 可能同时开始,而 Agent B 会针对一个尚不存在的 schema 工作。" + "title": "完整技能通过 tool_result 进入", + "description": "load_skill 把完整说明作为 tool result 返回。这样技能加载是 transcript 中的显式事件,也兼容普通循环。" }, "ja": { - "title": "タスクに blocks/blockedBy 依存関係フィールド", - "description": "各タスクは、自分がブロックするタスク(下流の依存先)と、自分をブロックするタスク(上流の依存元)を宣言できます。エージェントは未解決の blockedBy 依存がある タスクを開始しません。これはマルチエージェント連携に不可欠です:エージェント A がデータベーススキーマを書いていてエージェント B がそれに対するクエリを書く必要がある場合、B のタスクは A のタスクにブロックされます。依存関係がなければ両エージェントが同時に開始し、B はまだ存在しないスキーマに対して作業することになります。" + "title": "完全なスキルは tool_result で届く", + "description": "load_skill は完全な指示を tool result として返します。スキル読み込みが transcript 上の明示的なイベントになり、通常ループと互換です。" } }, { - "id": "task-default-todo-coexistence", - "title": "Task as Course Default, Todo Still Useful", - "description": "TaskManager extends the Todo mental model and becomes the default workflow from s07 onward in this course. Both track work items with statuses, but TaskManager adds file persistence (survives crashes), dependency tracking (blocks/blockedBy), ownership fields, and multi-process coordination. Todo remains useful for short, linear, one-shot tracking where heavyweight coordination is unnecessary.", - "alternatives": "Using only Todo keeps the model minimal but weak for long-running or collaborative work. Using only Task everywhere maximizes consistency but can feel heavy for tiny one-off tasks.", + "id": "filesystem-skills", + "title": "Skills Are Files, Not Code Plugins", + "description": "A skill is a directory with SKILL.md. Keeping skills as files makes authoring, reviewing, and loading knowledge straightforward.", + "alternatives": "Executable plugins can do more, but this chapter is about instruction loading rather than new runtime code.", "zh": { - "title": "Task 为课程主线,Todo 仍有适用场景", - "description": "TaskManager 延续了 Todo 的心智模型,并在本课程 s07 之后成为默认主线。两者都管理带状态的任务项,但 TaskManager 增加了文件持久化(崩溃后可恢复)、依赖追踪(blocks/blockedBy)、owner 字段与多进程协作能力。Todo 仍适合短、线性、一次性的轻量跟踪。" + "title": "技能是文件,不是代码插件", + "description": "一个 skill 就是包含 SKILL.md 的目录。把技能保持为文件,便于编写、审查和加载知识。" }, "ja": { - "title": "Task を主線にしつつ Todo も併存", - "description": "TaskManager は Todo のメンタルモデルを拡張し、本コースでは s07 以降のデフォルトになる。どちらもステータス付き作業項目を扱うが、TaskManager にはファイル永続化(クラッシュ耐性)、依存関係追跡(blocks/blockedBy)、owner、マルチプロセス協調がある。Todo は短く直線的な単発作業では引き続き有効。" - } - }, - { - "id": "task-write-discipline", - "title": "Durability Needs Write Discipline", - "description": "File persistence reduces context loss, but it does not remove concurrent-write risks by itself. Before writing task state, reload the JSON, validate expected status/dependency fields, and then save atomically. This prevents one agent from silently overwriting another agent's transition.", - "alternatives": "Blind overwrite writes are simpler but can corrupt coordination state under parallel execution. A database with optimistic locking would enforce stronger safety, but the course keeps file-based state for zero-dependency teaching.", - "zh": { - "title": "持久化仍需要写入纪律", - "description": "文件持久化能降低上下文丢失,但不会自动消除并发写入风险。写任务状态前应先重读 JSON、校验 `status/blockedBy` 是否符合预期,再原子写回,避免不同 agent 悄悄覆盖彼此状态。" - }, - "ja": { - "title": "耐久性には書き込み規律が必要", - "description": "ファイル永続化だけでは並行書き込み競合は防げない。更新前に JSON を再読込し、`status/blockedBy` を検証して原子的に保存することで、他エージェントの遷移上書きを防ぐ。" + "title": "スキルはコードプラグインではなくファイル", + "description": "スキルは SKILL.md を持つディレクトリです。ファイルとして扱うことで、作成、レビュー、知識読み込みが分かりやすくなります。" } } ] diff --git a/web/src/data/annotations/s08.json b/web/src/data/annotations/s08.json index 121fb51..e5d6a0f 100644 --- a/web/src/data/annotations/s08.json +++ b/web/src/data/annotations/s08.json @@ -2,45 +2,45 @@ "version": "s08", "decisions": [ { - "id": "notification-bus", - "title": "threading.Queue as the Notification Bus", - "description": "Background task results are delivered via a threading.Queue instead of direct callbacks. The background thread puts a notification on the queue when its work completes. The main agent loop polls the queue before each LLM call. This decoupling is important: the background thread doesn't need to know anything about the main loop's state or timing. It just drops a message on the queue and moves on. The main loop picks it up at its own pace -- never mid-API-call, never mid-tool-execution. No race conditions, no callback hell.", - "alternatives": "Direct callbacks (background thread calls a function in the main thread) would deliver results faster but create thread-safety issues -- the callback might fire while the main thread is in the middle of building a request. Event-driven systems (asyncio, event emitters) work but add complexity. A queue is the simplest thread-safe communication primitive.", + "id": "cheap-first-compaction", + "title": "Compaction Runs Cheap Passes First", + "description": "Budgeting large tool results, snipping middle history, and replacing old tool outputs run before any LLM summary. Most turns should avoid an extra model call.", + "alternatives": "Summarizing immediately is conceptually simple, but expensive and often unnecessary.", "zh": { - "title": "用 threading.Queue 作为通知总线", - "description": "后台任务结果通过 threading.Queue 传递,而非直接回调。后台线程在工作完成时向队列放入通知,主 agent 循环在每次 LLM 调用前轮询队列。这种解耦很重要:后台线程无需了解主循环的状态或时序,只需往队列放入消息然后继续。主循环按自己的节奏取出消息——永远不会在 API 调用中途或工具执行中途。没有竞争条件,没有回调地狱。" + "title": "压缩先跑低成本步骤", + "description": "大工具结果预算、中段裁剪、旧工具输出占位会先于任何 LLM 摘要运行。多数回合不应该额外调用模型。" }, "ja": { - "title": "threading.Queue を通知バスとして使用", - "description": "バックグラウンドタスクの結果は直接コールバックではなく threading.Queue を通じて配信されます。バックグラウンドスレッドは作業完了時にキューに通知を投入します。メインのエージェントループは各 LLM 呼び出しの前にキューをポーリングします。この疎結合が重要です:バックグラウンドスレッドはメインループの状態やタイミングを一切知る必要がありません。キューにメッセージを入れて先に進むだけです。メインループは自分のペースで取り出します――API 呼び出しの途中でもツール実行の途中でもありません。レースコンディションもコールバック地獄もありません。" + "title": "圧縮は低コストな処理から行う", + "description": "大きな tool result の退避、中間履歴の切り詰め、古い出力の置換を LLM 要約より先に行います。多くのターンで追加モデル呼び出しを避けられます。" } }, { - "id": "daemon-threads", - "title": "Background Tasks Run as Daemon Threads", - "description": "Background task threads are created with daemon=True. In Python, daemon threads are killed automatically when the main thread exits. This prevents a common problem: if the main agent completes its work and exits, but a background thread is still running (waiting on a long API call, stuck in a loop), the process would hang indefinitely. With daemon threads, exit is clean -- the main thread finishes, all daemon threads die, process exits. No zombie processes, no cleanup code needed.", - "alternatives": "Non-daemon threads with explicit cleanup (join with timeout, then terminate) give more control over shutdown but require careful lifecycle management. Process-based parallelism (multiprocessing) provides stronger isolation but higher overhead. Daemon threads are the pragmatic choice: minimal code, correct behavior in the common case.", + "id": "large-output-side-store", + "title": "Large Outputs Move to a Side Store", + "description": "The transcript keeps a pointer while the full output is persisted elsewhere. This preserves recoverability without forcing the model to reread huge logs.", + "alternatives": "Dropping output loses evidence; keeping all output burns context.", "zh": { - "title": "后台任务以守护线程运行", - "description": "后台任务线程以 daemon=True 创建。在 Python 中,守护线程在主线程退出时自动被终止。这防止了一个常见问题:如果主 agent 完成工作并退出,但后台线程仍在运行(等待一个长时间 API 调用或陷入循环),进程会无限挂起。使用守护线程,退出是干净的——主线程结束,所有守护线程自动终止,进程退出。没有僵尸进程,不需要清理代码。" + "title": "大输出转移到旁路存储", + "description": "Transcript 保留指针,完整输出持久化到别处。这样既能恢复证据,又不会强迫模型反复读取巨量日志。" }, "ja": { - "title": "バックグラウンドタスクはデーモンスレッドとして実行", - "description": "バックグラウンドタスクのスレッドは daemon=True で作成されます。Python ではデーモンスレッドはメインスレッドの終了時に自動的に終了されます。これにより一般的な問題を防ぎます:メインエージェントが作業を完了して終了しても、バックグラウンドスレッドがまだ実行中(長い API 呼び出しを待機、ループに陥っている)だとプロセスが無限にハングします。デーモンスレッドならクリーンに終了できます――メインスレッドが終了すると全デーモンスレッドが自動終了し、プロセスが終了します。ゾンビプロセスもクリーンアップコードも不要です。" + "title": "大きな出力はサイドストアへ移す", + "description": "transcript にはポインタだけを残し、完全な出力は別に保存します。証拠を失わず、モデルが巨大ログを読み直す必要もありません。" } }, { - "id": "attachment-format", - "title": "Structured Notification Format with Type Tags", - "description": "Notifications from background tasks use a structured format: {\"type\": \"attachment\", \"attachment\": {status, result, ...}} instead of plain text strings. The type tag lets the main loop handle different notification types differently: an 'attachment' might be injected into the conversation as a tool_result, while a 'status_update' might just update a progress indicator. Machine-readable notifications also enable programmatic filtering (show only errors, suppress progress updates) and UI rendering (display status as a progress bar, not raw text).", - "alternatives": "Plain text notifications are simpler but lose structure. The main loop would have to parse free-form text to determine what happened, which is fragile. A class hierarchy (StatusNotification, ResultNotification, ErrorNotification) is more Pythonic but less portable -- JSON structures work the same way regardless of language or serialization format.", + "id": "reactive-compact", + "title": "Reactive Compact Handles API Rejection", + "description": "Even after estimates, the API may reject a prompt as too long. reactive_compact is the emergency path that compacts and retries.", + "alternatives": "Treating prompt_too_long as fatal would make long sessions brittle.", "zh": { - "title": "带类型标签的结构化通知格式", - "description": "后台任务的通知使用结构化格式:{\"type\": \"attachment\", \"attachment\": {status, result, ...}},而非纯文本字符串。类型标签让主循环可以区别处理不同通知类型:attachment 可能作为 tool_result 注入对话,而 status_update 可能只更新进度指示器。机器可读的通知还支持程序化过滤(只显示错误、抑制进度更新)和 UI 渲染(将状态显示为进度条而非原始文本)。" + "title": "Reactive Compact 处理 API 拒绝", + "description": "即使有估算,API 仍可能拒绝过长 prompt。reactive_compact 是压缩并重试的应急路径。" }, "ja": { - "title": "型タグ付き構造化通知フォーマット", - "description": "バックグラウンドタスクからの通知は構造化フォーマットを使用します:プレーンテキストではなく {\"type\": \"attachment\", \"attachment\": {status, result, ...}} です。型タグによりメインループは異なる通知タイプを異なる方法で処理できます:attachment は会話に tool_result として注入され、status_update は進捗インジケーターの更新のみを行うかもしれません。機械可読な通知はプログラム的なフィルタリング(エラーのみ表示、進捗更新の抑制)や UI レンダリング(ステータスを生テキストではなくプログレスバーとして表示)も可能にします。" + "title": "Reactive compact が API 拒否を処理する", + "description": "見積もり後でも API が prompt_too_long を返すことがあります。reactive_compact は圧縮して再試行する緊急経路です。" } } ] diff --git a/web/src/data/annotations/s09.json b/web/src/data/annotations/s09.json index 8c04537..b8caa27 100644 --- a/web/src/data/annotations/s09.json +++ b/web/src/data/annotations/s09.json @@ -2,45 +2,45 @@ "version": "s09", "decisions": [ { - "id": "teammate-vs-subagent", - "title": "Persistent Teammates vs One-Shot Subagents", - "description": "In s04, subagents are ephemeral: spawn, do one task, return result, die. Their knowledge dies with them. In s09, teammates are persistent threads with identity (name, role) and config files. A teammate can complete task A, then be assigned task B, carrying forward everything it learned. Persistent teammates accumulate project knowledge, understand established patterns, and don't need to re-read the same files for every task.", - "alternatives": "One-shot subagents (s04 style) are simpler and provide perfect context isolation -- no risk of one task's context polluting another. But the re-learning cost is high: every new task starts from zero. A middle ground (subagents with shared memory/knowledge base) was considered but adds complexity without the full benefit of persistent identity and state.", + "id": "memory-index", + "title": "A Small Index Keeps Memory Cheap", + "description": "MEMORY.md stays short and always present. Full memory files are loaded only when the index suggests they are relevant.", + "alternatives": "Injecting all memories every turn would turn long-term knowledge into context bloat.", "zh": { - "title": "持久化队友 vs 一次性 Subagent", - "description": "在 s04 中,Subagent 是临时的:创建、执行一个任务、返回结果、销毁。它们的知识随之消亡。在 s09 中,队友是具有身份(名称、角色)和配置文件的持久化线程。队友可以完成任务 A,然后被分配任务 B,并携带之前学到的所有知识。持久化队友积累项目知识,理解已建立的模式,不需要为每个任务重新阅读相同的文件。" + "title": "小索引让记忆保持低成本", + "description": "MEMORY.md 保持短小并始终存在。只有当索引显示相关时,才加载完整记忆文件。" }, "ja": { - "title": "永続的なチームメイト vs 使い捨てサブエージェント", - "description": "s04 ではサブエージェントは一時的です:生成、1つのタスクを実行、結果を返却、消滅。その知識も一緒に消えます。s09 ではチームメイトはアイデンティティ(名前、役割)と設定ファイルを持つ永続的なスレッドです。チームメイトはタスク A を完了した後、学んだ全てを引き継いでタスク B に割り当てられます。永続的なチームメイトはプロジェクトの知識を蓄積し、確立されたパターンを理解し、タスクごとに同じファイルを再読する必要がありません。" + "title": "小さな索引でメモリを低コストに保つ", + "description": "MEMORY.md は短く常に存在します。索引が関連性を示す場合だけ、完全なメモリファイルを読み込みます。" } }, { - "id": "file-based-team-config", - "title": "Team Config Persisted to .teams/{name}/config.json", - "description": "Team structure (member names, roles, agent IDs) is stored in a JSON config file, not in any agent's memory. Any agent can discover its teammates by reading the config file -- no need for a discovery service or shared memory. If an agent crashes and restarts, it reads the config to find out who else is on the team. This is consistent with the s07 philosophy: the filesystem is the coordination layer.", - "alternatives": "In-memory team registries are faster but don't survive process restarts and require a central process to maintain. Service discovery (like DNS or a discovery server) is more robust at scale but overkill for a local multi-agent system. File-based config is the simplest approach that works across independent processes.", + "id": "extract-after-turn", + "title": "Memory Extraction Happens After the Turn", + "description": "The agent first solves the user's task, then extracts durable facts from the original messages. This avoids interrupting the working loop with memory bookkeeping.", + "alternatives": "Writing memory during every tool step would make the loop noisy and increase accidental persistence.", "zh": { - "title": "团队配置持久化到 .teams/{name}/config.json", - "description": "团队结构(成员名称、角色、agent ID)存储在 JSON 配置文件中,而非任何 agent 的内存中。任何 agent 都可以通过读取配置文件发现队友——无需发现服务或共享内存。如果 agent 崩溃并重启,它读取配置即可知道团队中还有谁。这与 s07 的理念一致:文件系统就是协调层。配置文件人类可读,便于手动添加或移除团队成员、调试团队配置问题。" + "title": "记忆提取发生在回合之后", + "description": "Agent 先完成用户任务,再从原始消息中提取持久事实。这样不会用记忆账务打断工作循环。" }, "ja": { - "title": "チーム設定を .teams/{name}/config.json に永続化", - "description": "チーム構成(メンバー名、役割、エージェント ID)はエージェントのメモリではなく JSON 設定ファイルに保存されます。どのエージェントも設定ファイルを読むことでチームメイトを発見できます――ディスカバリーサービスや共有メモリは不要です。エージェントがクラッシュして再起動した場合、設定を読んで他のチームメンバーを把握します。これは s07 の思想と一貫しています:ファイルシステムが連携レイヤーです。" + "title": "メモリ抽出はターン後に行う", + "description": "エージェントはまずユーザーのタスクを解き、その後で元メッセージから永続的な事実を抽出します。作業ループをメモリ管理で中断しません。" } }, { - "id": "tool-filtering-by-role", - "title": "Teammates Get Subset of Tools, Lead Gets All", - "description": "The team lead receives ALL_TOOLS (including spawn, send, read_inbox, etc.) while teammates receive TEAMMATE_TOOLS (a reduced set focused on task execution). This enforces a clear separation of concerns: teammates focus on doing work (coding, testing, researching), while the lead focuses on coordination (creating tasks, assigning work, managing communication). Giving teammates coordination tools would let them create their own sub-teams or reassign tasks, undermining the lead's ability to maintain a coherent plan.", - "alternatives": "Giving all agents identical tools is simpler and more egalitarian, but in practice leads to coordination chaos -- multiple agents trying to manage each other, creating conflicting task assignments. Static role-based filtering is predictable and easy to reason about.", + "id": "dream-consolidation", + "title": "Consolidation Is Periodic, Not Constant", + "description": "Dream-style consolidation can merge and prune memories periodically. The runtime separates everyday retrieval from heavier memory maintenance.", + "alternatives": "Consolidating on every turn is costly and risks rewriting memories before they prove useful.", "zh": { - "title": "队友获得工具子集,组长获得全部工具", - "description": "团队组长获得 ALL_TOOLS(包括 spawn、send、read_inbox 等),而队友获得 TEAMMATE_TOOLS(专注于任务执行的精简工具集)。这强制了清晰的职责分离:队友专注于做事(编码、测试、研究),组长专注于协调(创建任务、分配工作、管理沟通)。给队友协调工具会让他们创建自己的子团队或重新分配任务,破坏组长维持连贯计划的能力。" + "title": "整合是周期性的,不是每回合都做", + "description": "Dream 式整合可以周期性合并和修剪记忆。运行时把日常检索和更重的记忆维护分开。" }, "ja": { - "title": "チームメイトはツールのサブセット、リーダーは全ツール", - "description": "チームリーダーは ALL_TOOLS(spawn、send、read_inbox など含む)を受け取り、チームメイトは TEAMMATE_TOOLS(タスク実行に特化した縮小セット)を受け取ります。これにより明確な関心の分離が強制されます:チームメイトは作業(コーディング、テスト、調査)に集中し、リーダーは調整(タスク作成、作業割り当て、コミュニケーション管理)に集中します。" + "title": "統合は常時ではなく定期的に行う", + "description": "Dream 型の統合はメモリの結合や剪定を定期的に行います。日常の検索と重いメモリ保守を分離します。" } } ] diff --git a/web/src/data/annotations/s10.json b/web/src/data/annotations/s10.json index 548b299..e0fde3c 100644 --- a/web/src/data/annotations/s10.json +++ b/web/src/data/annotations/s10.json @@ -2,45 +2,45 @@ "version": "s10", "decisions": [ { - "id": "jsonl-inbox", - "title": "JSONL Inbox Files Instead of Shared Memory", - "description": "Each teammate has its own inbox file (a JSONL file in the team directory). Sending a message means appending a JSON line to the recipient's inbox file. Reading messages means reading the inbox file and tracking which line was last read. JSONL is append-only by nature, which means concurrent writers don't corrupt each other's data (appends to different file positions). This works across processes without any shared memory, mutex, or IPC mechanism. It's also crash-safe: if the writer crashes mid-append, the worst case is one partial line that the reader can skip.", - "alternatives": "Shared memory (Python multiprocessing.Queue) would be faster but doesn't work if agents are separate processes launched independently. A message broker (Redis, RabbitMQ) provides robust pub/sub but adds infrastructure dependencies. Unix domain sockets would work but are harder to debug (no human-readable message log). JSONL files are the simplest approach that provides persistence, cross-process communication, and debuggability.", + "id": "prompt-from-state", + "title": "The System Prompt Is Built from Runtime State", + "description": "Prompt sections are selected from real state: workspace, available tools, memory, and skills. The prompt becomes a product of the runtime rather than a single hardcoded string.", + "alternatives": "A static prompt is easier to inspect, but it goes stale as capabilities change.", "zh": { - "title": "JSONL 收件箱文件而非共享内存", - "description": "每个队友都有自己的收件箱文件(团队目录中的 JSONL 文件)。发送消息意味着向接收者的收件箱文件追加一行 JSON。读取消息意味着读取收件箱文件并追踪上次读到的行。JSONL 天然是仅追加的,这意味着并发写入不会破坏彼此的数据(追加到不同的文件位置)。这在无需共享内存、互斥锁或 IPC 机制的情况下跨进程工作。它也是崩溃安全的:如果写入者在追加中途崩溃,最坏情况是一行不完整的数据,读取者可以跳过。" + "title": "系统提示词由运行时状态构建", + "description": "Prompt section 来自真实状态:workspace、可用工具、memory 和 skills。提示词是运行时的产物,而不是单个硬编码字符串。" }, "ja": { - "title": "共有メモリではなく JSONL インボックスファイル", - "description": "各チームメイトはチームディレクトリ内に独自のインボックスファイル(JSONL ファイル)を持ちます。メッセージの送信は受信者のインボックスファイルに JSON 行を追記することです。メッセージの読み取りはインボックスファイルを読んで最後に読んだ行を追跡することです。JSONL は本質的に追記専用で、並行ライターが互いのデータを破壊しません(異なるファイル位置への追記)。共有メモリ、ミューテックス、IPC メカニズムなしにプロセス間で動作します。" + "title": "システムプロンプトはランタイム状態から作る", + "description": "workspace、利用可能ツール、memory、skills などの実状態から prompt section を選びます。プロンプトは単一の固定文字列ではなくランタイムの産物です。" } }, { - "id": "five-message-types", - "title": "Exactly Five Message Types Cover All Coordination Patterns", - "description": "The messaging system supports exactly five types: (1) 'message' for point-to-point communication between two agents, (2) 'broadcast' for team-wide announcements, (3) 'shutdown_request' for graceful termination, (4) 'shutdown_response' for acknowledging shutdown, (5) 'plan_approval_response' for the lead to approve or reject a teammate's plan. These five types map to the fundamental coordination patterns: direct communication, broadcast, lifecycle management, and approval workflows.", - "alternatives": "A single generic message type with metadata fields would be more flexible but makes it harder to enforce protocol correctness. Many more types (10+) would provide finer-grained semantics but increase the model's decision burden. Five types is the sweet spot where every type has a clear, distinct purpose.", + "id": "deterministic-cache-key", + "title": "A Deterministic Cache Avoids Reassembly", + "description": "The context object is serialized with stable ordering. If the key has not changed, the prompt can be reused safely.", + "alternatives": "Rebuilding every turn is simple, but hides when the prompt actually changed.", "zh": { - "title": "恰好五种消息类型覆盖所有协调模式", - "description": "消息系统恰好支持五种类型:(1) message 用于两个 agent 间的点对点通信;(2) broadcast 用于全团队公告;(3) shutdown_request 用于优雅终止;(4) shutdown_response 用于确认终止;(5) plan_approval_response 用于组长批准或拒绝队友的计划。这五种类型映射到基本协调模式:直接通信、广播、生命周期管理和审批流程。" + "title": "确定性缓存避免重复组装", + "description": "Context 对象用稳定顺序序列化。如果 key 没变,提示词就可以安全复用。" }, "ja": { - "title": "正確に5つのメッセージタイプで全連携パターンをカバー", - "description": "メッセージングシステムは正確に5つのタイプをサポートします:(1) message は2つのエージェント間のポイントツーポイント通信、(2) broadcast はチーム全体への通知、(3) shutdown_request はグレースフルな終了要求、(4) shutdown_response はシャットダウンの確認応答、(5) plan_approval_response はリーダーによるチームメイトの計画の承認・却下。" + "title": "決定的キャッシュで再組み立てを避ける", + "description": "context オブジェクトを安定した順序でシリアライズします。key が変わらなければプロンプトを安全に再利用できます。" } }, { - "id": "inbox-before-api-call", - "title": "Check Inbox Before Every LLM Call", - "description": "Teammates check their inbox file at the top of every agent loop iteration, before calling the LLM API. This ensures maximum responsiveness to incoming messages: a shutdown request is seen within one loop iteration (typically seconds), not after the current task completes (potentially minutes). The inbox check is cheap (read a small file, check if new lines exist) compared to the LLM call (seconds of latency, thousands of tokens). This placement also means incoming messages can influence the next LLM call -- a message saying 'stop working on X, switch to Y' takes effect immediately.", - "alternatives": "Checking inbox after each tool execution would be more responsive but adds overhead to every tool call, which is more frequent than LLM calls. A separate watcher thread could monitor the inbox continuously but adds threading complexity. Checking once per LLM call is the pragmatic sweet spot: responsive enough for coordination, cheap enough to not impact performance.", + "id": "sections-have-owners", + "title": "Prompt Sections Have Owners", + "description": "Identity, tools, workspace, and memory are separate sections. This makes it easier to debug which subsystem injected a bad instruction.", + "alternatives": "Concatenating arbitrary strings works until the prompt grows and no one knows where a rule came from.", "zh": { - "title": "每次 LLM 调用前检查收件箱", - "description": "队友在每次 agent 循环迭代的顶部、调用 LLM API 之前检查收件箱文件。这确保了对传入消息的最大响应性:一个终止请求会在一个循环迭代内被看到(通常几秒钟),而非在当前任务完成后(可能数分钟)。收件箱检查成本很低(读取小文件,检查是否有新行),相比 LLM 调用(秒级延迟,数千 token)微不足道。这个位置还意味着传入消息可以影响下一次 LLM 调用——一条'停止 X,转去做 Y'的消息会立即生效。" + "title": "Prompt Section 有明确归属", + "description": "identity、tools、workspace、memory 是分开的 section。这样更容易定位哪一层注入了错误指令。" }, "ja": { - "title": "毎回の LLM 呼び出し前にインボックスを確認", - "description": "チームメイトはエージェントループの各イテレーションの冒頭、LLM API を呼び出す前にインボックスファイルを確認します。これにより受信メッセージへの応答性を最大化します:シャットダウンリクエストは1ループイテレーション以内(通常数秒)で確認され、現在のタスク完了後(数分かかる可能性)ではありません。" + "title": "prompt section には所有者がある", + "description": "identity、tools、workspace、memory を別 section にします。どのサブシステムが悪い指示を入れたかを追いやすくなります。" } } ] diff --git a/web/src/data/annotations/s11.json b/web/src/data/annotations/s11.json index 8595728..46cc8c1 100644 --- a/web/src/data/annotations/s11.json +++ b/web/src/data/annotations/s11.json @@ -2,45 +2,45 @@ "version": "s11", "decisions": [ { - "id": "polling-not-events", - "title": "Polling for Unclaimed Tasks Instead of Event-Driven Notification", - "description": "Autonomous teammates poll the shared task board every ~1 second to find unclaimed tasks, rather than waiting for event-driven notifications. Polling is fundamentally simpler than pub/sub: there's no subscription management, no event routing, no missed-event bugs. With file-based persistence, polling is just 'read the directory listing' -- a cheap operation that works regardless of how many agents are running. The 1-second interval balances responsiveness (new tasks are discovered quickly) against filesystem overhead (not hammering the disk with reads).", - "alternatives": "Event-driven notification (file watchers via inotify/fsevents, or a pub/sub channel) would reduce latency from seconds to milliseconds. But file watchers are platform-specific and unreliable across network filesystems. A message broker would work but adds infrastructure. For a system where tasks take minutes to complete, discovering new tasks in 1 second instead of 10 milliseconds makes no practical difference.", + "id": "classified-recovery", + "title": "Different Failures Need Different Recovery Paths", + "description": "max_tokens, prompt_too_long, and provider overload mean different things. The runtime classifies the error before retrying.", + "alternatives": "Blind retry is easy, but wastes time and can repeat a failure that needs compaction or token escalation.", "zh": { - "title": "轮询未认领任务而非事件驱动通知", - "description": "自主队友每隔约 1 秒轮询共享任务板以寻找未认领的任务,而非等待事件驱动的通知。轮询从根本上比发布/订阅更简单:没有订阅管理、没有事件路由、没有事件丢失的 bug。在基于文件的持久化下,轮询就是'读取目录列表'——一个低成本操作,无论有多少 agent 在运行都能正常工作。1 秒的间隔平衡了响应性(新任务被快速发现)和文件系统开销(不会过度读取磁盘)。" + "title": "不同失败需要不同恢复路径", + "description": "max_tokens、prompt_too_long 和供应商过载含义不同。运行时会先分类错误,再决定如何重试。" }, "ja": { - "title": "イベント駆動通知ではなくポーリングで未割り当てタスクを発見", - "description": "自律的なチームメイトはイベント駆動の通知を待つのではなく、約1秒ごとに共有タスクボードをポーリングして未割り当てタスクを探します。ポーリングはパブ/サブより根本的にシンプルです:サブスクリプション管理、イベントルーティング、イベント欠落バグがありません。ファイルベースの永続化では、ポーリングは「ディレクトリ一覧を読む」だけで、実行中のエージェント数に関係なく動作する安価な操作です。" + "title": "失敗ごとに異なる回復経路が必要", + "description": "max_tokens、prompt_too_long、プロバイダ過負荷は意味が違います。ランタイムは再試行前にエラーを分類します。" } }, { - "id": "idle-timeout", - "title": "60-Second Idle Timeout Before Self-Termination", - "description": "When an autonomous teammate has no tasks to work on and no messages in its inbox, it waits up to 60 seconds before giving up and shutting down. This prevents zombie teammates that wait forever for work that never comes -- a real problem when the lead forgets to send a shutdown request, or when all remaining tasks are blocked on external events. The 60-second window is long enough that a brief gap between task completions and new task creation won't cause premature shutdown, but short enough that unused teammates don't waste resources.", - "alternatives": "No timeout (wait forever) risks zombie processes. A very short timeout (5s) causes premature exits when the lead is simply thinking or typing. A heartbeat system (lead periodically pings teammates to keep them alive) works but adds protocol complexity. The 60-second fixed timeout is a good default that balances false-positive exits against resource waste.", + "id": "recovery-state", + "title": "RecoveryState Prevents Infinite Retries", + "description": "The runtime tracks token escalation, compact retries, consecutive 529s, and fallback model use. Recovery becomes bounded and inspectable.", + "alternatives": "A while-retry loop can accidentally retry forever or hide which mitigation has already run.", "zh": { - "title": "空闲 60 秒后自动终止", - "description": "当自主队友没有任务可做且收件箱中没有消息时,它最多等待 60 秒后放弃并关闭。这防止了永远等待不会到来的工作的僵尸队友——这在组长忘记发送关闭请求、或所有剩余任务都被外部事件阻塞时是真实存在的问题。60 秒窗口足够长,不会因为任务完成到新任务创建之间的短暂间隔而导致过早关闭;又足够短,不会让闲置队友浪费资源。" + "title": "RecoveryState 防止无限重试", + "description": "运行时记录 token 升级、compact retry、连续 529、fallback model 等状态。恢复因此有边界、可检查。" }, "ja": { - "title": "60秒のアイドルタイムアウトで自動終了", - "description": "自律的なチームメイトが作業するタスクもインボックスのメッセージもない場合、最大60秒待ってから諦めてシャットダウンします。これにより永遠に来ない仕事を待ち続けるゾンビチームメイトを防ぎます。60秒のウィンドウはタスク完了から新タスク作成までの短い間隔で早期シャットダウンが起きない十分な長さであり、かつ未使用のチームメイトがリソースを浪費しない十分な短さです。" + "title": "RecoveryState が無限リトライを防ぐ", + "description": "token 拡張、compact retry、連続 529、fallback model の利用を追跡します。回復処理に境界と可観測性を与えます。" } }, { - "id": "identity-after-compression", - "title": "Re-Inject Teammate Identity After Context Compression", - "description": "When auto_compact compresses the conversation, the resulting summary loses crucial metadata: the teammate's name, which team it belongs to, and its agent_id. Without this information, the teammate can't claim tasks (tasks are owned by name), can't check its inbox (inbox files are keyed by agent_id), and can't identify itself in messages. So after every auto_compact, the system re-injects a structured identity block into the conversation: 'You are [name] on team [team], your agent_id is [id], your inbox is at [path].' This is the minimum context needed for the teammate to remain functional after memory loss.", - "alternatives": "Putting identity in the system prompt (which survives compression) would avoid this problem, but violates the cache-friendly static-system-prompt design from s05. Embedding identity in the summary prompt ('when summarizing, always include your name and team') is unreliable -- the LLM might omit it. Explicit post-compression injection is deterministic and guaranteed to work.", + "id": "no-fake-continuation", + "title": "Do Not Fake User Messages During Token Recovery", + "description": "The first max_tokens escalation retries without appending a synthetic user prompt. The transcript should reflect real events, not internal recovery tricks.", + "alternatives": "Always appending 'continue' is tempting, but it pollutes conversation history and may change model behavior.", "zh": { - "title": "上下文压缩后重新注入队友身份", - "description": "自动压缩对话时,生成的摘要会丢失关键元数据:队友的名称、所属团队和 agent_id。没有这些信息,队友无法认领任务(任务按名称归属)、无法检查收件箱(收件箱文件以 agent_id 为键)、也无法在消息中表明身份。因此每次自动压缩后,系统会向对话中重新注入一个结构化的身份块:'你是 [team] 团队的 [name],你的 agent_id 是 [id],你的收件箱在 [path]。'这是队友在记忆丢失后保持功能所需的最小上下文。" + "title": "Token 恢复时不伪造用户消息", + "description": "第一次 max_tokens 升级会直接重试,不追加合成 user prompt。Transcript 应反映真实事件,而不是内部恢复技巧。" }, "ja": { - "title": "コンテキスト圧縮後にチームメイトのアイデンティティを再注入", - "description": "自動コンパクトが会話を圧縮すると、生成された要約は重要なメタデータを失います:チームメイトの名前、所属チーム、agent_id。この情報がなければチームメイトはタスクを申告できず(タスクは名前で所有)、インボックスを確認できず(インボックスファイルは agent_id をキーとする)、メッセージで自分を識別できません。そのため自動コンパクトの後、システムは構造化されたアイデンティティブロックを会話に再注入します。これはメモリ喪失後もチームメイトが機能し続けるために必要な最小限のコンテキストです。" + "title": "トークン回復で偽のユーザーメッセージを入れない", + "description": "最初の max_tokens 拡張では合成 user prompt を追加せず再試行します。transcript は内部の回復処理ではなく実際の出来事を反映すべきです。" } } ] diff --git a/web/src/data/annotations/s12.json b/web/src/data/annotations/s12.json index 3f2cb7f..7f673be 100644 --- a/web/src/data/annotations/s12.json +++ b/web/src/data/annotations/s12.json @@ -2,87 +2,45 @@ "version": "s12", "decisions": [ { - "id": "shared-board-isolated-lanes", - "title": "Shared Task Board + Isolated Execution Lanes", - "description": "The task board remains shared and centralized in `.tasks/`, while file edits happen in per-task worktree directories. This separation preserves global visibility (who owns what, what is done) without forcing everyone to edit inside one mutable directory. Coordination stays simple because there is one board, and execution stays safe because each lane is isolated.", - "alternatives": "A single shared workspace is simpler but causes edit collisions and mixed git state. Fully independent task stores per lane avoid collisions but lose team-level visibility and make planning harder.", + "id": "tasks-as-files", + "title": "Tasks Are Durable JSON Files", + "description": "Each task is persisted under .tasks/ with id, subject, description, status, owner, and blockedBy. The task board survives context compaction and process restarts.", + "alternatives": "In-memory tasks are easier to code, but vanish exactly when long-running coordination needs them most.", "zh": { - "title": "共享任务板 + 隔离执行通道", - "description": "任务板继续集中在 `.tasks/`,而文件改动发生在按任务划分的 worktree 目录中。这样既保留了全局可见性(谁在做什么、完成到哪),又避免所有人同时写同一目录导致冲突。协调层简单(一个任务板),执行层安全(多条隔离通道)。" + "title": "任务是持久 JSON 文件", + "description": "每个任务都持久化在 .tasks/ 下,包含 id、subject、description、status、owner、blockedBy。任务板能跨上下文压缩和进程重启保留。" }, "ja": { - "title": "共有タスクボード + 分離実行レーン", - "description": "タスクボードは `.tasks/` に集約しつつ、実際の編集はタスクごとの worktree ディレクトリで行う。これにより全体の可視性(担当と進捗)を維持しながら、単一ディレクトリでの衝突を回避できる。調整は1つのボードで単純化され、実行はレーン分離で安全になる。" + "title": "タスクは永続 JSON ファイル", + "description": "各タスクは .tasks/ に id、subject、description、status、owner、blockedBy を持って保存されます。タスクボードはコンテキスト圧縮や再起動を越えて残ります。" } }, { - "id": "index-file-lifecycle", - "title": "Explicit Worktree Lifecycle Index", - "description": "`.worktrees/index.json` records each worktree's name, path, branch, task_id, and status. This makes lifecycle state inspectable and recoverable even after context compression or process restarts. The index also provides a deterministic source for list/status/remove operations.", - "alternatives": "Relying only on `git worktree list` removes local bookkeeping but loses task binding metadata and custom lifecycle states. Keeping all state only in memory is simpler in code but breaks recoverability.", + "id": "blockedby-dependencies", + "title": "blockedBy Encodes Ordering", + "description": "A task can only be claimed when all blockedBy dependencies are completed. Missing dependencies are treated as blocked to fail closed.", + "alternatives": "Letting the model remember ordering is fragile and hard for teammates to share.", "zh": { - "title": "显式 worktree 生命周期索引", - "description": "`.worktrees/index.json` 记录每个 worktree 的名称、路径、分支、task_id 与状态。即使上下文压缩或进程重启,这些生命周期状态仍可检查和恢复。它也为 list/status/remove 提供了确定性的本地数据源。" + "title": "blockedBy 编码任务顺序", + "description": "只有所有 blockedBy 依赖都完成时,任务才能被 claim。缺失依赖也被视为阻塞,采用 fail closed。" }, "ja": { - "title": "明示的な worktree ライフサイクル索引", - "description": "`.worktrees/index.json` に name/path/branch/task_id/status を記録することで、コンテキスト圧縮やプロセス再起動後も状態を追跡できる。list/status/remove の挙動もこの索引を基準に決定できる。" + "title": "blockedBy が順序を表現する", + "description": "blockedBy の依存がすべて完了した時だけタスクを claim できます。存在しない依存もブロック扱いにして fail closed にします。" } }, { - "id": "lane-cwd-routing-and-reentry-guard", - "title": "Lane-Scoped CWD Routing + Re-entry Guard", - "description": "Commands are routed to a worktree's directory via `worktree_run(name, command)` using the `cwd` parameter. A re-entry guard prevents accidentally running inside an already-active worktree context, keeping lifecycle ownership unambiguous.", - "alternatives": "Global cwd mutation is easy to implement but can leak context across parallel work. Allowing silent re-entry makes lifecycle ownership ambiguous and complicates teardown behavior.", + "id": "claim-complete-lifecycle", + "title": "Claim and Complete Make Work Observable", + "description": "claim_task records an owner and in_progress state; complete_task marks completion and reports downstream tasks that became unblocked.", + "alternatives": "A simple checklist can say done, but it cannot safely coordinate ownership or dependencies.", "zh": { - "title": "按通道 cwd 路由 + 禁止重入", - "description": "命令通过 `worktree_run(name, command)` 使用 `cwd` 参数路由到 worktree 目录。重入保护避免了在已激活的 worktree 上下文中意外二次进入,保持生命周期归属清晰。" + "title": "Claim 和 Complete 让工作可观察", + "description": "claim_task 记录 owner 和 in_progress 状态;complete_task 标记完成,并报告被解锁的下游任务。" }, "ja": { - "title": "レーン単位 cwd ルーティング + 再入防止", - "description": "`worktree_run(name, command)` で `cwd` パラメータを使いコマンドを worktree ディレクトリへ転送する。再入ガードにより active な worktree への二重入場を防ぎ、ライフサイクルの帰属を明確に保つ。" - } - }, - { - "id": "event-stream-observability", - "title": "Append-Only Lifecycle Event Stream", - "description": "Lifecycle events are appended to `.worktrees/events.jsonl` (`worktree.create.*`, `worktree.remove.*`, `task.completed`). This turns hidden transitions into queryable records and makes failures explicit (`*.failed`) instead of silent.", - "alternatives": "Relying only on console logs is lighter but fragile during long sessions and hard to audit. A full event bus infrastructure is powerful but heavier than needed for this teaching baseline.", - "zh": { - "title": "追加式生命周期事件流", - "description": "生命周期事件写入 `.worktrees/events.jsonl`(如 `worktree.create.*`、`worktree.remove.*`、`task.completed`)。这样状态迁移可查询、可追踪,失败也会以 `*.failed` 显式暴露,而不是静默丢失。" - }, - "ja": { - "title": "追記型ライフサイクルイベント", - "description": "ライフサイクルイベントを `.worktrees/events.jsonl` に追記する(`worktree.create.*`、`worktree.remove.*`、`task.completed` など)。遷移が可観測になり、失敗も `*.failed` として明示できる。" - } - }, - { - "id": "task-worktree-closeout", - "title": "Close Task and Workspace Together", - "description": "`worktree_remove(..., complete_task=true)` allows a single closeout step: remove the isolated directory and mark the bound task completed. Closeout remains an explicit tool-driven transition (`worktree_keep` / `worktree_remove`) rather than hidden automatic cleanup. This reduces dangling state where a task says done but its temporary lane remains active (or the reverse).", - "alternatives": "Keeping closeout fully manual gives flexibility but increases operational drift. Fully automatic removal on every completion risks deleting a workspace before final review.", - "zh": { - "title": "任务与工作区一起收尾", - "description": "`worktree_remove(..., complete_task=true)` 允许在一个动作里完成收尾:删除隔离目录并把绑定任务标记为 completed。收尾保持为显式工具驱动迁移(`worktree_keep` / `worktree_remove`),而不是隐藏的自动清理。这样可减少状态悬挂(任务已完成但临时工作区仍活跃,或反过来)。" - }, - "ja": { - "title": "タスクとワークスペースを同時にクローズ", - "description": "`worktree_remove(..., complete_task=true)` により、分離ディレクトリ削除とタスク完了更新を1ステップで実行できる。クローズ処理は `worktree_keep` / `worktree_remove` の明示ツール遷移として扱い、暗黙の自動清掃にはしない。" - } - }, - { - "id": "event-stream-side-channel", - "title": "Event Stream Is Observability Side-Channel", - "description": "Lifecycle events improve auditability, but the source of truth remains task/worktree state files. Events should be read as transition traces, not as a replacement state machine.", - "alternatives": "Using logs alone hides structured transitions; using events as the only state source risks drift when replay/repair semantics are undefined.", - "zh": { - "title": "事件流是观测旁路,不是状态机替身", - "description": "生命周期事件提升可审计性,但真实状态源仍是任务/工作区状态文件。事件更适合做迁移轨迹,而不是替代主状态机。" - }, - "ja": { - "title": "イベントは観測サイドチャネルであり状態機械の代替ではない", - "description": "ライフサイクルイベントは監査性を高めるが、真の状態源は task/worktree 状態ファイルのまま。イベントは遷移トレースとして扱い、主状態機械の代替にしない。" + "title": "claim と complete が作業を観測可能にする", + "description": "claim_task は owner と in_progress を記録し、complete_task は完了を記録して解放された下流タスクを報告します。" } } ] diff --git a/web/src/data/annotations/s13.json b/web/src/data/annotations/s13.json new file mode 100644 index 0000000..65adc0c --- /dev/null +++ b/web/src/data/annotations/s13.json @@ -0,0 +1,47 @@ +{ + "version": "s13", + "decisions": [ + { + "id": "explicit-background-boundary", + "title": "Background Work Is an Execution Mode, Not a New Tool", + "description": "The lesson keeps the familiar tool surface and adds a background execution flag around slow operations. That makes the new mechanism visible: the same bash call can either block the loop or be moved to a thread. The agent learns that responsiveness is a runtime concern, not a reason to invent a separate tool for every slow task.", + "alternatives": "A dedicated background_bash tool would be simpler to route, but it would hide the more general idea that any slow operation can be scheduled asynchronously.", + "zh": { + "title": "后台任务是执行模式,而不是新工具", + "description": "课程保留原有工具表面,只在慢操作外增加后台执行标记。这样能清楚看到:同一个 bash 调用既可以阻塞主循环,也可以放入线程。Agent 学到的是响应性属于运行时问题,而不是每个慢任务都要发明一个新工具。" + }, + "ja": { + "title": "バックグラウンド処理は新ツールではなく実行モード", + "description": "このレッスンでは既存のツール面を保ち、遅い操作にバックグラウンド実行フラグを加えます。同じ bash 呼び出しがループをブロックすることも、スレッドへ移すこともできる点が見えます。応答性はランタイムの責務であり、遅いタスクごとに新しいツールを作る必要はありません。" + } + }, + { + "id": "notification-reentry", + "title": "Completed Threads Re-enter as Notifications", + "description": "Background results are injected as task notifications instead of pretending to be immediate tool results. This preserves the chronology of the conversation: the model first sees that work started, and later sees that a task completed.", + "alternatives": "The thread could mutate the last tool result in place, but that would make the transcript impossible to reason about and hard to replay.", + "zh": { + "title": "线程完成后以通知形式回到循环", + "description": "后台结果会作为任务通知注入,而不是伪装成立即返回的 tool result。这样保留了对话时间线:模型先看到任务已启动,之后再看到任务完成。" + }, + "ja": { + "title": "完了したスレッドは通知として戻る", + "description": "バックグラウンド結果は即時の tool result ではなくタスク通知として注入されます。モデルはまず作業開始を見て、その後に完了を知るため、会話の時系列が保たれます。" + } + }, + { + "id": "shared-result-store", + "title": "A Small Shared Store Keeps Threads Observable", + "description": "The implementation tracks background task state and results in explicit dictionaries. That keeps the code teachable while still exposing the hard parts of concurrency: ids, lifecycle state, and safe collection.", + "alternatives": "A full queue or job database would be more production-ready, but it would obscure the minimal moving parts needed to understand threaded agent work.", + "zh": { + "title": "小型共享存储让线程可观察", + "description": "实现用显式字典记录后台任务状态和结果。这样代码仍然易学,同时暴露并发中的关键问题:任务 id、生命周期状态和结果收集。" + }, + "ja": { + "title": "小さな共有ストアでスレッドを観測可能にする", + "description": "実装は辞書でバックグラウンドタスクの状態と結果を追跡します。コードを学びやすく保ちながら、id、ライフサイクル、安全な収集という並行処理の要点を示します。" + } + } + ] +} diff --git a/web/src/data/annotations/s14.json b/web/src/data/annotations/s14.json new file mode 100644 index 0000000..65f07ca --- /dev/null +++ b/web/src/data/annotations/s14.json @@ -0,0 +1,47 @@ +{ + "version": "s14", + "decisions": [ + { + "id": "scheduler-outside-agent-loop", + "title": "The Scheduler Runs Outside the Agent Loop", + "description": "Cron matching is handled by a daemon loop rather than by asking the LLM to remember future times. This separates timekeeping from reasoning and makes recurring work reliable even when no user is actively chatting.", + "alternatives": "The agent could poll schedules inside each conversation turn, but missed turns would mean missed jobs.", + "zh": { + "title": "调度器运行在 Agent 循环之外", + "description": "Cron 匹配由独立守护循环处理,而不是让 LLM 记住未来时间。这把计时和推理分开,使定期任务在没有用户对话时也能可靠触发。" + }, + "ja": { + "title": "スケジューラはエージェントループの外で動く", + "description": "cron の照合は LLM に未来時刻を覚えさせるのではなく、デーモンループで処理します。時間管理と推論を分離し、ユーザーが会話していない時でも定期処理を確実にします。" + } + }, + { + "id": "queue-decouples-time-from-work", + "title": "A Queue Decouples Due Time from Execution", + "description": "When a schedule matches, the scheduler enqueues work and lets a queue processor invoke the agent loop. That keeps cron matching fast and prevents long agent runs from blocking future schedule checks.", + "alternatives": "The scheduler could call the agent directly, but a slow job would stall the scheduler itself.", + "zh": { + "title": "队列把到期判断和任务执行解耦", + "description": "当 schedule 匹配时,调度器只把任务放入队列,由队列处理器调用 agent_loop。这样 cron 匹配保持快速,长时间运行的 agent 任务不会阻塞后续调度检查。" + }, + "ja": { + "title": "キューが期限判定と実行を分離する", + "description": "スケジュールが一致すると、スケジューラは作業をキューへ入れ、キュープロセッサが agent_loop を呼び出します。cron 照合は速く保たれ、長いエージェント実行が次の確認を妨げません。" + } + }, + { + "id": "durable-schedules", + "title": "Schedules Are Durable Data", + "description": "Cron jobs are stored in a small JSON file so they survive process restarts. The lesson treats scheduled work as data that can be listed, cancelled, and inspected, not as hidden timers.", + "alternatives": "In-memory timers are shorter to implement, but they disappear on restart and are difficult to audit.", + "zh": { + "title": "计划任务是持久数据", + "description": "Cron job 存储在小型 JSON 文件中,因此进程重启后仍然存在。课程把计划任务视为可列出、可取消、可检查的数据,而不是隐藏的计时器。" + }, + "ja": { + "title": "スケジュールは永続データ", + "description": "cron ジョブは小さな JSON ファイルに保存され、プロセス再起動後も残ります。予定された作業を、一覧化、取り消し、検査できるデータとして扱います。" + } + } + ] +} diff --git a/web/src/data/annotations/s15.json b/web/src/data/annotations/s15.json new file mode 100644 index 0000000..bbca4e5 --- /dev/null +++ b/web/src/data/annotations/s15.json @@ -0,0 +1,47 @@ +{ + "version": "s15", + "decisions": [ + { + "id": "lead-agent-owns-coordination", + "title": "The Lead Owns Coordination", + "description": "The lead agent decides when to spawn teammates, what to send them, and how to interpret replies. Teammates can work independently, but the user-facing conversation stays anchored in one lead loop.", + "alternatives": "A peer-to-peer team would be more flexible, but much harder to explain because no single loop owns the answer.", + "zh": { + "title": "由 Lead Agent 负责协调", + "description": "Lead agent 决定何时创建队友、发送什么任务、如何解释回复。队友可以独立工作,但面向用户的对话始终锚定在一个 lead 循环中。" + }, + "ja": { + "title": "調整はリードエージェントが担う", + "description": "リードエージェントがチームメイトの生成、送信内容、返信の解釈を決めます。チームメイトは独立して作業できますが、ユーザー向けの会話は一つのリードループに固定されます。" + } + }, + { + "id": "file-backed-mailboxes", + "title": "Mailboxes Make Team Communication Inspectable", + "description": "MessageBus writes JSONL mailboxes so every handoff is visible on disk. This avoids magical shared memory and gives learners a concrete artifact for debugging team behavior.", + "alternatives": "In-memory channels are faster, but they hide the communication history and disappear when the process stops.", + "zh": { + "title": "邮箱文件让团队通信可检查", + "description": "MessageBus 使用 JSONL 邮箱记录每次交接。这样避免了神秘的共享内存,也给学习者一个能直接调试团队行为的具体文件。" + }, + "ja": { + "title": "メールボックスでチーム通信を検査可能にする", + "description": "MessageBus は JSONL メールボックスへ各ハンドオフを書き込みます。見えない共有メモリを避け、チーム動作をデバッグできる具体的な成果物を提供します。" + } + }, + { + "id": "scoped-teammate-tools", + "title": "Teammates Use Scoped Tool Sets", + "description": "A teammate loop receives a narrower prompt and tool set than the lead. That keeps delegation focused and prevents a helper agent from accidentally taking over orchestration.", + "alternatives": "Giving every teammate the full tool pool is simpler, but it blurs roles and makes failures harder to attribute.", + "zh": { + "title": "队友使用受限工具集", + "description": "队友循环拿到比 lead 更窄的提示词和工具集。这样委派更聚焦,也避免 helper agent 意外接管整体协调。" + }, + "ja": { + "title": "チームメイトには範囲を絞ったツールセットを与える", + "description": "チームメイトループにはリードより狭いプロンプトとツールセットを渡します。委任を集中させ、補助エージェントが誤って全体調整を奪うことを防ぎます。" + } + } + ] +} diff --git a/web/src/data/annotations/s16.json b/web/src/data/annotations/s16.json new file mode 100644 index 0000000..cdc8acb --- /dev/null +++ b/web/src/data/annotations/s16.json @@ -0,0 +1,47 @@ +{ + "version": "s16", + "decisions": [ + { + "id": "typed-protocol-messages", + "title": "Typed Messages Replace Informal Chat", + "description": "Plan requests and shutdown requests are encoded as protocol messages with explicit kinds. The teammate can branch on message type instead of guessing intent from free-form text.", + "alternatives": "Plain natural-language messages are easier to write, but brittle once the team has multiple request types.", + "zh": { + "title": "用类型化协议消息替代随意聊天", + "description": "计划请求和关闭请求会编码成带有明确 kind 的协议消息。队友可以根据消息类型分支处理,而不是从自由文本中猜意图。" + }, + "ja": { + "title": "非公式チャットを型付きプロトコルメッセージに置き換える", + "description": "計画要求とシャットダウン要求は明示的な kind を持つプロトコルメッセージとして表現されます。チームメイトは自由文から意図を推測せず、型で分岐できます。" + } + }, + { + "id": "request-id-correlation", + "title": "Request IDs Close the Loop", + "description": "Each protocol request creates a pending record with a request_id. Responses must carry the same id, which lets the lead match replies even when multiple teammates are active.", + "alternatives": "Matching by latest message works in demos, but fails as soon as two requests overlap.", + "zh": { + "title": "Request ID 闭合协议循环", + "description": "每个协议请求都会创建带 request_id 的 pending 记录。响应必须携带同一个 id,因此即使多个队友同时工作,lead 也能匹配对应回复。" + }, + "ja": { + "title": "request_id がループを閉じる", + "description": "各プロトコル要求は request_id 付きの pending レコードを作ります。応答も同じ id を持つため、複数のチームメイトが動いていてもリードは対応する返信を照合できます。" + } + }, + { + "id": "idle-protocol-handling", + "title": "Protocol Handling Runs During Idle Time", + "description": "Teammates can consume protocol messages while idle, so the lead can request plans or shutdowns without waiting for a separate user turn. This makes team control part of the runtime lifecycle.", + "alternatives": "Only checking protocols during active work would delay control messages and make shutdown unreliable.", + "zh": { + "title": "空闲期也处理协议", + "description": "队友在空闲状态也会消费协议消息,因此 lead 可以请求计划或关闭,而不必等待另一个用户回合。这让团队控制成为运行时生命周期的一部分。" + }, + "ja": { + "title": "アイドル中にもプロトコルを処理する", + "description": "チームメイトはアイドル時にもプロトコルメッセージを消費します。リードは別のユーザーターンを待たずに計画や終了を要求でき、チーム制御がランタイムのライフサイクルに組み込まれます。" + } + } + ] +} diff --git a/web/src/data/annotations/s17.json b/web/src/data/annotations/s17.json new file mode 100644 index 0000000..26c79b0 --- /dev/null +++ b/web/src/data/annotations/s17.json @@ -0,0 +1,47 @@ +{ + "version": "s17", + "decisions": [ + { + "id": "idle-poll-loop", + "title": "Autonomy Starts from Idle Polling", + "description": "The agent becomes autonomous by doing useful checks while idle: scanning tasks, reading inbox messages, and deciding whether to claim work. No new magic planner is introduced.", + "alternatives": "A central scheduler could assign every task, but this lesson focuses on local autonomy inside each teammate loop.", + "zh": { + "title": "自治从空闲轮询开始", + "description": "Agent 通过在空闲时做有用检查获得自治能力:扫描任务、读取 inbox、判断是否 claim 工作。这里没有引入新的神秘规划器。" + }, + "ja": { + "title": "自律性はアイドルポーリングから始まる", + "description": "エージェントはアイドル時にタスク走査、受信箱確認、作業の claim 判断を行うことで自律的になります。新しい魔法のプランナーは導入しません。" + } + }, + { + "id": "claim-before-work", + "title": "Claim Before Work Prevents Collisions", + "description": "A teammate must claim a task before entering WORK state. Ownership checks make autonomous pickup safe when multiple agents poll the same task board.", + "alternatives": "Agents could simply pick any open task, but two agents might duplicate work or overwrite each other's result.", + "zh": { + "title": "先 Claim 再工作,避免冲突", + "description": "队友必须先 claim 任务,再进入 WORK 状态。多个 agent 轮询同一个任务板时,所有权检查让自治领取任务更安全。" + }, + "ja": { + "title": "作業前に claim して衝突を防ぐ", + "description": "チームメイトは WORK 状態へ入る前にタスクを claim します。複数のエージェントが同じタスクボードをポーリングしても、所有権チェックにより安全に取得できます。" + } + }, + { + "id": "identity-reinjection", + "title": "Identity Is Re-injected on Each Autonomous Turn", + "description": "Autonomous agents need a stable sense of who they are and what they are allowed to do. Re-injecting identity keeps a teammate from drifting into the lead's responsibilities.", + "alternatives": "A one-time identity prompt is shorter, but long-running loops are prone to context drift.", + "zh": { + "title": "每个自治回合都重新注入身份", + "description": "自治 agent 需要稳定知道自己是谁、允许做什么。重新注入身份可以防止队友逐渐漂移到 lead 的职责上。" + }, + "ja": { + "title": "各自律ターンでアイデンティティを再注入する", + "description": "自律エージェントには、自分が誰で何を許可されているかという安定した認識が必要です。アイデンティティを再注入することで、チームメイトがリードの責務へ漂うことを防ぎます。" + } + } + ] +} diff --git a/web/src/data/annotations/s18.json b/web/src/data/annotations/s18.json new file mode 100644 index 0000000..d3c8312 --- /dev/null +++ b/web/src/data/annotations/s18.json @@ -0,0 +1,47 @@ +{ + "version": "s18", + "decisions": [ + { + "id": "worktree-name-validation", + "title": "Worktree Names Are Validated Before Git Runs", + "description": "The tool validates names before creating branches or directories. That keeps a teaching implementation from normalizing unsafe user input into shell or filesystem operations.", + "alternatives": "Passing names directly to git is shorter, but it turns a collaboration feature into an injection hazard.", + "zh": { + "title": "运行 Git 前先校验 Worktree 名称", + "description": "工具在创建分支或目录前先校验名称。这样教学实现不会把不安全的用户输入直接传入 shell 或文件系统操作。" + }, + "ja": { + "title": "git 実行前に worktree 名を検証する", + "description": "ブランチやディレクトリ作成前に名前を検証します。学習用実装が危険なユーザー入力を shell やファイルシステム操作へ流し込むことを防ぎます。" + } + }, + { + "id": "task-bound-worktree", + "title": "The Task Record Owns the Worktree Binding", + "description": "A task stores its assigned worktree so future commands know where to run. The binding is explicit data, not a hidden convention based on naming or current working directory.", + "alternatives": "Deriving the worktree path from branch names is convenient, but brittle when tasks are renamed or moved.", + "zh": { + "title": "任务记录持有 Worktree 绑定关系", + "description": "任务会记录自己分配到的 worktree,因此后续命令知道应该在哪里运行。这个绑定是显式数据,而不是依赖命名或当前目录的隐藏约定。" + }, + "ja": { + "title": "タスクレコードが worktree の紐付けを持つ", + "description": "タスクは割り当てられた worktree を保持し、後続コマンドは実行場所を把握できます。この紐付けは命名や現在ディレクトリに依存する暗黙の規約ではなく、明示的なデータです。" + } + }, + { + "id": "lifecycle-event-stream", + "title": "Lifecycle Events Stay Separate from Tool Results", + "description": "Creation, status, keep, and removal events are emitted to a side-channel log. That makes worktree state observable without overloading the conversational transcript.", + "alternatives": "Only returning tool results is simpler, but later debugging needs a durable audit trail of worktree lifecycle changes.", + "zh": { + "title": "生命周期事件与工具结果分离", + "description": "创建、状态、保留和移除事件会写入旁路日志。这样 worktree 状态可观察,同时不会把对话 transcript 塞满运行时事件。" + }, + "ja": { + "title": "ライフサイクルイベントをツール結果から分離する", + "description": "作成、状態、保持、削除のイベントはサイドチャネルログへ出力します。会話 transcript をランタイムイベントで埋めずに worktree 状態を観測できます。" + } + } + ] +} diff --git a/web/src/data/annotations/s19.json b/web/src/data/annotations/s19.json new file mode 100644 index 0000000..eba7975 --- /dev/null +++ b/web/src/data/annotations/s19.json @@ -0,0 +1,47 @@ +{ + "version": "s19", + "decisions": [ + { + "id": "normalized-mcp-namespace", + "title": "MCP Tools Use a Normalized Namespace", + "description": "Discovered tools are exposed as mcp__server__tool. The prefix makes the source explicit and avoids collisions with built-in tools or tools from another server.", + "alternatives": "Using the raw tool name is shorter, but search from two servers could overwrite each other.", + "zh": { + "title": "MCP 工具使用规范化命名空间", + "description": "发现到的工具会暴露为 mcp__server__tool。前缀让工具来源明确,也避免和内置工具或其他服务器工具冲突。" + }, + "ja": { + "title": "MCP ツールは正規化された名前空間を使う", + "description": "発見されたツールは mcp__server__tool として公開されます。接頭辞により出所が明確になり、組み込みツールや別サーバーのツールとの衝突を避けます。" + } + }, + { + "id": "dynamic-tool-pool", + "title": "Tool Discovery Updates the Active Tool Pool", + "description": "After connecting to a server, the runtime assembles a new tool pool for the next LLM call. The model can only use MCP tools after discovery has made them visible.", + "alternatives": "Preloading every possible MCP tool would create a huge prompt and expose capabilities the user did not request.", + "zh": { + "title": "工具发现会更新活动工具池", + "description": "连接服务器后,运行时会为下一次 LLM 调用组装新的工具池。模型只有在发现阶段让 MCP 工具可见之后,才能调用它们。" + }, + "ja": { + "title": "ツール発見がアクティブなツールプールを更新する", + "description": "サーバー接続後、ランタイムは次の LLM 呼び出し用に新しいツールプールを組み立てます。MCP ツールは発見で可視化された後にのみモデルが利用できます。" + } + }, + { + "id": "external-results-append-like-tools", + "title": "External Results Reuse the Tool Result Path", + "description": "MCP responses are appended to the conversation like ordinary tool results. This keeps the agent loop unchanged while still letting external systems participate.", + "alternatives": "A separate external-response channel would make MCP feel special and require extra loop logic.", + "zh": { + "title": "外部结果复用 Tool Result 路径", + "description": "MCP 响应会像普通 tool result 一样追加到对话中。这样 agent 循环无需改变,同时外部系统仍然可以参与。" + }, + "ja": { + "title": "外部結果は tool result 経路を再利用する", + "description": "MCP の応答は通常の tool result と同じように会話へ追加されます。エージェントループを変えずに外部システムを参加させられます。" + } + } + ] +} diff --git a/web/src/data/annotations/s20.json b/web/src/data/annotations/s20.json new file mode 100644 index 0000000..9bd319f --- /dev/null +++ b/web/src/data/annotations/s20.json @@ -0,0 +1,47 @@ +{ + "version": "s20", + "decisions": [ + { + "id": "composition-over-new-loop", + "title": "The Final Agent Composes Previous Layers", + "description": "The comprehensive agent does not replace the loop with a new architecture. It composes memory, tasks, skills, background work, teams, worktrees, and MCP around the same core model-tool-result cycle.", + "alternatives": "A new orchestration framework would look more impressive, but it would hide the continuity across the course.", + "zh": { + "title": "最终 Agent 是组合既有层,而不是换掉循环", + "description": "综合 Agent 没有用新架构替换循环,而是把 memory、task、skill、后台任务、团队、worktree、MCP 组合到同一个模型-工具-结果循环周围。" + }, + "ja": { + "title": "最終エージェントは既存レイヤーの合成", + "description": "総合エージェントはループを新しい構造で置き換えません。memory、task、skill、バックグラウンド処理、チーム、worktree、MCP を同じ model-tool-result サイクルの周囲に合成します。" + } + }, + { + "id": "single-source-of-runtime-truth", + "title": "Runtime State Has Named Sources", + "description": "Context assembly pulls from named sources such as memory, task graph, skills, tool registry, and policy. This keeps a large agent debuggable because each piece of prompt context has an owner.", + "alternatives": "Dumping everything into one prompt string is shorter, but it becomes impossible to tell which subsystem caused a bad decision.", + "zh": { + "title": "运行时状态来自具名来源", + "description": "上下文组装从 memory、task graph、skills、tool registry、policy 等具名来源读取。大型 agent 因此仍可调试,因为每块 prompt context 都有清晰归属。" + }, + "ja": { + "title": "ランタイム状態には名前付きの出所がある", + "description": "コンテキスト組み立ては memory、task graph、skills、tool registry、policy などの名前付きソースから取得します。各 prompt context に所有者があるため、大きなエージェントでもデバッグ可能です。" + } + }, + { + "id": "recovery-is-first-class", + "title": "Recovery Is Part of the Main Flow", + "description": "Compaction, error recovery, and asynchronous result collection are treated as normal loop behavior. The final lesson shows that production agents spend as much effort recovering and resuming as they do calling tools.", + "alternatives": "Recovery could be left as error handling around the edges, but then the architecture would understate what real long-running agents need.", + "zh": { + "title": "恢复能力是一等流程", + "description": "压缩、错误恢复、异步结果收集都被视为正常循环行为。最终课展示了生产级 agent 在恢复和续跑上投入的工程量,并不低于调用工具本身。" + }, + "ja": { + "title": "リカバリは主要フローの一部", + "description": "圧縮、エラー回復、非同期結果収集を通常のループ動作として扱います。実運用の長時間エージェントでは、ツール呼び出しと同じくらい回復と再開が重要であることを示します。" + } + } + ] +} diff --git a/web/src/data/execution-flows.ts b/web/src/data/execution-flows.ts index 72ce54d..e72276c 100644 --- a/web/src/data/execution-flows.ts +++ b/web/src/data/execution-flows.ts @@ -308,8 +308,497 @@ export const EXECUTION_FLOWS: Record = { { from: "append", to: "llm" }, ], }, + s13: { + nodes: [ + { id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 }, + { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 }, + { id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 190 }, + { id: "bg_check", label: "background?", type: "decision", x: COL_LEFT, y: 280 }, + { id: "spawn", label: "Spawn Thread", type: "subprocess", x: 70, y: 370 }, + { id: "placeholder", label: "Return Placeholder", type: "process", x: 70, y: 450 }, + { id: "notify", label: "Notification\nQueue", type: "process", x: COL_RIGHT, y: 500 }, + { id: "collect", label: "Collect Results", type: "process", x: COL_RIGHT, y: 590 }, + { id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT + 110, y: 370 }, + { id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 690 }, + { id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 }, + ], + edges: [ + { from: "start", to: "llm" }, + { from: "llm", to: "tool_check" }, + { from: "tool_check", to: "bg_check", label: "yes" }, + { from: "tool_check", to: "end", label: "no" }, + { from: "bg_check", to: "spawn", label: "bg" }, + { from: "bg_check", to: "exec", label: "fg" }, + { from: "spawn", to: "placeholder" }, + { from: "spawn", to: "notify", label: "done" }, + { from: "notify", to: "collect" }, + { from: "placeholder", to: "append" }, + { from: "exec", to: "append" }, + { from: "collect", to: "append" }, + { from: "append", to: "llm" }, + ], + }, + s14: { + nodes: [ + { id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 }, + { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 }, + { id: "tool_check", label: "cron tool?", type: "decision", x: COL_CENTER, y: 190 }, + { id: "schedule", label: "schedule_cron", type: "subprocess", x: COL_LEFT, y: 280 }, + { id: "store", label: "Durable Store\n.scheduled_tasks", type: "process", x: COL_LEFT, y: 360 }, + { id: "scheduler", label: "Scheduler Loop", type: "process", x: COL_CENTER, y: 450 }, + { id: "match", label: "cron_matches?", type: "decision", x: COL_CENTER, y: 540 }, + { id: "queue", label: "Cron Queue", type: "process", x: COL_RIGHT, y: 540 }, + { id: "processor", label: "Queue Processor", type: "process", x: COL_RIGHT, y: 630 }, + { id: "agent", label: "Agent Loop", type: "process", x: COL_CENTER, y: 720 }, + { id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 }, + ], + edges: [ + { from: "start", to: "llm" }, + { from: "llm", to: "tool_check" }, + { from: "tool_check", to: "schedule", label: "yes" }, + { from: "tool_check", to: "end", label: "no" }, + { from: "schedule", to: "store" }, + { from: "store", to: "scheduler" }, + { from: "scheduler", to: "match" }, + { from: "match", to: "queue", label: "due" }, + { from: "queue", to: "processor" }, + { from: "processor", to: "agent" }, + { from: "agent", to: "llm" }, + ], + }, + s15: { + nodes: [ + { id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 }, + { id: "lead", label: "Lead LLM", type: "process", x: COL_CENTER, y: 110 }, + { id: "team_tool", label: "team tool?", type: "decision", x: COL_CENTER, y: 200 }, + { id: "spawn", label: "Spawn Teammate", type: "subprocess", x: COL_LEFT, y: 300 }, + { id: "send", label: "Send Message", type: "subprocess", x: COL_CENTER, y: 300 }, + { id: "bus", label: "MessageBus\n.mailboxes", type: "process", x: COL_CENTER, y: 400 }, + { id: "teammate", label: "Teammate Loop", type: "process", x: COL_RIGHT, y: 500 }, + { id: "tools", label: "Scoped Tools", type: "subprocess", x: COL_RIGHT, y: 590 }, + { id: "inbox", label: "Lead Inbox", type: "process", x: COL_CENTER, y: 700 }, + { id: "append", label: "Append Result", type: "process", x: COL_LEFT, y: 700 }, + { id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 300 }, + ], + edges: [ + { from: "start", to: "lead" }, + { from: "lead", to: "team_tool" }, + { from: "team_tool", to: "spawn", label: "spawn" }, + { from: "team_tool", to: "send", label: "send" }, + { from: "team_tool", to: "end", label: "no" }, + { from: "spawn", to: "bus", label: "register" }, + { from: "send", to: "bus" }, + { from: "bus", to: "teammate" }, + { from: "teammate", to: "tools" }, + { from: "tools", to: "bus", label: "reply" }, + { from: "bus", to: "inbox" }, + { from: "inbox", to: "append" }, + { from: "append", to: "lead" }, + ], + }, + s16: { + nodes: [ + { id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 }, + { id: "lead", label: "Lead LLM", type: "process", x: COL_CENTER, y: 110 }, + { id: "protocol", label: "protocol?", type: "decision", x: COL_CENTER, y: 200 }, + { id: "request", label: "request_plan /\nrequest_shutdown", type: "subprocess", x: COL_LEFT, y: 300 }, + { id: "pending", label: "Pending Requests\nrequest_id", type: "process", x: COL_LEFT, y: 390 }, + { id: "dispatch", label: "Dispatch Message", type: "process", x: COL_CENTER, y: 470 }, + { id: "teammate", label: "Teammate Handler", type: "process", x: COL_RIGHT, y: 470 }, + { id: "response", label: "submit_plan /\nack shutdown", type: "subprocess", x: COL_RIGHT, y: 560 }, + { id: "match", label: "match_response?", type: "decision", x: COL_CENTER, y: 640 }, + { id: "append", label: "Append Protocol\nResult", type: "process", x: COL_CENTER, y: 730 }, + { id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 300 }, + ], + edges: [ + { from: "start", to: "lead" }, + { from: "lead", to: "protocol" }, + { from: "protocol", to: "request", label: "yes" }, + { from: "protocol", to: "end", label: "no" }, + { from: "request", to: "pending" }, + { from: "pending", to: "dispatch" }, + { from: "dispatch", to: "teammate" }, + { from: "teammate", to: "response" }, + { from: "response", to: "match" }, + { from: "match", to: "append", label: "matched" }, + { from: "append", to: "lead" }, + ], + }, + s17: { + nodes: [ + { id: "start", label: "System Tick", type: "start", x: COL_CENTER, y: 30 }, + { id: "idle", label: "Idle Poll", type: "process", x: COL_CENTER, y: 110 }, + { id: "scan", label: "Scan Tasks", type: "subprocess", x: COL_CENTER, y: 190 }, + { id: "claimable", label: "claimable?", type: "decision", x: COL_CENTER, y: 280 }, + { id: "claim", label: "claim_task\n(owner check)", type: "subprocess", x: COL_LEFT, y: 380 }, + { id: "work", label: "WORK State", type: "process", x: COL_LEFT, y: 470 }, + { id: "complete", label: "complete_task", type: "subprocess", x: COL_LEFT, y: 560 }, + { id: "inbox", label: "Check Inbox", type: "process", x: COL_RIGHT, y: 380 }, + { id: "shutdown", label: "Shutdown?", type: "decision", x: COL_RIGHT, y: 470 }, + { id: "done", label: "IDLE / SHUTDOWN", type: "end", x: COL_RIGHT, y: 560 }, + ], + edges: [ + { from: "start", to: "idle" }, + { from: "idle", to: "scan" }, + { from: "scan", to: "claimable" }, + { from: "claimable", to: "claim", label: "yes" }, + { from: "claimable", to: "inbox", label: "no" }, + { from: "claim", to: "work" }, + { from: "work", to: "complete" }, + { from: "complete", to: "idle" }, + { from: "inbox", to: "shutdown" }, + { from: "shutdown", to: "done", label: "yes" }, + { from: "shutdown", to: "idle", label: "no" }, + ], + }, + s18: { + nodes: [ + { id: "start", label: "Task Selected", type: "start", x: COL_CENTER, y: 30 }, + { id: "create", label: "create_worktree", type: "subprocess", x: COL_CENTER, y: 110 }, + { id: "validate", label: "Validate Name", type: "process", x: COL_CENTER, y: 190 }, + { id: "git", label: "git worktree add", type: "subprocess", x: COL_LEFT, y: 290 }, + { id: "bind", label: "Bind Task\nworktree field", type: "process", x: COL_LEFT, y: 380 }, + { id: "run", label: "Run in Isolated\nDirectory", type: "subprocess", x: COL_CENTER, y: 470 }, + { id: "events", label: "Lifecycle Events\n.events.jsonl", type: "process", x: COL_RIGHT, y: 190 }, + { id: "close", label: "keep / remove", type: "decision", x: COL_CENTER, y: 560 }, + { id: "cleanup", label: "remove_worktree", type: "subprocess", x: COL_LEFT, y: 650 }, + { id: "keep", label: "keep_worktree", type: "process", x: COL_RIGHT, y: 650 }, + { id: "end", label: "Task Result", type: "end", x: COL_CENTER, y: 740 }, + ], + edges: [ + { from: "start", to: "create" }, + { from: "create", to: "validate" }, + { from: "validate", to: "git" }, + { from: "git", to: "bind" }, + { from: "bind", to: "run" }, + { from: "create", to: "events", label: "emit" }, + { from: "run", to: "events", label: "status" }, + { from: "run", to: "close" }, + { from: "close", to: "cleanup", label: "remove" }, + { from: "close", to: "keep", label: "keep" }, + { from: "cleanup", to: "end" }, + { from: "keep", to: "end" }, + ], + }, + s19: { + nodes: [ + { id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 }, + { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 }, + { id: "connect", label: "connect_mcp?", type: "decision", x: COL_CENTER, y: 200 }, + { id: "client", label: "MCP Client", type: "process", x: COL_LEFT, y: 300 }, + { id: "discover", label: "Discover Tools", type: "subprocess", x: COL_LEFT, y: 390 }, + { id: "pool", label: "Assemble Tool Pool\nmcp__server__tool", type: "process", x: COL_CENTER, y: 480 }, + { id: "call", label: "MCP Tool Call", type: "subprocess", x: COL_RIGHT, y: 390 }, + { id: "server", label: "External Server", type: "process", x: COL_RIGHT, y: 480 }, + { id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 580 }, + { id: "end", label: "Output", type: "end", x: 520, y: 200 }, + ], + edges: [ + { from: "start", to: "llm" }, + { from: "llm", to: "connect" }, + { from: "connect", to: "client", label: "connect" }, + { from: "connect", to: "call", label: "use" }, + { from: "connect", to: "end", label: "done" }, + { from: "client", to: "discover" }, + { from: "discover", to: "pool" }, + { from: "pool", to: "llm" }, + { from: "call", to: "server" }, + { from: "server", to: "append" }, + { from: "append", to: "llm" }, + ], + }, + s20: { + nodes: [ + { id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 }, + { id: "context", label: "Assemble Context\nmemory + tasks", type: "process", x: COL_CENTER, y: 115 }, + { id: "policy", label: "Policy + Hooks", type: "process", x: COL_LEFT, y: 210 }, + { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 300 }, + { id: "router", label: "Route Tool", type: "decision", x: COL_CENTER, y: 390 }, + { id: "builtin", label: "Built-in Tools", type: "subprocess", x: 85, y: 500 }, + { id: "team", label: "Teams /\nProtocols", type: "subprocess", x: 230, y: 500 }, + { id: "async", label: "Background /\nCron", type: "subprocess", x: 370, y: 500 }, + { id: "external", label: "Worktree /\nMCP", type: "subprocess", x: 515, y: 500 }, + { id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 610 }, + { id: "recover", label: "Recover /\nCompact", type: "process", x: COL_LEFT, y: 700 }, + { id: "end", label: "Final Output", type: "end", x: COL_RIGHT, y: 390 }, + ], + edges: [ + { from: "start", to: "context" }, + { from: "context", to: "policy" }, + { from: "policy", to: "llm" }, + { from: "context", to: "llm" }, + { from: "llm", to: "router" }, + { from: "router", to: "builtin", label: "local" }, + { from: "router", to: "team", label: "team" }, + { from: "router", to: "async", label: "async" }, + { from: "router", to: "external", label: "ext" }, + { from: "router", to: "end", label: "done" }, + { from: "builtin", to: "append" }, + { from: "team", to: "append" }, + { from: "async", to: "append" }, + { from: "external", to: "append" }, + { from: "append", to: "recover" }, + { from: "recover", to: "context" }, + ], + }, +}; + +const CURRENT_FLOW_OVERRIDES: Record = { + s03: { + nodes: [ + { id: "start", label: "Tool Call", type: "start", x: COL_CENTER, y: 30 }, + { id: "hard", label: "Hard Deny?", type: "decision", x: COL_CENTER, y: 120 }, + { id: "rules", label: "Rule Match?", type: "decision", x: COL_CENTER, y: 220 }, + { id: "ask", label: "Ask User", type: "subprocess", x: COL_LEFT, y: 320 }, + { id: "allow", label: "Approved?", type: "decision", x: COL_LEFT, y: 410 }, + { id: "exec", label: "Execute Tool", type: "process", x: COL_CENTER, y: 520 }, + { id: "blocked_policy", label: "Blocked", type: "end", x: COL_RIGHT, y: 120 }, + { id: "blocked_user", label: "Blocked", type: "end", x: COL_RIGHT, y: 410 }, + { id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 610 }, + ], + edges: [ + { from: "start", to: "hard" }, + { from: "hard", to: "blocked_policy", label: "deny" }, + { from: "hard", to: "rules", label: "ok" }, + { from: "rules", to: "ask", label: "needs approval" }, + { from: "rules", to: "exec", label: "allow" }, + { from: "ask", to: "allow" }, + { from: "allow", to: "blocked_user", label: "no" }, + { from: "allow", to: "exec", label: "yes" }, + { from: "exec", to: "append" }, + ], + }, + s04: { + nodes: [ + { id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 }, + { id: "user_hook", label: "UserPromptSubmit\nHooks", type: "subprocess", x: COL_CENTER, y: 120 }, + { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 220 }, + { id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 310 }, + { id: "pre", label: "PreToolUse\nHooks", type: "subprocess", x: COL_LEFT, y: 410 }, + { id: "blocked", label: "Blocked?", type: "decision", x: COL_LEFT, y: 500 }, + { id: "exec", label: "Tool Handler", type: "process", x: COL_CENTER, y: 600 }, + { id: "post", label: "PostToolUse\nHooks", type: "subprocess", x: COL_CENTER, y: 690 }, + { id: "stop", label: "Stop Hooks", type: "subprocess", x: COL_RIGHT, y: 410 }, + { id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 500 }, + ], + edges: [ + { from: "start", to: "user_hook" }, + { from: "user_hook", to: "llm" }, + { from: "llm", to: "tool_check" }, + { from: "tool_check", to: "pre", label: "yes" }, + { from: "tool_check", to: "stop", label: "no" }, + { from: "pre", to: "blocked" }, + { from: "blocked", to: "end", label: "yes" }, + { from: "blocked", to: "exec", label: "no" }, + { from: "exec", to: "post" }, + { from: "post", to: "llm" }, + { from: "stop", to: "end" }, + ], + }, + s05: { + nodes: [ + { id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 }, + { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 120 }, + { id: "tool", label: "tool_use?", type: "decision", x: COL_CENTER, y: 210 }, + { id: "todo", label: "todo_write?", type: "decision", x: COL_LEFT, y: 310 }, + { id: "update", label: "Update\ncurrent_todos", type: "process", x: COL_LEFT, y: 410 }, + { id: "other", label: "Run Tool", type: "subprocess", x: COL_CENTER, y: 410 }, + { id: "reminder", label: "3 rounds?\nInject Reminder", type: "process", x: COL_RIGHT, y: 500 }, + { id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 590 }, + { id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 310 }, + ], + edges: [ + { from: "start", to: "llm" }, + { from: "llm", to: "tool" }, + { from: "tool", to: "todo", label: "yes" }, + { from: "tool", to: "end", label: "no" }, + { from: "todo", to: "update", label: "todo" }, + { from: "todo", to: "other", label: "other" }, + { from: "update", to: "append" }, + { from: "other", to: "append" }, + { from: "append", to: "reminder" }, + { from: "reminder", to: "llm" }, + ], + }, + s06: { + nodes: [ + { id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 }, + { id: "parent", label: "Parent LLM", type: "process", x: COL_CENTER, y: 120 }, + { id: "task_check", label: "task tool?", type: "decision", x: COL_CENTER, y: 220 }, + { id: "spawn", label: "Spawn Subagent\nfresh messages[]", type: "subprocess", x: COL_LEFT, y: 330 }, + { id: "subloop", label: "Subagent Loop\nmax 30 turns", type: "process", x: COL_LEFT, y: 430 }, + { id: "summary", label: "Return Summary\nOnly", type: "process", x: COL_LEFT, y: 530 }, + { id: "tool", label: "Run Parent Tool", type: "subprocess", x: COL_RIGHT, y: 330 }, + { id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 630 }, + { id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 220 }, + ], + edges: [ + { from: "start", to: "parent" }, + { from: "parent", to: "task_check" }, + { from: "task_check", to: "spawn", label: "task" }, + { from: "task_check", to: "tool", label: "other" }, + { from: "task_check", to: "end", label: "done" }, + { from: "spawn", to: "subloop" }, + { from: "subloop", to: "summary" }, + { from: "summary", to: "append" }, + { from: "tool", to: "append" }, + { from: "append", to: "parent" }, + ], + }, + s07: { + nodes: [ + { id: "start", label: "Startup", type: "start", x: COL_CENTER, y: 30 }, + { id: "scan", label: "Scan skills/", type: "process", x: COL_CENTER, y: 120 }, + { id: "catalog", label: "Inject Catalog\nOnly", type: "process", x: COL_CENTER, y: 210 }, + { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 310 }, + { id: "need", label: "load_skill?", type: "decision", x: COL_CENTER, y: 400 }, + { id: "read", label: "Read SKILL.md", type: "subprocess", x: COL_LEFT, y: 500 }, + { id: "inject", label: "Tool Result\nFull Skill", type: "process", x: COL_LEFT, y: 590 }, + { id: "other", label: "Other Tool", type: "subprocess", x: COL_RIGHT, y: 500 }, + { id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 690 }, + ], + edges: [ + { from: "start", to: "scan" }, + { from: "scan", to: "catalog" }, + { from: "catalog", to: "llm" }, + { from: "llm", to: "need" }, + { from: "need", to: "read", label: "yes" }, + { from: "need", to: "other", label: "no" }, + { from: "read", to: "inject" }, + { from: "inject", to: "append" }, + { from: "other", to: "append" }, + { from: "append", to: "llm" }, + ], + }, + s08: { + nodes: [ + { id: "start", label: "messages[]", type: "start", x: COL_CENTER, y: 30 }, + { id: "budget", label: "Tool Result\nBudget", type: "process", x: COL_CENTER, y: 120 }, + { id: "snip", label: "Snip Compact", type: "process", x: COL_CENTER, y: 210 }, + { id: "micro", label: "Micro Compact", type: "process", x: COL_CENTER, y: 300 }, + { id: "threshold", label: "over limit?", type: "decision", x: COL_CENTER, y: 390 }, + { id: "summary", label: "LLM Summary\nCompact", type: "subprocess", x: COL_LEFT, y: 500 }, + { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 610 }, + { id: "too_long", label: "prompt_too_long?", type: "decision", x: COL_RIGHT, y: 610 }, + { id: "reactive", label: "Reactive Compact", type: "subprocess", x: COL_LEFT, y: 720 }, + ], + edges: [ + { from: "start", to: "budget" }, + { from: "budget", to: "snip" }, + { from: "snip", to: "micro" }, + { from: "micro", to: "threshold" }, + { from: "threshold", to: "summary", label: "yes" }, + { from: "threshold", to: "llm", label: "no" }, + { from: "summary", to: "llm" }, + { from: "llm", to: "too_long" }, + { from: "too_long", to: "reactive", label: "yes" }, + { from: "reactive", to: "llm" }, + ], + }, + s09: { + nodes: [ + { id: "start", label: "Session Start", type: "start", x: COL_CENTER, y: 30 }, + { id: "index", label: "Load MEMORY.md\nIndex", type: "process", x: COL_CENTER, y: 120 }, + { id: "select", label: "Select Relevant\nMemory Files", type: "process", x: COL_CENTER, y: 220 }, + { id: "inject", label: "Inject Memory\nContent", type: "process", x: COL_CENTER, y: 320 }, + { id: "compact", label: "Compact Pipeline", type: "subprocess", x: COL_CENTER, y: 420 }, + { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 520 }, + { id: "extract", label: "Extract New\nMemories", type: "subprocess", x: COL_LEFT, y: 630 }, + { id: "write", label: "Write .memory\nFiles", type: "process", x: COL_LEFT, y: 720 }, + { id: "dream", label: "Periodic\nConsolidate", type: "process", x: COL_RIGHT, y: 720 }, + ], + edges: [ + { from: "start", to: "index" }, + { from: "index", to: "select" }, + { from: "select", to: "inject" }, + { from: "inject", to: "compact" }, + { from: "compact", to: "llm" }, + { from: "llm", to: "extract" }, + { from: "extract", to: "write" }, + { from: "write", to: "index" }, + { from: "write", to: "dream" }, + ], + }, + s10: { + nodes: [ + { id: "start", label: "Runtime State", type: "start", x: COL_CENTER, y: 30 }, + { id: "sections", label: "PROMPT_SECTIONS", type: "process", x: COL_CENTER, y: 120 }, + { id: "context", label: "Build Context\nmemory/tools/workspace", type: "process", x: COL_CENTER, y: 220 }, + { id: "cache", label: "Cache Hit?", type: "decision", x: COL_CENTER, y: 320 }, + { id: "reuse", label: "Reuse Prompt", type: "process", x: COL_RIGHT, y: 420 }, + { id: "assemble", label: "Assemble Prompt", type: "subprocess", x: COL_LEFT, y: 420 }, + { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 540 }, + { id: "loop", label: "Tool Loop", type: "subprocess", x: COL_CENTER, y: 640 }, + ], + edges: [ + { from: "start", to: "sections" }, + { from: "sections", to: "context" }, + { from: "context", to: "cache" }, + { from: "cache", to: "reuse", label: "yes" }, + { from: "cache", to: "assemble", label: "no" }, + { from: "reuse", to: "llm" }, + { from: "assemble", to: "llm" }, + { from: "llm", to: "loop" }, + { from: "loop", to: "context" }, + ], + }, + s11: { + nodes: [ + { id: "start", label: "LLM Request", type: "start", x: COL_CENTER, y: 30 }, + { id: "try", label: "try LLM Call", type: "process", x: COL_CENTER, y: 120 }, + { id: "ok", label: "success?", type: "decision", x: COL_CENTER, y: 220 }, + { id: "tools", label: "Execute Tools", type: "process", x: COL_RIGHT, y: 330 }, + { id: "classify", label: "Classify Error", type: "decision", x: COL_LEFT, y: 330 }, + { id: "tokens", label: "max_tokens\nEscalate", type: "subprocess", x: 40, y: 440 }, + { id: "prompt", label: "prompt_too_long\nCompact", type: "subprocess", x: COL_LEFT, y: 610 }, + { id: "backoff", label: "429 / 529\nBackoff", type: "subprocess", x: COL_LEFT + 140, y: 440 }, + { id: "fallback", label: "Fallback Model", type: "process", x: COL_RIGHT, y: 540 }, + { id: "retry", label: "Retry Request", type: "process", x: COL_CENTER, y: 740 }, + ], + edges: [ + { from: "start", to: "try" }, + { from: "try", to: "ok" }, + { from: "ok", to: "tools", label: "yes" }, + { from: "ok", to: "classify", label: "error" }, + { from: "classify", to: "tokens", label: "max_tokens" }, + { from: "classify", to: "prompt", label: "too long" }, + { from: "classify", to: "backoff", label: "429/529" }, + { from: "backoff", to: "fallback", label: "repeated 529" }, + { from: "tokens", to: "retry" }, + { from: "prompt", to: "retry" }, + { from: "backoff", to: "retry" }, + { from: "fallback", to: "retry" }, + { from: "retry", to: "try" }, + ], + }, + s12: { + nodes: [ + { id: "start", label: "User Goal", type: "start", x: COL_CENTER, y: 30 }, + { id: "create", label: "create_task", type: "subprocess", x: COL_CENTER, y: 120 }, + { id: "save", label: "Persist JSON\n.tasks/", type: "process", x: COL_CENTER, y: 210 }, + { id: "list", label: "list / get", type: "subprocess", x: COL_RIGHT, y: 300 }, + { id: "deps", label: "blockedBy\ncomplete?", type: "decision", x: COL_CENTER, y: 390 }, + { id: "blocked", label: "Remain Pending", type: "end", x: COL_RIGHT, y: 490 }, + { id: "claim", label: "claim_task\nowner + in_progress", type: "subprocess", x: COL_LEFT, y: 490 }, + { id: "complete", label: "complete_task", type: "subprocess", x: COL_LEFT, y: 590 }, + { id: "unblock", label: "Report\nUnblocked", type: "process", x: COL_CENTER, y: 690 }, + { id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 780 }, + ], + edges: [ + { from: "start", to: "create" }, + { from: "create", to: "save" }, + { from: "save", to: "list" }, + { from: "save", to: "deps" }, + { from: "deps", to: "blocked", label: "no" }, + { from: "deps", to: "claim", label: "yes" }, + { from: "claim", to: "complete" }, + { from: "complete", to: "unblock" }, + { from: "unblock", to: "append" }, + { from: "append", to: "list" }, + ], + }, }; export function getFlowForVersion(version: string): FlowDefinition | null { - return EXECUTION_FLOWS[version] ?? null; + return CURRENT_FLOW_OVERRIDES[version] ?? EXECUTION_FLOWS[version] ?? null; } diff --git a/web/src/data/generated/docs.json b/web/src/data/generated/docs.json index b0a3f89..7db1561 100644 --- a/web/src/data/generated/docs.json +++ b/web/src/data/generated/docs.json @@ -2,217 +2,361 @@ { "version": "s01", "locale": "en", - "title": "s01: The Agent Loop", - "content": "# s01: The Agent Loop\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- one tool + one loop = an agent.\n\n## Problem\n\nA language model can reason about code, but it can't *touch* the real world -- can't read files, run tests, or check errors. Without a loop, every tool call requires you to manually copy-paste results back. You become the loop.\n\n## Solution\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\nOne exit condition controls the entire flow. The loop runs until the model stops calling tools.\n\n## How It Works\n\n1. User prompt becomes the first message.\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. Send messages + tool definitions to the LLM.\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. Append the assistant response. Check `stop_reason` -- if the model didn't call a tool, we're done.\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. Execute each tool call, collect results, append as a user message. Loop back to step 2.\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\nAssembled into one function:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\nThat's the entire agent in under 30 lines. Everything else in this course layers on top -- without changing the loop.\n\n## What Changed\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" - }, - { - "version": "s02", - "locale": "en", - "title": "s02: Tool Use", - "content": "# s02: Tool Use\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Adding a tool means adding one handler\"* -- the loop stays the same; new tools register into the dispatch map.\n\n## Problem\n\nWith only `bash`, the agent shells out for everything. `cat` truncates unpredictably, `sed` fails on special characters, and every bash call is an unconstrained security surface. Dedicated tools like `read_file` and `write_file` let you enforce path sandboxing at the tool level.\n\nThe key insight: adding tools does not require changing the loop.\n\n## Solution\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## How It Works\n\n1. Each tool gets a handler function. Path sandboxing prevents workspace escape.\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. The dispatch map links tool names to handlers.\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. In the loop, look up the handler by name. The loop body itself is unchanged from s01.\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nAdd a tool = add a handler + add a schema entry. The loop never changes.\n\n## What Changed From s01\n\n| Component | Before (s01) | After (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\n| Path safety | None | `safe_path()` sandbox |\n| Agent loop | Unchanged | Unchanged |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n" - }, - { - "version": "s03", - "locale": "en", - "title": "s03: TodoWrite", - "content": "# s03: TodoWrite\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"An agent without a plan drifts\"* -- list the steps first, then execute.\n\n## Problem\n\nOn multi-step tasks, the model loses track. It repeats work, skips steps, or wanders off. Long conversations make this worse -- the system prompt fades as tool results fill the context. A 10-step refactoring might complete steps 1-3, then the model starts improvising because it forgot steps 4-10.\n\n## Solution\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## How It Works\n\n1. TodoManager stores items with statuses. Only one item can be `in_progress` at a time.\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. The `todo` tool goes into the dispatch map like any other tool.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. A nag reminder injects a nudge if the model goes 3+ rounds without calling `todo`.\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\nThe \"one in_progress at a time\" constraint forces sequential focus. The nag reminder creates accountability.\n\n## What Changed From s02\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|----------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses |\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" - }, - { - "version": "s04", - "locale": "en", - "title": "s04: Subagents", - "content": "# s04: Subagents\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Break big tasks down; each subtask gets a clean context\"* -- subagents use independent messages[], keeping the main conversation clean.\n\n## Problem\n\nAs the agent works, its messages array grows. Every file read, every bash output stays in context permanently. \"What testing framework does this project use?\" might require reading 5 files, but the parent only needs the answer: \"pytest.\"\n\n## Solution\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## How It Works\n\n1. The parent gets a `task` tool. The child gets all base tools except `task` (no recursive spawning).\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. The subagent starts with `messages=[]` and runs its own loop. Only the final text returns to the parent.\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\nThe child's entire message history (possibly 30+ tool calls) is discarded. The parent receives a one-paragraph summary as a normal `tool_result`.\n\n## What Changed From s03\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" - }, - { - "version": "s05", - "locale": "en", - "title": "s05: Skills", - "content": "# s05: Skills\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Load knowledge when you need it, not upfront\"* -- inject via tool_result, not the system prompt.\n\n## Problem\n\nYou want the agent to follow domain-specific workflows: git conventions, testing patterns, code review checklists. Putting everything in the system prompt wastes tokens on unused skills. 10 skills at 2000 tokens each = 20,000 tokens, most of which are irrelevant to any given task.\n\n## Solution\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\nLayer 1: skill *names* in system prompt (cheap). Layer 2: full *body* via tool_result (on demand).\n\n## How It Works\n\n1. Each skill is a directory containing a `SKILL.md` with YAML frontmatter.\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoader scans for `SKILL.md` files, uses the directory name as the skill identifier.\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n3. Layer 1 goes into the system prompt. Layer 2 is just another tool handler.\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\nThe model learns what skills exist (cheap) and loads them when relevant (expensive).\n\n## What Changed From s04\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | skills/\\*/SKILL.md files |\n| Injection | None | Two-layer (system + result)|\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" - }, - { - "version": "s06", - "locale": "en", - "title": "s06: Context Compact", - "content": "# s06: Context Compact\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Context will fill up; you need a way to make room\"* -- three-layer compression strategy for infinite sessions.\n\n## Problem\n\nThe context window is finite. A single `read_file` on a 1000-line file costs ~4000 tokens. After reading 30 files and running 20 bash commands, you hit 100,000+ tokens. The agent cannot work on large codebases without compression.\n\n## Solution\n\nThree layers, increasing in aggressiveness:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## How It Works\n\n1. **Layer 1 -- micro_compact**: Before each LLM call, replace old tool results with placeholders.\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **Layer 2 -- auto_compact**: When tokens exceed threshold, save full transcript to disk, then ask the LLM to summarize.\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **Layer 3 -- manual compact**: The `compact` tool triggers the same summarization on demand.\n\n4. The loop integrates all three:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\nTranscripts preserve full history on disk. Nothing is truly lost -- just moved out of active context.\n\n## What Changed From s05\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Transcripts | None | Saved to .transcripts/ |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n1. `Read every Python file in the agents/ directory one by one` (watch micro-compact replace old results)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" - }, - { - "version": "s07", - "locale": "en", - "title": "s07: Task System", - "content": "# s07: Task System\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"Break big goals into small tasks, order them, persist to disk\"* -- a file-based task graph with dependencies, laying the foundation for multi-agent collaboration.\n\n## Problem\n\ns03's TodoManager is a flat checklist in memory: no ordering, no dependencies, no status beyond done-or-not. Real goals have structure -- task B depends on task A, tasks C and D can run in parallel, task E waits for both C and D.\n\nWithout explicit relationships, the agent can't tell what's ready, what's blocked, or what can run concurrently. And because the list lives only in memory, context compression (s06) wipes it clean.\n\n## Solution\n\nPromote the checklist into a **task graph** persisted to disk. Each task is a JSON file with status, dependencies (`blockedBy`), and dependents (`blocks`). The graph answers three questions at any moment:\n\n- **What's ready?** -- tasks with `pending` status and empty `blockedBy`.\n- **What's blocked?** -- tasks waiting on unfinished dependencies.\n- **What's done?** -- `completed` tasks, whose completion automatically unblocks dependents.\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\nTask graph (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\nOrdering: task 1 must finish before 2 and 3\nParallelism: tasks 2 and 3 can run at the same time\nDependencies: task 4 waits for both 2 and 3\nStatus: pending -> in_progress -> completed\n```\n\nThis task graph becomes the coordination backbone for everything after s07: background execution (s08), multi-agent teams (s09+), and worktree isolation (s12) all read from and write to this same structure.\n\n## How It Works\n\n1. **TaskManager**: one JSON file per task, CRUD with dependency graph.\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **Dependency resolution**: completing a task clears its ID from every other task's `blockedBy` list, automatically unblocking dependents.\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **Status + dependency wiring**: `update` handles transitions and dependency edges.\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. Four task tools go into the dispatch map.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\nFrom s07 onward, the task graph is the default for multi-step work. s03's Todo remains for quick single-session checklists.\n\n## What Changed From s06\n\n| Component | Before (s06) | After (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| Planning model | Flat checklist (in-memory) | Task graph with dependencies (on disk) |\n| Relationships | None | `blockedBy` + `blocks` edges |\n| Status tracking | Done or not | `pending` -> `in_progress` -> `completed` |\n| Persistence | Lost on compression | Survives compression and restarts |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n" - }, - { - "version": "s08", - "locale": "en", - "title": "s08: Background Tasks", - "content": "# s08: Background Tasks\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"Run slow operations in the background; the agent keeps thinking\"* -- daemon threads run commands, inject notifications on completion.\n\n## Problem\n\nSome commands take minutes: `npm install`, `pytest`, `docker build`. With a blocking loop, the model sits idle waiting. If the user asks \"install dependencies and while that runs, create the config file,\" the agent does them sequentially, not in parallel.\n\n## Solution\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## How It Works\n\n1. BackgroundManager tracks tasks with a thread-safe notification queue.\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` starts a daemon thread and returns immediately.\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. When the subprocess finishes, its result goes into the notification queue.\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. The agent loop drains notifications before each LLM call.\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\nThe loop stays single-threaded. Only subprocess I/O is parallelized.\n\n## What Changed From s07\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" - }, - { - "version": "s09", - "locale": "en", - "title": "s09: Agent Teams", - "content": "# s09: Agent Teams\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"When the task is too big for one, delegate to teammates\"* -- persistent teammates + async mailboxes.\n\n## Problem\n\nSubagents (s04) are disposable: spawn, work, return summary, die. No identity, no memory between invocations. Background tasks (s08) run shell commands but can't make LLM-guided decisions.\n\nReal teamwork needs: (1) persistent agents that outlive a single prompt, (2) identity and lifecycle management, (3) a communication channel between agents.\n\n## Solution\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## How It Works\n\n1. TeammateManager maintains config.json with the team roster.\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` creates a teammate and starts its agent loop in a thread.\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: append-only JSONL inboxes. `send()` appends a JSON line; `read_inbox()` reads all and drains.\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. Each teammate checks its inbox before every LLM call, injecting received messages into context.\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## What Changed From s08\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | message + broadcast |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. Type `/team` to see the team roster with statuses\n5. Type `/inbox` to manually check the lead's inbox\n" - }, - { - "version": "s10", - "locale": "en", - "title": "s10: Team Protocols", - "content": "# s10: Team Protocols\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"Teammates need shared communication rules\"* -- one request-response pattern drives all negotiation.\n\n## Problem\n\nIn s09, teammates work and communicate but lack structured coordination:\n\n**Shutdown**: Killing a thread leaves files half-written and config.json stale. You need a handshake: the lead requests, the teammate approves (finish and exit) or rejects (keep working).\n\n**Plan approval**: When the lead says \"refactor the auth module,\" the teammate starts immediately. For high-risk changes, the lead should review the plan first.\n\nBoth share the same structure: one side sends a request with a unique ID, the other responds referencing that ID.\n\n## Solution\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## How It Works\n\n1. The lead initiates shutdown by generating a request_id and sending through the inbox.\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. The teammate receives the request and responds with approve/reject.\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. Plan approval follows the identical pattern. The teammate submits a plan (generating a request_id), the lead reviews (referencing the same request_id).\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\nOne FSM, two applications. The same `pending -> approved | rejected` state machine handles any request-response protocol.\n\n## What Changed From s09\n\n| Component | Before (s09) | After (s10) |\n|----------------|------------------|------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan)|\n| Shutdown | Natural exit only| Request-response handshake |\n| Plan gating | None | Submit/review with approval |\n| Correlation | None | request_id per request |\n| FSM | None | pending -> approved/rejected |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. Type `/team` to monitor statuses\n" - }, - { - "version": "s11", - "locale": "en", - "title": "s11: Autonomous Agents", - "content": "# s11: Autonomous Agents\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"Teammates scan the board and claim tasks themselves\"* -- no need for the lead to assign each one.\n\n## Problem\n\nIn s09-s10, teammates only work when explicitly told to. The lead must spawn each one with a specific prompt. 10 unclaimed tasks on the board? The lead assigns each one manually. Doesn't scale.\n\nTrue autonomy: teammates scan the task board themselves, claim unclaimed tasks, work on them, then look for more.\n\nOne subtlety: after context compression (s06), the agent might forget who it is. Identity re-injection fixes this.\n\n## Solution\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## How It Works\n\n1. The teammate loop has two phases: WORK and IDLE. When the LLM stops calling tools (or calls `idle`), the teammate enters IDLE.\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. The idle phase polls inbox and task board in a loop.\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. Task board scanning: find pending, unowned, unblocked tasks.\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. Identity re-injection: when context is too short (compression happened), insert an identity block.\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work.\"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## What Changed From s10\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. Type `/tasks` to see the task board with owners\n5. Type `/team` to monitor who is working vs idle\n" - }, - { - "version": "s12", - "locale": "en", - "title": "s12: Worktree + Task Isolation", - "content": "# s12: Worktree + Task Isolation\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"Each works in its own directory, no interference\"* -- tasks manage goals, worktrees manage directories, bound by ID.\n\n## Problem\n\nBy s11, agents can claim and complete tasks autonomously. But every task runs in one shared directory. Two agents refactoring different modules at the same time will collide: agent A edits `config.py`, agent B edits `config.py`, unstaged changes mix, and neither can roll back cleanly.\n\nThe task board tracks *what to do* but has no opinion about *where to do it*. The fix: give each task its own git worktree directory. Tasks manage goals, worktrees manage execution context. Bind them by task ID.\n\n## Solution\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## How It Works\n\n1. **Create a task.** Persist the goal first.\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **Create a worktree and bind to the task.** Passing `task_id` auto-advances the task to `in_progress`.\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\nThe binding writes state to both sides:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **Run commands in the worktree.** `cwd` points to the isolated directory.\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **Close out.** Two choices:\n - `worktree_keep(name)` -- preserve the directory for later.\n - `worktree_remove(name, complete_task=True)` -- remove directory, complete the bound task, emit event. One call handles teardown + completion.\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **Event stream.** Every lifecycle step emits to `.worktrees/events.jsonl`:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\nEvents emitted: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`.\n\nAfter a crash, state reconstructs from `.tasks/` + `.worktrees/index.json` on disk. Conversation memory is volatile; file state is durable.\n\n## What Changed From s11\n\n| Component | Before (s11) | After (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| Coordination | Task board (owner/status) | Task board + explicit worktree binding |\n| Execution scope | Shared directory | Task-scoped isolated directory |\n| Recoverability | Task status only | Task status + worktree index |\n| Teardown | Task completion | Task completion + explicit keep/remove |\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n" + "title": "s01: The Agent Loop — One Loop Is All You Need", + "content": "# s01: The Agent Loop — One Loop Is All You Need\n\n`s01` → [s02](/en/s02) → s03 → s04 → ... → s20\n> *\"One loop & Bash is all you need\"* — One tool + one loop = one Agent.\n>\n> **Harness Layer**: The Loop — the first bridge between the model and the real world.\n\n---\n\n## The Problem\n\nYou ask the model: \"List the files in my directory and run XXX.py.\"\n\nThe model can output a bash command, but once it's done outputting, it stops — it won't execute the command on its own, and it won't keep reasoning based on the result.\n\nYou could run it manually, paste the output back into the chat, and let it continue. Next command comes out, you run it again, paste it back.\n\nEvery round-trip, you're the middle layer. Automating that is what this chapter is about.\n\n---\n\n## The Solution\n\n![Agent Loop](/course-assets/s01_agent_loop/agent-loop.en.svg)\n\nA `while True` loop: keep going when the model calls a tool, stop when it doesn't. The entire process hinges on two signals:\n\n| Signal | Meaning | Loop Action |\n|--------|---------|-------------|\n| `stop_reason == \"tool_use\"` | Model raises hand: \"I need a tool\" | Execute → feed result back → continue |\n| `stop_reason != \"tool_use\"` | Model says: \"I'm done\" | Exit loop |\n\n---\n\n## How It Works\n\nLet's translate this process into code. Step by step:\n\n**Step 1**: Start with the user's question as the first message.\n\n```python\nmessages = [{\"role\": \"user\", \"content\": query}]\n```\n\n**Step 2**: Send the messages and tool definitions to the LLM.\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n**Step 3**: Append the model's response and check whether it called a tool. No tool call → done.\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n**Step 4**: Execute the tool the model requested and collect the results.\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n**Step 5**: Append the tool results as a new message and go back to Step 2.\n\n```python\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\nAssembled into a complete function:\n\n```python\ndef agent_loop(messages):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\nUnder 30 lines — that's the minimal runnable agent harness kernel. It's not intelligence itself, but the smallest runtime framework that lets the model keep acting. The model decides (whether to call a tool, which one), the harness executes (if called, run it, feed the result back). The next 18 chapters all add mechanisms on top of this loop. The loop itself never changes.\n\n---\n\n## Try It\n\n> **Teaching demo notice**: The code executes shell commands generated by the model. Run it in a temporary test directory to avoid affecting your project files. s03 covers the real permission system.\n\n**Setup** (first run):\n\n```sh\npip install -r requirements.txt\ncp .env.example .env\n# Edit .env, fill in ANTHROPIC_API_KEY and MODEL_ID\n```\n\n**Run**:\n\n```sh\npython s01_agent_loop/code.py\n```\n\nTry these prompts:\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n\nWhat to watch for: When does the model call a tool (loop continues), and when does it not (loop ends)?\n\n---\n\n## What's Next\n\nRight now the model only has bash — reading files requires `cat`, writing files requires `echo ... >`, finding files requires `find`. Ugly and error-prone.\n\n→ s02 Tool Use: What happens when we give it 5 proper tools? Will the model call multiple tools at once? Will parallel tool executions step on each other?\n\n
\nDive into CC Source Code\n\n> The following is based on a review of CC source code `src/query.ts` (1729 lines). The core differences are twofold: CC doesn't rely on the `stop_reason` field to decide whether to continue the loop — instead it checks whether the content contains `tool_use` blocks (because `stop_reason` is unreliable in streaming responses); CC has more exit paths and recovery strategies for production-grade protection.\n\n**The 30-line `while True` from the teaching version IS the core of CC's 1729 lines.** Everything below is a protection mechanism layered on top of that core.\n\n
\n1. Loop Structure Differences\n\nThe teaching version checks `response.stop_reason`. CC doesn't use it as the sole signal for loop continuation — in streaming responses, `stop_reason` may not have updated yet even though `tool_use` blocks are already present. CC uses a `needsFollowUp` flag: during streaming message reception (`query.ts:830-834`), it's set to `true` whenever a `tool_use` block is detected. `QueryEngine.ts` captures the real `stop_reason` from `message_delta` for other logic, but the query loop itself relies on `needsFollowUp`.\n\n```typescript\n// query.ts:554-558\n// stop_reason === 'tool_use' is unreliable.\n// Set during streaming whenever a tool_use block arrives.\nlet needsFollowUp = false\n```\n\n
\n\n
\n2. State Object — 10 Fields (Teaching Version Only Uses messages)\n\n| # | Field | Purpose | Chapter |\n|---|-------|---------|---------|\n| 1 | `messages` | Message array for the current iteration | s01 |\n| 2 | `toolUseContext` | Tool, signal, and permission context | s02 |\n| 3 | `autoCompactTracking` | Compaction state tracking | s08 |\n| 4 | `maxOutputTokensRecoveryCount` | Token recovery attempt count (max 3) | s11 |\n| 5 | `hasAttemptedReactiveCompact` | Whether reactive compaction was attempted this round | s08 |\n| 6 | `maxOutputTokensOverride` | 8K→64K upgrade override | s11 |\n| 7 | `pendingToolUseSummary` | Background Haiku-generated tool use summary | s08 |\n| 8 | `stopHookActive` | Whether the stop hook produced a blocking error | s04 |\n| 9 | `turnCount` | Turn count (for maxTurns check) | s01 |\n| 10 | `transition` | Last continue reason | s11 |\n\n> Note: `taskBudgetRemaining` (`query.ts:291`) is a loop-local variable, not on State. The source comment explicitly says \"Loop-local (not on State)\".\n\n
\n\n
\n3. Multiple Exit and Continue Paths\n\nThe teaching version has only 1 exit path (model doesn't call a tool → done). The production version has multiple exit and continue paths, covering blocking limit, prompt too long, model error, abort, hook stop, max turns, token budget continuation, reactive compact retry, and more. Each scenario has a corresponding recovery or exit strategy.\n\n
\n\n
\n4. Streaming Tool Execution and QueryEngine\n\nCC's `StreamingToolExecutor` (`query.ts:561`) allows tools to begin parallel execution while the model is still generating (concurrency-safe tools run in parallel, others run exclusively). `QueryEngine.ts` adds additional protections for cost overruns, structured output validation failures, and more. The teaching version doesn't implement these — the goal is conceptual clarity, not peak performance.\n\n
\n\n**In one sentence**: The core of query.ts's 1729 lines is a 30-line `while True`. All the complex fields and exit paths are protection mechanisms. Understand the core loop first, and everything that follows unfolds naturally.\n\n
\n\n\n" }, { "version": "s01", "locale": "zh", - "title": "s01: The Agent Loop (Agent 循环)", - "content": "# s01: The Agent Loop (Agent 循环)\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- 一个工具 + 一个循环 = 一个 Agent。\n\n## 问题\n\n语言模型能推理代码, 但碰不到真实世界 -- 不能读文件、跑测试、看报错。没有循环, 每次工具调用你都得手动把结果粘回去。你自己就是那个循环。\n\n## 解决方案\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\n一个退出条件控制整个流程。循环持续运行, 直到模型不再调用工具。\n\n## 工作原理\n\n1. 用户 prompt 作为第一条消息。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. 将消息和工具定义一起发给 LLM。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. 追加助手响应。检查 `stop_reason` -- 如果模型没有调用工具, 结束。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. 执行每个工具调用, 收集结果, 作为 user 消息追加。回到第 2 步。\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n组装为一个完整函数:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n不到 30 行, 这就是整个 Agent。后面 11 个章节都在这个循环上叠加机制 -- 循环本身始终不变。\n\n## 变更内容\n\n| 组件 | 之前 | 之后 |\n|---------------|------------|--------------------------------|\n| Agent loop | (无) | `while True` + stop_reason |\n| Tools | (无) | `bash` (单一工具) |\n| Messages | (无) | 累积式消息列表 |\n| Control flow | (无) | `stop_reason != \"tool_use\"` |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" - }, - { - "version": "s02", - "locale": "zh", - "title": "s02: Tool Use (工具使用)", - "content": "# s02: Tool Use (工具使用)\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"加一个工具, 只加一个 handler\"* -- 循环不用动, 新工具注册进 dispatch map 就行。\n\n## 问题\n\n只有 `bash` 时, 所有操作都走 shell。`cat` 截断不可预测, `sed` 遇到特殊字符就崩, 每次 bash 调用都是不受约束的安全面。专用工具 (`read_file`, `write_file`) 可以在工具层面做路径沙箱。\n\n关键洞察: 加工具不需要改循环。\n\n## 解决方案\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## 工作原理\n\n1. 每个工具有一个处理函数。路径沙箱防止逃逸工作区。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. dispatch map 将工具名映射到处理函数。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. 循环中按名称查找处理函数。循环体本身与 s01 完全一致。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n加工具 = 加 handler + 加 schema。循环永远不变。\n\n## 相对 s01 的变更\n\n| 组件 | 之前 (s01) | 之后 (s02) |\n|----------------|--------------------|--------------------------------|\n| Tools | 1 (仅 bash) | 4 (bash, read, write, edit) |\n| Dispatch | 硬编码 bash 调用 | `TOOL_HANDLERS` 字典 |\n| 路径安全 | 无 | `safe_path()` 沙箱 |\n| Agent loop | 不变 | 不变 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n" - }, - { - "version": "s03", - "locale": "zh", - "title": "s03: TodoWrite (待办写入)", - "content": "# s03: TodoWrite (待办写入)\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"没有计划的 agent 走哪算哪\"* -- 先列步骤再动手, 完成率翻倍。\n\n## 问题\n\n多步任务中, 模型会丢失进度 -- 重复做过的事、跳步、跑偏。对话越长越严重: 工具结果不断填满上下文, 系统提示的影响力逐渐被稀释。一个 10 步重构可能做完 1-3 步就开始即兴发挥, 因为 4-10 步已经被挤出注意力了。\n\n## 解决方案\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 工作原理\n\n1. TodoManager 存储带状态的项目。同一时间只允许一个 `in_progress`。\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo` 工具和其他工具一样加入 dispatch map。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nag reminder: 模型连续 3 轮以上不调用 `todo` 时注入提醒。\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\n\"同时只能有一个 in_progress\" 强制顺序聚焦。nag reminder 制造问责压力 -- 你不更新计划, 系统就追着你问。\n\n## 相对 s02 的变更\n\n| 组件 | 之前 (s02) | 之后 (s03) |\n|----------------|------------------|--------------------------------|\n| Tools | 4 | 5 (+todo) |\n| 规划 | 无 | 带状态的 TodoManager |\n| Nag 注入 | 无 | 3 轮后注入 `` |\n| Agent loop | 简单分发 | + rounds_since_todo 计数器 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" - }, - { - "version": "s04", - "locale": "zh", - "title": "s04: Subagents (Subagent)", - "content": "# s04: Subagents (Subagent)\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"大任务拆小, 每个小任务干净的上下文\"* -- Subagent 用独立 messages[], 不污染主对话。\n\n## 问题\n\nAgent 工作越久, messages 数组越胖。每次读文件、跑命令的输出都永久留在上下文里。\"这个项目用什么测试框架?\" 可能要读 5 个文件, 但父 Agent 只需要一个词: \"pytest。\"\n\n## 解决方案\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## 工作原理\n\n1. 父 Agent 有一个 `task` 工具。Subagent 拥有除 `task` 外的所有基础工具 (禁止递归生成)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. Subagent 以 `messages=[]` 启动, 运行自己的循环。只有最终文本返回给父 Agent。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\nSubagent 可能跑了 30+ 次工具调用, 但整个消息历史直接丢弃。父 Agent 收到的只是一段摘要文本, 作为普通 `tool_result` 返回。\n\n## 相对 s03 的变更\n\n| 组件 | 之前 (s03) | 之后 (s04) |\n|----------------|------------------|-------------------------------|\n| Tools | 5 | 5 (基础) + task (仅父端) |\n| 上下文 | 单一共享 | 父 + 子隔离 |\n| Subagent | 无 | `run_subagent()` 函数 |\n| 返回值 | 不适用 | 仅摘要文本 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" - }, - { - "version": "s05", - "locale": "zh", - "title": "s05: Skills (Skill 加载)", - "content": "# s05: Skills (Skill 加载)\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"用到什么知识, 临时加载什么知识\"* -- 通过 tool_result 注入, 不塞 system prompt。\n\n## 问题\n\n你希望 Agent 遵循特定领域的工作流: git 约定、测试模式、代码审查清单。全塞进系统提示太浪费 -- 10 个 Skill, 每个 2000 token, 就是 20,000 token, 大部分跟当前任务毫无关系。\n\n## 解决方案\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\n第一层: 系统提示中放 Skill 名称 (低成本)。第二层: tool_result 中按需放完整内容。\n\n## 工作原理\n\n1. 每个 Skill 是一个目录, 包含 `SKILL.md` 文件和 YAML frontmatter。\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoader 递归扫描 `SKILL.md` 文件, 用目录名作为 Skill 标识。\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n3. 第一层写入系统提示。第二层不过是 dispatch map 中的又一个工具。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\n模型知道有哪些 Skill (便宜), 需要时再加载完整内容 (贵)。\n\n## 相对 s04 的变更\n\n| 组件 | 之前 (s04) | 之后 (s05) |\n|----------------|------------------|--------------------------------|\n| Tools | 5 (基础 + task) | 5 (基础 + load_skill) |\n| 系统提示 | 静态字符串 | + Skill 描述列表 |\n| 知识库 | 无 | skills/\\*/SKILL.md 文件 |\n| 注入方式 | 无 | 两层 (系统提示 + result) |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" - }, - { - "version": "s06", - "locale": "zh", - "title": "s06: Context Compact (上下文压缩)", - "content": "# s06: Context Compact (上下文压缩)\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"上下文总会满, 要有办法腾地方\"* -- 三层压缩策略, 换来无限会话。\n\n## 问题\n\n上下文窗口是有限的。读一个 1000 行的文件就吃掉 ~4000 token; 读 30 个文件、跑 20 条命令, 轻松突破 100k token。不压缩, Agent 根本没法在大项目里干活。\n\n## 解决方案\n\n三层压缩, 激进程度递增:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 工作原理\n\n1. **第一层 -- micro_compact**: 每次 LLM 调用前, 将旧的 tool result 替换为占位符。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第二层 -- auto_compact**: token 超过阈值时, 保存完整对话到磁盘, 让 LLM 做摘要。\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **第三层 -- manual compact**: `compact` 工具按需触发同样的摘要机制。\n\n4. 循环整合三层:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\n完整历史通过 transcript 保存在磁盘上。信息没有真正丢失, 只是移出了活跃上下文。\n\n## 相对 s05 的变更\n\n| 组件 | 之前 (s05) | 之后 (s06) |\n|----------------|------------------|--------------------------------|\n| Tools | 5 | 5 (基础 + compact) |\n| 上下文管理 | 无 | 三层压缩 |\n| Micro-compact | 无 | 旧结果 -> 占位符 |\n| Auto-compact | 无 | token 阈值触发 |\n| Transcripts | 无 | 保存到 .transcripts/ |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Read every Python file in the agents/ directory one by one` (观察 micro-compact 替换旧结果)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" - }, - { - "version": "s07", - "locale": "zh", - "title": "s07: Task System (任务系统)", - "content": "# s07: Task System (任务系统)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"大目标要拆成小任务, 排好序, 记在磁盘上\"* -- 文件持久化的任务图, 为多 agent 协作打基础。\n\n## 问题\n\ns03 的 TodoManager 只是内存中的扁平清单: 没有顺序、没有依赖、状态只有做完没做完。真实目标是有结构的 -- 任务 B 依赖任务 A, 任务 C 和 D 可以并行, 任务 E 要等 C 和 D 都完成。\n\n没有显式的关系, Agent 分不清什么能做、什么被卡住、什么能同时跑。而且清单只活在内存里, 上下文压缩 (s06) 一跑就没了。\n\n## 解决方案\n\n把扁平清单升级为持久化到磁盘的**任务图**。每个任务是一个 JSON 文件, 有状态、前置依赖 (`blockedBy`) 和后置依赖 (`blocks`)。任务图随时回答三个问题:\n\n- **什么可以做?** -- 状态为 `pending` 且 `blockedBy` 为空的任务。\n- **什么被卡住?** -- 等待前置任务完成的任务。\n- **什么做完了?** -- 状态为 `completed` 的任务, 完成时自动解锁后续任务。\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\n任务图 (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\n顺序: task 1 必须先完成, 才能开始 2 和 3\n并行: task 2 和 3 可以同时执行\n依赖: task 4 要等 2 和 3 都完成\n状态: pending -> in_progress -> completed\n```\n\n这个任务图是 s07 之后所有机制的协调骨架: 后台执行 (s08)、多 agent 团队 (s09+)、worktree 隔离 (s12) 都读写这同一个结构。\n\n## 工作原理\n\n1. **TaskManager**: 每个任务一个 JSON 文件, CRUD + 依赖图。\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **依赖解除**: 完成任务时, 自动将其 ID 从其他任务的 `blockedBy` 中移除, 解锁后续任务。\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **状态变更 + 依赖关联**: `update` 处理状态转换和依赖边。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. 四个任务工具加入 dispatch map。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\n从 s07 起, 任务图是多步工作的默认选择。s03 的 Todo 仍可用于单次会话内的快速清单。\n\n## 相对 s06 的变更\n\n| 组件 | 之前 (s06) | 之后 (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| 规划模型 | 扁平清单 (仅内存) | 带依赖关系的任务图 (磁盘) |\n| 关系 | 无 | `blockedBy` + `blocks` 边 |\n| 状态追踪 | 做完没做完 | `pending` -> `in_progress` -> `completed` |\n| 持久化 | 压缩后丢失 | 压缩和重启后存活 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n" - }, - { - "version": "s08", - "locale": "zh", - "title": "s08: Background Tasks (后台任务)", - "content": "# s08: Background Tasks (后台任务)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"慢操作丢后台, agent 继续想下一步\"* -- 后台线程跑命令, 完成后注入通知。\n\n## 问题\n\n有些命令要跑好几分钟: `npm install`、`pytest`、`docker build`。阻塞式循环下模型只能干等。用户说 \"装依赖, 顺便建个配置文件\", Agent 却只能一个一个来。\n\n## 解决方案\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## 工作原理\n\n1. BackgroundManager 用线程安全的通知队列追踪任务。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` 启动守护线程, 立即返回。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. 子进程完成后, 结果进入通知队列。\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. 每次 LLM 调用前排空通知队列。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\n循环保持单线程。只有子进程 I/O 被并行化。\n\n## 相对 s07 的变更\n\n| 组件 | 之前 (s07) | 之后 (s08) |\n|----------------|------------------|------------------------------------|\n| Tools | 8 | 6 (基础 + background_run + check) |\n| 执行方式 | 仅阻塞 | 阻塞 + 后台线程 |\n| 通知机制 | 无 | 每轮排空的队列 |\n| 并发 | 无 | 守护线程 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" - }, - { - "version": "s09", - "locale": "zh", - "title": "s09: Agent Teams (Agent 团队)", - "content": "# s09: Agent Teams (Agent 团队)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"任务太大一个人干不完, 要能分给队友\"* -- 持久化队友 + JSONL 邮箱。\n\n## 问题\n\nSubagent (s04) 是一次性的: 生成、干活、返回摘要、消亡。没有身份, 没有跨调用的记忆。Background Tasks (s08) 能跑 shell 命令, 但做不了 LLM 引导的决策。\n\n真正的团队协作需要三样东西: (1) 能跨多轮对话存活的持久 Agent, (2) 身份和生命周期管理, (3) Agent 之间的通信通道。\n\n## 解决方案\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## 工作原理\n\n1. TeammateManager 通过 config.json 维护团队名册。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` 创建队友并在线程中启动 agent loop。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: append-only 的 JSONL 收件箱。`send()` 追加一行; `read_inbox()` 读取全部并清空。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 每个队友在每次 LLM 调用前检查收件箱, 将消息注入上下文。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## 相对 s08 的变更\n\n| 组件 | 之前 (s08) | 之后 (s09) |\n|----------------|------------------|------------------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agent 数量 | 单一 | 领导 + N 个队友 |\n| 持久化 | 无 | config.json + JSONL 收件箱 |\n| 线程 | 后台命令 | 每线程完整 agent loop |\n| 生命周期 | 一次性 | idle -> working -> idle |\n| 通信 | 无 | message + broadcast |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. 输入 `/team` 查看团队名册和状态\n5. 输入 `/inbox` 手动检查领导的收件箱\n" - }, - { - "version": "s10", - "locale": "zh", - "title": "s10: Team Protocols (团队协议)", - "content": "# s10: Team Protocols (团队协议)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"队友之间要有统一的沟通规矩\"* -- 一个 request-response 模式驱动所有协商。\n\n## 问题\n\ns09 中队友能干活能通信, 但缺少结构化协调:\n\n**关机**: 直接杀线程会留下写了一半的文件和过期的 config.json。需要握手 -- 领导请求, 队友批准 (收尾退出) 或拒绝 (继续干)。\n\n**计划审批**: 领导说 \"重构认证模块\", 队友立刻开干。高风险变更应该先过审。\n\n两者结构一样: 一方发带唯一 ID 的请求, 另一方引用同一 ID 响应。\n\n## 解决方案\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## 工作原理\n\n1. 领导生成 request_id, 通过收件箱发起关机请求。\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. 队友收到请求后, 用 approve/reject 响应。\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. 计划审批遵循完全相同的模式。队友提交计划 (生成 request_id), 领导审查 (引用同一个 request_id)。\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\n一个 FSM, 两种用途。同样的 `pending -> approved | rejected` 状态机可以套用到任何请求-响应协议上。\n\n## 相对 s09 的变更\n\n| 组件 | 之前 (s09) | 之后 (s10) |\n|----------------|------------------|--------------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan) |\n| 关机 | 仅自然退出 | 请求-响应握手 |\n| 计划门控 | 无 | 提交/审查与审批 |\n| 关联 | 无 | 每个请求一个 request_id |\n| FSM | 无 | pending -> approved/rejected |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. 输入 `/team` 监控状态\n" - }, - { - "version": "s11", - "locale": "zh", - "title": "s11: Autonomous Agents (Autonomous Agent)", - "content": "# s11: Autonomous Agents (Autonomous Agent)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"队友自己看看板, 有活就认领\"* -- 不需要领导逐个分配, 自组织。\n\n## 问题\n\ns09-s10 中, 队友只在被明确指派时才动。领导得给每个队友写 prompt, 任务看板上 10 个未认领的任务得手动分配。这扩展不了。\n\n真正的自治: 队友自己扫描任务看板, 认领没人做的任务, 做完再找下一个。\n\n一个细节: Context Compact (s06) 后 Agent 可能忘了自己是谁。身份重注入解决这个问题。\n\n## 解决方案\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## 工作原理\n\n1. 队友循环分两个阶段: WORK 和 IDLE。LLM 停止调用工具 (或调用了 `idle`) 时, 进入 IDLE。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. 空闲阶段循环轮询收件箱和任务看板。\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. 任务看板扫描: 找 pending 状态、无 owner、未被阻塞的任务。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. 身份重注入: 上下文过短 (说明发生了压缩) 时, 在开头插入身份块。\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work.\"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## 相对 s10 的变更\n\n| 组件 | 之前 (s10) | 之后 (s11) |\n|----------------|------------------|----------------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| 自治性 | 领导指派 | 自组织 |\n| 空闲阶段 | 无 | 轮询收件箱 + 任务看板 |\n| 任务认领 | 仅手动 | 自动认领未分配任务 |\n| 身份 | 系统提示 | + 压缩后重注入 |\n| 超时 | 无 | 60 秒空闲 -> 自动关机 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. 输入 `/tasks` 查看带 owner 的任务看板\n5. 输入 `/team` 监控谁在工作、谁在空闲\n" - }, - { - "version": "s12", - "locale": "zh", - "title": "s12: Worktree + Task Isolation (Worktree 任务隔离)", - "content": "# s12: Worktree + Task Isolation (Worktree 任务隔离)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"各干各的目录, 互不干扰\"* -- 任务管目标, worktree 管目录, 按 ID 绑定。\n\n## 问题\n\n到 s11, Agent 已经能自主认领和完成任务。但所有任务共享一个目录。两个 Agent 同时重构不同模块 -- A 改 `config.py`, B 也改 `config.py`, 未提交的改动互相污染, 谁也没法干净回滚。\n\n任务板管 \"做什么\" 但不管 \"在哪做\"。解法: 给每个任务一个独立的 git worktree 目录, 用任务 ID 把两边关联起来。\n\n## 解决方案\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## 工作原理\n\n1. **创建任务。** 先把目标持久化。\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **创建 worktree 并绑定任务。** 传入 `task_id` 自动将任务推进到 `in_progress`。\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\n绑定同时写入两侧状态:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **在 worktree 中执行命令。** `cwd` 指向隔离目录。\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **收尾。** 两种选择:\n - `worktree_keep(name)` -- 保留目录供后续使用。\n - `worktree_remove(name, complete_task=True)` -- 删除目录, 完成绑定任务, 发出事件。一个调用搞定拆除 + 完成。\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **事件流。** 每个生命周期步骤写入 `.worktrees/events.jsonl`:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\n事件类型: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。\n\n崩溃后从 `.tasks/` + `.worktrees/index.json` 重建现场。会话记忆是易失的; 磁盘状态是持久的。\n\n## 相对 s11 的变更\n\n| 组件 | 之前 (s11) | 之后 (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| 协调 | 任务板 (owner/status) | 任务板 + worktree 显式绑定 |\n| 执行范围 | 共享目录 | 每个任务独立目录 |\n| 可恢复性 | 仅任务状态 | 任务状态 + worktree 索引 |\n| 收尾 | 任务完成 | 任务完成 + 显式 keep/remove |\n| 生命周期可见性 | 隐式日志 | `.worktrees/events.jsonl` 显式事件流 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n" + "title": "s01: Agent Loop — 一个循环就够了", + "content": "# s01: Agent Loop — 一个循环就够了\n\n`s01` → [s02](/zh/s02) → s03 → s04 → ... → s20\n> *\"One loop & Bash is all you need\"* — 一个工具 + 一个循环 = 一个 Agent。\n>\n> **Harness 层**: 循环 — 模型与真实世界的第一道连接。\n\n---\n\n## 问题\n\n你提出了一个问题给大模型:“帮我读取下我的目录下有哪些文件,并且执行XXX.py”。\n\n模型能输出一条 bash 命令,但输出完了就停了,它不会自己跑,也不会看到结果后继续推理。\n\n你可以手动跑一遍,把输出粘贴回对话框,让它接着干。下一个命令出来,你再跑一遍、再贴回去。\n\n每一个来回,你都在做中间层。而把它自动化,就是这一章要做的事。\n\n---\n\n## 解决方案\n\n![Agent Loop](/course-assets/s01_agent_loop/agent-loop.svg)\n\n一个 `while True` 循环,模型调用工具就继续,不调用就停。整个过程只有两个信号:\n\n| 信号 | 含义 | 循环动作 |\n|------|------|---------|\n| `stop_reason == \"tool_use\"` | 模型举手说\"我要用工具\" | 执行 → 结果喂回去 → 继续 |\n| `stop_reason != \"tool_use\"` | 模型说\"我做完了\" | 退出循环 |\n\n---\n\n## 工作原理\n\n将这个过程翻译成代码。分步来看:\n\n**第 1 步**:把用户的问题作为第一条消息。\n\n```python\nmessages = [{\"role\": \"user\", \"content\": query}]\n```\n\n**第 2 步**:将消息和工具定义一起发给 LLM。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n**第 3 步**:追加模型回答,检查它是否调了工具。没调 → 结束。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n**第 4 步**:执行模型要求的工具,收集结果。\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n**第 5 步**:把工具结果作为新消息追加,回到第 2 步。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n组装为一个完整函数:\n\n```python\ndef agent_loop(messages):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n不到 30 行,这就是最小可运行的 agent harness 内核。它不是智能本身,而是让模型能持续行动的最小运行框架,模型负责决策(要不要调工具、调哪个),harness 负责执行(调了就跑、结果喂回去)。后面 18 个章节都在这个循环上叠加机制,循环本身始终不变。\n\n---\n\n## 试一下\n\n> **教学 demo 提示**:代码会执行模型生成的 shell 命令。建议在一个临时测试目录中运行,避免影响你的项目文件。s03 会讲真正的权限系统。\n\n**准备**(首次运行):\n\n```sh\npip install -r requirements.txt\ncp .env.example .env\n# 编辑 .env,填入 ANTHROPIC_API_KEY 和 MODEL_ID\n```\n\n**运行**:\n\n```sh\npython s01_agent_loop/code.py\n```\n\n试试这些 prompt:\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n\n观察重点:模型什么时候调用工具(循环继续),什么时候不调用(循环结束)?\n\n---\n\n## 接下来\n\n现在模型手里只有 bash 一个工具,读文件要 `cat`,写文件要 `echo ... >`,找个文件要 `find`,又丑又容易出错。\n\ns02 Tool Use → 给它 5 个真正的工具,会发生什么?模型会不会一次调用多个工具?几个工具同时跑会不会互相踩?\n\n
\n深入 CC 源码\n\n> 以下内容基于 CC 源码 `src/query.ts`(1729 行)的核查。核心差异就两个:CC 不看 `stop_reason` 字段而是检查内容里有没有 tool_use 块(因为流式响应中 stop_reason 不可靠);CC 有更多的退出路径和恢复策略做生产级保护。\n\n**教学版的 30 行 `while True` 就是 CC 1729 行的核心。** 下面每一项都是在这个核心上叠加的保护机制。\n\n
\n一、循环结构差异\n\n教学版检查 `response.stop_reason`。CC 不把它作为循环继续的唯一依据——流式响应中 `stop_reason` 可能还没更新但内容里已经有 `tool_use` 块了。CC 用 `needsFollowUp` 标志:接收到流式消息时(`query.ts:830-834`),只要检测到 `tool_use` 块就设为 `true`;`QueryEngine.ts` 会从 `message_delta` 捕获真实 `stop_reason` 用于其他逻辑,但 query loop 本身靠 `needsFollowUp` 决定是否继续。\n\n```typescript\n// query.ts:554-558\n// stop_reason === 'tool_use' is unreliable.\n// Set during streaming whenever a tool_use block arrives.\nlet needsFollowUp = false\n```\n\n
\n\n
\n二、State 对象 10 字段(教学版只用 messages)\n\n| # | 字段 | 用途 | 对应章节 |\n|---|------|------|---------|\n| 1 | `messages` | 当前迭代的消息数组 | s01 |\n| 2 | `toolUseContext` | 工具、信号、权限上下文 | s02 |\n| 3 | `autoCompactTracking` | 压缩状态追踪 | s08 |\n| 4 | `maxOutputTokensRecoveryCount` | token 恢复尝试次数(上限 3) | s11 |\n| 5 | `hasAttemptedReactiveCompact` | 本轮是否已尝试响应式压缩 | s08 |\n| 6 | `maxOutputTokensOverride` | 8K→64K 的升级覆盖 | s11 |\n| 7 | `pendingToolUseSummary` | 后台 Haiku 生成的 tool use 摘要 | s08 |\n| 8 | `stopHookActive` | 停止钩子是否产生阻塞错误 | s04 |\n| 9 | `turnCount` | 轮次计数(maxTurns 检查) | s01 |\n| 10 | `transition` | 上一次继续原因 | s11 |\n\n> 注:`taskBudgetRemaining`(`query.ts:291`)是 loop-local 局部变量,不在 State 上。源码注释明确写了 \"Loop-local (not on State)\"。\n\n
\n\n
\n三、多条退出和继续路径\n\n教学版只有 1 条退出路径(模型不调工具就结束)。生产版有多条退出和继续路径,覆盖 blocking limit、prompt too long、model error、abort、hook stop、max turns、token budget continuation、reactive compact retry 等场景。每种场景都有对应的恢复或退出策略。\n\n
\n\n
\n四、流式工具执行和 QueryEngine\n\nCC 的 `StreamingToolExecutor`(`query.ts:561`)让工具在模型还在生成时就开始并行执行(根据工具是否 concurrency-safe 决定并发或独占)。`QueryEngine.ts` 额外加了费用超限、结构化输出验证失败等保护。教学版不实现这些——目标是概念清晰,不是性能极致。\n\n
\n\n**一句话**:1729 行的 query.ts 核心就是 30 行 `while True`。所有复杂字段和退出路径都是保护机制。先理解核心循环,后面的一切自然展开。\n\n
\n\n\n" }, { "version": "s01", "locale": "ja", - "title": "s01: The Agent Loop", - "content": "# s01: The Agent Loop\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- 1つのツール + 1つのループ = エージェント。\n\n## 問題\n\n言語モデルはコードについて推論できるが、現実世界に触れられない。ファイルを読めず、テストを実行できず、エラーを確認できない。ループがなければ、ツール呼び出しのたびにユーザーが手動で結果をコピーペーストする必要がある。つまりユーザー自身がループになる。\n\n## 解決策\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\n1つの終了条件がフロー全体を制御する。モデルがツール呼び出しを止めるまでループが回り続ける。\n\n## 仕組み\n\n1. ユーザーのプロンプトが最初のメッセージになる。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. メッセージとツール定義をLLMに送信する。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. アシスタントのレスポンスを追加し、`stop_reason`を確認する。ツールが呼ばれなければ終了。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. 各ツール呼び出しを実行し、結果を収集してuserメッセージとして追加。ステップ2に戻る。\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n1つの関数にまとめると:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\nこれでエージェント全体が30行未満に収まる。本コースの残りはすべてこのループの上に積み重なる -- ループ自体は変わらない。\n\n## 変更点\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" + "title": "s01: Agent Loop — ループ一つで十分", + "content": "# s01: Agent Loop — ループ一つで十分\n\n`s01` → [s02](/ja/s02) → s03 → s04 → ... → s20\n> *\"One loop & Bash is all you need\"* — ツール一つ + ループ一つ = 一つの Agent。\n>\n> **Harness レイヤー**: ループ — モデルと現実世界をつなぐ最初の架け橋。\n\n---\n\n## 課題\n\nモデルにこう頼んだとする:「ディレクトリ内のファイル一覧を取得して、XXX.py を実行して」。\n\nモデルは bash コマンドを出力できるが、出力が終わると止まってしまう — 自分で実行することも、結果を見て推論を続けることもない。\n\n手動で実行し、出力をチャットに貼り付ければ、モデルは続きを生成できる。次のコマンドが出たら、また実行して貼り付ける。\n\n毎回の往復で、あなたが中間層になっている。これを自動化するのが、この章の目的だ。\n\n---\n\n## ソリューション\n\n![Agent Loop](/course-assets/s01_agent_loop/agent-loop.ja.svg)\n\n一つの `while True` ループ — モデルがツールを呼べば続き、呼ばなければ停止。全体でたった 2 つのシグナル:\n\n| シグナル | 意味 | ループの動作 |\n|----------|------|-------------|\n| `stop_reason == \"tool_use\"` | モデルが「ツールが必要」と挙手 | 実行 → 結果を戻す → 続行 |\n| `stop_reason != \"tool_use\"` | モデルが「完了」と宣言 | ループ終了 |\n\n---\n\n## 仕組み\n\nこのプロセスをコードに変換してみよう。ステップごとに:\n\n**ステップ 1**:ユーザーの質問を最初のメッセージとして設定する。\n\n```python\nmessages = [{\"role\": \"user\", \"content\": query}]\n```\n\n**ステップ 2**:メッセージとツール定義を一緒に LLM に送信する。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n**ステップ 3**:モデルの応答を追加し、ツールを呼び出したか確認する。呼び出しなし → 終了。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n**ステップ 4**:モデルが要求したツールを実行し、結果を収集する。\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n**ステップ 5**:ツールの結果を新しいメッセージとして追加し、ステップ 2 に戻る。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n完全な関数に組み立てる:\n\n```python\ndef agent_loop(messages):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n30 行未満 — これが最小実行可能な agent harness のカーネルだ。これは知能そのものではなく、モデルが継続的に行動できるための最小ランタイムフレームワーク。モデルが決定し(ツールを呼ぶか、どれを呼ぶか)、harness が実行する(呼ばれたら実行し、結果を戻す)。次の 18 章はすべてこのループの上に仕組みを積み重ねていく。ループ自体は永遠に変わらない。\n\n---\n\n## 試してみよう\n\n> **教育デモの注意**: このコードはモデルが生成したシェルコマンドを実行します。プロジェクトファイルへの影響を避けるため、一時テストディレクトリで実行してください。s03 で本格的な権限システムを説明します。\n\n**準備**(初回のみ):\n\n```sh\npip install -r requirements.txt\ncp .env.example .env\n# .env を編集し、ANTHROPIC_API_KEY と MODEL_ID を入力\n```\n\n**実行**:\n\n```sh\npython s01_agent_loop/code.py\n```\n\n以下のプロンプトを試してみよう:\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n\n観察のポイント:モデルがツールを呼び出すとき(ループ継続)、呼び出さないとき(ループ終了)の違い。\n\n---\n\n## 次へ\n\n現在、モデルが持っているのは bash だけだ — ファイルを読むには `cat`、書くには `echo ... >`、探すには `find`。不便でエラーも起きやすい。\n\n→ s02 Tool Use:5 つの本格的なツールを与えたらどうなる? モデルは複数のツールを同時に呼び出すか? 並列実行で競合は起きないか?\n\n
\nCC ソースコードを深掘り\n\n> 以下は CC ソースコード `src/query.ts`(1729 行)の検証に基づく。核心的な違いは二つ:CC はループ継続の判断に `stop_reason` フィールドを頼らず、コンテンツに `tool_use` ブロックが含まれるかをチェックする(ストリーミングレスポンスでは `stop_reason` が信頼できないため)。CC には本番環境向けのより多くの終了パスとリカバリ戦略がある。\n\n**教育版の 30 行 `while True` が CC の 1729 行の核心。** 以下の各項目は、すべてその核心の上に積み重ねられた保護機構である。\n\n
\n一、ループ構造の違い\n\n教育版は `response.stop_reason` をチェックする。CC はこれをループ継続の唯一の根拠として使わない — ストリーミングレスポンスでは、`stop_reason` がまだ更新されていなくても、コンテンツに既に `tool_use` ブロックが含まれている可能性がある。CC は `needsFollowUp` フラグを使用する:ストリーミングメッセージの受信時(`query.ts:830-834`)に、`tool_use` ブロックが検出されると `true` に設定される。`QueryEngine.ts` は `message_delta` から実際の `stop_reason` を取得して他の処理に利用するが、query loop 自体は `needsFollowUp` に依存する。\n\n```typescript\n// query.ts:554-558\n// stop_reason === 'tool_use' is unreliable.\n// Set during streaming whenever a tool_use block arrives.\nlet needsFollowUp = false\n```\n\n
\n\n
\n二、State オブジェクト 10 フィールド(教育版は messages のみ使用)\n\n| # | フィールド | 用途 | 対応章 |\n|---|-----------|------|--------|\n| 1 | `messages` | 現在のイテレーションのメッセージ配列 | s01 |\n| 2 | `toolUseContext` | ツール、シグナル、権限コンテキスト | s02 |\n| 3 | `autoCompactTracking` | 圧縮状態の追跡 | s08 |\n| 4 | `maxOutputTokensRecoveryCount` | トークンリカバリ試行回数(上限 3) | s11 |\n| 5 | `hasAttemptedReactiveCompact` | 今回のラウンドでリアクティブ圧縮を試みたか | s08 |\n| 6 | `maxOutputTokensOverride` | 8K→64K へのアップグレード上書き | s11 |\n| 7 | `pendingToolUseSummary` | バックグラウンド Haiku 生成のツール使用要約 | s08 |\n| 8 | `stopHookActive` | 停止フックがブロッキングエラーを発生させたか | s04 |\n| 9 | `turnCount` | ターン数(maxTurns チェック用) | s01 |\n| 10 | `transition` | 前回の継続理由 | s11 |\n\n> 注:`taskBudgetRemaining`(`query.ts:291`)は loop-local のローカル変数であり、State には含まれない。ソースコメントには明確に \"Loop-local (not on State)\" と書かれている。\n\n
\n\n
\n三、複数の終了パスと継続パス\n\n教育版には 1 つの終了パスしかない(モデルがツールを呼ばなければ終了)。本番版には複数の終了・継続パスがあり、blocking limit、prompt too long、model error、abort、hook stop、max turns、token budget continuation、reactive compact retry など多くのシナリオをカバーしている。各シナリオには対応するリカバリまたは終了戦略がある。\n\n
\n\n
\n四、ストリーミングツール実行と QueryEngine\n\nCC の `StreamingToolExecutor`(`query.ts:561`)は、モデルがまだ生成中にツールの実行を開始できる(concurrency-safe なツールは並列、それ以外は排他実行)。`QueryEngine.ts` はさらに、コスト超過や構造化出力の検証失敗などの保護を追加する。教育版はこれらを実装しない — 目標は概念の明確さであり、極限のパフォーマンスではない。\n\n
\n\n**一言で**: query.ts の 1729 行の核心は 30 行の `while True`。複雑なフィールドや終了パスはすべて保護機構だ。まず核心のループを理解すれば、その後のすべては自然に理解できる。\n\n
\n\n\n" + }, + { + "version": "s02", + "locale": "en", + "title": "s02: Tool Use — Add a Tool, Add Just One Line", + "content": "# s02: Tool Use — Add a Tool, Add Just One Line\n\ns01 → `s02` → [s03](/en/s03) → s04 → ... → s20\n> *\"Add a tool, add just one handler\"* — The loop stays the same. Register the new tool in the dispatch map and you're done.\n>\n> **Harness Layer**: Tool Dispatch — Expanding the model's reach.\n\n---\n\n## Only One Tool: Bash\n\nThe s01 Agent has only one tool: bash. To read a file, `cat`; to write, `echo \"...\" > file.py`; to edit, `sed`.\n\nThe model thinks \"read this file\" but has to spell out `cat path/to/file`. An extra layer of translation that wastes tokens and invites errors.\n\n---\n\n## Overview: Tool Dispatch\n\n![Tool Dispatch](/course-assets/s02_tool_use/tool-dispatch.en.svg)\n\nThe s01 loop is fully preserved (LLM call, stop_reason check, message append — not a single word changed). The only change is in that one line of tool execution: `run_bash()` is replaced with `TOOL_HANDLERS[block.name]()` dispatch lookup.\n\nAdding a tool to the Agent requires just two things:\n\n1. **Define the tool**: Add one entry to the `TOOLS` array\n2. **Register the handler**: Add one mapping in the `TOOL_HANDLERS` dict\n\n---\n\n## From 1 Tool to 5 Tools\n\ns01 had only bash:\n\n```python\nTOOLS = [{\"name\": \"bash\", ...}]\n\ndef run_bash(command): ...\n```\n\ns02 expands to 5 tools, each independently defined:\n\n```python\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\", ...},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\", ...},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\", ...},\n {\"name\": \"edit_file\", \"description\": \"Replace text in file once.\", ...},\n {\"name\": \"glob\", \"description\": \"Find files by pattern.\", ...},\n]\n```\n\nEach tool has its own implementation function:\n\n```python\ndef run_read(path, limit=None):\n lines = safe_path(path).read_text().splitlines()\n if limit:\n lines = lines[:limit]\n return \"\\n\".join(lines)\n\ndef run_write(path, content):\n safe_path(path).write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n\ndef run_edit(path, old_text, new_text):\n text = safe_path(path).read_text()\n if old_text not in text:\n return \"Error: text not found\"\n safe_path(path).write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n\ndef run_glob(pattern):\n import glob as g\n return \"\\n\".join(g.glob(pattern, root_dir=WORKDIR))\n```\n\n---\n\n## Tool Dispatch\n\n```python\nTOOL_HANDLERS = {\n \"bash\": run_bash,\n \"read_file\": run_read,\n \"write_file\": run_write,\n \"edit_file\": run_edit,\n \"glob\": run_glob,\n}\n\n# Only one line changed in the loop — from hardcoded run_bash to dispatch lookup:\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS[block.name] # lookup\n output = handler(**block.input) # call\n results.append(...)\n```\n\nAdding a tool = one entry in `TOOLS` array + one line in `TOOL_HANDLERS` dict. The loop stays the same.\n\n---\n\n## Multiple Tool Calls\n\nThe model often returns multiple tool_use calls at once — \"read a.py and b.py, then list all .py files\".\n\nThe teaching version executes them one by one in the original `response.content` order. CC's approach is more complex: it slices the original order into consecutive batches, where concurrency-safe tools within a batch run in parallel, and batches are strictly sequential (see appendix).\n\n---\n\n## Quick Reference\n\n| Concept | One-Liner |\n|---------|-----------|\n| TOOL_HANDLERS | Tool name → handler function dict. Add a tool = add one mapping line |\n| Tool Definition | JSON schema telling the model \"what I can do\" |\n| Multiple tool calls | Model may return multiple tool_use at once; teaching version executes them in original order |\n| Loop Unchanged | s01's `while True` loop — not a single line changed |\n\n---\n\n## Changes from s01\n\n| Component | Before (s01) | After (s02) |\n|-----------|-------------|-------------|\n| Tool count | 1 (bash) | 5 (+read, write, edit, glob) |\n| Tool execution | Hardcoded `run_bash()` | TOOL_HANDLERS dispatch lookup |\n| Path safety | None | safe_path validation (file tools only) |\n| Loop | `while True` + `stop_reason` | Identical to s01 |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s02_tool_use/code.py\n```\n\nTry these prompts:\n\n1. `Read the file README.md and tell me what this project is about`\n2. `Create a file called test.py that prints \"hello\", then read it back`\n3. `Find all Python files in this directory`\n4. `Read both README.md and requirements.txt, then create a summary file`\n\nWhat to watch for: When does the model call just one tool, and when does it call multiple at once? Are multiple tool calls executed in the correct order?\n\n---\n\n## What's Next\n\nThe Agent now has 5 specialized tools. File tools are protected by `safe_path`, but bash is unrestricted — `rm -rf /` still runs.\n\n→ s03 Permission: Add a gate before tool execution — is this operation safe? Does it need user approval?\n\n
\nDive into CC Source Code\n\n> The following is based on a review of CC source code `Tool.ts`, `tools.ts`, `toolOrchestration.ts`, `toolExecution.ts`, and `StreamingToolExecutor.ts`.\n\n### 1. Tool Definition Approach\n\n**Teaching version**: `TOOLS` array + `TOOL_HANDLERS` dict. Definition and implementation are separate.\n**CC**: Each tool is an independent object created by `buildTool()`, containing schema, validation, permissions, and execution. `getAllBaseTools()` aggregates all tools.\n\nThe teaching version's separation is clearer for teaching — readers immediately see \"add a tool = two definitions\".\n\n### 2. Concurrency Safety: isConcurrencySafe()\n\n![Tool Concurrency](/course-assets/s02_tool_use/concurrency-comparison.en.svg)\n\nThe teaching version executes tools one by one in original order, without concurrency. CC uses `isConcurrencySafe(input)` to determine concurrency — note this isn't simply \"read-only vs write\", but judges by specific input:\n\n| | isReadOnly | isConcurrencySafe |\n|---|---|---|\n| FileRead | true | true |\n| Glob | true | true |\n| Bash `ls` | true | **true** ← key difference |\n| Bash `rm` | false | false |\n| TaskCreate | false | **true** ← modifies state but can be concurrent (introduced in s12) |\n\nCC's Bash tool's `isConcurrencySafe` equals `isReadOnly` — read-only commands can be concurrent, write commands cannot. TaskCreate modifies task files, but each writes a different file, so it can be concurrent.\n\n### 3. Partition Algorithm\n\nCC's `partitionToolCalls()` (`toolOrchestration.ts:91-115`) doesn't split into two groups — it batches tool calls **by consecutive blocks**:\n\n```\n[read A, read B, glob *.py, bash \"rm x\", read C]\n → batch1(concurrent): [read A, read B, glob *.py]\n → batch2(serial): [bash \"rm x\"]\n → batch3(concurrent): [read C]\n```\n\nConsecutive concurrency-safe calls are grouped into the same batch for truly concurrent execution (`toolOrchestration.ts:152-176`, with a concurrency limit). When a non-concurrency-safe call is encountered, a new batch starts for serial execution. Batches are strictly sequential.\n\n### 4. Validation Pipeline\n\nEach tool call in CC goes through a strict 5-step validation (`toolExecution.ts`):\n\n1. **Zod schema validation** (`614-680`, teaching version uses JSON Schema): parameter type/structure check\n2. **Tool-level validateInput()** (`682-733`): parameter value validation (e.g., is the path within the working directory)\n3. **PreToolUse hooks** (`800-862`, covered in s04): hooks can return messages, modify input, or block execution\n4. **Permission check** (`921-931`, core topic of s03): canUseTool + checkPermissions → allow/deny/ask\n5. **Execute tool.call()** (`1207-1222`)\n\nThe teaching version omits Zod (uses JSON Schema), omits validateInput (uses safety functions), but preserves the permission check and hook concepts.\n\n### 5. Streaming Tool Execution\n\nCC's `StreamingToolExecutor` (`StreamingToolExecutor.ts`) starts tools while the model is still generating — no waiting for the model to finish. `read_file` might complete while the model is still outputting \"Let me analyze\". The teaching version doesn't implement this, consistent with s01's goal — conceptual clarity, not peak performance.\n\n### 6. Tool Result Persistence\n\nEach tool has a `maxResultSizeChars` field. Results exceeding this threshold are persisted to disk, and the model sees a preview + file path. FileRead is special — set to `Infinity`, preventing file read output from being persisted again. Specifically, if FileRead's result exceeds the threshold and gets persisted, the model's next read of that persisted file would trigger another persistence → infinite loop (read file → persist → re-read → re-persist → ...).\n\n
\n\n\n" + }, + { + "version": "s02", + "locale": "zh", + "title": "s02: Tool Use — 多加一个工具,只加一行", + "content": "# s02: Tool Use — 多加一个工具,只加一行\n\ns01 → `s02` → [s03](/zh/s03) → s04 → ... → s20\n> *\"加一个工具, 只加一个 handler\"* — 循环不用动, 新工具注册进 dispatch map 就行。\n>\n> **Harness 层**: 工具分发 — 扩展模型能触达的边界。\n\n---\n\n## 只有 bash 一个工具\n\ns01 的 Agent 只有一个 bash 工具。读文件要 `cat`,写文件要 `echo \"...\" > file.py`,改文件要 `sed`。\n\n模型想的是\"读这个文件\",却要拼出 `cat path/to/file`。多了一层翻译,浪费 token,还容易拼错。\n\n---\n\n## 全局视角:工具分发\n\n![Tool Dispatch](/course-assets/s02_tool_use/tool-dispatch.svg)\n\ns01 的循环完全保留(LLM 调用、stop_reason 判断、消息追加)。唯一的变动在工具执行那 1 行:`run_bash()` 替换为 `TOOL_HANDLERS[block.name]()` 查表分发。\n\n给 Agent 加一个工具只需要做两件事:\n\n1. **定义工具**:在 `TOOLS` 数组里加一条描述\n2. **注册处理函数**:在 `TOOL_HANDLERS` 字典里加一个映射\n\n---\n\n## 从 1 个工具到 5 个工具\n\ns01 只有一个 bash:\n\n```python\nTOOLS = [{\"name\": \"bash\", ...}]\n\ndef run_bash(command): ...\n```\n\ns02 加到 5 个,每个工具都是独立定义:\n\n```python\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\", ...},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\", ...},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\", ...},\n {\"name\": \"edit_file\", \"description\": \"Replace text in file once.\", ...},\n {\"name\": \"glob\", \"description\": \"Find files by pattern.\", ...},\n]\n```\n\n每个工具有自己的实现函数:\n\n```python\ndef run_read(path, limit=None):\n lines = safe_path(path).read_text().splitlines()\n if limit:\n lines = lines[:limit]\n return \"\\n\".join(lines)\n\ndef run_write(path, content):\n safe_path(path).write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n\ndef run_edit(path, old_text, new_text):\n text = safe_path(path).read_text()\n if old_text not in text:\n return \"Error: text not found\"\n safe_path(path).write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n\ndef run_glob(pattern):\n import glob as g\n return \"\\n\".join(g.glob(pattern, root_dir=WORKDIR))\n```\n\n---\n\n## 工具分发\n\n```python\nTOOL_HANDLERS = {\n \"bash\": run_bash,\n \"read_file\": run_read,\n \"write_file\": run_write,\n \"edit_file\": run_edit,\n \"glob\": run_glob,\n}\n\n# 循环里只改了一行——从硬编码 run_bash 变成查表:\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS[block.name] # 查表\n output = handler(**block.input) # 调用\n results.append(...)\n```\n\n加一个工具 = 在 `TOOLS` 数组加一条 + 在 `TOOL_HANDLERS` 字典加一行。循环不变。\n\n---\n\n## 多个工具调用\n\n模型经常一次返回多个 tool_use:\"读一下 a.py 和 b.py,然后列出所有 .py 文件\"。\n\n教学版按 `response.content` 原始顺序逐个执行。CC 的做法更复杂:按原始顺序切成连续 batch,batch 内并发安全的工具并行执行,batch 间严格顺序(见附录)。\n\n---\n\n## 速查\n\n| 概念 | 一句话 |\n|------|--------|\n| TOOL_HANDLERS | 工具名 → 处理函数的字典。加工具 = 加一行映射 |\n| 工具定义 | 告诉模型\"我能做什么\"的 JSON schema |\n| 多工具调用 | 模型可一次返回多个 tool_use,教学版按原始顺序逐个执行 |\n| 循环不变 | s01 的 `while True` 循环一行都没改 |\n\n---\n\n## 相对 s01 的变更\n\n| 组件 | 之前 (s01) | 之后 (s02) |\n|------|-----------|-----------|\n| 工具数量 | 1 (bash) | 5 (+read, write, edit, glob) |\n| 工具执行 | 硬编码 `run_bash()` | TOOL_HANDLERS 查表分发 |\n| 路径安全 | 无 | safe_path 校验(仅 file tools) |\n| 循环 | `while True` + `stop_reason` | 与 s01 完全一致 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s02_tool_use/code.py\n```\n\n试试这些 prompt:\n\n1. `Read the file README.md and tell me what this project is about`\n2. `Create a file called test.py that prints \"hello\", then read it back`\n3. `Find all Python files in this directory`\n4. `Read both README.md and requirements.txt, then create a summary file`\n\n观察重点:模型什么时候只调一个工具,什么时候一次调多个?多个工具调用的顺序和结果是否正确?\n\n---\n\n## 接下来\n\n现在 Agent 有 5 个专用工具。file tools 受 `safe_path` 保护,但 bash 不受限制,`rm -rf /` 还是能跑。\n\ns03 Permission → 在工具执行之前加一道门:这个操作安全吗?需要用户批准吗?\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `Tool.ts`、`tools.ts`、`toolOrchestration.ts`、`toolExecution.ts`、`StreamingToolExecutor.ts` 的核查。\n\n### 一、工具定义方式\n\n**教学版**:`TOOLS` 数组 + `TOOL_HANDLERS` 字典。定义和实现分开。\n**CC**:每个工具是 `buildTool()` 创建的独立对象,包含 schema、验证、权限、执行。`getAllBaseTools()` 汇总所有工具。\n\n教学版的分离方式对教学更清晰——读者一眼看到\"加一个工具 = 两条定义\"。\n\n### 二、并发安全判断:isConcurrencySafe()\n\n![Tool Concurrency](/course-assets/s02_tool_use/concurrency-comparison.svg)\n\n教学版按原始顺序逐个执行,不做并发。CC 用 `isConcurrencySafe(input)` 判断能否并发——注意这不是简单的\"只读 vs 写\",而是按具体输入判断:\n\n| | isReadOnly | isConcurrencySafe |\n|---|---|---|\n| FileRead | true | true |\n| Glob | true | true |\n| Bash `ls` | true | **true** ← 关键差异 |\n| Bash `rm` | false | false |\n| TaskCreate | false | **true** ← 改状态但可并发(TaskCreate 在 s12 介绍) |\n\nCC 的 Bash tool 的 `isConcurrencySafe` 等于 `isReadOnly`——只读命令可并发,写命令不可。TaskCreate 虽然改了任务文件,但每次都写不同的文件,所以可以并发。\n\n### 三、分区算法\n\nCC 的 `partitionToolCalls()`(`toolOrchestration.ts:91-115`)不是分两组,而是把工具调用**按连续块分批**:\n\n```\n[read A, read B, glob *.py, bash \"rm x\", read C]\n → batch1(并发): [read A, read B, glob *.py]\n → batch2(串行): [bash \"rm x\"]\n → batch3(并发): [read C]\n```\n\n并发安全的连续块编入同一个 batch,batch 内真正并发执行(`toolOrchestration.ts:152-176`,有并发上限)。遇到非并发安全的就开新 batch 串行执行。batch 之间严格顺序。\n\n### 四、验证管线\n\nCC 的每个工具调用经过严格的 5 步验证(`toolExecution.ts`):\n\n1. **Zod schema 验证**(`614-680`,教学版用 JSON Schema 替代):参数类型/结构检查\n2. **工具级 validateInput()**(`682-733`):参数值验证(如路径是否在工作区内)\n3. **PreToolUse hooks**(`800-862`,s04 详细介绍):钩子可以返回消息、修改输入、阻止执行\n4. **权限检查**(`921-931`,s03 的核心内容):canUseTool + checkPermissions → allow/deny/ask\n5. **执行 tool.call()**(`1207-1222`)\n\n教学版省略了 Zod(用 JSON Schema)、省略了 validateInput(用安全函数)、保留了权限检查和钩子概念。\n\n### 五、流式工具执行\n\nCC 的 `StreamingToolExecutor`(`StreamingToolExecutor.ts`)让工具在模型还在生成时就启动——不等模型说完。`read_file` 可能在模型还在输出\"我来分析\"的时候就跑完了。教学版不实现这个,目标和 s01 一致——概念清晰,不追求性能极致。\n\n### 六、工具结果持久化\n\n每个工具有一个 `maxResultSizeChars` 字段。结果超过这个值就落盘,模型看到的是预览 + 文件路径。FileRead 特殊——设为 `Infinity`,防止读文件的输出又被当成文件落盘。具体来说,如果 FileRead 的结果超过阈值被落盘,模型下次读那个落盘文件时又会触发落盘 → 无限循环(读文件 → 落盘 → 再读 → 再落盘 → ...)。\n\n
\n\n\n" }, { "version": "s02", "locale": "ja", - "title": "s02: Tool Use", - "content": "# s02: Tool Use\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"ツールを足すなら、ハンドラーを1つ足すだけ\"* -- ループは変わらない。新ツールは dispatch map に登録するだけ。\n\n## 問題\n\n`bash`だけでは、エージェントは何でもシェル経由で行う。`cat`は予測不能に切り詰め、`sed`は特殊文字で壊れ、すべてのbash呼び出しが制約のないセキュリティ面になる。`read_file`や`write_file`のような専用ツールなら、ツールレベルでパスのサンドボックス化を強制できる。\n\n重要な点: ツールを追加してもループの変更は不要。\n\n## 解決策\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## 仕組み\n\n1. 各ツールにハンドラ関数を定義する。パスのサンドボックス化でワークスペース外への脱出を防ぐ。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. ディスパッチマップがツール名とハンドラを結びつける。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. ループ内で名前によりハンドラをルックアップする。ループ本体はs01から不変。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nツール追加 = ハンドラ追加 + スキーマ追加。ループは決して変わらない。\n\n## s01からの変更点\n\n| Component | Before (s01) | After (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\n| Path safety | None | `safe_path()` sandbox |\n| Agent loop | Unchanged | Unchanged |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n" + "title": "s02: Tool Use — ツール一つ追加、一行追加だけ", + "content": "# s02: Tool Use — ツール一つ追加、一行追加だけ\n\ns01 → `s02` → [s03](/ja/s03) → s04 → ... → s20\n> *\"ツールを一つ追加、ハンドラを一つ追加\"* — ループはそのまま。新しいツールをディスパッチマップに登録するだけ。\n>\n> **Harness レイヤー**: ツールディスパッチ — モデルが触れる範囲を拡張。\n\n---\n\n## ツールは bash 一つだけ\n\ns01 の Agent には bash 一つのツールしかない。ファイルを読むには `cat`、書くには `echo \"...\" > file.py`、編集するには `sed`。\n\nモデルは「このファイルを読みたい」と考えながら、`cat path/to/file` と組み立てなければならない。翻訳の層が一つ増え、トークンを無駄にし、エラーも起きやすい。\n\n---\n\n## 概要:ツールディスパッチ\n\n![Tool Dispatch](/course-assets/s02_tool_use/tool-dispatch.ja.svg)\n\ns01 のループは完全に保持される(LLM 呼び出し、stop_reason 判定、メッセージ追加 — 一文字も変更なし)。唯一の変更点はツール実行の 1 行:`run_bash()` が `TOOL_HANDLERS[block.name]()` の検索ディスパッチに置き換わる。\n\nAgent にツールを追加するには、たった二つ:\n\n1. **ツールを定義**:`TOOLS` 配列に一条を追加\n2. **ハンドラを登録**:`TOOL_HANDLERS` 辞書に一つのマッピングを追加\n\n---\n\n## 1 つのツールから 5 つのツールへ\n\ns01 には bash だけだった:\n\n```python\nTOOLS = [{\"name\": \"bash\", ...}]\n\ndef run_bash(command): ...\n```\n\ns02 では 5 つに増え、各ツールは独立して定義される:\n\n```python\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\", ...},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\", ...},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\", ...},\n {\"name\": \"edit_file\", \"description\": \"Replace text in file once.\", ...},\n {\"name\": \"glob\", \"description\": \"Find files by pattern.\", ...},\n]\n```\n\n各ツールには専用の実装関数がある:\n\n```python\ndef run_read(path, limit=None):\n lines = safe_path(path).read_text().splitlines()\n if limit:\n lines = lines[:limit]\n return \"\\n\".join(lines)\n\ndef run_write(path, content):\n safe_path(path).write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n\ndef run_edit(path, old_text, new_text):\n text = safe_path(path).read_text()\n if old_text not in text:\n return \"Error: text not found\"\n safe_path(path).write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n\ndef run_glob(pattern):\n import glob as g\n return \"\\n\".join(g.glob(pattern, root_dir=WORKDIR))\n```\n\n---\n\n## ツールディスパッチ\n\n```python\nTOOL_HANDLERS = {\n \"bash\": run_bash,\n \"read_file\": run_read,\n \"write_file\": run_write,\n \"edit_file\": run_edit,\n \"glob\": run_glob,\n}\n\n# ループ内で変更されたのは一行だけ — ハードコードの run_bash から検索ディスパッチへ:\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS[block.name] # 検索\n output = handler(**block.input) # 呼び出し\n results.append(...)\n```\n\nツールの追加 = `TOOLS` 配列に一条 + `TOOL_HANDLERS` 辞書に一行。ループは変わらない。\n\n---\n\n## 複数のツール呼び出し\n\nモデルはよく一度に複数の tool_use を返す — 「a.py と b.py を読んで、全 .py ファイルを列挙して」。\n\n教育版は `response.content` の元の順序で一つずつ実行する。CC のやり方はより複雑:元の順序を保ったまま連続バッチに分割し、バッチ内の並列安全なツールを並行実行し、バッチ間は厳密に順次(付録を参照)。\n\n---\n\n## 速查\n\n| 概念 | 一言で |\n|------|--------|\n| TOOL_HANDLERS | ツール名 → ハンドラ関数の辞書。ツール追加 = マッピング一行追加 |\n| ツール定義 | モデルに「何ができるか」を伝える JSON schema |\n| 複数ツール呼び出し | モデルは一度に複数の tool_use を返す可能性がある。教育版は元の順序で一つずつ実行 |\n| ループ不変 | s01 の `while True` ループ — 一行も変更なし |\n\n---\n\n## s01 からの変更\n\n| コンポーネント | 変更前 (s01) | 変更後 (s02) |\n|--------------|-------------|-------------|\n| ツール数 | 1 (bash) | 5 (+read, write, edit, glob) |\n| ツール実行 | ハードコード `run_bash()` | TOOL_HANDLERS 検索ディスパッチ |\n| パス安全性 | なし | safe_path 検証(file tools のみ) |\n| ループ | `while True` + `stop_reason` | s01 と完全に同一 |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s02_tool_use/code.py\n```\n\n以下のプロンプトを試してみよう:\n\n1. `Read the file README.md and tell me what this project is about`\n2. `Create a file called test.py that prints \"hello\", then read it back`\n3. `Find all Python files in this directory`\n4. `Read both README.md and requirements.txt, then create a summary file`\n\n観察のポイント:モデルがツールを一つだけ呼び出すときと、複数同時に呼び出すときの違い。複数のツール呼び出しは正しい順序で実行されているか?\n\n---\n\n## 次へ\n\nAgent は 5 つの専用ツールを持つようになった。file tools は `safe_path` で保護されるが、bash は制限なし — `rm -rf /` はまだ実行できる。\n\n→ s03 Permission:ツール実行前にゲートを追加 — この操作は安全か? ユーザーの承認が必要か?\n\n
\nCC ソースコードを深掘り\n\n> 以下は CC ソースコード `Tool.ts`、`tools.ts`、`toolOrchestration.ts`、`toolExecution.ts`、`StreamingToolExecutor.ts` の検証に基づく。\n\n### 一、ツール定義方式\n\n**教育版**:`TOOLS` 配列 + `TOOL_HANDLERS` 辞書。定義と実装が分離。\n**CC**:各ツールは `buildTool()` で作成された独立オブジェクトで、schema、バリデーション、権限、実行を含む。`getAllBaseTools()` が全ツールを集約。\n\n教育版の分離方式は教学に適している — 読者は「ツール追加 = 二つの定義」と一目で分かる。\n\n### 二、並列安全性:isConcurrencySafe()\n\n![Tool Concurrency](/course-assets/s02_tool_use/concurrency-comparison.ja.svg)\n\n教育版は元の順序で一つずつ実行し、並列処理は行わない。CC は `isConcurrencySafe(input)` で並列可否を判断する — これは単なる「読み取り専用 vs 書き込み」ではなく、具体的な入力で判断する:\n\n| | isReadOnly | isConcurrencySafe |\n|---|---|---|\n| FileRead | true | true |\n| Glob | true | true |\n| Bash `ls` | true | **true** ← 重要な違い |\n| Bash `rm` | false | false |\n| TaskCreate | false | **true** ← 状態変更するが並列可能(s12 で紹介) |\n\nCC の Bash ツールの `isConcurrencySafe` は `isReadOnly` と同じ — 読み取り専用コマンドは並列可能、書き込みコマンドは不可。TaskCreate はタスクファイルを変更するが、毎回異なるファイルに書き込むため並列可能。\n\n### 三、パーティションアルゴリズム\n\nCC の `partitionToolCalls()`(`toolOrchestration.ts:91-115`)は二つのグループに分けるのではなく、ツール呼び出しを**連続ブロックごとにバッチ化**する:\n\n```\n[read A, read B, glob *.py, bash \"rm x\", read C]\n → batch1(並列): [read A, read B, glob *.py]\n → batch2(直列): [bash \"rm x\"]\n → batch3(並列): [read C]\n```\n\n連続する並列安全な呼び出しを同じバッチにまとめ、真の並列実行を行う(`toolOrchestration.ts:152-176`、並列数上限あり)。非並列安全な呼び出しに遭遇すると新しいバッチを開始して直列実行。バッチ間は厳密に順次。\n\n### 四、バリデーションパイプライン\n\nCC の各ツール呼び出しは厳格な 5 段階のバリデーションを経る(`toolExecution.ts`):\n\n1. **Zod schema バリデーション**(`614-680`、教育版は JSON Schema で代替):パラメータの型/構造チェック\n2. **ツールレベル validateInput()**(`682-733`):パラメータ値の検証(例:パスが作業ディレクトリ内か)\n3. **PreToolUse フック**(`800-862`、s04 で詳解):フックはメッセージの返却、入力の変更、実行のブロックが可能\n4. **権限チェック**(`921-931`、s03 の核心):canUseTool + checkPermissions → allow/deny/ask\n5. **tool.call() の実行**(`1207-1222`)\n\n教育版は Zod を省略(JSON Schema を使用)、validateInput を省略(安全関数を使用)、権限チェックとフック概念は保持。\n\n### 五、ストリーミングツール実行\n\nCC の `StreamingToolExecutor`(`StreamingToolExecutor.ts`)はモデルがまだ生成中にツールを起動する — モデルの完了を待たない。`read_file` はモデルが「分析します」と出力中に完了するかもしれない。教育版はこれを実装しない。s01 と同じ目標 — 概念の明確さ、極限のパフォーマンスではない。\n\n### 六、ツール結果の永続化\n\n各ツールには `maxResultSizeChars` フィールドがある。この閾値を超える結果はディスクに保存され、モデルにはプレビュー + ファイルパスが表示される。FileRead は特殊 — `Infinity` に設定され、ファイル読み出し結果の再永続化を防ぐ。具体的には、FileRead の結果が閾値を超えて永続化されると、モデルがその永続化ファイルを次に読むときにまた永続化がトリガーされ → 無限ループ(ファイル読む → 永続化 → 再読み → 再永続化 → ...)になる。\n\n
\n\n\n" + }, + { + "version": "s03", + "locale": "en", + "title": "s03: Permission — Check Permissions Before Execution", + "content": "# s03: Permission — Check Permissions Before Execution\n\ns01 → s02 → `s03` → [s04](/en/s04) → s05 → ... → s20\n> *\"Check permissions before executing\"* — The permission pipeline decides which operations need approval.\n>\n> **Harness Layer**: Permission — a gate before tool execution.\n\n---\n\n## The Problem\n\ns02's Agent has 5 tools. File tools are protected by `safe_path`, but bash is unrestricted. Ask it to \"clean up the project,\" and it might run `rm -rf /`.\n\nSafety can't rely on trusting the model — it needs code: a check before every tool execution.\n\n---\n\n## The Solution\n\n![Permission Overview](/course-assets/s03_permission/permission-overview.en.svg)\n\ns02's loop is fully preserved. The only change is inserting `check_permission()` before tool execution — each tool call passes through three gates in a fixed order: hard deny first, then soft ask, and if neither matches, allow.\n\nThe three gates correspond to three decisions:\n\n| Gate | Purpose | On Match |\n|------|---------|----------|\n| 1. Deny List | Permanently forbidden operations (`rm -rf /`, `sudo`) | Denied immediately, not executed |\n| 2. Rule Matching | Context-dependent operations (writing outside workspace, `rm` files) | Passed to Gate 3 |\n| 3. User Approval | After Gate 2 matches, pauses for user confirmation | User decides allow or deny |\n\nNone of the three gates match → execute directly. Most routine operations take this path.\n\n---\n\n## How It Works\n\n![Permission Pipeline](/course-assets/s03_permission/permission-pipeline.en.svg)\n\n**Gate 1**: A hard deny list. Check first; if matched, return a block message. (Teaching demo: simple string matching is not a reliable security mechanism — command variants and shell expansion can bypass it. CC's approach is in the appendix.)\n\n```python\nDENY_LIST = [\n \"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\",\n \"mkfs\", \"dd if=\", \"> /dev/sda\",\n]\n\ndef check_deny_list(command: str) -> str | None:\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Blocked: '{pattern}' is on the deny list\"\n return None\n```\n\n**Gate 2**: Rule matching — describes \"when to ask the user.\" Each rule specifies a tool and a check condition.\n\n```python\nPERMISSION_RULES = [\n {\n \"tools\": [\"write_file\", \"edit_file\"],\n \"check\": lambda args: not (WORKDIR / args.get(\"path\", \"\")).resolve().is_relative_to(WORKDIR),\n \"message\": \"Writing outside workspace\",\n },\n {\n \"tools\": [\"bash\"],\n \"check\": lambda args: any(kw in args.get(\"command\", \"\") for kw in [\"rm \", \"> /etc/\", \"chmod 777\"]),\n \"message\": \"Potentially destructive command\",\n },\n]\n\ndef check_rules(tool_name: str, args: dict) -> str | None:\n for rule in PERMISSION_RULES:\n if tool_name in rule[\"tools\"] and rule[\"check\"](args):\n return rule[\"message\"]\n return None\n```\n\n**Gate 3**: After a rule matches, pause for user input.\n\n```python\ndef ask_user(tool_name: str, args: dict, reason: str) -> str:\n print(f\"\\n⚠ {reason}\")\n print(f\" Tool: {tool_name}({args})\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n return \"allow\" if choice in (\"y\", \"yes\") else \"deny\"\n```\n\n**All three gates chained together**, inserted before tool execution:\n\n```python\ndef check_permission(block) -> bool:\n # Gate 1: Hard deny\n if block.name == \"bash\":\n reason = check_deny_list(block.input.get(\"command\", \"\"))\n if reason:\n print(f\"\\n⛔ {reason}\")\n return False\n\n # Gate 2 + 3: Rule matching → User approval\n reason = check_rules(block.name, block.input)\n if reason:\n decision = ask_user(block.name, block.input, reason)\n if decision == \"deny\":\n return False\n\n return True\n\n# In agent_loop — s02's loop with just one line added:\nfor block in response.content:\n if block.type == \"tool_use\":\n if not check_permission(block): # ← NEW\n results.append({... \"content\": \"Permission denied.\"})\n continue\n output = TOOL_HANDLERS[block.name](**block.input) # s02 original\n results.append(...)\n```\n\n---\n\n## Changes from s02\n\n| Component | Before (s02) | After (s03) |\n|-----------|-------------|-------------|\n| Security model | None (trust the model) | Three-gate permission pipeline |\n| New functions | — | check_deny_list, check_rules, ask_user, check_permission |\n| Loop | Executes all tools directly | Inserts check_permission() before execution |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s03_permission/code.py\n```\n\nTry these prompts:\n\n1. `Create a file called test.txt in the current directory` (should pass through)\n2. `Delete all temporary files in /tmp` (bash + rm triggers Gate 2)\n3. `What files are in the current directory?` (read-only, all pass)\n4. `Try to write a file to /etc/something` (writing outside workspace triggers Gate 2)\n\nWhat to watch for: Which operations pass through? Which need your confirmation? Which are denied outright?\n\n---\n\n## What's Next\n\nPermission checks are in place — but every check is hardcoded as `check_permission()` inside the loop. What if you want to add logging before and after each tool execution? What if you want to auto-trigger a git commit after certain operations? Scattering this extension logic throughout the loop makes it bloat.\n\n→ s04 Hooks: Add hooks to the loop. Extension logic hangs on hooks; the loop stays clean.\n\n
\nDive into CC Source Code\n\n> The following is based on a review of CC source code `types/permissions.ts`, `utils/permissions/permissions.ts`, `toolExecution.ts`, `utils/permissions/yoloClassifier.ts`, `tools/AgentTool/forkSubagent.ts`.\n\n### 1. PermissionResult: Not 3, but 4\n\nThe teaching version's three gates (deny → ask → allow) don't fully correspond to CC. CC's `PermissionResult` has 4 behaviors (`types/permissions.ts:241-266`):\n\n| behavior | Meaning | Teaching Version Equivalent |\n|----------|---------|---------------------------|\n| `allow` | Allow directly | Gate 3 passes |\n| `deny` | Deny directly | Gate 1 matches |\n| `ask` | Show dialog to user | Gate 2 matches |\n| `passthrough` | Tool doesn't express opinion, passes to generic pipeline | Not in teaching version |\n\n### 2. Production Verification Stages\n\nCC's tool calls don't go through three gates — they go through multiple stages distributed across `checkPermissionsAndCallTool()` (`toolExecution.ts:599-1745`), hooks, `hasPermissionsToUseToolInner()` (`utils/permissions/permissions.ts:1158-1310`), and classifier logic:\n\n1. **Zod schema validation** (`toolExecution.ts:614-680`) — parameter type checking\n2. **validateInput()** (`toolExecution.ts:682-733`) — tool-level semantic validation\n3. **backfillObservableInput()** (`toolExecution.ts:784`) — backfill legacy fields\n4. **PreToolUse hooks** (`toolExecution.ts:800-862`) — hooks can return allow/deny/ask\n5. **resolveHookPermissionDecision()** (`toolExecution.ts:921-931`) — coordinate hook + pipeline decisions\n6. **hasPermissionsToUseToolInner()** (`permissions.ts:1158-1310`) — multi-layer rule check:\n - Entire tool disabled by deny rule → `deny`\n - Entire tool flagged by ask rule → `ask`\n - `tool.checkPermissions()` tool's own judgment\n - Tool itself returns deny → `deny`\n - `requiresUserInteraction()` → `ask`\n - Content-related ask rules → `ask` (not bypassable)\n - Security check violation → `ask` (not bypassable)\n - bypassPermissions mode → `allow`\n - Entire tool allowed by allow rule → `allow`\n - passthrough → converted to `ask`\n\n### 3. Deny List: Not One File, but 8 Sources\n\nCC doesn't have a single deny list. Permission rules come from 8 sources (`types/permissions.ts:54-62`):\n\n| Source | Configuration Location |\n|--------|----------------------|\n| `userSettings` | `~/.claude/settings.json` |\n| `projectSettings` | `.claude/settings.json` |\n| `localSettings` | `settings.local.json` |\n| `flagSettings` | Feature flags |\n| `policySettings` | Enterprise management policy |\n| `cliArg` | `--allowedTools` / `--deniedTools` |\n| `command` | Inline command |\n| `session` | In-session temporary authorization |\n\nEach rule format: `{ toolName: \"Bash\", ruleBehavior: \"deny\", ruleContent: \"npm publish:*\" }`. Rules from multiple sources are merged, with higher-priority sources overriding lower ones (low to high: user < project < local < flag < policy, plus cliArg, command, session).\n\n### 4. What is isDestructive()\n\nIn CC, `isDestructive` (`Tool.ts:405-406`) is **purely for UI display** — showing a `[destructive]` label in the tool list. It doesn't participate in permission decisions. All tools return `false` by default. Only ExitWorktree (on remove) and MCP tools (depending on `annotations.destructiveHint`) override it.\n\n### 5. YoloClassifier (Auto-Approval)\n\nIn CC's auto mode, it doesn't pop a dialog every time. `classifyYoloAction` (`utils/permissions/yoloClassifier.ts:1012`) sends the tool call + conversation context to a classifier LLM to judge safety. It first tries acceptEdits mode simulation (`permissions.ts:620-656`, if acceptEdits allows → auto-approve), then checks the safe tool whitelist (`permissions.ts:658-686`), and finally calls the classifier. If the classifier rejects too many times in a row → falls back to manual approval.\n\n### 6. Permission Bubbling\n\nA sub-Agent's (forked via AgentTool) `permissionMode` is set to `'bubble'` (`forkSubagent.ts:50`). This means permission dialogs **bubble up to the parent Agent's terminal**, rather than being silently denied in the sub-Agent. The Bash classifier continues running during this process — displaying the permission dialog while judging in the background whether auto-approval is possible.\n\n### The Teaching Version's Simplification Is Intentional\n\n- Multi-stage pipeline → 3 gates: dramatically lower barrier to understanding\n- 8 rule sources → 1 local DENY_LIST: manageable concept count\n- isDestructive → omitted (teaching version has no UI layer, and it doesn't participate in permission decisions in CC either)\n- YoloClassifier → omitted (depends on additional LLM calls and telemetry)\n- Permission bubbling → omitted (s15 covers multi-Agent)\n\n
\n\n\n" + }, + { + "version": "s03", + "locale": "zh", + "title": "s03: Permission — 执行前做权限判断", + "content": "# s03: Permission — 执行前做权限判断\n\ns01 → s02 → `s03` → [s04](/zh/s04) → s05 → ... → s20\n> *\"工具执行前先做权限判断\"* — 权限管线决定哪些操作需要审批。\n>\n> **Harness 层**: 权限 — 在工具执行前加一道门。\n\n---\n\n## 问题\n\ns02 的 Agent 有 5 个工具。file tools 受 `safe_path` 保护,但 bash 不受限制。让它\"清理一下项目\",可能执行 `rm -rf /`。\n\n安全不能靠信任模型,要靠代码——在工具执行之前做判断。\n\n---\n\n## 解决方案\n\n![Permission Overview](/course-assets/s03_permission/permission-overview.svg)\n\ns02 的循环完全保留。唯一的变动在工具执行前插入 `check_permission()`——每个工具调用经过三道闸门,顺序固定:硬拒绝优先,软询问次之,都没命中就放行。\n\n三道闸门对应三种决策:\n\n| 闸门 | 作用 | 命中后 |\n|------|------|--------|\n| 1. 拒绝列表 | 永远禁止的操作(`rm -rf /`、`sudo`) | 直接拒绝,不执行 |\n| 2. 规则匹配 | 取决于上下文的操作(写工作区外、`rm` 文件) | 交给闸门 3 |\n| 3. 用户审批 | 闸门 2 命中后,暂停等用户确认 | 用户决定允许或拒绝 |\n\n三道都没命中 → 直接执行。大部分日常操作走这条路。\n\n---\n\n## 工作原理\n\n![Permission Pipeline](/course-assets/s03_permission/permission-pipeline.svg)\n\n**闸门 1**:一张硬拒绝表,先查,命中就返回阻止信息。(教学示意:简单字符串匹配不是可靠安全机制,命令变体和 shell 展开可能绕过。CC 的做法见附录。)\n\n```python\nDENY_LIST = [\n \"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\",\n \"mkfs\", \"dd if=\", \"> /dev/sda\",\n]\n\ndef check_deny_list(command: str) -> str | None:\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Blocked: '{pattern}' is on the deny list\"\n return None\n```\n\n**闸门 2**:规则匹配——描述\"什么时候需要问用户\"。每条规则指定工具和检查条件。\n\n```python\nPERMISSION_RULES = [\n {\n \"tools\": [\"write_file\", \"edit_file\"],\n \"check\": lambda args: not (WORKDIR / args.get(\"path\", \"\")).resolve().is_relative_to(WORKDIR),\n \"message\": \"Writing outside workspace\",\n },\n {\n \"tools\": [\"bash\"],\n \"check\": lambda args: any(kw in args.get(\"command\", \"\") for kw in [\"rm \", \"> /etc/\", \"chmod 777\"]),\n \"message\": \"Potentially destructive command\",\n },\n]\n\ndef check_rules(tool_name: str, args: dict) -> str | None:\n for rule in PERMISSION_RULES:\n if tool_name in rule[\"tools\"] and rule[\"check\"](args):\n return rule[\"message\"]\n return None\n```\n\n**闸门 3**:规则命中后,暂停等用户输入。\n\n```python\ndef ask_user(tool_name: str, args: dict, reason: str) -> str:\n print(f\"\\n⚠ {reason}\")\n print(f\" Tool: {tool_name}({args})\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n return \"allow\" if choice in (\"y\", \"yes\") else \"deny\"\n```\n\n**三道闸门串在一起**,插在工具执行之前:\n\n```python\ndef check_permission(block) -> bool:\n # 闸门 1: 硬拒绝\n if block.name == \"bash\":\n reason = check_deny_list(block.input.get(\"command\", \"\"))\n if reason:\n print(f\"\\n⛔ {reason}\")\n return False\n\n # 闸门 2 + 3: 规则匹配 → 用户审批\n reason = check_rules(block.name, block.input)\n if reason:\n decision = ask_user(block.name, block.input, reason)\n if decision == \"deny\":\n return False\n\n return True\n\n# 在 agent_loop 中——s02 的循环只加了一行:\nfor block in response.content:\n if block.type == \"tool_use\":\n if not check_permission(block): # ← 新增\n results.append({... \"content\": \"Permission denied.\"})\n continue\n output = TOOL_HANDLERS[block.name](**block.input) # s02 原有\n results.append(...)\n```\n\n---\n\n## 相对 s02 的变更\n\n| 组件 | 之前 (s02) | 之后 (s03) |\n|------|-----------|-----------|\n| 安全模型 | 无(信任模型) | 三道闸门权限管线 |\n| 新函数 | — | check_deny_list, check_rules, ask_user, check_permission |\n| 循环 | 直接执行所有工具 | 执行前插入 check_permission() |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s03_permission/code.py\n```\n\n试试这些 prompt:\n\n1. `Create a file called test.txt in the current directory`(应该直接通过)\n2. `Delete all temporary files in /tmp`(bash + rm 会触发闸门 2)\n3. `What files are in the current directory?`(只读,全部通过)\n4. `Try to write a file to /etc/something`(写工作区外,触发闸门 2)\n\n观察重点:哪些操作直接通过?哪些需要你确认?哪些被直接拒绝?\n\n---\n\n## 接下来\n\n权限检查做了——但每次都在循环里硬编码 `check_permission()`。如果我想在每次工具执行前后加日志?如果想在某些操作后自动触发 git commit?这些扩展逻辑散落在 loop 里,循环很快就会膨胀。\n\ns04 Hooks → 给循环加钩子,扩展逻辑挂在钩子上,循环保持干净。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `types/permissions.ts`、`utils/permissions/permissions.ts`、`toolExecution.ts`、`utils/permissions/yoloClassifier.ts`、`tools/AgentTool/forkSubagent.ts` 的核查。\n\n### 一、PermissionResult:不是 3 种,是 4 种\n\n教学版的三道闸门(deny → ask → allow)和 CC 不完全对应。CC 的 `PermissionResult` 有 4 个 behavior(`types/permissions.ts:241-266`):\n\n| behavior | 含义 | 教学版对应 |\n|----------|------|-----------|\n| `allow` | 直接允许 | 闸门 3 通过 |\n| `deny` | 直接拒绝 | 闸门 1 命中 |\n| `ask` | 弹出对话框问用户 | 闸门 2 命中 |\n| `passthrough` | 工具不表态,交给通用管线决定 | 教学版无 |\n\n### 二、生产版的验证阶段\n\nCC 的工具调用不是经过三道闸门,而是经过多个阶段,分布在 `checkPermissionsAndCallTool()`(`toolExecution.ts:599-1745`)、hooks、`hasPermissionsToUseToolInner()`(`utils/permissions/permissions.ts:1158-1310`)和 classifier 逻辑里:\n\n1. **Zod schema 验证**(`toolExecution.ts:614-680`)— 参数类型检查\n2. **validateInput()**(`toolExecution.ts:682-733`)— 工具级语义验证\n3. **backfillObservableInput()**(`toolExecution.ts:784`)— 补全遗留字段\n4. **PreToolUse hooks**(`toolExecution.ts:800-862`)— 钩子可以返回 allow/deny/ask\n5. **resolveHookPermissionDecision()**(`toolExecution.ts:921-931`)— 协调钩子+管线决策\n6. **hasPermissionsToUseToolInner()**(`permissions.ts:1158-1310`)— 多层规则检查:\n - 整个工具被 deny rule 禁用 → `deny`\n - 整个工具被 ask rule 标记 → `ask`\n - `tool.checkPermissions()` 工具自己的判断\n - 工具自己返回 deny → `deny`\n - `requiresUserInteraction()` → `ask`\n - 内容相关的 ask 规则 → `ask`(不可绕过)\n - 安全检查违规 → `ask`(不可绕过)\n - bypassPermissions 模式 → `allow`\n - 整个工具被 allow rule 放行 → `allow`\n - passthrough → 转为 `ask`\n\n### 三、拒绝列表:不是一个文件,是 8 个来源\n\nCC 没有单一的 deny list。权限规则来自 8 个来源(`types/permissions.ts:54-62`):\n\n| 来源 | 配置位置 |\n|------|---------|\n| `userSettings` | `~/.claude/settings.json` |\n| `projectSettings` | `.claude/settings.json` |\n| `localSettings` | `settings.local.json` |\n| `flagSettings` | Feature flags |\n| `policySettings` | 企业管理策略 |\n| `cliArg` | `--allowedTools` / `--deniedTools` |\n| `command` | 内联命令 |\n| `session` | 会话内临时授权 |\n\n每条规则格式:`{ toolName: \"Bash\", ruleBehavior: \"deny\", ruleContent: \"npm publish:*\" }`。多个来源的规则合并,高优先级来源覆盖低优先级(从低到高:user < project < local < flag < policy,加上 cliArg、command、session)。\n\n### 四、isDestructive() 是什么\n\nCC 中 `isDestructive`(`Tool.ts:405-406`)**纯粹是 UI 展示用的**——在工具列表里显示 `[destructive]` 标签。它不参与权限决策。默认所有工具都返回 `false`。只有 ExitWorktree(remove 时)和 MCP 工具(依赖 `annotations.destructiveHint`)覆写了它。\n\n### 五、YoloClassifier(自动审批)\n\nCC 的 auto 模式下,不会每次都弹对话框。`classifyYoloAction`(`utils/permissions/yoloClassifier.ts:1012`)把工具调用 + 对话上下文发给一个分类器 LLM 判断是否安全。先尝试 acceptEdits 模式模拟(`permissions.ts:620-656`,如果 acceptEdits 允许 → 直接批准),再查安全工具白名单(`permissions.ts:658-686`),最后才调分类器。分类器连续拒绝太多次 → 回退到人工审批。\n\n### 六、权限冒泡\n\n子 Agent(通过 AgentTool fork 出来的)的 `permissionMode` 设为 `'bubble'`(`forkSubagent.ts:50`)。意思是权限弹窗**冒泡到父 Agent 的终端**,而不是在子 Agent 里静默拒绝。Bash 分类器在这个过程中继续跑——给权限对话框显示的同时在后台判断是否可以自动批准。\n\n### 教学版的简化是刻意的\n\n- 多阶段管线 → 3 道闸门:理解门槛大幅降低\n- 8 个规则来源 → 1 个本地 DENY_LIST:概念量可控\n- isDestructive → 忽略(教学版没有 UI 层,CC 里它也不参与权限决策)\n- YoloClassifier → 省略(依赖于额外的 LLM 调用和遥测系统)\n- 权限冒泡 → 省略(s15 才涉及多 Agent)\n\n
\n\n\n" }, { "version": "s03", "locale": "ja", - "title": "s03: TodoWrite", - "content": "# s03: TodoWrite\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"計画のないエージェントは行き当たりばったり\"* -- まずステップを書き出し、それから実行。\n\n## 問題\n\nマルチステップのタスクで、モデルは途中で迷子になる。作業を繰り返したり、ステップを飛ばしたり、脱線したりする。長い会話になるほど悪化する -- ツール結果がコンテキストを埋めるにつれ、システムプロンプトの影響力が薄れる。10ステップのリファクタリングでステップ1-3を完了した後、残りを忘れて即興を始めてしまう。\n\n## 解決策\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 仕組み\n\n1. TodoManagerはアイテムのリストをステータス付きで保持する。`in_progress`にできるのは同時に1つだけ。\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo`ツールは他のツールと同様にディスパッチマップに追加される。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nagリマインダーが、モデルが3ラウンド以上`todo`を呼ばなかった場合にナッジを注入する。\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\n「一度にin_progressは1つだけ」の制約が逐次的な集中を強制し、nagリマインダーが説明責任を生む。\n\n## s02からの変更点\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|----------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses |\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" + "title": "s03: Permission — 実行前に権限を判断する", + "content": "# s03: Permission — 実行前に権限を判断する\n\ns01 → s02 → `s03` → [s04](/ja/s04) → s05 → ... → s20\n> *\"ツール実行前に権限を判断\"* — 権限パイプラインは、どの操作に承認が必要かを決める。\n>\n> **Harness レイヤー**: 権限 — ツール実行前に一つのゲートを追加。\n\n---\n\n## 課題\n\ns02 の Agent は 5 つのツールを持つ。file tools は `safe_path` で保護されるが、bash は制限なし。「プロジェクトを掃除して」と頼むと、`rm -rf /` を実行しかねない。\n\n安全性はモデルを信頼することではなく、コードに頼る — ツール実行前に判断を挟む。\n\n---\n\n## ソリューション\n\n![Permission Overview](/course-assets/s03_permission/permission-overview.ja.svg)\n\ns02 のループは完全に維持される。唯一の変更は、ツール実行前に `check_permission()` を挿入すること — 各ツール呼び出しは 3 つのゲートを固定順序で通過する:ハード拒否が最優先、次にソフト確認、どちらも一致しなければ許可。\n\n3 つのゲートは 3 つの決定に対応する:\n\n| ゲート | 役割 | 一致時 |\n|--------|------|--------|\n| 1. 拒否リスト | 常に禁止される操作(`rm -rf /`、`sudo`) | 即座に拒否、実行しない |\n| 2. ルールマッチング | コンテキスト依存の操作(作業ディレクトリ外への書き込み、`rm` ファイル) | ゲート 3 へ |\n| 3. ユーザー承認 | ゲート 2 が一致した場合、ユーザー確認を待機 | ユーザーが許可または拒否を決定 |\n\n3 つのゲートのどれにも一致しない → 直接実行。日常の操作の大部分はこの経路を通る。\n\n---\n\n## 仕組み\n\n![Permission Pipeline](/course-assets/s03_permission/permission-pipeline.ja.svg)\n\n**ゲート 1**:ハード拒否リスト。最初に確認し、一致すればブロックメッセージを返す。(教育デモ:単純な文字列マッチングは信頼できるセキュリティ機構ではない — コマンドの変種やシェル展開で回避される可能性がある。CC のアプローチは付録を参照。)\n\n```python\nDENY_LIST = [\n \"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\",\n \"mkfs\", \"dd if=\", \"> /dev/sda\",\n]\n\ndef check_deny_list(command: str) -> str | None:\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Blocked: '{pattern}' is on the deny list\"\n return None\n```\n\n**ゲート 2**:ルールマッチング — 「いつユーザーに聞くべきか」を記述する。各ルールはツールとチェック条件を指定する。\n\n```python\nPERMISSION_RULES = [\n {\n \"tools\": [\"write_file\", \"edit_file\"],\n \"check\": lambda args: not (WORKDIR / args.get(\"path\", \"\")).resolve().is_relative_to(WORKDIR),\n \"message\": \"Writing outside workspace\",\n },\n {\n \"tools\": [\"bash\"],\n \"check\": lambda args: any(kw in args.get(\"command\", \"\") for kw in [\"rm \", \"> /etc/\", \"chmod 777\"]),\n \"message\": \"Potentially destructive command\",\n },\n]\n\ndef check_rules(tool_name: str, args: dict) -> str | None:\n for rule in PERMISSION_RULES:\n if tool_name in rule[\"tools\"] and rule[\"check\"](args):\n return rule[\"message\"]\n return None\n```\n\n**ゲート 3**:ルールが一致した後、ユーザー入力を待機。\n\n```python\ndef ask_user(tool_name: str, args: dict, reason: str) -> str:\n print(f\"\\n⚠ {reason}\")\n print(f\" Tool: {tool_name}({args})\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n return \"allow\" if choice in (\"y\", \"yes\") else \"deny\"\n```\n\n**3 つのゲートを直列に接続**、ツール実行前に挿入する:\n\n```python\ndef check_permission(block) -> bool:\n # ゲート 1: ハード拒否\n if block.name == \"bash\":\n reason = check_deny_list(block.input.get(\"command\", \"\"))\n if reason:\n print(f\"\\n⛔ {reason}\")\n return False\n\n # ゲート 2 + 3: ルールマッチング → ユーザー承認\n reason = check_rules(block.name, block.input)\n if reason:\n decision = ask_user(block.name, block.input, reason)\n if decision == \"deny\":\n return False\n\n return True\n\n# agent_loop で — s02 のループに 1 行追加するだけ:\nfor block in response.content:\n if block.type == \"tool_use\":\n if not check_permission(block): # ← 新規\n results.append({... \"content\": \"Permission denied.\"})\n continue\n output = TOOL_HANDLERS[block.name](**block.input) # s02 既存\n results.append(...)\n```\n\n---\n\n## s02 からの変更点\n\n| コンポーネント | 変更前 (s02) | 変更後 (s03) |\n|---------------|-------------|-------------|\n| セキュリティモデル | なし(モデルを信頼) | 3 ゲート権限パイプライン |\n| 新規関数 | — | check_deny_list, check_rules, ask_user, check_permission |\n| ループ | すべてのツールを直接実行 | 実行前に check_permission() を挿入 |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s03_permission/code.py\n```\n\n以下のプロンプトを試してみよう:\n\n1. `Create a file called test.txt in the current directory`(そのまま通過するはず)\n2. `Delete all temporary files in /tmp`(bash + rm でゲート 2 が発動)\n3. `What files are in the current directory?`(読み取り専用、すべて通過)\n4. `Try to write a file to /etc/something`(作業ディレクトリ外への書き込みでゲート 2 が発動)\n\n観察のポイント:どの操作がそのまま通過するか? どれに確認が必要か? どれが即座に拒否されるか?\n\n---\n\n## 次へ\n\n権限チェックは実装された — しかし、毎回ループ内に `check_permission()` をハードコードしている。ツール実行の前後にログを追加したい場合は? 特定の操作後に自動的に git commit をトリガーしたい場合は? このような拡張ロジックがループ内に散らばると、ループはすぐに膨張する。\n\n→ s04 Hooks:ループにフックを追加する。拡張ロジックはフックにぶら下げ、ループはクリーンに保つ。\n\n
\nCC ソースコードを深掘り\n\n> 以下は CC ソースコード `types/permissions.ts`、`utils/permissions/permissions.ts`、`toolExecution.ts`、`utils/permissions/yoloClassifier.ts`、`tools/AgentTool/forkSubagent.ts` の検証に基づく。\n\n### 一、PermissionResult:3 種ではなく、4 種\n\n教育版の 3 つのゲート(deny → ask → allow)は CC と完全には対応しない。CC の `PermissionResult` には 4 つの behavior がある(`types/permissions.ts:241-266`):\n\n| behavior | 意味 | 教育版の対応 |\n|----------|------|-------------|\n| `allow` | 直接許可 | ゲート 3 通過 |\n| `deny` | 直接拒否 | ゲート 1 一致 |\n| `ask` | ユーザーにダイアログを表示 | ゲート 2 一致 |\n| `passthrough` | ツールが意見を表明せず、汎用パイプラインに委ねる | 教育版にはなし |\n\n### 二、本番環境の検証段階\n\nCC のツール呼び出しは 3 つのゲートを通るのではなく、`checkPermissionsAndCallTool()`(`toolExecution.ts:599-1745`)、hooks、`hasPermissionsToUseToolInner()`(`utils/permissions/permissions.ts:1158-1310`)、classifier ロジックに分散する複数の段階を経る:\n\n1. **Zod schema 検証**(`toolExecution.ts:614-680`)— パラメータの型チェック\n2. **validateInput()**(`toolExecution.ts:682-733`)— ツールレベルの意味的検証\n3. **backfillObservableInput()**(`toolExecution.ts:784`)— レガシーフィールドの補完\n4. **PreToolUse hooks**(`toolExecution.ts:800-862`)— フックが allow/deny/ask を返す\n5. **resolveHookPermissionDecision()**(`toolExecution.ts:921-931`)— フック + パイプラインの決定を調整\n6. **hasPermissionsToUseToolInner()**(`permissions.ts:1158-1310`)— 多層ルールチェック:\n - ツール全体が deny rule で無効 → `deny`\n - ツール全体が ask rule でマーク → `ask`\n - `tool.checkPermissions()` ツール自身の判断\n - ツール自身が deny を返す → `deny`\n - `requiresUserInteraction()` → `ask`\n - コンテンツ関連の ask ルール → `ask`(バイパス不可)\n - セキュリティチェック違反 → `ask`(バイパス不可)\n - bypassPermissions モード → `allow`\n - ツール全体が allow rule で許可 → `allow`\n - passthrough → `ask` に変換\n\n### 三、拒否リスト:1 つのファイルではなく、8 つのソース\n\nCC には単一の deny list はない。権限ルールは 8 つのソースから来る(`types/permissions.ts:54-62`):\n\n| ソース | 設定場所 |\n|--------|---------|\n| `userSettings` | `~/.claude/settings.json` |\n| `projectSettings` | `.claude/settings.json` |\n| `localSettings` | `settings.local.json` |\n| `flagSettings` | フィーチャーフラグ |\n| `policySettings` | 企業管理ポリシー |\n| `cliArg` | `--allowedTools` / `--deniedTools` |\n| `command` | インラインコマンド |\n| `session` | セッション内一時承認 |\n\n各ルールの形式:`{ toolName: \"Bash\", ruleBehavior: \"deny\", ruleContent: \"npm publish:*\" }`。複数ソースのルールは統合され、高優先度ソースが低優先度を上書きする(低→高:user < project < local < flag < policy、さらに cliArg、command、session)。\n\n### 四、isDestructive() とは\n\nCC では `isDestructive`(`Tool.ts:405-406`)は**純粋に UI 表示用** — ツール一覧に `[destructive]` ラベルを表示するだけ。権限決定には参加しない。デフォルトではすべてのツールが `false` を返す。ExitWorktree(remove 時)と MCP ツール(`annotations.destructiveHint` に依存)のみがオーバーライドする。\n\n### 五、YoloClassifier(自動承認)\n\nCC の auto モードでは、毎回ダイアログを表示するわけではない。`classifyYoloAction`(`utils/permissions/yoloClassifier.ts:1012`)はツール呼び出し + 会話コンテキストを分類器 LLM に送って安全性を判断する。まず acceptEdits モードのシミュレーションを試み(`permissions.ts:620-656`、acceptEdits が許可すれば → 自動承認)、次にセーフツールホワイトリストを確認し(`permissions.ts:658-686`)、最後に分類器を呼び出す。分類器が連続して拒否しすぎた場合 → 手動承認にフォールバック。\n\n### 六、権限バブリング\n\nサブ Agent(AgentTool 経由でフォークされたもの)の `permissionMode` は `'bubble'` に設定される(`forkSubagent.ts:50`)。これは権限ダイアログが**親 Agent のターミナルにバブルアップ**することを意味する。サブ Agent で黙って拒否されるのではない。Bash 分類器はこの過程で引き続き実行され — 権限ダイアログを表示しつつ、バックグラウンドで自動承認可能か判断する。\n\n### 教育版の単純化は意図的\n\n- 多段階パイプライン → 3 ゲート:理解のハードルが大幅に下がる\n- 8 ルールソース → 1 つのローカル DENY_LIST:概念量を制御可能\n- isDestructive → 省略(教育版には UI レイヤーがなく、CC でも権限決定には参加しない)\n- YoloClassifier → 省略(追加の LLM 呼び出しとテレメトリに依存)\n- 権限バブリング → 省略(s15 でマルチ Agent を扱う)\n\n
\n\n\n" + }, + { + "version": "s04", + "locale": "en", + "title": "s04: Hooks — Hang on the Loop, Don't Write into It", + "content": "# s04: Hooks — Hang on the Loop, Don't Write into It\n\ns01 → s02 → s03 → `s04` → [s05](/en/s05) → s06 → ... → s20\n\n> *\"Hang on the loop, don't write into it\"* — Hooks inject extension logic before and after tool execution.\n>\n> **Harness Layer**: Hooks — Extension points that don't invade the loop.\n\n---\n\n## The Problem\n\nThe s03 Agent has permission checks. But every new check, \"log every bash call\", \"auto git add after writes\", requires modifying the `agent_loop` function.\n\nThe loop quickly becomes this:\n\n```python\ndef agent_loop(messages):\n while True:\n # ... LLM call ...\n for block in response.content:\n if block.type == \"tool_use\":\n log_to_file(block) # added a line\n check_permission(block) # added a line\n notify_slack(block) # added another line\n output = execute(block)\n auto_git_add(block) # yet another line\n # ... the loop is unrecognizable\n```\n\nWhat you want to extend is the Agent's behavior, but what you're modifying is the loop itself. The loop should be a stable core; extensions should hang on the outside.\n\n---\n\n## The Solution\n\n![Hooks Overview](/course-assets/s04_hooks/hooks-overview.en.svg)\n\nThe s03 loop and permission logic are fully preserved. The only change is moving `check_permission()` from inside the loop body onto a hook. The loop no longer directly calls any check function. Instead it calls `trigger_hooks(\"PreToolUse\", block)`, and the registry decides what to run.\n\nFour events, covering a complete agent cycle:\n\n| Event | Trigger Timing | Typical Use |\n|-------|---------------|-------------|\n| UserPromptSubmit | After user input, before entering LLM | Input validation, context injection |\n| PreToolUse | Before tool execution | Permission checks, logging |\n| PostToolUse | After tool execution | Side effects (auto git add etc.), output checking |\n| Stop | When the loop is about to exit | Cleanup (CC also supports force continuation) |\n\nExtensions are added via `register_hook()`. The loop only calls `trigger_hooks()`.\n\n---\n\n## How It Works\n\n**Hook registry**: a dict mapping event names to callback lists.\n\n```python\nHOOKS = {\n \"UserPromptSubmit\": [],\n \"PreToolUse\": [],\n \"PostToolUse\": [],\n \"Stop\": [],\n}\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None: # return value ≠ None → hook says \"stop\"\n return result\n return None\n```\n\nIn the teaching version, PreToolUse returning non-None means block execution; Stop returning non-None means force continuation. UserPromptSubmit and PostToolUse return values are unused.\n\n**UserPromptSubmit**, triggers after user input, before entering the LLM. CC can intercept or modify input; the teaching version only logs:\n\n```python\ndef context_inject_hook(query: str) -> str | None:\n \"\"\"Inject current working directory info into every prompt.\"\"\"\n print(f\"\\033[90m[HOOK] UserPromptSubmit: working in {WORKDIR}\\033[0m\")\n return None # return None = no modification, let prompt through\n\nregister_hook(\"UserPromptSubmit\", context_inject_hook)\n```\n\nIn the main loop, triggered right after user input:\n\n```python\nquery = input(\"s04 >> \")\ntrigger_hooks(\"UserPromptSubmit\", query) # ← before entering LLM\nhistory.append({\"role\": \"user\", \"content\": query})\nagent_loop(history)\n```\n\n**PreToolUse / PostToolUse**, hooks before and after tool execution. s03's permission check logic is now wrapped as a PreToolUse hook, plus a logging hook and a large-output reminder:\n\n```python\n# PreToolUse: permission check (s03 logic, moved from loop to hook)\ndef permission_hook(block):\n if block.name == \"bash\":\n for pattern in DENY_LIST:\n if pattern in block.input.get(\"command\", \"\"):\n return \"Permission denied by deny list\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n if not (WORKDIR / path).resolve().is_relative_to(WORKDIR):\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n# PreToolUse: logging\ndef log_hook(block):\n print(f\"[HOOK] {block.name}(...)\")\n\n# PostToolUse: large output reminder\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"[HOOK] ⚠ Large output from {block.name}\")\n\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\n```\n\n**Stop**, triggers when the loop is about to exit (`stop_reason != \"tool_use\"`). The teaching version prints a cleanup summary:\n\n```python\ndef summary_hook(messages: list) -> str | None:\n \"\"\"Print a summary when the loop is about to stop.\"\"\"\n tool_count = sum(1 for m in messages\n for b in (m.get(\"content\") if isinstance(m.get(\"content\"), list) else [])\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: session used {tool_count} tool calls\\033[0m\")\n return None # return None = allow stop, return string = force continuation\n\nregister_hook(\"Stop\", summary_hook)\n```\n\nIn agent_loop, triggered before exit:\n\n```python\nif response.stop_reason != \"tool_use\":\n force = trigger_hooks(\"Stop\", messages) # ← before exiting\n if force:\n # hook returned a message → inject it and continue\n messages.append({\"role\": \"user\", \"content\": force})\n continue\n return\n```\n\n**Only one change in the loop**: s03 directly called `check_permission(block)`, s04 replaces it with `trigger_hooks(\"PreToolUse\", block)`:\n\n```python\nfor block in response.content:\n if block.type != \"tool_use\":\n continue\n\n # s03: if not check_permission(block): ...\n # s04: hooks replace hardcoding\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n\n trigger_hooks(\"PostToolUse\", block, output)\n\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n```\n\nFour hooks cover the critical nodes of the agent cycle: input → before execution → after execution → exit. The loop only calls trigger_hooks(); all logic lives in hook callbacks.\n\n---\n\n## Changes from s03\n\n| Component | Before (s03) | After (s04) |\n|-----------|-------------|-------------|\n| Extension method | check_permission() hardcoded in the loop | HOOKS registry + trigger_hooks() |\n| New functions | — | register_hook, trigger_hooks |\n| Hook callbacks | — | context_inject_hook, permission_hook, log_hook, large_output_hook, summary_hook |\n| Loop | Directly calls check_permission() | Calls trigger_hooks(\"PreToolUse\", ...) |\n| Exit control | None | trigger_hooks(\"Stop\", ...) can prevent exit |\n| Input interception | None | trigger_hooks(\"UserPromptSubmit\", ...) can inject context |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s04_hooks/code.py\n```\n\nTry these prompts:\n\n1. `Read the file README.md` (should pass directly, observe hook logs)\n2. `Create a file called test.txt` (after creation, observe if PostToolUse fires)\n3. `Delete all temporary files in /tmp` (bash + rm triggers permission hook)\n\nWhat to watch for: Before each tool execution, does the `[HOOK]` log appear? When permission is denied, was it intercepted by a hook or hardcoded in the loop?\n\n---\n\n## What's Next\n\nThe Agent can now safely execute operations. But does it ever stop to think \"what should I do first, and what next?\" Given a complex task, does it jump straight in, or plan first?\n\n→ s05 TodoWrite: Give the Agent a planning tool. Make a list first, then execute.\n\n
\nDive into CC Source Code\n\n> The following is based on a complete analysis of CC source code `toolHooks.ts` (650 lines), `hooks.ts`, `stopHooks.ts`, and `coreTypes.ts`.\n\n### 1. Hook Events: Not Just 4, but 27\n\nThe teaching version covers only PreToolUse and PostToolUse. CC actually has 27 hook events (`coreTypes.ts:25-53`):\n\n| Category | Events |\n|----------|--------|\n| Tool-related | `PreToolUse`, `PostToolUse`, `PostToolUseFailure` |\n| Session-related | `SessionStart`, `SessionEnd`, `Stop`, `StopFailure`, `Setup` |\n| User interaction | `UserPromptSubmit`, `Notification`, `PermissionRequest`, `PermissionDenied` |\n| Sub-agents | `SubagentStart`, `SubagentStop` |\n| Compaction-related | `PreCompact`, `PostCompact` |\n| Team-related | `TeammateIdle`, `TaskCreated`, `TaskCompleted` |\n| Other | `Elicitation`, `ElicitationResult`, `ConfigChange`, `WorktreeCreate`, `WorktreeRemove`, `InstructionsLoaded`, `CwdChanged`, `FileChanged` |\n\nThe teaching version covers only 4 core events (UserPromptSubmit, PreToolUse, PostToolUse, Stop) because they cover every critical node of a complete agent cycle. The other 23 follow the same pattern.\n\n### 2. HookResult Common Fields\n\nCC's `HookResult` (`types/hooks.ts:260-275`) has 14 fields. Common ones:\n\n| Field | Type | Purpose |\n|-------|------|---------|\n| `message` | Message | Optional UI message |\n| `blockingError` | HookBlockingError | Blocking error → injected into conversation for model self-correction |\n| `outcome` | success/blocking/non_blocking_error/cancelled | Execution result |\n| `preventContinuation` | boolean | Prevent subsequent execution |\n| `stopReason` | string | Stop reason description |\n| `permissionBehavior` | allow/deny/ask/passthrough | Hook returns permission decision |\n| `updatedInput` | Record | Modify tool input |\n| `additionalContext` | string | Additional context |\n| `updatedMCPToolOutput` | unknown | MCP tool output modification |\n\n### 3. Key Invariant: Hook 'allow' Cannot Bypass deny/ask Rules\n\nThis is the most important security design in CC's permission system (`toolHooks.ts:325-331`): **when a hook returns allow, it still checks settings.json deny/ask rules.** Even if the user's hook script says \"allow\", if the tool is disabled in settings.json, the operation is still blocked.\n\nThe teaching version doesn't have this layer; hooks returning non-None directly interrupt. This is sufficient for teaching, but would create a security vulnerability in production.\n\n### 4. stopHookActive Mechanism\n\nCC's Stop hooks have an infinite-loop prevention mechanism (`query.ts:212,1300`): the `stopHookActive` state field. When stop hooks produce a blockingError, the loop re-enters with `stopHookActive: true`. Subsequent iterations see this flag and don't trigger stop hooks again. This prevents a never-stopping bug: model self-corrects → stop hook errors again → model self-corrects again → stop hook errors again...\n\n### 5. hook_stopped_continuation\n\nWhen PostToolUse hooks return `preventContinuation: true`, a `hook_stopped_continuation` attachment is produced (`toolHooks.ts:117-130`). query.ts (L1388-1393) detects it and sets `shouldPreventContinuation = true`, causing the loop to exit. This is the mechanism for \"hooks gracefully shut down the Agent\" — not a crash, but a completion.\n\n### Teaching Version Simplifications Are Intentional\n\n- 27 events → 4 (UserPromptSubmit/PreToolUse/PostToolUse/Stop): covers agent cycle critical nodes\n- 14 fields → simple return values (None = continue, non-None = interrupt/continue): minimal cognitive load\n- Hook allow vs deny/ask invariant → omitted: teaching version has no settings.json layer\n- stopHookActive → omitted: teaching version Stop hook only does simple continuation, no infinite-loop prevention needed\n\n
\n\n\n" + }, + { + "version": "s04", + "locale": "zh", + "title": "s04: Hooks — 挂在循环上,不写进循环里", + "content": "# s04: Hooks — 挂在循环上,不写进循环里\n\ns01 → s02 → s03 → `s04` → [s05](/zh/s05) → s06 → ... → s20\n\n> *\"挂在循环上, 不写进循环里\"* — hook 在工具执行前后注入扩展逻辑。\n>\n> **Harness 层**: hook — 扩展点不侵入循环。\n\n---\n\n## 问题\n\ns03 的 Agent 有权限检查了。但每次加一个新检查,比如\"记录每次 bash 调用\"、\"操作后自动 git add\",都要修改 `agent_loop` 函数。\n\n循环很快就变成了这样:\n\n```python\ndef agent_loop(messages):\n while True:\n # ... LLM call ...\n for block in response.content:\n if block.type == \"tool_use\":\n log_to_file(block) # 加一行\n check_permission(block) # 加一行\n notify_slack(block) # 又加一行\n output = execute(block)\n auto_git_add(block) # 再加一行\n # ... 很快循环就认不出来了\n```\n\n你想扩展的是 Agent 的行为,但你改的却是循环本身。循环应该是一个稳定的核心,扩展应该挂在外面。\n\n---\n\n## 解决方案\n\n![Hooks Overview](/course-assets/s04_hooks/hooks-overview.svg)\n\ns03 的循环和权限逻辑完全保留。唯一的变动是把 `check_permission()` 从循环体内移到了 hook 上,循环不再直接调用任何检查函数,改为 `trigger_hooks(\"PreToolUse\", block)`,由注册表决定跑什么。\n\n四个事件,覆盖一个完整的 agent cycle:\n\n| 事件 | 触发时机 | 典型用途 |\n|------|---------|---------|\n| UserPromptSubmit | 用户输入提交后、进入 LLM 前 | 输入验证、注入上下文 |\n| PreToolUse | 工具执行前 | 权限检查、日志记录 |\n| PostToolUse | 工具执行后 | 副作用(自动 git add 等)、输出检查 |\n| Stop | 循环即将退出时 | 收尾清理(CC 还支持强制续跑) |\n\n扩展通过 `register_hook()` 添加,循环只调用 `trigger_hooks()`。\n\n---\n\n## 工作原理\n\n**hook 注册表**:一个字典,事件名映射到回调列表。\n\n```python\nHOOKS = {\n \"UserPromptSubmit\": [],\n \"PreToolUse\": [],\n \"PostToolUse\": [],\n \"Stop\": [],\n}\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None: # 返回值 ≠ None → hook 说\"停\"\n return result\n return None\n```\n\n教学版中,PreToolUse 的非 None 返回值会阻止本次工具执行,Stop 的非 None 返回值会强制续跑。UserPromptSubmit 和 PostToolUse 的返回值未被使用。\n\n**UserPromptSubmit**,用户输入提交后、进入 LLM 前触发。CC 中可以拦截或修改输入,教学版只做日志演示:\n\n```python\ndef context_inject_hook(query: str) -> str | None:\n \"\"\"Inject current working directory info into every prompt.\"\"\"\n print(f\"\\033[90m[HOOK] UserPromptSubmit: working in {WORKDIR}\\033[0m\")\n return None # return None = no modification, let prompt through\n\nregister_hook(\"UserPromptSubmit\", context_inject_hook)\n```\n\n在主循环中,用户输入后立即触发:\n\n```python\nquery = input(\"s04 >> \")\ntrigger_hooks(\"UserPromptSubmit\", query) # ← 进入 LLM 之前\nhistory.append({\"role\": \"user\", \"content\": query})\nagent_loop(history)\n```\n\n**PreToolUse / PostToolUse**,工具执行前后的 hook。s03 的权限检查逻辑现在包装成 PreToolUse hook,再加一个日志 hook 和一个大输出提醒:\n\n```python\n# PreToolUse: 权限检查(s03 的逻辑,从循环移到 hook)\ndef permission_hook(block):\n if block.name == \"bash\":\n for pattern in DENY_LIST:\n if pattern in block.input.get(\"command\", \"\"):\n return \"Permission denied by deny list\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n if not (WORKDIR / path).resolve().is_relative_to(WORKDIR):\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n# PreToolUse: 日志\ndef log_hook(block):\n print(f\"[HOOK] {block.name}(...)\")\n\n# PostToolUse: 大文件提醒\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"[HOOK] ⚠ Large output from {block.name}\")\n\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\n```\n\n**Stop**,循环即将退出时触发(`stop_reason != \"tool_use\"`)。教学版用于打印收尾统计:\n\n```python\ndef summary_hook(messages: list) -> str | None:\n \"\"\"Print a summary when the loop is about to stop.\"\"\"\n tool_count = sum(1 for m in messages\n for b in (m.get(\"content\") if isinstance(m.get(\"content\"), list) else [])\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: session used {tool_count} tool calls\\033[0m\")\n return None # return None = allow stop, return string = force continuation\n\nregister_hook(\"Stop\", summary_hook)\n```\n\n在 agent_loop 中,退出前触发:\n\n```python\nif response.stop_reason != \"tool_use\":\n force = trigger_hooks(\"Stop\", messages) # ← 退出之前\n if force:\n # hook returned a message → inject it and continue\n messages.append({\"role\": \"user\", \"content\": force})\n continue\n return\n```\n\n**循环里只改了一处**:s03 直接调用 `check_permission(block)`,s04 改为 `trigger_hooks(\"PreToolUse\", block)`:\n\n```python\nfor block in response.content:\n if block.type != \"tool_use\":\n continue\n\n # s03: if not check_permission(block): ...\n # s04: hook 替代硬编码\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n\n trigger_hooks(\"PostToolUse\", block, output)\n\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n```\n\n四个 hook 覆盖了 agent cycle 的关键节点:输入→执行前→执行后→退出。循环只负责调用 trigger_hooks(),具体逻辑全在 hook 回调里。\n\n---\n\n## 相对 s03 的变更\n\n| 组件 | 之前 (s03) | 之后 (s04) |\n|------|-----------|-----------|\n| 扩展方式 | check_permission() 硬编码在循环里 | HOOKS 注册表 + trigger_hooks() |\n| 新函数 | — | register_hook, trigger_hooks |\n| hook 回调 | — | context_inject_hook, permission_hook, log_hook, large_output_hook, summary_hook |\n| 循环 | 直接调用 check_permission() | 调用 trigger_hooks(\"PreToolUse\", ...) |\n| 退出控制 | 无 | trigger_hooks(\"Stop\", ...) 可阻止退出 |\n| 输入拦截 | 无 | trigger_hooks(\"UserPromptSubmit\", ...) 可注入上下文 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s04_hooks/code.py\n```\n\n试试这些 prompt:\n\n1. `Read the file README.md`(应该直接通过,观察 hook 日志)\n2. `Create a file called test.txt`(通过后观察 PostToolUse 是否触发)\n3. `Delete all temporary files in /tmp`(bash + rm 触发权限 hook)\n\n观察重点:每次工具执行前,是否出现了 `[HOOK]` 日志?权限被拒时,是 hook 拦截的还是循环里硬编码的?\n\n---\n\n## 接下来\n\nAgent 现在能安全执行操作了。但它有没有停下来想过\"我应该先做什么,再做什么\"?给它一个复杂任务,它是一上来就动手,还是先列个计划?\n\ns05 TodoWrite → 给 Agent 一个计划工具。先列清单,再做。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `toolHooks.ts`(650 行)、`hooks.ts`、`stopHooks.ts`、`coreTypes.ts` 的完整分析。\n\n### 一、Hook 事件:不止这 4 个,而是 27 个\n\n教学版只讲了 PreToolUse 和 PostToolUse。CC 实际有 27 个 hook 事件(`coreTypes.ts:25-53`):\n\n| 类别 | 事件 |\n|------|------|\n| 工具相关 | `PreToolUse`, `PostToolUse`, `PostToolUseFailure` |\n| 会话相关 | `SessionStart`, `SessionEnd`, `Stop`, `StopFailure`, `Setup` |\n| 用户交互 | `UserPromptSubmit`, `Notification`, `PermissionRequest`, `PermissionDenied` |\n| 子 Agent | `SubagentStart`, `SubagentStop` |\n| 压缩相关 | `PreCompact`, `PostCompact` |\n| 团队相关 | `TeammateIdle`, `TaskCreated`, `TaskCompleted` |\n| 其他 | `Elicitation`, `ElicitationResult`, `ConfigChange`, `WorktreeCreate`, `WorktreeRemove`, `InstructionsLoaded`, `CwdChanged`, `FileChanged` |\n\n教学版只讲 4 个核心事件(UserPromptSubmit、PreToolUse、PostToolUse、Stop),因为它们覆盖了一个完整 agent cycle 的关键节点。其他 23 个都是同样的模式。\n\n### 二、HookResult 常用字段摘录\n\nCC 的 `HookResult`(`types/hooks.ts:260-275`)有 14 个字段,以下是常用字段:\n\n| 字段 | 类型 | 用途 |\n|------|------|------|\n| `message` | Message | 可选 UI 消息 |\n| `blockingError` | HookBlockingError | 阻塞错误 → 注入对话让模型自纠 |\n| `outcome` | success/blocking/non_blocking_error/cancelled | 执行结果 |\n| `preventContinuation` | boolean | 阻止后续执行 |\n| `stopReason` | string | 停止原因描述 |\n| `permissionBehavior` | allow/deny/ask/passthrough | hook 返回权限决策 |\n| `updatedInput` | Record | 修改工具输入 |\n| `additionalContext` | string | 附加上下文 |\n| `updatedMCPToolOutput` | unknown | MCP 工具输出修改 |\n\n### 三、关键不变式:Hook 'allow' 不能绕过 deny/ask 规则\n\n这是 CC 权限系统最重要的安全设计(`toolHooks.ts:325-331`):**hook 返回 allow 时,仍然要检查 settings.json 的 deny/ask 规则**。即使用户的 hook 脚本说\"允许\",如果在 settings.json 中禁用了这个工具,操作仍然会被阻止。\n\n教学版没有这个层次,只把 PreToolUse 的非 None 返回值解释为阻止本次工具执行。这在教学场景中够了,但在生产环境中会形成安全漏洞。\n\n### 四、stopHookActive 机制\n\nCC 的 Stop hooks 有一个防无限循环机制(`query.ts:212,1300`):`stopHookActive` 状态字段。当 stop hooks 产生 blockingError 时,循环带 `stopHookActive: true` 重入下一轮。后续迭代中 stop hooks 看到这个标志就不会再次触发。这防止了一个永不停机的 bug:模型自纠后 stop hook 再次报错 → 模型再自纠 → stop hook 再报错...\n\n### 五、hook_stopped_continuation\n\nPostToolUse hooks 返回 `preventContinuation: true` 时,会产生一个 `hook_stopped_continuation` 附件(`toolHooks.ts:117-130`)。query.ts(L1388-1393)检测到后设置 `shouldPreventContinuation = true`,循环退出。这是 \"hook 优雅地让 Agent 停机\" 的机制,不是崩溃,是完成。\n\n### 教学版的简化是刻意的\n\n- 27 个事件 → 4 个(UserPromptSubmit/PreToolUse/PostToolUse/Stop):覆盖 agent cycle 关键节点\n- 14 个字段 → 简单的返回值(None = 继续,非 None = 阻止/续跑):心智负担降到最低\n- Hook allow vs deny/ask 不变式 → 省略:教学版没有 settings.json 层\n- stopHookActive → 省略:教学版 Stop hook 只做简单续跑,不涉及防无限循环机制\n\n
\n\n\n" }, { "version": "s04", "locale": "ja", - "title": "s04: Subagents", - "content": "# s04: Subagents\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"大きなタスクを分割し、各サブタスクにクリーンなコンテキストを\"* -- サブエージェントは独立した messages[] を使い、メイン会話を汚さない。\n\n## 問題\n\nエージェントが作業するにつれ、messages配列は膨張し続ける。すべてのファイル読み取り、すべてのbash出力がコンテキストに永久に残る。「このプロジェクトはどのテストフレームワークを使っているか」という質問は5つのファイルを読む必要があるかもしれないが、親に必要なのは「pytest」という答えだけだ。\n\n## 解決策\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## 仕組み\n\n1. 親に`task`ツールを追加する。子は`task`を除くすべての基本ツールを取得する(再帰的な生成は不可)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. サブエージェントは`messages=[]`で開始し、自身のループを実行する。最終テキストだけが親に返る。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n子のメッセージ履歴全体(30回以上のツール呼び出し)は破棄される。親は1段落の要約を通常の`tool_result`として受け取る。\n\n## s03からの変更点\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" + "title": "s04: Hooks — ループに掛ける、ループには書き込まない", + "content": "# s04: Hooks — ループに掛ける、ループには書き込まない\n\ns01 → s02 → s03 → `s04` → [s05](/ja/s05) → s06 → ... → s20\n\n> *\"ループに掛ける、ループには書き込まない\"* — フックがツール実行の前後に拡張ロジックを注入する。\n>\n> **Harness レイヤー**: フック — ループを侵襲しない拡張ポイント。\n\n---\n\n## 課題\n\ns03 の Agent には権限チェックがある。しかし新しいチェックを追加するたび、「bash 呼び出しを毎回ログに記録」「操作後に自動 git add」、`agent_loop` 関数を修正する必要がある。\n\nループはすぐにこうなる:\n\n```python\ndef agent_loop(messages):\n while True:\n # ... LLM call ...\n for block in response.content:\n if block.type == \"tool_use\":\n log_to_file(block) # 一行追加\n check_permission(block) # 一行追加\n notify_slack(block) # さらに一行追加\n output = execute(block)\n auto_git_add(block) # さらに一行追加\n # ... もうループが見えない\n```\n\n拡張したいのは Agent の振る舞いなのに、変更しているのはループそのもの。ループは安定した核心であるべき。拡張は外側に掛ける。\n\n---\n\n## ソリューション\n\n![Hooks Overview](/course-assets/s04_hooks/hooks-overview.ja.svg)\n\ns03 のループと権限ロジックは完全に保持される。唯一の変更点は `check_permission()` をループ本体内からフックに移動したこと。ループはもうチェック関数を直接呼び出さず、代わりに `trigger_hooks(\"PreToolUse\", block)` を呼び、登録済みのフックが何を実行するかを決める。\n\n4 つのイベントで、完全な agent cycle をカバー:\n\n| イベント | 発火タイミング | 典型的な用途 |\n|----------|--------------|-------------|\n| UserPromptSubmit | ユーザー入力後、LLM に入る前 | 入力バリデーション、コンテキスト注入 |\n| PreToolUse | ツール実行前 | 権限チェック、ログ記録 |\n| PostToolUse | ツール実行後 | 副作用(自動 git add など)、出力チェック |\n| Stop | ループが終了する直前 | クリーンアップ(CC は強制続行もサポート) |\n\n拡張は `register_hook()` で追加する。ループは `trigger_hooks()` を呼ぶだけ。\n\n---\n\n## 仕組み\n\n**フック登録簿**:イベント名をコールバックリストにマッピングする辞書。\n\n```python\nHOOKS = {\n \"UserPromptSubmit\": [],\n \"PreToolUse\": [],\n \"PostToolUse\": [],\n \"Stop\": [],\n}\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None: # 戻り値 ≠ None → フックが「止め」と指示\n return result\n return None\n```\n\n教学版では、PreToolUse の非 None 戻り値は実行阻止を意味し、Stop の非 None 戻り値は強制続行を意味する。UserPromptSubmit と PostToolUse の戻り値は未使用。\n\n**UserPromptSubmit**、ユーザー入力後、LLM に入る前に発火。CC では入力の横取りや変更が可能、教学版はログ出力のみ:\n\n```python\ndef context_inject_hook(query: str) -> str | None:\n \"\"\"Inject current working directory info into every prompt.\"\"\"\n print(f\"\\033[90m[HOOK] UserPromptSubmit: working in {WORKDIR}\\033[0m\")\n return None # return None = 変更なし、プロンプトを通す\n\nregister_hook(\"UserPromptSubmit\", context_inject_hook)\n```\n\nメインループでは、ユーザー入力直後に発火:\n\n```python\nquery = input(\"s04 >> \")\ntrigger_hooks(\"UserPromptSubmit\", query) # ← LLM に入る前\nhistory.append({\"role\": \"user\", \"content\": query})\nagent_loop(history)\n```\n\n**PreToolUse / PostToolUse**、ツール実行の前後のフック。s03 の権限チェックロジックは PreToolUse フックに包まれ、さらにログフックと大出力リマインダーが追加される:\n\n```python\n# PreToolUse: 権限チェック(s03 のロジック、ループからフックに移動)\ndef permission_hook(block):\n if block.name == \"bash\":\n for pattern in DENY_LIST:\n if pattern in block.input.get(\"command\", \"\"):\n return \"Permission denied by deny list\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n if not (WORKDIR / path).resolve().is_relative_to(WORKDIR):\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n# PreToolUse: ログ\ndef log_hook(block):\n print(f\"[HOOK] {block.name}(...)\")\n\n# PostToolUse: 大ファイルリマインダー\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"[HOOK] ⚠ Large output from {block.name}\")\n\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\n```\n\n**Stop**、ループが終了する直前に発火(`stop_reason != \"tool_use\"`)。教学版ではクリーンアップ統計を印刷:\n\n```python\ndef summary_hook(messages: list) -> str | None:\n \"\"\"Print a summary when the loop is about to stop.\"\"\"\n tool_count = sum(1 for m in messages\n for b in (m.get(\"content\") if isinstance(m.get(\"content\"), list) else [])\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: session used {tool_count} tool calls\\033[0m\")\n return None # return None = 終了を許可、return 文字列 = 強制続行\n\nregister_hook(\"Stop\", summary_hook)\n```\n\nagent_loop 内では、終了前に発火:\n\n```python\nif response.stop_reason != \"tool_use\":\n force = trigger_hooks(\"Stop\", messages) # ← 終了する前に\n if force:\n # フックがメッセージを返した → 注入して続行\n messages.append({\"role\": \"user\", \"content\": force})\n continue\n return\n```\n\n**ループ内で変更されたのは一箇所だけ**:s03 は直接 `check_permission(block)` を呼び出していたが、s04 は `trigger_hooks(\"PreToolUse\", block)` に置き換えた:\n\n```python\nfor block in response.content:\n if block.type != \"tool_use\":\n continue\n\n # s03: if not check_permission(block): ...\n # s04: フックがハードコードを代替\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n\n trigger_hooks(\"PostToolUse\", block, output)\n\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n```\n\n4 つのフックが agent cycle の重要ノードをカバー:入力→実行前→実行後→終了。ループは trigger_hooks() を呼ぶだけで、具体的なロジックは全てフックコールバックにある。\n\n---\n\n## s03 からの変更\n\n| コンポーネント | 変更前 (s03) | 変更後 (s04) |\n|--------------|-------------|-------------|\n| 拡張方式 | check_permission() をループ内にハードコード | HOOKS 登録簿 + trigger_hooks() |\n| 新規関数 | — | register_hook, trigger_hooks |\n| フックコールバック | — | context_inject_hook, permission_hook, log_hook, large_output_hook, summary_hook |\n| ループ | check_permission() を直接呼び出し | trigger_hooks(\"PreToolUse\", ...) を呼び出し |\n| 終了制御 | なし | trigger_hooks(\"Stop\", ...) が終了を阻止可能 |\n| 入力横取り | なし | trigger_hooks(\"UserPromptSubmit\", ...) がコンテキスト注入可能 |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s04_hooks/code.py\n```\n\n以下のプロンプトを試してみよう:\n\n1. `Read the file README.md`(そのまま通過するはず、フックログを観察)\n2. `Create a file called test.txt`(作成後、PostToolUse が発火するか観察)\n3. `Delete all temporary files in /tmp`(bash + rm で権限フックが発動)\n\n観察のポイント:各ツール実行前に `[HOOK]` ログが表示されるか? 権限が拒否されたとき、フックが拦截したのか、ループ内のハードコードが拦截したのか?\n\n---\n\n## 次へ\n\nAgent は安全に操作を実行できるようになった。しかし「まず何をして、次に何をすべきか」を立ち止まって考えたことはあるか? 複雑なタスクを与えたとき、すぐに取り掛かるのか、まず計画を立てるのか?\n\n→ s05 TodoWrite:Agent に計画ツールを与える。まずリストを作り、それから実行。\n\n
\nCC ソースコードを深掘り\n\n> 以下は CC ソースコード `toolHooks.ts`(650 行)、`hooks.ts`、`stopHooks.ts`、`coreTypes.ts` の完全分析に基づく。\n\n### 一、Hook イベント:4 つではなく 27 個\n\n教育版は PreToolUse と PostToolUse のみを取り上げる。CC には実際に 27 のフックイベントがある(`coreTypes.ts:25-53`):\n\n| カテゴリ | イベント |\n|----------|---------|\n| ツール関連 | `PreToolUse`, `PostToolUse`, `PostToolUseFailure` |\n| セッション関連 | `SessionStart`, `SessionEnd`, `Stop`, `StopFailure`, `Setup` |\n| ユーザー対話 | `UserPromptSubmit`, `Notification`, `PermissionRequest`, `PermissionDenied` |\n| サブエージェント | `SubagentStart`, `SubagentStop` |\n| 圧縮関連 | `PreCompact`, `PostCompact` |\n| チーム関連 | `TeammateIdle`, `TaskCreated`, `TaskCompleted` |\n| その他 | `Elicitation`, `ElicitationResult`, `ConfigChange`, `WorktreeCreate`, `WorktreeRemove`, `InstructionsLoaded`, `CwdChanged`, `FileChanged` |\n\n教育版は 4 つのコアイベント(UserPromptSubmit、PreToolUse、PostToolUse、Stop)のみを取り上げる。これらで agent cycle の重要ノードを全てカバーできる。残り 23 個は同じパターン。\n\n### 二、HookResult よく使うフィールド抜粋\n\nCC の `HookResult`(`types/hooks.ts:260-275`)には 14 のフィールドがある。よく使うもの:\n\n| フィールド | 型 | 用途 |\n|-----------|-----|------|\n| `message` | Message | オプションの UI メッセージ |\n| `blockingError` | HookBlockingError | ブロッキングエラー → 会話に注入してモデルが自己修正 |\n| `outcome` | success/blocking/non_blocking_error/cancelled | 実行結果 |\n| `preventContinuation` | boolean | 後続実行を阻止 |\n| `stopReason` | string | 停止理由の説明 |\n| `permissionBehavior` | allow/deny/ask/passthrough | フックが権限決定を返す |\n| `updatedInput` | Record | ツール入力の変更 |\n| `additionalContext` | string | 追加コンテキスト |\n| `updatedMCPToolOutput` | unknown | MCP ツール出力の変更 |\n\n### 三、重要な不変条件:Hook 'allow' は deny/ask ルールをバイパスできない\n\nこれは CC 権限システムで最も重要なセキュリティ設計(`toolHooks.ts:325-331`):**フックが allow を返しても、settings.json の deny/ask ルールをチェックする。** ユーザーのフックスクリプトが「許可」と言っても、settings.json でそのツールが無効になっていれば、操作は阻止される。\n\n教育版にはこの階層がない。フックが非 None を返せば直接中断。教育目的では十分だが、本番環境ではセキュリティホールになる。\n\n### 四、stopHookActive 機構\n\nCC の Stop フックには無限ループ防止機構がある(`query.ts:212,1300`):`stopHookActive` 状態フィールド。Stop フックが blockingError を発生させると、ループは `stopHookActive: true` で次のラウンドに再入する。後続のイテレーションではこのフラグを見て Stop フックを再トリガーしない。これで「永久に止まらない」バグを防ぐ:モデルが自己修正 → Stop フックが再度エラー → モデルが再修正 → Stop フックが再度エラー... を防止。\n\n### 五、hook_stopped_continuation\n\nPostToolUse フックが `preventContinuation: true` を返すと、`hook_stopped_continuation` アタッチメントが生成される(`toolHooks.ts:117-130`)。query.ts(L1388-1393)はそれを検出して `shouldPreventContinuation = true` を設定し、ループが終了する。これは「フックが Agent を優雅に停止させる」機構 — クラッシュではなく、完了。\n\n### 教育版の簡略化は意図的\n\n- 27 イベント → 4(UserPromptSubmit/PreToolUse/PostToolUse/Stop):agent cycle の重要ノードをカバー\n- 14 フィールド → 単純な戻り値(None = 続行、非 None = 中断/続行):認知負荷を最小限に\n- Hook allow vs deny/ask の不変条件 → 省略:教育版に settings.json 層はない\n- stopHookActive → 省略:教育版の Stop フックは単純な続行のみ、無限ループ防止は不要\n\n
\n\n\n" + }, + { + "version": "s05", + "locale": "en", + "title": "s05: TodoWrite — An Agent Without a Plan Drifts Off Course", + "content": "# s05: TodoWrite — An Agent Without a Plan Drifts Off Course\n\ns01 → s02 → s03 → s04 → `s05` → [s06](/en/s06) → s07 → ... → s20\n\n> *\"An agent without a plan goes wherever the wind blows\"* — List the steps first, then execute. Complex tasks are less likely to miss steps.\n>\n> **Harness Layer**: Planning — Let the Agent think before it acts.\n\n---\n\n## The Problem\n\nGive the Agent a complex task: \"Rename all Python files to snake_case, run tests, and fix failures.\"\n\nThe Agent starts working, renames 3 files, runs a test, finds 2 failures, starts fixing. While fixing, it forgets the original goal was \"rename to snake_case\", the test failures have consumed all its attention.\n\nThe longer the conversation, the worse it gets: tool results keep filling the context, diluting the system prompt's influence. A 10-step refactoring: after steps 1-3, the Agent starts improvising because steps 4-10 have been pushed out of its attention.\n\n---\n\n## The Solution\n\n![Todo Overview](/course-assets/s05_todo_write/todo-overview.en.svg)\n\nThe minimal hook structure from the previous chapter is preserved, focusing on the new `todo_write` tool and reminder mechanism. `todo_write` does no actual work, can't read files or run commands, it simply lets the Agent organize its thoughts before diving in.\n\nThe dispatch mechanism is unchanged; the new tool is still routed through `TOOL_HANDLERS[block.name]`. However, to demonstrate the todo reminder, a counter was added to the loop: after 3 consecutive rounds without calling `todo_write`, a reminder is injected.\n\n---\n\n## How It Works\n\n**The todo_write tool** accepts a list with statuses, keeps it in the current process memory, and displays progress in the terminal:\n\n```python\nCURRENT_TODOS: list[dict] = []\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n CURRENT_TODOS = todos\n\n lines = [\"\\n## Current Tasks\"]\n for t in CURRENT_TODOS:\n icon = {\"pending\": \" \", \"in_progress\": \"▸\", \"completed\": \"✓\"}[t[\"status\"]]\n lines.append(f\" [{icon}] {t['content']}\")\n print(\"\\n\".join(lines))\n return f\"Updated {len(CURRENT_TODOS)} tasks\"\n```\n\nThe tool definition joins the other 5 in the dispatch map:\n\n```python\nTOOLS = [\n {\"name\": \"bash\", ...},\n {\"name\": \"read_file\", ...},\n {\"name\": \"write_file\", ...},\n {\"name\": \"edit_file\", ...},\n {\"name\": \"glob\", ...},\n # s05: new entry\n {\"name\": \"todo_write\", \"description\": \"Create and manage a task list ...\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"todos\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"content\": {\"type\": \"string\"},\n \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]},\n },\n },\n },\n },\n },\n },\n]\n\nTOOL_HANDLERS[\"todo_write\"] = run_todo_write\n```\n\n**Nag reminder**, when the model hasn't called `todo_write` for 3 consecutive rounds, a reminder is automatically injected (teaching mechanism; CC source has no fixed round-count logic):\n\n```python\nif rounds_since_todo >= 3 and messages:\n messages.append({\n \"role\": \"user\",\n \"content\": \"Update your todos.\",\n })\n rounds_since_todo = 0\n```\n\nTypical flow when the Agent receives a task: first call `todo_write` to list all steps (all `pending`) → pick one step, set it to `in_progress` → complete it, set to `completed` → look at the next `pending` → continue. After 3 rounds without `todo_write`, the loop appends a reminder before the next LLM call.\n\n**Key insight**: todo_write doesn't give the Agent any additional **execution capability**. What it adds is **planning capability**.\n\n---\n\n## Changes from s04\n\n| Component | Before (s04) | After (s05) |\n|-----------|-------------|-------------|\n| Tool count | 5 (bash, read, write, edit, glob) | 6 (+todo_write) |\n| Planning | None | Stateful TODO list + nag reminder |\n| SYSTEM prompt | Generic prompt | Added \"plan before executing\" guidance |\n| Loop | Unchanged | Dispatch unchanged, added rounds_since_todo counter and reminder injection |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s05_todo_write/code.py\n```\n\nTry these prompts:\n\n1. `Refactor s05_todo_write/example/hello.py: add type hints, docstrings, and a main guard` (should list 3 steps first, then execute)\n2. `Create a Python package under s05_todo_write/example/demo_pkg with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review Python files under s05_todo_write/example and fix any style issues`\n\nWhat to watch for: Was the first tool call `todo_write`? How many TODO steps were listed? Did statuses move from `pending` to `in_progress` / `completed` during execution?\n\n---\n\n## What's Next\n\nThe Agent can plan now. But if a task is too large, say \"refactor the entire auth module\", a TODO list alone isn't enough. That task is itself a collection of dozens of subtasks that would drown in a single conversation's context.\n\n→ s06 Subagent: Break large tasks into subtasks, each handled by an independent Agent with its own clean context, no cross-contamination.\n\n
\nDive into CC Source Code\n\nCC has two task systems coexisting (`tasks.ts:133-139`):\n\n- **TodoWrite (V1)**: A simple list tool, data maintained in memory AppState (`TodoWriteTool.ts:65-103`). The teaching version also keeps it in process memory and clears it on exit.\n- **Task System (V2 = s12)**: File-persisted, dependency graph, concurrency locks, ownership.\n\nThe switch is controlled by `isTodoV2Enabled()`. In the current source: V2 is enabled by default in interactive sessions, V1 in non-interactive (SDK) sessions; setting `CLAUDE_CODE_ENABLE_TASKS` forces V2 regardless. Note the source comment \"Force-enable tasks in non-interactive mode\" describes the env var path's purpose, not the default branch's return semantics.\n\nThe teaching version omits the `activeForm` field from the real source (`utils/todo/types.ts:8-15`). CC uses it for the UI spinner to show \"what's being done\"; the teaching version only has terminal output and doesn't need this field.\n\nThe teaching version's nag reminder (3 rounds without update triggers injection) is an educational mechanism. The CC source has no fixed \"3 rounds\" logic; the closest is `TodoWriteTool.ts:72-107` which appends a verification nudge when 3+ todos are all completed without a verification item.\n\nCore increments of the Task System over TodoWrite:\n- File persistence (Claude config directory `tasks/{taskListId}/{taskId}.json`) instead of in-memory list\n- `blockedBy` dependency graph instead of flat list\n- `proper-lockfile` concurrency safety instead of no locking\n- Four separate tools (Create/Get/Update/List) instead of one\n- TaskCreated / TaskCompleted hooks (`TaskCreateTool.ts:80-129`, `TaskUpdateTool.ts:231-260`) for external system integration\n\n
\n\n\n" + }, + { + "version": "s05", + "locale": "zh", + "title": "s05: TodoWrite — 没有计划的 Agent,做着做着就偏了", + "content": "# s05: TodoWrite — 没有计划的 Agent,做着做着就偏了\n\ns01 → s02 → s03 → s04 → `s05` → [s06](/zh/s06) → s07 → ... → s20\n\n> *\"没有计划的 agent 走哪算哪\"* — 先列步骤再动手,长任务更不容易漏项。\n>\n> **Harness 层**: 规划 — 让 Agent 在动手之前先想清楚。\n\n---\n\n## 问题\n\n给 Agent 一个复杂任务:\"把所有 Python 文件改成 snake_case 命名,然后跑测试,修好失败。\"\n\nAgent 开始干活,改了 3 个文件,跑了个测试,发现 2 个失败,开始修。修着修着,它忘了最初是\"改成 snake_case\",测试失败把注意力全吸走了。\n\n对话越长越严重:工具结果不断填满上下文,系统提示的影响力被稀释。一个 10 步重构,做完 1-3 步就开始即兴发挥,因为 4-10 步已经被挤出注意力了。\n\n---\n\n## 解决方案\n\n![Todo Overview](/course-assets/s05_todo_write/todo-overview.svg)\n\n保留上一章的最小 hook 结构,重点看新增的 `todo_write` 工具和 reminder 机制。`todo_write` 本身不做任何实际工作,不能读文件、不能跑命令,只是让 Agent 在动手之前先理清思路。\n\ndispatch 机制不变,新工具仍然走 `TOOL_HANDLERS[block.name]` 分发。但为了演示 todo reminder,循环里加了一个计数器:连续 3 轮没调 `todo_write` 就注入一条提醒。\n\n---\n\n## 工作原理\n\n**todo_write 工具**,接收一个带状态的列表,保存在当前进程内存中,同时在终端显示进度:\n\n```python\nCURRENT_TODOS: list[dict] = []\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n CURRENT_TODOS = todos\n\n lines = [\"\\n## Current Tasks\"]\n for t in CURRENT_TODOS:\n icon = {\"pending\": \" \", \"in_progress\": \"▸\", \"completed\": \"✓\"}[t[\"status\"]]\n lines.append(f\" [{icon}] {t['content']}\")\n print(\"\\n\".join(lines))\n return f\"Updated {len(CURRENT_TODOS)} tasks\"\n```\n\n工具定义和其他 5 个工具一起加入 dispatch map:\n\n```python\nTOOLS = [\n {\"name\": \"bash\", ...},\n {\"name\": \"read_file\", ...},\n {\"name\": \"write_file\", ...},\n {\"name\": \"edit_file\", ...},\n {\"name\": \"glob\", ...},\n # s05: 新增一条\n {\"name\": \"todo_write\", \"description\": \"Create and manage a task list ...\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"todos\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"content\": {\"type\": \"string\"},\n \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]},\n },\n },\n },\n },\n },\n },\n]\n\nTOOL_HANDLERS[\"todo_write\"] = run_todo_write\n```\n\n**Nag reminder**,模型连续 3 轮没调 `todo_write` 时,自动注入一条提醒(教学版机制,CC 源码中没有这个固定轮数逻辑):\n\n```python\nif rounds_since_todo >= 3 and messages:\n messages.append({\n \"role\": \"user\",\n \"content\": \"Update your todos.\",\n })\n rounds_since_todo = 0\n```\n\nAgent 收到任务后的典型流程:先调 `todo_write` 列出所有步骤(全 `pending`)→ 做一个步骤,改成 `in_progress` → 做完改成 `completed` → 看下一个 `pending` → 继续。连续 3 轮没有调用 `todo_write` 时,循环会在下一次 LLM 调用前追加一条 reminder。\n\n**关键洞察**:todo_write 不给 Agent 增加任何**执行能力**。它增加的是**规划能力**。\n\n---\n\n## 相对 s04 的变更\n\n| 组件 | 之前 (s04) | 之后 (s05) |\n|------|-----------|-----------|\n| 工具数量 | 5 (bash, read, write, edit, glob) | 6 (+todo_write) |\n| 规划能力 | 无 | 带状态的 TODO 列表 + nag reminder |\n| SYSTEM 提示 | 通用提示 | 加入 \"先计划再执行\" 引导 |\n| 循环 | 不变 | dispatch 不变,新增 rounds_since_todo 计数器和 reminder 注入 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s05_todo_write/code.py\n```\n\n试试这些 prompt:\n\n1. `Refactor s05_todo_write/example/hello.py: add type hints, docstrings, and a main guard`(先列 3 步再执行)\n2. `Create a Python package under s05_todo_write/example/demo_pkg with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review Python files under s05_todo_write/example and fix any style issues`\n\n观察重点:第一次工具调用是不是 `todo_write`?TODO 列了几步?执行过程中状态有没有从 `pending` 变成 `in_progress` / `completed`?\n\n---\n\n## 接下来\n\nAgent 能计划了。但如果一个任务太大,比如\"重构整个认证模块\",光靠 TODO 列表不够。这个任务本身就是几十个小任务的集合,放在同一个对话里会被上下文淹没。\n\ns06 Subagent → 把大任务拆成子任务,每个子任务派一个独立的 Agent。它们有自己的干净上下文,不会互相污染。\n\n
\n深入 CC 源码\n\nCC 中有两套任务系统并存(`tasks.ts:133-139`):\n\n- **TodoWrite(V1)**:一个简单的列表工具,数据在内存 AppState 中维护(`TodoWriteTool.ts:65-103`)。教学版也保存在进程内存里,退出后清空\n- **Task System(V2 = s12)**:文件持久化、依赖图、并发锁、ownership\n\n切换由 `isTodoV2Enabled()` 控制。当前源码的实现逻辑:交互式会话中 V2 默认启用,非交互式会话(SDK)中 V1 默认启用;设置 `CLAUDE_CODE_ENABLE_TASKS` 环境变量可强制启用 V2。注意源码注释 \"Force-enable tasks in non-interactive mode\" 描述的是 env var 路径的用途,和默认分支的返回值语义不同,阅读时需区分。\n\n教学版省略了真实源码中的 `activeForm` 字段(`utils/todo/types.ts:8-15`)。CC 用它给 UI spinner 展示\"正在做什么\",教学版只有终端输出,不需要这个字段。\n\n教学版的 nag reminder(3 轮未更新就注入提醒)是教学机制。CC 源码中没有固定的\"3 轮\"逻辑,更接近的是 `TodoWriteTool.ts:72-107` 中当 3 个以上 todo 全部完成但没有 verification 项时,追加 verification nudge。\n\nTask System 相比 TodoWrite 的核心增量:\n- 文件持久化(Claude 配置目录下 `tasks/{taskListId}/{taskId}.json`)而非内存列表\n- `blockedBy` 依赖图而非平铺列表\n- `proper-lockfile` 并发安全而非无锁\n- 四个独立工具(Create/Get/Update/List)而非一个\n- TaskCreated / TaskCompleted hooks(`TaskCreateTool.ts:80-129`、`TaskUpdateTool.ts:231-260`)供外部系统集成\n\n
\n\n\n" }, { "version": "s05", "locale": "ja", - "title": "s05: Skills", - "content": "# s05: Skills\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"必要な知識を、必要な時に読み込む\"* -- system prompt ではなく tool_result で注入。\n\n## 問題\n\nエージェントにドメイン固有のワークフローを遵守させたい: gitの規約、テストパターン、コードレビューチェックリスト。すべてをシステムプロンプトに入れると、使われないスキルにトークンを浪費する。10スキル x 2000トークン = 20,000トークン、ほとんどが任意のタスクに無関係だ。\n\n## 解決策\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\n第1層: スキル*名*をシステムプロンプトに(低コスト)。第2層: スキル*本体*をtool_resultに(オンデマンド)。\n\n## 仕組み\n\n1. 各スキルは `SKILL.md` ファイルを含むディレクトリとして配置される。\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoaderが `SKILL.md` を再帰的に探索し、ディレクトリ名をスキル識別子として使用する。\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n3. 第1層はシステムプロンプトに配置。第2層は通常のツールハンドラ。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\nモデルはどのスキルが存在するかを知り(低コスト)、関連する時にだけ読み込む(高コスト)。\n\n## s04からの変更点\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | skills/\\*/SKILL.md files |\n| Injection | None | Two-layer (system + result)|\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" + "title": "s05: TodoWrite — 計画なき Agent は途中で道を外れる", + "content": "# s05: TodoWrite — 計画なき Agent は途中で道を外れる\n\ns01 → s02 → s03 → s04 → `s05` → [s06](/ja/s06) → s07 → ... → s20\n\n> *\"計画なき agent は風の向くままに\"* — まず手順を列挙してから実行。長いタスクで見落としが減る。\n>\n> **Harness レイヤー**: 計画 — Agent が行動する前に考えさせる。\n\n---\n\n## 課題\n\nAgent に複雑なタスクを与える:「全 Python ファイルを snake_case にリネームし、テストを実行し、失敗を修正して。」\n\nAgent は作業を開始する。3 つのファイルをリネーム、テストを実行、2 つの失敗を発見、修正を開始。修正しているうちに、本来の目的が「snake_case にリネーム」だったことを忘れる。テストの失敗に注意を全て持っていかれる。\n\n会話が長くなるほど悪化する:ツールの結果がコンテキストを埋め続け、システムプロンプトの影響力が希釈される。10 ステップのリファクタリング:ステップ 1-3 を終えた時点で Agent は即興で動き始める。ステップ 4-10 は既に注意の外に追い出されているから。\n\n---\n\n## ソリューション\n\n![Todo Overview](/course-assets/s05_todo_write/todo-overview.ja.svg)\n\n前章の最小フック構造を保持し、本章では新規の `todo_write` ツールとリマインダー機構に注目する。`todo_write` は実際の作業を何もしない。ファイルを読めない、コマンドを実行できない。Agent が手を動かす前に思考を整理できるようにするだけ。\n\nディスパッチ機構は変わらず、新ツールも `TOOL_HANDLERS[block.name]` を経由する。ただし、todo リマインダーのデモのため、ループにカウンターを追加した:連続 3 ラウンド `todo_write` を呼び出さないとリマインダーが注入される。\n\n---\n\n## 仕組み\n\n**todo_write ツール**は、ステータス付きのリストを受け取り、現在のプロセスメモリに保持し、端末に進捗を表示する:\n\n```python\nCURRENT_TODOS: list[dict] = []\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n CURRENT_TODOS = todos\n\n lines = [\"\\n## Current Tasks\"]\n for t in CURRENT_TODOS:\n icon = {\"pending\": \" \", \"in_progress\": \"▸\", \"completed\": \"✓\"}[t[\"status\"]]\n lines.append(f\" [{icon}] {t['content']}\")\n print(\"\\n\".join(lines))\n return f\"Updated {len(CURRENT_TODOS)} tasks\"\n```\n\nツール定義は他の 5 つと一緒にディスパッチマップに追加される:\n\n```python\nTOOLS = [\n {\"name\": \"bash\", ...},\n {\"name\": \"read_file\", ...},\n {\"name\": \"write_file\", ...},\n {\"name\": \"edit_file\", ...},\n {\"name\": \"glob\", ...},\n # s05: 新規追加\n {\"name\": \"todo_write\", \"description\": \"Create and manage a task list ...\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"todos\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"content\": {\"type\": \"string\"},\n \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]},\n },\n },\n },\n },\n },\n },\n]\n\nTOOL_HANDLERS[\"todo_write\"] = run_todo_write\n```\n\n**Nag リマインダー**、モデルが連続 3 ラウンド `todo_write` を呼び出さないとき、リマインダーが自動的に注入される(教育用機構、CC ソースコードに固定ラウンド数のロジックはない):\n\n```python\nif rounds_since_todo >= 3 and messages:\n messages.append({\n \"role\": \"user\",\n \"content\": \"Update your todos.\",\n })\n rounds_since_todo = 0\n```\n\nAgent がタスクを受け取った後の典型的な流れ:まず `todo_write` を呼び出して全手順を列挙(全て `pending`)→ 一つの手順に取り掛かり、`in_progress` に変更 → 完了したら `completed` に変更 → 次の `pending` を見る → 続行。3 ラウンド `todo_write` がない場合、次の LLM 呼び出し前にリマインダーが追加される。\n\n**重要な洞察**:todo_write は Agent に**実行能力**を何も追加しない。追加するのは**計画能力**だ。\n\n---\n\n## s04 からの変更\n\n| コンポーネント | 変更前 (s04) | 変更後 (s05) |\n|--------------|-------------|-------------|\n| ツール数 | 5 (bash, read, write, edit, glob) | 6 (+todo_write) |\n| 計画能力 | なし | ステータス付き TODO リスト + Nag リマインダー |\n| SYSTEM プロンプト | 汎用プロンプト | 「先に計画してから実行」のガイダンスを追加 |\n| ループ | 不変 | ディスパッチは不変、rounds_since_todo カウンターとリマインダー注入を追加 |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s05_todo_write/code.py\n```\n\n以下のプロンプトを試してみよう:\n\n1. `Refactor s05_todo_write/example/hello.py: add type hints, docstrings, and a main guard`(まず 3 手順を列挙してから実行するはず)\n2. `Create a Python package under s05_todo_write/example/demo_pkg with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review Python files under s05_todo_write/example and fix any style issues`\n\n観察のポイント:最初のツール呼び出しは `todo_write` か? TODO は何手順列挙されたか? 実行中にステータスが `pending` から `in_progress` / `completed` に変わったか?\n\n---\n\n## 次へ\n\nAgent は計画できるようになった。しかしタスクが大きすぎる場合、例えば「認証モジュール全体をリファクタリング」、TODO リストだけでは不十分。そのタスク自体が数十のサブタスクの集合体で、同じ会話のコンテキストに押し込めると溢れてしまう。\n\n→ s06 Subagent:大きなタスクをサブタスクに分割し、それぞれを独立した Agent に任せる。それぞれが独自のクリーンなコンテキストを持ち、相互汚染がない。\n\n
\nCC ソースコードを深掘り\n\nCC には二つのタスクシステムが共存している(`tasks.ts:133-139`):\n\n- **TodoWrite(V1)**:シンプルなリストツール、データはメモリ AppState で管理(`TodoWriteTool.ts:65-103`)。教育版もプロセスメモリに保持し、終了時に消える\n- **Task System(V2 = s12)**:ファイル永続化、依存グラフ、並行ロック、ownership\n\n切り替えは `isTodoV2Enabled()` で制御される。現在のソースコードの実装:対話型セッションでは V2 がデフォルトで有効、非対話型セッション(SDK)では V1 がデフォルトで有効。`CLAUDE_CODE_ENABLE_TASKS` 環境変数を設定するとセッション種別に関わらず V2 が強制有効になる。ソースコメント「Force-enable tasks in non-interactive mode」は環境変数パスの用途を説明しており、デフォルト分岐の戻り値のセマンティクスとは異なるため注意。\n\n教育版は実際のソースコードにある `activeForm` フィールドを省略している(`utils/todo/types.ts:8-15`)。CC は UI スピナーに「何をしているか」を表示するために使用するが、教育版は端末出力のみでこのフィールドは不要。\n\n教育版の Nag リマインダー(3 ラウンド未更新で注入)は教育用機構。CC ソースコードに固定「3 ラウンド」のロジックはなく、最も近いのは `TodoWriteTool.ts:72-107` で 3 つ以上の todo が全て完了しているのに verification 項目がない場合に verification nudge を追加する処理。\n\nTask System の TodoWrite に対する核心的な増分:\n- メモリリストではなくファイル永続化(Claude 設定ディレクトリ下 `tasks/{taskListId}/{taskId}.json`)\n- 平坦なリストではなく `blockedBy` 依存グラフ\n- ロックなしではなく `proper-lockfile` による並行安全性\n- 一つのツールではなく四つの独立ツール(Create/Get/Update/List)\n- TaskCreated / TaskCompleted フック(`TaskCreateTool.ts:80-129`、`TaskUpdateTool.ts:231-260`)による外部システム統合\n\n
\n\n\n" + }, + { + "version": "s06", + "locale": "en", + "title": "s06: Subagent — Break Large Tasks into Small Ones with Clean Context", + "content": "# s06: Subagent — Break Large Tasks into Small Ones with Clean Context\n\ns01 → s02 → s03 → s04 → s05 → `s06` → [s07](/en/s07) → s08 → ... → s20\n\n> *\"Break large tasks small, each with clean context\"* — Subagent uses an independent messages[], no pollution in the main conversation.\n>\n> **Harness Layer**: Sub-Agent — Context isolation, attention doesn't drift.\n\n---\n\n## The Problem\n\nThe Agent is fixing a bug. It reads 30 files to trace the call chain, chatting for 60 rounds along the way. The messages list grows to 120 entries, most of which are intermediate steps from \"tracing the call chain\" — unrelated to the final goal of \"fixing the bug.\"\n\nThese intermediate steps occupy context space, making the Agent increasingly \"forgetful\" — it can no longer remember what the original problem was.\n\nThink of it differently: when you fix a bug, you'd \"open a new terminal\" to trace the call chain. When done, close the terminal, write the result into your notes, and return to the original terminal to keep fixing. The Agent needs this ability too — **open an independent sub-process, give it an independent message list, let it focus on one thing.**\n\n---\n\n## The Solution\n\n![Subagent Overview](/course-assets/s06_subagent/subagent-overview.en.svg)\n\nThe minimal hook structure and `todo_write` tool from the previous chapter are preserved; this chapter focuses on the new `task` tool. When called, it spawns a sub-Agent with a fresh `messages[]`, running its own loop, and returning only a summary text to the main Agent. Conversation context is discarded, but file system side effects (writes, edits, commands) remain in the working directory.\n\nThe sub-Agent's tools are restricted: it has bash/read/write/edit/glob, but no task, preventing recursive spawning. The sub-Agent's tool calls still go through permission hooks; context isolation does not bypass security.\n\n---\n\n## How It Works\n\n**spawn_subagent**, gives the sub-Agent a fresh messages list, runs its own loop, returns only the conclusion:\n\n```python\ndef spawn_subagent(description: str) -> str:\n # Sub-Agent tools: base tools, but no task (no recursion)\n sub_tools = [...]\n messages = [{\"role\": \"user\", \"content\": description}] # fresh messages[]\n\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=sub_tools, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({... \"content\": str(blocked)})\n continue\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown\"\n trigger_hooks(\"PostToolUse\", block, output)\n results.append({... \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Return only the final text conclusion, all intermediate steps discarded\n return extract_text(messages[-1][\"content\"])\n```\n\nThe main Agent calls it just like any other tool:\n\n```python\nTOOLS = [\n {\"name\": \"bash\", ...},\n {\"name\": \"read_file\", ...},\n {\"name\": \"write_file\", ...},\n {\"name\": \"edit_file\", ...},\n {\"name\": \"glob\", ...},\n {\"name\": \"todo_write\", ...},\n # s06: new task tool\n {\"name\": \"task\",\n \"description\": \"Launch a subagent to handle a complex subtask. Returns only the final conclusion.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n]\n\nTOOL_HANDLERS[\"task\"] = spawn_subagent\n```\n\nThree key design decisions:\n\n| Decision | Choice | Reason |\n|----------|--------|--------|\n| Context isolation | Fresh `messages[]` | Sub-Agent's intermediate steps don't pollute main Agent's context |\n| Return only conclusion | `extract_text(last_message)` | Not returning the entire messages list |\n| No recursion | Sub-Agent has no task tool | Prevents sub-Agent from spawning further sub-Agents |\n| Security not bypassed | Sub-Agent tool calls go through PreToolUse hook | Context isolation does not mean permission isolation |\n\nThe dispatch mechanism is unchanged; the task tool is routed through `TOOL_HANDLERS[block.name]`. The sub-Agent has its own `SUB_SYSTEM` prompt, explicitly instructing \"complete the task, do not delegate further.\"\n\n---\n\n## Changes from s05\n\n| Component | Before (s05) | After (s06) |\n|-----------|-------------|-------------|\n| Tool count | 6 (bash, read, write, edit, glob, todo_write) | 7 (+task) |\n| New function | — | spawn_subagent (independent messages[] + 30-round safety limit) |\n| Context isolation | Everything in the main conversation | Sub-Agent uses fresh messages[] |\n| Loop | Unchanged | Dispatch unchanged, sub-Agent has independent SUB_SYSTEM and hook-protected loop |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s06_subagent/code.py\n```\n\nTry these prompts:\n\n1. `Use a subtask to find what testing framework this project uses` (sub-Agent reads files, main Agent receives only the conclusion)\n2. `Delegate: read all .py files in agents/ and summarize what each one does`\n3. `Use a task to create s06_subagent/example/string_tools.py with a slugify(text: str) function, then verify it from the parent agent`\n\nWhat to watch for: Do `[Subagent spawned]` / `[Subagent done]` appear? Do sub-Agent tool calls print as `[sub] ...`? Does the parent Agent continue with only the summary returned by the sub-Agent?\n\n---\n\n## What's Next\n\nThe Agent can now break tasks apart. But different tasks require different knowledge: editing frontend components needs React conventions, writing SQL needs table schemas. Stuffing all this knowledge into the system prompt would blow up the context.\n\n→ s07 Skill Loading: Inject skills on demand instead of piling documents into the system prompt. Load only when needed, as natural as reading a file.\n\n
\nDive into CC Source Code\n\n> The following is based on a complete analysis of CC source code `AgentTool.tsx`, `runAgent.ts`, `forkSubagent.ts`, and `forkedAgent.ts`.\n\n### 1. Not One Pattern, but Three\n\nThe teaching version covers only \"fresh messages[]\". CC actually has three execution modes:\n\n| Mode | Trigger | Context |\n|------|---------|---------|\n| **Normal Subagent** | `subagent_type` specified (normal path) | Truly fresh messages[], only the prompt |\n| **Fork Subagent** | No `subagent_type`, fork gate enabled | Constructs cache-friendly prefix via `buildForkedMessages()`, shares prompt cache |\n| **General-Purpose** | No `subagent_type`, fork gate disabled | Same as Normal |\n\n### 2. Fork Mode: Sharing Prompt Cache\n\nThis is a core concept the teaching version omits. Fork mode (`forkSubagent.ts:60-71`) doesn't create a fresh context. Instead, it constructs a cache-friendly message prefix via `buildForkedMessages()` (`forkSubagent.ts:107-168`), preserving the parent assistant message and generating placeholder tool results. The goal isn't isolation, but making the Anthropic API's prompt cache hit: parent and child Agent's system prompt, tools, and message prefix are byte-identical, so the API doesn't need to recompute.\n\nFive key components for cache hit (`forkedAgent.ts:57-68`): system prompt, tools, model, message prefix, thinking config, must be byte-identical.\n\n### 3. Context Isolation's Precise Granularity\n\n`createSubagentContext()` (`forkedAgent.ts:345-462`) creates the sub-Agent's `ToolUseContext`:\n\n| Field | Behavior |\n|-------|----------|\n| `abortController` | New child controller; parent abort propagates down |\n| `setAppState` | Default no-op; but sync agents share via `shareSetAppState` (`runAgent.ts:697-714`) |\n| `readFileState` | **Cloned from parent** (avoids re-reading same files) |\n| `queryTracking` | New chainId, `depth = parentDepth + 1` |\n\nThe sub-Agent isn't fully isolated: file read state is shared. The degree of UI and notification isolation varies by execution path (sync/async/fork/teammate differ).\n\n### 4. Recursive Fork Protection\n\nThe teaching version uses \"sub-Agent has no task tool\" for recursion protection. The real implementation is more nuanced: `isInForkChild()` (`forkSubagent.ts:78-89`) checks for `FORK_BOILERPLATE_TAG` in history. But `constants/tools.ts:36-46` defaults `Agent` to all agents' disabled set (with `USER_TYPE === 'ant'` exception); `forkSubagent.ts:73-89` has fork-child-specific recursion protection; `agentToolUtils.ts:100-110` has special allowances in teammate scenarios. Not simply \"no further sub-Agents.\"\n\n### 5. Permission Bubbling\n\nFork Agent's `permissionMode: 'bubble'` (`forkSubagent.ts:67`) means the sub-Agent's permission prompts bubble up to the parent terminal: the user approves sub-Agent operations in the main terminal.\n\n### 6. Async vs Sync\n\nThe teaching version only shows synchronous sub-Agents (parent waits for child to finish). CC also supports async paths (`AgentTool.tsx:686-764`): when `run_in_background: true`, the sub-Agent launches asynchronously, returning `{ status: 'async_launched' }` immediately to the parent, and notifies the parent when complete. Actual triggers go beyond `run_in_background`, including auto-background, assistant force async, and coordinator/proactive paths.\n\n### Teaching Version Simplifications Are Intentional\n\n- Three modes → one (fresh messages): conceptually clear\n- Prompt cache sharing → omitted: teaching version doesn't involve API-layer optimization\n- Recursive fork protection → simplified to \"sub-Agent has no task tool\"\n- Async → omitted (left for s13): s06 focuses on the synchronous model first\n\n
\n\n\n" + }, + { + "version": "s06", + "locale": "zh", + "title": "s06: Subagent — 大任务拆小,每个拿到的都是干净上下文", + "content": "# s06: Subagent — 大任务拆小,每个拿到的都是干净上下文\n\ns01 → s02 → s03 → s04 → s05 → `s06` → [s07](/zh/s07) → s08 → ... → s20\n\n> *\"大任务拆小, 每个小任务干净的上下文\"* — Subagent 用独立 messages[], 不污染主对话。\n>\n> **Harness 层**: 子 Agent — 上下文隔离, 注意力不漂移。\n\n---\n\n## 问题\n\nAgent 在修一个 bug。它读了 30 个文件来追踪调用链,中间聊了 60 轮。messages 列表涨到 120 条,其中大部分是\"追踪调用链\"的中间过程,和\"修 bug\"这个最终目标无关。\n\n这些中间过程占着上下文位置,让 Agent 越来越\"健忘\",它记不住最初的问题是什么了。\n\n换个角度:你修 bug 的时候,会\"开一个新终端\"来追踪调用链。追踪完了,终端关掉,结果写进笔记,回到原来的终端继续修 bug。Agent 也需要这个能力:开一个独立的子进程,给它一个独立的消息列表,让它专心做一件事。\n\n---\n\n## 解决方案\n\n![Subagent Overview](/course-assets/s06_subagent/subagent-overview.svg)\n\n保留上一章的最小 hook 结构和 `todo_write` 工具,本章重点转向新增的 `task` 工具。调用它时,spawn 一个子 Agent,拥有全新的 `messages[]`,跑自己的循环,结束后只把摘要文本回传给主 Agent。对话上下文被丢弃,但文件系统的副作用(写文件、改文件、跑命令)保留在工作目录中。\n\n子 Agent 的工具受限:有 bash/read/write/edit/glob,但没有 task,不能递归 spawn 新的子 Agent。子 Agent 的工具调用仍经过权限 hook,安全策略不因上下文隔离而跳过。\n\n---\n\n## 工作原理\n\n**spawn_subagent**,给子 Agent 一个全新的 messages 列表,跑自己的循环,只回传结论:\n\n```python\ndef spawn_subagent(description: str) -> str:\n # 子 Agent 的工具:基础工具,但没有 task(禁止递归)\n sub_tools = [\n {\"name\": \"bash\", ...}, {\"name\": \"read_file\", ...},\n {\"name\": \"write_file\", ...}, {\"name\": \"edit_file\", ...},\n {\"name\": \"glob\", ...},\n ]\n messages = [{\"role\": \"user\", \"content\": description}] # 全新 messages[]\n\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=sub_tools, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({... \"content\": str(blocked)})\n continue\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown\"\n trigger_hooks(\"PostToolUse\", block, output)\n results.append({... \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # 只返回最后的文本结论,中间过程全部丢弃\n return extract_text(messages[-1][\"content\"])\n```\n\n主 Agent 调用时,跟调其他工具一样:\n\n```python\nTOOLS = [\n {\"name\": \"bash\", ...},\n {\"name\": \"read_file\", ...},\n {\"name\": \"write_file\", ...},\n {\"name\": \"edit_file\", ...},\n {\"name\": \"glob\", ...},\n {\"name\": \"todo_write\", ...},\n # s06: 新增 task 工具\n {\"name\": \"task\",\n \"description\": \"Launch a subagent to handle a complex subtask. Returns only the final conclusion.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n]\n\nTOOL_HANDLERS[\"task\"] = spawn_subagent\n```\n\n三个关键设计决策:\n\n| 决策 | 选择 | 原因 |\n|------|------|------|\n| 上下文隔离 | 全新 `messages[]` | 子 Agent 的中间过程不污染主 Agent 的上下文 |\n| 只回传结论 | `extract_text(last_message)` | 不是回传整个 messages 列表 |\n| 禁止递归 | 子 Agent 无 task 工具 | 防止子 Agent 再 spawn 新的子 Agent |\n| 安全策略不跳过 | 子 Agent 工具调用也走 PreToolUse hook | 上下文隔离不代表权限隔离 |\n\ndispatch 机制不变,task 工具通过 `TOOL_HANDLERS[block.name]` 分发。子 Agent 有独立的 `SUB_SYSTEM` 提示,明确要求\"直接完成任务,不要再委派\"。\n\n---\n\n## 相对 s05 的变更\n\n| 组件 | 之前 (s05) | 之后 (s06) |\n|------|-----------|-----------|\n| 工具数量 | 6 (bash, read, write, edit, glob, todo_write) | 7 (+task) |\n| 新函数 | — | spawn_subagent(独立 messages[] + 30 轮安全限制) |\n| 上下文隔离 | 全部在主对话中 | 子 Agent 用全新的 messages[] |\n| 循环 | 不变 | dispatch 不变,子 Agent 有独立 SUB_SYSTEM 和 hook 保护的循环 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s06_subagent/code.py\n```\n\n试试这些 prompt:\n\n1. `Use a subtask to find what testing framework this project uses`(子 Agent 去读文件,主 Agent 只收结论)\n2. `Delegate: read all .py files in agents/ and summarize what each one does`\n3. `Use a task to create s06_subagent/example/string_tools.py with a slugify(text: str) function, then verify it from the parent agent`\n\n观察重点:是否出现 `[Subagent spawned]` / `[Subagent done]`?子 Agent 的工具调用是否以 `[sub] ...` 输出?主 Agent 最后是否只继续处理子 Agent 返回的摘要?\n\n---\n\n## 接下来\n\nAgent 现在能拆任务了。但每个任务需要的知识不一样:改前端组件需要知道 React 规范,写 SQL 需要知道表结构。这些知识全塞进 system prompt,上下文直接爆了。\n\ns07 Skill Loading → 技能按需注入,不在 system prompt 里堆文档。用到的时候才加载,和读文件一样自然。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `AgentTool.tsx`、`runAgent.ts`、`forkSubagent.ts`、`forkedAgent.ts` 的完整分析。\n\n### 一、不是一种模式,是三种\n\n教学版只讲了\"全新的 messages[]\"。CC 实际有三种执行模式:\n\n| 模式 | 触发条件 | 上下文 |\n|------|---------|--------|\n| **Normal Subagent** | 指定了 `subagent_type`(normal path) | 全新 messages[],只有 prompt |\n| **Fork Subagent** | 没指定 `subagent_type`,fork gate 开启 | 通过 `buildForkedMessages()` 构造 cache-friendly 前缀,共享 prompt cache |\n| **General-Purpose** | 没指定 `subagent_type`,fork gate 关闭 | 同 Normal |\n\n### 二、Fork 模式:为了共享 Prompt Cache\n\n这是教学版没有的核心概念。Fork 模式(`forkSubagent.ts:60-71`)不创建全新上下文,而是通过 `buildForkedMessages()`(`forkSubagent.ts:107-168`)构造 cache-friendly 消息前缀,保留父 assistant message 并生成 placeholder tool results。目的不是隔离,而是让 Anthropic API 的 prompt cache 命中:父子 Agent 的 system prompt、tools、messages 前缀完全一致,API 端不需要重算。\n\n缓存命中的五个关键组件(`forkedAgent.ts:57-68`):system prompt、tools、model、messages 前缀、thinking config,必须字节级一致。\n\n### 三、Context Isolation 的精确粒度\n\n`createSubagentContext()`(`forkedAgent.ts:345-462`)创建子 Agent 的 `ToolUseContext`:\n\n| 字段 | 行为 |\n|------|------|\n| `abortController` | 新的 child controller,父 abort 向下传播 |\n| `setAppState` | 默认 no-op;但 sync agent 通过 `shareSetAppState` 共享(`runAgent.ts:697-714`) |\n| `readFileState` | **从父克隆**(避免重复读相同文件) |\n| `queryTracking` | 新 chainId,`depth = parentDepth + 1` |\n\n子 Agent 不是完全隔离的:文件读取状态是共享的。UI 和通知的隔离程度取决于执行路径(sync/async/fork/teammate 各不同)。\n\n### 四、递归 Fork 防护\n\n教学版用\"子 Agent 不给 task 工具\"表达递归保护。真实实现更精细:`isInForkChild()`(`forkSubagent.ts:78-89`)检查对话历史中是否有 `FORK_BOILERPLATE_TAG`,有就拒绝。但 `constants/tools.ts:36-46` 中 `Agent` 工具默认在所有 agent 的禁用集合里,`USER_TYPE === 'ant'` 时例外;`forkSubagent.ts:73-89` 针对 fork child 有专门的递归保护;`agentToolUtils.ts:100-110` 在 teammate 场景下有特殊放行。不是简单的\"禁止新的子 Agent\"。\n\n### 五、Permission Bubbling\n\nFork Agent 的 `permissionMode: 'bubble'`(`forkSubagent.ts:67`)意味着子 Agent 的权限弹窗冒泡到父终端,用户在主终端里审批子 Agent 的操作。\n\n### 六、Async vs Sync\n\n教学版只展示了同步子 Agent(父等着子跑完)。CC 还支持异步路径(`AgentTool.tsx:686-764`):`run_in_background: true` 时异步启动,返回 `{ status: 'async_launched' }` 立即给父 Agent,子 Agent 完成后通过通知机制告知父 Agent。实际触发条件不止 `run_in_background`,还有 auto-background、assistant force async、coordinator/proactive 等路径。\n\n### 教学版的简化是刻意的\n\n- 三种模式 → 一种(fresh messages):概念清晰\n- Prompt cache 共享 → 省略:教学版不涉及 API 层优化\n- 递归 fork 防护 → 简化为\"子 Agent 无 task 工具\"\n- Async → 省略(留给 s13):s06 先理解同步模型\n\n
\n\n\n" }, { "version": "s06", "locale": "ja", - "title": "s06: Context Compact", - "content": "# s06: Context Compact\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"コンテキストはいつか溢れる、空ける手段が要る\"* -- 3層圧縮で無限セッションを実現。\n\n## 問題\n\nコンテキストウィンドウは有限だ。1000行のファイルに対する`read_file`1回で約4000トークンを消費する。30ファイルを読み20回のbashコマンドを実行すると、100,000トークン超。圧縮なしでは、エージェントは大規模コードベースで作業できない。\n\n## 解決策\n\n積極性を段階的に上げる3層構成:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 仕組み\n\n1. **第1層 -- micro_compact**: 各LLM呼び出しの前に、古いツール結果をプレースホルダーに置換する。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第2層 -- auto_compact**: トークンが閾値を超えたら、完全なトランスクリプトをディスクに保存し、LLMに要約を依頼する。\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **第3層 -- manual compact**: `compact`ツールが同じ要約処理をオンデマンドでトリガーする。\n\n4. ループが3層すべてを統合する:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\nトランスクリプトがディスク上に完全な履歴を保持する。何も真に失われず、アクティブなコンテキストの外に移動されるだけ。\n\n## s05からの変更点\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Transcripts | None | Saved to .transcripts/ |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n1. `Read every Python file in the agents/ directory one by one` (micro-compactが古い結果を置換するのを観察する)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" + "title": "s06: Subagent — 大きなタスクを分割、それぞれがクリーンなコンテキストを取得", + "content": "# s06: Subagent — 大きなタスクを分割、それぞれがクリーンなコンテキストを取得\n\ns01 → s02 → s03 → s04 → s05 → `s06` → [s07](/ja/s07) → s08 → ... → s20\n\n> *\"大きなタスクは小さく、小さなタスクごとにクリーンなコンテキスト\"* — Subagent は独立した messages[] を使い、メイン会話を汚染しない。\n>\n> **Harness レイヤー**: サブエージェント — コンテキストの隔離、注意の散漫を防ぐ。\n\n---\n\n## 課題\n\nAgent がバグを修正している。呼び出しチェーンを追跡するために 30 のファイルを読み、途中で 60 ラウンドやり取りした。messages リストは 120 件に膨らみ、その大部分は「呼び出しチェーンの追跡」という中間過程 — 「バグ修正」という最終目標とは無関係。\n\nこの中間過程がコンテキストの席を占め、Agent はますます「健忘」になる — 最初の問題が何だったか覚えていられない。\n\n別の見方をすると:バグを修正するとき、あなたは「新しいターミナルを開いて」呼び出しチェーンを追跡するだろう。追跡が終わったらターミナルを閉じ、結果をメモに書き、元のターミナルに戻ってバグ修正を続ける。Agent にもこの能力が必要 — **独立したサブプロセスを開き、独立したメッセージリストを与え、一つのことに集中させる。**\n\n---\n\n## ソリューション\n\n![Subagent Overview](/course-assets/s06_subagent/subagent-overview.ja.svg)\n\n前章の最小フック構造と `todo_write` ツールを保持し、本章は新規の `task` ツールに注目する。呼び出されると、サブエージェントを spawn する。新しい `messages[]` を持ち、自分自身のループを実行し、終了後に要約テキストのみをメイン Agent に返す。会話コンテキストは破棄されるが、ファイルシステムの副作用(書き込み、編集、コマンド実行)は作業ディレクトリに残る。\n\nサブエージェントのツールは制限される:bash/read/write/edit/glob を持つが、task はない。再帰 spawn を防止する。サブエージェントのツール呼び出しも権限フックを経由する。コンテキスト分離は権限のバイパスではない。\n\n---\n\n## 仕組み\n\n**spawn_subagent**、サブエージェントに新しいメッセージリストを与え、自分自身のループを実行し、結論のみを返す:\n\n```python\ndef spawn_subagent(description: str) -> str:\n # サブエージェントのツール:基本ツールのみ、task なし(再帰禁止)\n sub_tools = [...]\n messages = [{\"role\": \"user\", \"content\": description}] # 新規 messages[]\n\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=sub_tools, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({... \"content\": str(blocked)})\n continue\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown\"\n trigger_hooks(\"PostToolUse\", block, output)\n results.append({... \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # 最後のテキスト結論のみを返す、中間過程はすべて破棄\n return extract_text(messages[-1][\"content\"])\n```\n\nメイン Agent の呼び出しは、他のツールと同じ:\n\n```python\nTOOLS = [\n {\"name\": \"bash\", ...},\n {\"name\": \"read_file\", ...},\n {\"name\": \"write_file\", ...},\n {\"name\": \"edit_file\", ...},\n {\"name\": \"glob\", ...},\n {\"name\": \"todo_write\", ...},\n # s06: 新規 task ツール\n {\"name\": \"task\",\n \"description\": \"Launch a subagent to handle a complex subtask. Returns only the final conclusion.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n]\n\nTOOL_HANDLERS[\"task\"] = spawn_subagent\n```\n\n三つの重要な設計決定:\n\n| 決定 | 選択 | 理由 |\n|------|------|------|\n| コンテキスト隔離 | 新規 `messages[]` | サブエージェントの中間過程がメイン Agent のコンテキストを汚染しない |\n| 結論のみ返却 | `extract_text(last_message)` | messages リスト全体を返すのではない |\n| 再帰禁止 | サブエージェントに task ツールなし | サブエージェントがさらにサブエージェントを spawn するのを防止 |\n| セキュリティのバイパスなし | サブエージェントのツール呼び出しも PreToolUse フックを経由 | コンテキスト分離は権限分離ではない |\n\nディスパッチ機構は変わらず、task ツールは `TOOL_HANDLERS[block.name]` を経由する。サブエージェントは独立した `SUB_SYSTEM` プロンプトを持ち、「タスクを完了し、さらに委託しない」と明示される。\n\n---\n\n## s05 からの変更\n\n| コンポーネント | 変更前 (s05) | 変更後 (s06) |\n|--------------|-------------|-------------|\n| ツール数 | 6 (bash, read, write, edit, glob, todo_write) | 7 (+task) |\n| 新規関数 | — | spawn_subagent(独立 messages[] + 30 ラウンド安全制限) |\n| コンテキスト隔離 | すべてメイン会話内 | サブエージェントが新規 messages[] を使用 |\n| ループ | 不変 | ディスパッチは不変、サブエージェントに独立した SUB_SYSTEM とフック保護されたループ |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s06_subagent/code.py\n```\n\n以下のプロンプトを試してみよう:\n\n1. `Use a subtask to find what testing framework this project uses`(サブエージェントがファイルを読み、メイン Agent は結論のみ受け取る)\n2. `Delegate: read all .py files in agents/ and summarize what each one does`\n3. `Use a task to create s06_subagent/example/string_tools.py with a slugify(text: str) function, then verify it from the parent agent`\n\n観察のポイント:`[Subagent spawned]` / `[Subagent done]` が表示されるか? サブエージェントのツール呼び出しが `[sub] ...` として出力されるか? 親 Agent はサブエージェントが返した要約だけを受け取って続行するか?\n\n---\n\n## 次へ\n\nAgent はタスクを分割できるようになった。しかし各タスクに必要な知識は異なる。フロントエンドコンポーネントの変更には React 規約が必要で、SQL を書くにはテーブル構造を知る必要がある。これらの知識をすべて system prompt に詰め込むと、コンテキストが溢れてしまう。\n\n→ s07 Skill Loading:スキルをオンデマンドで注入する。system prompt にドキュメントを積み上げるのではなく、必要なときだけ読み込む。ファイルを読むのと同じくらい自然に。\n\n
\nCC ソースコードを深掘り\n\n> 以下は CC ソースコード `AgentTool.tsx`、`runAgent.ts`、`forkSubagent.ts`、`forkedAgent.ts` の完全分析に基づく。\n\n### 一、一つのパターンではなく三つ\n\n教育版は「新規 messages[]」のみを取り上げる。CC には実際に三つの実行モードがある:\n\n| モード | トリガー | コンテキスト |\n|--------|---------|-------------|\n| **Normal Subagent** | `subagent_type` 指定時(normal path) | 新規 messages[]、プロンプトのみ |\n| **Fork Subagent** | `subagent_type` 未指定、fork gate 有効時 | `buildForkedMessages()` でキャッシュフレンドリーなプレフィックスを構築、プロンプトキャッシュを共有 |\n| **General-Purpose** | `subagent_type` 未指定、fork gate 無効時 | Normal と同じ |\n\n### 二、Fork モード:プロンプトキャッシュの共有のため\n\nこれは教育版にはない核心概念。Fork モード(`forkSubagent.ts:60-71`)は新規コンテキストを作成せず、`buildForkedMessages()`(`forkSubagent.ts:107-168`)でキャッシュフレンドリーなメッセージプレフィックスを構築する。親の assistant message を保持し、placeholder tool results を生成する。目的は隔離ではなく、Anthropic API のプロンプトキャッシュをヒットさせること:親子 Agent の system prompt、tools、messages プレフィックスがバイトレベルで一致するため、API 側で再計算が不要になる。\n\nキャッシュヒットの五つの重要コンポーネント(`forkedAgent.ts:57-68`):system prompt、tools、model、messages プレフィックス、thinking config、バイトレベルで一致する必要がある。\n\n### 三、コンテキスト隔離の精密な粒度\n\n`createSubagentContext()`(`forkedAgent.ts:345-462`)はサブエージェントの `ToolUseContext` を作成:\n\n| フィールド | 挙動 |\n|-----------|------|\n| `abortController` | 新しい子コントローラ、親の abort は下に伝播 |\n| `setAppState` | デフォルトは no-op、ただし sync agent は `shareSetAppState` で共有(`runAgent.ts:697-714`) |\n| `readFileState` | **親からクローン**(同じファイルの再読み込みを回避) |\n| `queryTracking` | 新しい chainId、`depth = parentDepth + 1` |\n\nサブエージェントは完全に隔離されているわけではない。ファイル読み取り状態は共有される。UI と通知の隔離度は実行パスにより異なる(sync/async/fork/teammate でそれぞれ異なる)。\n\n### 四、再帰 Fork 防護\n\n教育版は「サブエージェントに task ツールなし」で再帰防止を表現する。実際の実装はより精密:`isInForkChild()`(`forkSubagent.ts:78-89`)が会話履歴内の `FORK_BOILERPLATE_TAG` をチェックする。しかし `constants/tools.ts:36-46` では `Agent` ツールが全エージェントの無効セットにデフォルト設定(`USER_TYPE === 'ant'` 時は例外)、`forkSubagent.ts:73-89` は fork child 向けの専用再帰保護があり、`agentToolUtils.ts:100-110` は teammate シナリオで特別な許可がある。単純な「サブエージェントの再 spawn 禁止」ではない。\n\n### 五、Permission Bubbling\n\nFork Agent の `permissionMode: 'bubble'`(`forkSubagent.ts:67`)は、サブエージェントの権限プロンプトが親ターミナルにバブルアップすることを意味する。ユーザーはメインターミナルでサブエージェントの操作を承認する。\n\n### 六、Async vs Sync\n\n教育版は同期サブエージェントのみ(親が子の完了を待つ)を示す。CC は非同期パスもサポート(`AgentTool.tsx:686-764`):`run_in_background: true` の場合、サブエージェントは非同期で起動し、`{ status: 'async_launched' }` を直ちに親に返し、完了時に通知機構で親に知らせる。実際のトリガーは `run_in_background` だけでなく、auto-background、assistant force async、coordinator/proactive パスもある。\n\n### 教育版の簡略化は意図的\n\n- 三つのモード → 一つ(新規 messages):概念的に明確\n- プロンプトキャッシュ共有 → 省略:教育版は API 層の最適化を扱わない\n- 再帰 fork 防護 → 「サブエージェントに task ツールなし」に簡略化\n- Async → 省略(s13 に委ねる):s06 はまず同期モデルを理解する\n\n
\n\n\n" + }, + { + "version": "s07", + "locale": "en", + "title": "s07: Skill Loading — Load Only When Needed", + "content": "# s07: Skill Loading — Load Only When Needed\n\ns01 → s02 → s03 → s04 → s05 → s06 → `s07` → [s08](/en/s08) → s09 → ... → s20\n> *\"Load when needed, don't stuff the prompt\"* — Inject via tool_result, not system prompt.\n>\n> **Harness Layer**: Knowledge — load on demand, don't fill the context.\n\n---\n\n## The Problem\n\nYour project has a React component spec, a SQL style guide, and an API design doc. You want the Agent to follow these specs automatically. The most straightforward idea — stuff them all into the system prompt:\n\n```python\nSYSTEM = (\n f\"You are a coding agent. \"\n + open(\"docs/react-style.md\").read() # 2000 lines\n + open(\"docs/sql-style.md\").read() # 1500 lines\n + open(\"docs/api-design.md\").read() # 3000 lines\n)\n```\n\n6500 lines of system prompt. The Agent carries these docs on every LLM call — whether it's changing a CSS color or fixing a SQL query. 99% of the content is irrelevant to the current task, burning tokens for nothing.\n\n---\n\n## The Solution\n\n![Skill Overview](/course-assets/s07_skill_loading/skill-overview.en.svg)\n\nThe minimal hook structure, `todo_write`, and sub-Agent from the previous chapter are preserved. This chapter focuses on the new `load_skill` tool. At startup, inject the skill catalog into the SYSTEM prompt; at runtime, register one more tool to load full content, spending tokens only when used.\n\nTwo-level design:\n\n| Level | Location | Timing | Cost |\n|-------|----------|--------|------|\n| 1. Catalog | system prompt | Injected at startup (harness scans skills/) | ~100 tokens/skill, carried every turn |\n| 2. Content | tool_result | When Agent calls load_skill | ~2000 tokens/skill, on demand |\n\nThe dispatch mechanism is unchanged, `load_skill` auto-dispatches via `TOOL_HANDLERS[block.name]`.\n\n---\n\n## How It Works\n\n**skills/ directory**, one subdirectory per skill, each containing a `SKILL.md` file:\n\n```\nskills/\n agent-builder/SKILL.md\n code-review/SKILL.md\n mcp-builder/SKILL.md\n pdf/SKILL.md\n```\n\n**Level 1: Inject catalog at startup**: the harness calls `_scan_skills()` at startup to scan the skills/ directory, parsing each SKILL.md's YAML frontmatter (`name`, `description`) into a `SKILL_REGISTRY` dictionary. `list_skills()` generates the catalog from the registry, injected into the SYSTEM prompt. The Agent sees \"which skills I have available\" every turn, with no extra API calls:\n\n```python\nSKILL_REGISTRY: dict[str, dict] = {}\n\ndef _scan_skills():\n if not SKILLS_DIR.exists():\n return\n for d in sorted(SKILLS_DIR.iterdir()):\n if not d.is_dir():\n continue\n manifest = d / \"SKILL.md\"\n if manifest.exists():\n raw = manifest.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", d.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\"name\": name, \"description\": desc, \"content\": raw}\n\n_scan_skills() # runs once at startup\n\ndef list_skills() -> str:\n return \"\\n\".join(f\"- **{s['name']}**: {s['description']}\" for s in SKILL_REGISTRY.values())\n\ndef build_system() -> str:\n catalog = list_skills()\n return (\n f\"You are a coding agent at {WORKDIR}. \"\n f\"Skills available:\\n{catalog}\\n\"\n \"Use load_skill to get full details when needed.\"\n )\n\nSYSTEM = build_system()\n```\n\n**Level 2: load_skill**: the Agent decides \"I need the SQL style guide\" and calls `load_skill(\"sql-style\")`. Lookup goes through the registry, not file paths, eliminating path traversal risk. The content is injected via `tool_result`:\n\n```python\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n return f\"Skill not found: {name}\"\n return skill[\"content\"]\n```\n\nThe key distinction: skill content is not part of the system prompt. It enters the current messages as a tool result. Subsequent calls carry it along with the history until context compaction, truncation, or session end. This naturally connects to s08's compact: on-demand loading solves \"don't carry what you shouldn't\", compact solves \"how to drop what you should.\"\n\n---\n\n## Changes from s06\n\n| Component | Before (s06) | After (s07) |\n|-----------|-------------|-------------|\n| Tool count | 7 (bash, read, write, edit, glob, todo_write, task) | 8 (+load_skill) |\n| Knowledge loading | None | Two-level: startup catalog in SYSTEM + runtime load_skill |\n| SYSTEM prompt | Static string | Startup scan of skills/ injects catalog |\n| Skill registry | None | SKILL_REGISTRY (populated at startup, prevents path traversal) |\n| Loop | Unchanged | Unchanged (skill tool auto-dispatches) |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s07_skill_loading/code.py\n```\n\nTry these prompts:\n\n1. `What skills are available?`\n2. `Load the code-review skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n\nWhat to watch for: Does the Agent know available skills from the SYSTEM catalog? Does `[HOOK] load_skill` appear when full instructions are needed? Does the answer use the loaded skill's instructions?\n\n---\n\n## What's Next\n\nOn-demand loading solved \"don't carry what you shouldn't.\" But another problem looms: after the Agent works for 30 minutes, the messages list fills up with intermediate process. Old tool_results, stale file contents, occupying context but adding no value.\n\n→ s08 Context Compact: A four-layer compaction strategy. Cheap layers run first, expensive layers run last.\n\n
\nDive into CC Source Code\n\n> The following is based on analysis of CC source code `loadSkillsDir.ts`, `SkillTool.ts`, `bundledSkills.ts`, `commands.ts`.\n\n### 1. Skill Sources: Not Just One skills/ Directory\n\nThe teaching version assumes all skills live in a `skills/` directory. CC loads from multiple sources spread across multiple files: `loadSkillsDir.ts` handles user/project/`--add-dir` directories and legacy commands (`.claude/commands/`); `bundledSkills.ts` handles built-in skills; `SkillTool.ts` handles MCP remote skills; `commands.ts` handles command aggregation. Types include managed/policy skills, user skills (`~/.claude/skills/`), project skills (`.claude/skills/`), `--add-dir` skills, legacy commands, dynamic skills, conditional skills (with `paths` frontmatter, activated by file path), bundled skills, plugin skills, MCP skills.\n\n### 2. SKILL.md Frontmatter — Common Fields\n\nCC's SKILL.md YAML frontmatter is parsed by `parseSkillFrontmatterFields()` in `loadSkillsDir.ts`. Common fields include:\n\n| Field | Purpose |\n|-------|---------|\n| `name` / `description` | Display name and description |\n| `when_to_use` | Guides the model on when to invoke |\n| `allowed-tools` | Auto-allow list of tools available to the skill |\n| `context` | `inline` (default) or `fork` (run as sub-Agent) |\n| `model` | Model override (haiku/sonnet/opus/inherit) |\n| `hooks` | Skill-level hook configuration |\n| `paths` | Glob patterns for conditional activation |\n| `user-invocable` | Users can invoke via `/name` |\n\nThe complete field list changes across versions; above are the core fields relevant to the teaching version.\n\n### 3. Precise Implementation of Two-Level Loading\n\n1. **Catalog (at startup)**: `getSkillDirCommands()` scans directory → registers as `Command` objects containing only metadata. `getSkillListingAttachments()` formats the skill list as attachments, budgeted at ~1% of the context window (cap 8000 characters).\n2. **Load (on invocation)**: Model calls `Skill` tool (input fields are `skill` + optional `args`; teaching version uses `name`) → `getPromptForCommand()` expands full SKILL.md content → `SkillTool` returns a tool_result with display text `\"Launching skill: {name}\"`, while the actual skill content is injected via `newMessages`. The teaching version merges both into \"injected via tool_result\" as a simplification.\n\n### The Teaching Version's Simplification Is Intentional\n\n- Multiple files and sources → 1 `skills/` directory: sufficient to demonstrate the core concept of two-level loading\n- Multiple frontmatter fields → only parse name/description: reduces parsing complexity\n- Forked skills (`context: 'fork'`) → omitted: the teaching version only expands inline skill loading\n- `Skill` tool input `skill`+`args` → teaching version uses `name`: avoids extra argument parsing complexity\n\n
\n\n\n" + }, + { + "version": "s07", + "locale": "zh", + "title": "s07: Skill Loading — 用到的时候才加载", + "content": "# s07: Skill Loading — 用到的时候才加载\n\ns01 → s02 → s03 → s04 → s05 → s06 → `s07` → [s08](/zh/s08) → s09 → ... → s20\n> *\"用到时再加载, 别全塞 prompt 里\"* — 通过 tool_result 注入, 不塞 system prompt。\n>\n> **Harness 层**: 知识 — 按需加载, 不堆满上下文。\n\n---\n\n## 问题\n\n你的项目有一套 React 组件规范、一份 SQL 风格指南、一份 API 设计文档。你希望 Agent 自动遵守这些规范。最直接的想法,全塞进 system prompt:\n\n```python\nSYSTEM = (\n f\"You are a coding agent. \"\n + open(\"docs/react-style.md\").read() # 2000 行\n + open(\"docs/sql-style.md\").read() # 1500 行\n + open(\"docs/api-design.md\").read() # 3000 行\n)\n```\n\n6500 行 system prompt。Agent 每次调用 LLM 都带着这些文档——不管是在改 CSS 颜色还是修 SQL 查询。99% 的内容和当前任务无关,白白消耗 token。\n\n---\n\n## 解决方案\n\n![Skill Overview](/course-assets/s07_skill_loading/skill-overview.svg)\n\n保留上一章的最小 hook 结构、`todo_write` 和子 Agent,本章重点转向新增的 `load_skill` 工具。启动时把技能目录注入 SYSTEM prompt,运行时多注册一个工具加载完整内容,用到才花 token。\n\n两层设计:\n\n| 层 | 位置 | 时机 | 代价 |\n|---|------|------|------|\n| 1. 目录 | system prompt | 启动时注入(harness 扫描 skills/) | ~100 tokens/skill,每轮都带 |\n| 2. 内容 | tool_result | Agent 调用 load_skill 时 | ~2000 tokens/skill,按需 |\n\ndispatch 机制不变,load_skill 通过 `TOOL_HANDLERS[block.name]` 分发。\n\n---\n\n## 工作原理\n\n**skills/ 目录**,每个技能一个子目录,包含 `SKILL.md` 文件:\n\n```\nskills/\n agent-builder/SKILL.md\n code-review/SKILL.md\n mcp-builder/SKILL.md\n pdf/SKILL.md\n```\n\n**第一级:启动时注入目录**:harness 启动时调用 `_scan_skills()` 扫描 skills/ 目录,解析每个 SKILL.md 的 YAML frontmatter(`name`、`description`),存入 `SKILL_REGISTRY` 字典。`list_skills()` 从注册表生成目录,注入 SYSTEM prompt。Agent 每轮都能看到\"我有哪些技能可用\",不花额外 API 调用:\n\n```python\nSKILL_REGISTRY: dict[str, dict] = {}\n\ndef _scan_skills():\n if not SKILLS_DIR.exists():\n return\n for d in sorted(SKILLS_DIR.iterdir()):\n if not d.is_dir():\n continue\n manifest = d / \"SKILL.md\"\n if manifest.exists():\n raw = manifest.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", d.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\"name\": name, \"description\": desc, \"content\": raw}\n\n_scan_skills() # runs once at startup\n\ndef list_skills() -> str:\n return \"\\n\".join(f\"- **{s['name']}**: {s['description']}\" for s in SKILL_REGISTRY.values())\n\ndef build_system() -> str:\n catalog = list_skills()\n return (\n f\"You are a coding agent at {WORKDIR}. \"\n f\"Skills available:\\n{catalog}\\n\"\n \"Use load_skill to get full details when needed.\"\n )\n\nSYSTEM = build_system()\n```\n\n**第二级:load_skill**:Agent 决定\"我需要 SQL 风格指南\",调用 `load_skill(\"sql-style\")`。通过注册表查找,不走文件路径,没有路径遍历风险。内容通过 `tool_result` 注入:\n\n```python\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n return f\"Skill not found: {name}\"\n return skill[\"content\"]\n```\n\n关键区别:技能内容不是 system prompt 的一部分,它作为一次工具结果进入当前 messages。后续调用会随历史一起携带,直到上下文压缩、截断或会话结束。这和 s08 的 compact 自然衔接:按需加载解决了\"不该提前带的不要带\",compact 解决\"该丢的怎么丢\"。\n\n---\n\n## 相对 s06 的变更\n\n| 组件 | 之前 (s06) | 之后 (s07) |\n|------|-----------|-----------|\n| 工具数量 | 7 (bash, read, write, edit, glob, todo_write, task) | 8 (+load_skill) |\n| 知识加载 | 无 | 两级:启动时目录注入 SYSTEM + 运行时 load_skill |\n| SYSTEM 提示 | 静态字符串 | 启动时扫描 skills/ 注入目录 |\n| 技能注册表 | 无 | SKILL_REGISTRY(启动时填充,防路径遍历) |\n| 循环 | 不变 | 不变(skill 工具自动分发) |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s07_skill_loading/code.py\n```\n\n试试这些 prompt:\n\n1. `What skills are available?`\n2. `Load the code-review skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n\n观察重点:Agent 是否直接从 SYSTEM 里的目录知道有哪些技能?需要完整规范时是否出现 `[HOOK] load_skill`?加载后回答是否使用了对应 skill 的说明?\n\n---\n\n## 接下来\n\n按需加载解决了\"不该带的不要带\"。但另一个问题来了:Agent 连续工作 30 分钟后,messages 列表塞满了中间过程。旧的 tool_result、过时的文件内容,占着上下文但不产生价值。\n\ns08 Context Compact → 四层压缩策略。便宜的先跑,贵的后跑。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `loadSkillsDir.ts`、`SkillTool.ts`、`bundledSkills.ts`、`commands.ts` 的分析。\n\n### 一、技能来源:不是只有一个 skills/ 目录\n\n教学版假设所有技能在 `skills/` 目录下。CC 实际从多个来源加载,分布在多个文件中:`loadSkillsDir.ts` 负责从 user/project/`--add-dir` 目录和 legacy commands(`.claude/commands/`)加载;`bundledSkills.ts` 负责内置技能;`SkillTool.ts` 处理 MCP 远程技能;`commands.ts` 负责命令聚合。类型包括 managed/policy skills、user skills(`~/.claude/skills/`)、project skills(`.claude/skills/`)、`--add-dir` skills、legacy commands、dynamic skills、conditional skills(带 `paths` frontmatter,按文件路径激活)、bundled skills、plugin skills、MCP skills。\n\n### 二、SKILL.md Frontmatter 常见字段\n\nCC 的 SKILL.md YAML frontmatter 由 `parseSkillFrontmatterFields()` 解析(`loadSkillsDir.ts`),常见字段包括:\n\n| 字段 | 用途 |\n|------|------|\n| `name` / `description` | 显示名称和描述 |\n| `when_to_use` | 指导模型何时调用 |\n| `allowed-tools` | 技能可用工具的自动允许列表 |\n| `context` | `inline`(默认)或 `fork`(作为子 Agent 运行) |\n| `model` | 模型覆盖(haiku/sonnet/opus/inherit) |\n| `hooks` | 技能级别的 hook 配置 |\n| `paths` | 条件激活的 glob 模式 |\n| `user-invocable` | 用户可以通过 `/name` 调用 |\n\n完整字段列表随版本迭代会变化,以上仅列出教学版涉及的核心字段。\n\n### 三、两级加载的精确实现\n\n1. **Catalog(启动时)**:`getSkillDirCommands()` 扫描目录 → 注册为 `Command` 对象,只包含元数据。`getSkillListingAttachments()` 把技能列表格式化为附件,预算为上下文窗口的 ~1%(上限 8000 字符)。\n2. **Load(调用时)**:模型调 `Skill` 工具(输入字段是 `skill` + 可选 `args`,教学版用 `name`)→ `getPromptForCommand()` 展开完整 SKILL.md 内容 → `SkillTool` 返回的 tool_result 展示文本只是 `\"Launching skill: {name}\"`,真正的技能内容通过 `newMessages` 注入对话。教学版把两者合并为\"通过 tool_result 注入\"是一种简化。\n\n### 教学版的简化是刻意的\n\n- 多文件多来源 → 1 个 `skills/` 目录:足以展示两级加载的核心概念\n- 多个 frontmatter 字段 → 只解析 name/description:减少解析复杂度\n- forked skills(`context: 'fork'`)→ 省略:教学版只展开 inline 技能加载\n- `Skill` 工具输入 `skill`+`args` → 教学版用 `name`:避免参数解析的额外复杂度\n\n
\n\n\n" }, { "version": "s07", "locale": "ja", - "title": "s07: Task System", - "content": "# s07: Task System\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"大きな目標を小タスクに分解し、順序付けし、ディスクに記録する\"* -- ファイルベースのタスクグラフ、マルチエージェント協調の基盤。\n\n## 問題\n\ns03のTodoManagerはメモリ上のフラットなチェックリストに過ぎない: 順序なし、依存関係なし、ステータスは完了か未完了のみ。実際の目標には構造がある -- タスクBはタスクAに依存し、タスクCとDは並行実行でき、タスクEはCとDの両方を待つ。\n\n明示的な関係がなければ、エージェントは何が実行可能で、何がブロックされ、何が同時に走れるかを判断できない。しかもリストはメモリ上にしかないため、コンテキスト圧縮(s06)で消える。\n\n## 解決策\n\nフラットなチェックリストをディスクに永続化する**タスクグラフ**に昇格させる。各タスクは1つのJSONファイルで、ステータス・前方依存(`blockedBy`)・後方依存(`blocks`)を持つ。タスクグラフは常に3つの問いに答える:\n\n- **何が実行可能か?** -- `pending`ステータスで`blockedBy`が空のタスク。\n- **何がブロックされているか?** -- 未完了の依存を待つタスク。\n- **何が完了したか?** -- `completed`のタスク。完了時に後続タスクを自動的にアンブロックする。\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\nタスクグラフ (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\n順序: task 1 は 2 と 3 より先に完了する必要がある\n並行: task 2 と 3 は同時に実行できる\n依存: task 4 は 2 と 3 の両方を待つ\nステータス: pending -> in_progress -> completed\n```\n\nこのタスクグラフは s07 以降の全メカニズムの協調バックボーンとなる: バックグラウンド実行(s08)、マルチエージェントチーム(s09+)、worktree分離(s12)はすべてこの同じ構造を読み書きする。\n\n## 仕組み\n\n1. **TaskManager**: タスクごとに1つのJSONファイル、依存グラフ付きCRUD。\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **依存解除**: タスク完了時に、他タスクの`blockedBy`リストから完了IDを除去し、後続タスクをアンブロックする。\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **ステータス遷移 + 依存配線**: `update`がステータス変更と依存エッジを担う。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. 4つのタスクツールをディスパッチマップに追加する。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\ns07以降、タスクグラフがマルチステップ作業のデフォルト。s03のTodoは軽量な単一セッション用チェックリストとして残る。\n\n## s06からの変更点\n\n| コンポーネント | Before (s06) | After (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| 計画モデル | フラットチェックリスト (メモリ) | 依存関係付きタスクグラフ (ディスク) |\n| 関係 | なし | `blockedBy` + `blocks` エッジ |\n| ステータス追跡 | 完了か未完了 | `pending` -> `in_progress` -> `completed` |\n| 永続性 | 圧縮で消失 | 圧縮・再起動後も存続 |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n" + "title": "s07: Skill Loading — 必要なときにだけ読み込む", + "content": "# s07: Skill Loading — 必要なときにだけ読み込む\n\ns01 → s02 → s03 → s04 → s05 → s06 → `s07` → [s08](/ja/s08) → s09 → ... → s20\n> *\"Load when needed, don't stuff the prompt\"* — tool_result で注入、system prompt には詰め込まない。\n>\n> **Harness レイヤー**: 知識 — 必要に応じて読み込み、コンテキストに詰め込まない。\n\n---\n\n## 課題\n\nプロジェクトには React コンポーネント仕様、SQL スタイルガイド、API 設計ドキュメントがある。Agent にこれらの仕様を自動的に守らせたい。最も直接的な方法 — すべて system prompt に詰め込む:\n\n```python\nSYSTEM = (\n f\"You are a coding agent. \"\n + open(\"docs/react-style.md\").read() # 2000 行\n + open(\"docs/sql-style.md\").read() # 1500 行\n + open(\"docs/api-design.md\").read() # 3000 行\n)\n```\n\n6500 行の system prompt。Agent は LLM を呼び出すたびにこれらのドキュメントを運ぶ — CSS の色を変えるときも SQL クエリを修正するときも。99% の内容が現在のタスクと無関係で、トークンを無駄に消費する。\n\n---\n\n## ソリューション\n\n![Skill Overview](/course-assets/s07_skill_loading/skill-overview.ja.svg)\n\n前章の最小フック構造、`todo_write`、サブ Agent を維持し、本章は新規の `load_skill` ツールに注目する。起動時にスキルカタログを SYSTEM prompt に注入し、実行時に完全な内容を読み込むツールを登録する。使ったときだけトークンを消費。\n\n2 層設計:\n\n| 層 | 場所 | タイミング | コスト |\n|---|------|-----------|--------|\n| 1. カタログ | system prompt | 起動時に注入(harness が skills/ をスキャン) | ~100 トークン/スキル、毎ターン携帯 |\n| 2. 内容 | tool_result | Agent が load_skill を呼び出したとき | ~2000 トークン/スキル、オンデマンド |\n\nディスパッチ機構は変わらず、`load_skill` は `TOOL_HANDLERS[block.name]` を通じて自動的にディスパッチされる。\n\n---\n\n## 仕組み\n\n**skills/ ディレクトリ**、スキルごとに 1 つのサブディレクトリ、それぞれに `SKILL.md` ファイルを含む:\n\n```\nskills/\n agent-builder/SKILL.md\n code-review/SKILL.md\n mcp-builder/SKILL.md\n pdf/SKILL.md\n```\n\n**第 1 層:起動時にカタログを注入**:harness は起動時に `_scan_skills()` を呼び出して skills/ ディレクトリをスキャンし、各 SKILL.md の YAML frontmatter(`name`、`description`)を解析して `SKILL_REGISTRY` 辞書に格納する。`list_skills()` はレジストリからカタログを生成し、SYSTEM prompt に注入する。Agent は毎ターン「どのスキルが利用可能か」を確認できる。追加の API 呼び出しは不要:\n\n```python\nSKILL_REGISTRY: dict[str, dict] = {}\n\ndef _scan_skills():\n if not SKILLS_DIR.exists():\n return\n for d in sorted(SKILLS_DIR.iterdir()):\n if not d.is_dir():\n continue\n manifest = d / \"SKILL.md\"\n if manifest.exists():\n raw = manifest.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", d.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\"name\": name, \"description\": desc, \"content\": raw}\n\n_scan_skills() # runs once at startup\n\ndef list_skills() -> str:\n return \"\\n\".join(f\"- **{s['name']}**: {s['description']}\" for s in SKILL_REGISTRY.values())\n\ndef build_system() -> str:\n catalog = list_skills()\n return (\n f\"You are a coding agent at {WORKDIR}. \"\n f\"Skills available:\\n{catalog}\\n\"\n \"Use load_skill to get full details when needed.\"\n )\n\nSYSTEM = build_system()\n```\n\n**第 2 層:load_skill**:Agent が「SQL スタイルガイドが必要」と判断し、`load_skill(\"sql-style\")` を呼び出す。レジストリを通じて検索し、ファイルパスを経由しないため、パストラバーサルのリスクがない。内容は `tool_result` を通じて注入される:\n\n```python\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n return f\"Skill not found: {name}\"\n return skill[\"content\"]\n```\n\n重要な違い:スキル内容は system prompt の一部ではなく、ツール結果として現在の messages に入る。後続の呼び出しでは履歴とともに携帯され、コンテキスト圧縮、切り捨て、またはセッション終了まで保持される。これは s08 の compact と自然に接続する:オンデマンド読み込みで「運ぶべきでないものは運ばない」を解決し、compact が「捨てるべきものをどう捨てるか」を解決する。\n\n---\n\n## s06 からの変更点\n\n| コンポーネント | 変更前 (s06) | 変更後 (s07) |\n|---------------|-------------|-------------|\n| ツール数 | 7 (bash, read, write, edit, glob, todo_write, task) | 8 (+load_skill) |\n| 知識読み込み | なし | 2 層:起動時カタログ注入 SYSTEM + 実行時 load_skill |\n| SYSTEM プロンプト | 静的文字列 | 起動時に skills/ をスキャンしてカタログ注入 |\n| スキルレジストリ | なし | SKILL_REGISTRY(起動時に充填、パストラバーサル防止) |\n| ループ | 変更なし | 変更なし(スキルツールは自動ディスパッチ) |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s07_skill_loading/code.py\n```\n\n以下のプロンプトを試してみよう:\n\n1. `What skills are available?`\n2. `Load the code-review skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n\n観察のポイント:Agent は SYSTEM 内のカタログから利用可能なスキルを知っているか? 完全な手順が必要なときに `[HOOK] load_skill` が表示されるか? 読み込んだスキルの説明を使って回答しているか?\n\n---\n\n## 次へ\n\nオンデマンド読み込みで「運ぶべきでないものは運ばない」問題は解決した。しかし別の問題が待っている:Agent が 30 分連続で作業すると、messages リストが中間プロセスで埋め尽くされる。古い tool_result、期限切れのファイル内容、コンテキストを占領しているが価値を生まない。\n\n→ s08 Context Compact:4 層圧縮戦略。安価な層を先に実行、高価な層を後に実行。\n\n
\nCC ソースコードを深掘り\n\n> 以下は CC ソースコード `loadSkillsDir.ts`、`SkillTool.ts`、`bundledSkills.ts`、`commands.ts` の分析に基づく。\n\n### 一、スキルソース:skills/ ディレクトリだけではない\n\n教育版はすべてのスキルが `skills/` ディレクトリにあると想定している。CC は実際に複数のファイルに分散したソースから読み込む:`loadSkillsDir.ts` は user/project/`--add-dir` ディレクトリと legacy commands(`.claude/commands/`)を担当、`bundledSkills.ts` は組み込みスキル、`SkillTool.ts` は MCP リモートスキル、`commands.ts` はコマンド集約を担当。タイプには managed/policy skills、user skills(`~/.claude/skills/`)、project skills(`.claude/skills/`)、`--add-dir` skills、legacy commands、dynamic skills、conditional skills(`paths` frontmatter を持ち、ファイルパスでアクティベート)、bundled skills、plugin skills、MCP skills が含まれる。\n\n### 二、SKILL.md Frontmatter の一般的なフィールド\n\nCC の SKILL.md YAML frontmatter は `parseSkillFrontmatterFields()`(`loadSkillsDir.ts`)で解析される。一般的なフィールド:\n\n| フィールド | 用途 |\n|-----------|------|\n| `name` / `description` | 表示名と説明 |\n| `when_to_use` | モデルにいつ呼び出すかを指導 |\n| `allowed-tools` | スキルが使用可能なツールの自動許可リスト |\n| `context` | `inline`(デフォルト)または `fork`(サブ Agent として実行) |\n| `model` | モデルオーバーライド(haiku/sonnet/opus/inherit) |\n| `hooks` | スキルレベルのフック設定 |\n| `paths` | 条件付きアクティベーションの glob パターン |\n| `user-invocable` | ユーザーが `/name` で呼び出し可能 |\n\n完全なフィールドリストはバージョンによって変動する。上記は教育版に関連するコアフィールドのみ。\n\n### 三、2 層読み込みの正確な実装\n\n1. **カタログ(起動時)**:`getSkillDirCommands()` がディレクトリをスキャン → メタデータのみを含む `Command` オブジェクトとして登録。`getSkillListingAttachments()` がスキルリストを添付ファイルとしてフォーマット、コンテキストウィンドウの ~1% を予算とする(上限 8000 文字)。\n2. **読み込み(呼び出し時)**:モデルが `Skill` ツールを呼び出す(入力フィールドは `skill` + オプションの `args`、教育版は `name` を使用)→ `getPromptForCommand()` が完全な SKILL.md 内容を展開 → `SkillTool` が返す tool_result の表示テキストは `\"Launching skill: {name}\"` のみ、実際のスキル内容は `newMessages` を通じて注入される。教育版では両者を「tool_result を通じて注入」として簡略化している。\n\n### 教育版の単純化は意図的\n\n- 複数ファイル・複数ソース → 1 つの `skills/` ディレクトリ:2 層読み込みの核心概念を示すのに十分\n- 複数の frontmatter フィールド → name/description のみ解析:解析の複雑さを削減\n- forked skills(`context: 'fork'`)→ 省略:教学版では inline skill loading のみ展開する\n- `Skill` ツールの入力 `skill`+`args` → 教育版は `name` を使用:追加の引数解析の複雑さを回避\n\n
\n\n\n" + }, + { + "version": "s08", + "locale": "en", + "title": "s08: Context Compact — Context Will Fill Up, Have a Way to Make Room", + "content": "# s08: Context Compact — Context Will Fill Up, Have a Way to Make Room\n\ns01 → s02 → s03 → s04 → s05 → s06 → s07 → `s08` → [s09](/en/s09) → s10 → ... → s20\n> *\"Context will fill up — have a way to make room\"* — Four-layer compression pipeline: cheap first, expensive last.\n>\n> **Harness Layer**: Compression — clean memory, unlimited sessions.\n\n---\n\n## The Problem\n\nThe agent is running along, then freezes.\n\nIt has bash, read, write — all the capabilities it needs. But it read a 1000-line file (~4000 tokens), then read 30 more files, ran 20 commands. Every command's output, every file's contents, all pile up in the `messages` list.\n\nThe context window is finite. Once full, the API outright rejects the call: `prompt_too_long`.\n\nWithout compression, an agent simply cannot work on large projects.\n\n---\n\n## The Solution\n\n![Compact Overview](/course-assets/s08_context_compact/compact-overview.en.svg)\n\nThe hook structure, skill loading, and sub-Agent from s07 are preserved, with some tools omitted to focus on compaction. The core change: insert three pre-processors (0 API calls) before each LLM call, trigger an LLM summary (1 API call) when tokens still exceed the threshold, and emergency-trim if the API throws an error.\n\nCore design: cheap first, expensive last.\n\n---\n\n## How It Works\n\n![Four-layer compression pipeline](/course-assets/s08_context_compact/compaction-layers.en.svg)\n\n### L1: snip_compact — Trim Irrelevant Old Conversation\n\nThe agent ran 80 turns of conversation, accumulating 160 `messages`. The very first \"help me create hello.py\" is barely relevant to current work, yet it still occupies space.\n\nMessage count exceeds 50 → keep the first 3 (initial context) and the last 47 (current work), trim the middle:\n\n```python\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages:\n return messages\n keep_head, keep_tail = 3, max_messages - 3\n snipped = len(messages) - keep_head - keep_tail\n placeholder = {\"role\": \"user\",\n \"content\": f\"[snipped {snipped} messages from conversation middle]\"}\n return messages[:keep_head] + [placeholder] + messages[-keep_tail:]\n```\n\nEntire messages are trimmed, but `tool_result` content within remaining messages keeps accumulating — message #34 may still hold 30KB of old file contents. → L2.\n\n### L2: micro_compact — Placeholder for Old Tool Results\n\n![Old results placeholder](/course-assets/s08_context_compact/micro-compact.en.svg)\n\nThe agent read 10 files consecutively. The full contents of reads 1–7 are still sitting in context, no longer needed, but hogging large amounts of space.\n\nKeep only the 3 most recent `tool_result` entries intact; replace older ones with a one-line placeholder:\n\n```python\nKEEP_RECENT_TOOL_RESULTS = 3\n\ndef micro_compact(messages):\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n```\n\nOld results are cleared, but a single new result can be 500KB — one `cat` of a large file can max out the context. → L3.\n\n### L3: tool_result_budget — Persist Large Results to Disk\n\n![Large results to disk](/course-assets/s08_context_compact/layer1-budget.en.svg)\n\nThe model read 5 large files in one go; all `tool_result` blocks in the last user message total 500KB.\n\nSum the size of all `tool_result` blocks in the last user message. If over 200KB → sort by size, starting from the largest, persist to `.task_outputs/tool-results/`, keeping only a `` marker + a 2000-character preview in context. The model sees the marker and knows the full content is on disk, re-reading it when needed.\n\n```python\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1]\n blocks = [(i, b) for i, b in enumerate(last[\"content\"])\n if b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for idx, block in ranked:\n if total <= max_bytes:\n break\n block[\"content\"] = persist_large_output(block[\"tool_use_id\"], str(block[\"content\"]))\n total = recalculate_total(blocks)\n return messages\n```\n\nThe first three layers are all plain-text / structural operations — 0 API calls — but they cannot \"understand\" conversation content. Context may still be too large. → L4.\n\n### L4: compact_history — Full LLM Summary\n\n![Full LLM summary](/course-assets/s08_context_compact/auto-compact.en.svg)\n\nAll three previous layers have run, but after 30 minutes of continuous work on a huge project, tokens still exceed the threshold.\n\nThree-step process:\n\n1. **Save transcript**: Write the full conversation to `.transcripts/` in JSONL format. The transcript preserves a recoverable record, but the model's active context only contains the summary. For the model's current reasoning, the details are no longer in context. The teaching code does not provide a transcript retrieval tool.\n2. **LLM generates summary**: Send conversation history to the LLM, asking it to preserve key information: current goals, important findings, modified files, remaining work, user constraints, etc.\n3. **Replace message list**: All old messages are replaced with a single summary. The teaching version only keeps the summary; the real Claude Code re-attaches some recent files, plans, agent/skill/tool context after compaction.\n\n```python\ndef compact_history(messages):\n transcript_path = write_transcript(messages) # Save full conversation first\n summary = summarize_history(messages) # LLM generates summary\n return [{\"role\": \"user\",\n \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n```\n\n**Circuit breaker**: After 3 consecutive failures, stop retrying to prevent an infinite loop wasting API calls.\n\n### Reactive: reactive_compact\n\nSometimes the API still returns `prompt_too_long` (413) — when context grows faster than compression triggers.\n\nThis triggers **reactive_compact**: more aggressive than compact_history, it retreats from the tail, trimming to an API-acceptable size with byte-level precision, keeping only the last 5 messages + summary.\n\n```python\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n tail = messages[-5:]\n return [{\"role\": \"user\",\n \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *tail]\n```\n\nReactive compact has a retry limit (default 1). If it still fails, an exception is raised instead of looping forever. Full error recovery is deferred to s11.\n\n### Putting It All Together\n\n```python\ndef agent_loop(messages):\n reactive_retries = 0\n while True:\n # Three pre-processors (0 API calls)\n # Order: budget first, so large content is persisted before placeholders\n messages[:] = tool_result_budget(messages) # L3: persist large results\n messages[:] = snip_compact(messages) # L1: trim middle\n messages[:] = micro_compact(messages) # L2: old result placeholders\n\n # Still too much? LLM summary (1 API call)\n if estimate_token_count(messages) > THRESHOLD:\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(...)\n except PromptTooLongError:\n if reactive_retries < MAX_REACTIVE_RETRIES:\n messages[:] = reactive_compact(messages) # Emergency\n reactive_retries += 1\n continue\n raise # retry limit exceeded, raise exception\n # ... tool execution ...\n\n # compact tool: when the model actively calls it, triggers compact_history\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({..., \"content\": \"[Compacted. History summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # end current turn, start fresh with compacted context\n```\n\n**The order must not be swapped.** L3 (budget) runs before L2 (micro) because micro replaces old large tool_results with one-line placeholders — budget must persist the full content before that happens. This is why CC source puts `applyToolResultBudget` first.\n\n---\n\n## Changes From s07\n\n| Component | Before (s07) | After (s08) |\n|-----------|-------------|-------------|\n| Context management | None (context grows unbounded) | Four-layer compression pipeline + emergency |\n| New functions | — | snip_compact, micro_compact, tool_result_budget, compact_history, reactive_compact |\n| Tools | bash, read_file, write_file, edit_file, glob, todo_write, task, load_skill (8) | 8 + compact (9) |\n| Loop | LLM call → tool execution | Three pre-processors before each turn + threshold-triggered compact_history |\n| Design principle | — | Cheap first, expensive last |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s08_context_compact/code.py\n```\n\nTry these prompts:\n\n1. `Read the file README.md, then read code.py, then read s01_agent_loop/README.md` (read multiple files consecutively, observe L2 compressing old results)\n2. `Read every file in s08_context_compact/` (read a large amount of content at once, observe L3 persisting to disk)\n3. Chat for 20+ turns, observe whether `[auto compact]` or `[reactive compact]` appears\n\nWhat to watch for: After each tool execution, are old `tool_result` entries compressed? When tokens exceed the threshold after extended conversation, is summarization triggered automatically?\n\n---\n\n## What's Next\n\nContext compression lets an agent run for a long time without crashing. But after each compression, the preferences and constraints the user told it are also lost. Can we let the agent selectively remember important things?\n\ns09 Memory → three subsystems: choosing what to remember, extracting key information, consolidating and organizing. Across compressions, across sessions.\n\n
\nDeep Dive Into CC Source Code\n\n> The following is based on analysis of CC source code `compact.ts`, `autoCompact.ts`, `microCompact.ts`, and `query.ts`.\n\n### Execution Order Comparison\n\nThe teaching version labels layers L1/L2/L3/L4 for pedagogical clarity, but actual execution order does not match the numbering:\n\n| Dimension | Teaching Version | Claude Code |\n|-----------|-----------------|-------------|\n| Execution order | budget → snip → micro → auto | budget → snip → micro → collapse → auto (`query.ts:379-468`) |\n| snip_compact | Keep head 3 + tail 47 | CC only enables on main thread; implementation not in open-source repo (`HISTORY_SNIP` feature gate), but interface is visible: `snipCompactIfNeeded(messages)` → `{ messages, tokensFreed, boundaryMessage? }`, also exposes `SnipTool` for model-initiated snipping. Teaching version's 3/47 are simplified parameters |\n| micro_compact | Text placeholder replacement | Two paths: time-based clears content directly, cached uses API `cache_edits` (legacy path removed) |\n| micro_compact whitelist | By position (most recent 3) | time-based triggers by time threshold; cached triggers by count (`microCompact.ts`) |\n| tool_result_budget | 200KB characters | 200,000 characters (`toolLimits.ts:49`) |\n| compact_history threshold | Character count estimate | Precise tokens: `contextWindow - maxOutputTokens - 13_000` |\n| Summary requirements | 5 categories of info | 9 sections + ``/`` dual tags |\n| Compression prompt | Simple prompt | Double-ended hard guardrails forbidding tool calls |\n| PTL retry | Yes (simplified) | `truncateHeadForPTLRetry()` retreats by message groups (`compact.ts:243-290`) |\n| Post-compaction recovery | None (teaching version only keeps summary) | Auto re-read recent files, plans, agent/skill/tool context |\n| Circuit breaker | 3 times | 3 times (`autoCompact.ts:70`) |\n| Reactive retry | 1 time | CC has more granular tiered retries |\n\n### Execution Order Details\n\nThe real order in CC source `query.ts`:\n\n1. `applyToolResultBudget` (L379): persist large results first, ensuring full content is saved\n2. `snipCompact` (L403): trim middle messages\n3. `microcompact` (L414): old result placeholders\n4. `contextCollapse` (L441): independent context management system (not in teaching version)\n5. `autoCompact` (L454): LLM full summary\n\nThe teaching version's budget → snip → micro order matches this. The teaching version does not have the contextCollapse mechanism.\n\n### Full Constant Reference\n\n| Constant | Value | Source File |\n|----------|-------|-------------|\n| `AUTOCOMPACT_BUFFER_TOKENS` | 13,000 | `autoCompact.ts:62` |\n| `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | 3 | `autoCompact.ts:70` |\n| `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | 20,000 | `autoCompact.ts:30` |\n| `POST_COMPACT_TOKEN_BUDGET` | 50,000 | `compact.ts:123` |\n| `POST_COMPACT_MAX_FILES_TO_RESTORE` | 5 | `compact.ts:122` |\n| `POST_COMPACT_MAX_TOKENS_PER_FILE` | 5,000 | `compact.ts:124` |\n| Time micro_compact interval | 60 minutes | `timeBasedMCConfig.ts` |\n| `MAX_COMPACT_STREAMING_RETRIES` | 2 | `compact.ts:131` |\n\n### contextCollapse and sessionMemoryCompact\n\nCC source code has two additional mechanisms not covered in this teaching version:\n\n- **contextCollapse**: An independent context management system that, when enabled, suppresses proactive autocompact (`autoCompact.ts:215-222`), with collapse's commit/blocking flow taking over context management. Manual `/compact` and reactive fallback remain independent paths, unaffected by contextCollapse.\n- **sessionMemoryCompact**: Before compact_history, CC first attempts a lightweight summary using existing session memory (covered in s09) without calling the LLM. This mechanism becomes clearer after learning s09.\n\n### What Does the Compression Prompt Look Like?\n\nCC's compression prompt has two hard requirements:\n\n1. **Absolutely no tool calls**: It begins with `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.`, and appends another REMINDER at the end\n2. **Analyze first, then summarize**: The model must first reason in an `` tag, then output the formal summary in a `` tag. The analysis is stripped during formatting\n\n### Teaching Version Simplifications Are Intentional\n\n- micro_compact uses text placeholders → we don't have API-level `cache_edits` access\n- Tokens estimated via character count → precise tokenizers are out of scope\n- Post-compaction recovery omitted → teaching version only keeps summary, does not auto re-attach files\n- Two auxiliary mechanisms not covered → they fall in the 10% detail category\n\nThe core design principle, cheap first, expensive last, is fully preserved.\n\n
\n\n\n" + }, + { + "version": "s08", + "locale": "zh", + "title": "s08: Context Compact — 上下文总会满,要有办法腾地方", + "content": "# s08: Context Compact — 上下文总会满,要有办法腾地方\n\ns01 → s02 → s03 → s04 → s05 → s06 → s07 → `s08` → [s09](/zh/s09) → s10 → ... → s20\n> *\"上下文总会满, 要有办法腾地方\"* — 四层压缩策略, 便宜的先跑贵的后跑。\n>\n> **Harness 层**: 压缩 — 干净的记忆, 无限的会话。\n\n---\n\n## 问题\n\nAgent 跑着跑着,不动了。\n\n手里有 bash、有 read、有 write,能力是够的。但它读了一个 1000 行的文件(~4000 token),又读了 30 个文件,跑了 20 条命令。每条命令的输出、每个文件的内容,全都堆在 `messages` 列表里。\n\n上下文窗口是有限的。满了之后,API 直接拒绝:`prompt_too_long`。\n\n不压缩,Agent 根本没法在大项目里干活。\n\n---\n\n## 解决方案\n\n![Compact Overview](/course-assets/s08_context_compact/compact-overview.svg)\n\n保留 s07 的 hook 结构、技能加载、子 Agent 等骨架,省略部分工具细节以聚焦压缩。核心变动:每轮 LLM 调用前插入三层预处理器(0 API),token 仍超阈值时触发 LLM 摘要(1 API),API 报错时应急裁剪。\n\n核心设计:便宜的先跑,贵的后跑。\n\n---\n\n## 工作原理\n\n![四层压缩管线](/course-assets/s08_context_compact/compaction-layers.svg)\n\n### L1: snip_compact — 裁掉无关的旧对话\n\nAgent 跑了 80 轮对话,`messages` 攒了 160 条。最前面的\"帮我创建 hello.py\"和当前工作几乎无关了,但全占着位置。\n\n消息数超过 50 条 → 保留头部 3 条(初始上下文)和尾部 47 条(当前工作),中间裁掉:\n\n```python\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages:\n return messages\n keep_head, keep_tail = 3, max_messages - 3\n snipped = len(messages) - keep_head - keep_tail\n placeholder = {\"role\": \"user\",\n \"content\": f\"[snipped {snipped} messages from conversation middle]\"}\n return messages[:keep_head] + [placeholder] + messages[-keep_tail:]\n```\n\n裁掉了整条消息,但剩下的消息里 `tool_result` 内容仍在累积——第 34 条消息里可能躺着 30KB 的旧文件内容。→ L2。\n\n### L2: micro_compact — 旧工具结果占位\n\n![旧结果占位](/course-assets/s08_context_compact/micro-compact.svg)\n\nAgent 连续读了 10 个文件。第 1-7 次的完整内容还躺在上下文里,早就不需要了,但占着大量空间。\n\n只保留最近 3 条 `tool_result` 的完整内容,更旧的替换为一行占位符:\n\n```python\nKEEP_RECENT_TOOL_RESULTS = 3\n\ndef micro_compact(messages):\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n```\n\n旧结果清掉了,但单条新结果可能就有 500KB——一个 `cat` 大文件的输出就能打满上下文。→ L3。\n\n### L3: tool_result_budget — 大结果落盘\n\n![大结果落盘](/course-assets/s08_context_compact/layer1-budget.svg)\n\n模型一次读了 5 个大文件,单条 user 消息里所有 `tool_result` 加起来 500KB。\n\n统计最后一条 user 消息里所有 `tool_result` 的总大小。超过 200KB → 按大小排序,从最大的开始落盘到 `.task_outputs/tool-results/`,上下文里只留 `` 标记 + 前 2000 字符预览。模型看到标记后知道完整内容在磁盘上,需要时可以重新读。\n\n```python\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1]\n blocks = [(i, b) for i, b in enumerate(last[\"content\"])\n if b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for idx, block in ranked:\n if total <= max_bytes:\n break\n block[\"content\"] = persist_large_output(block[\"tool_use_id\"], str(block[\"content\"]))\n total = recalculate_total(blocks)\n return messages\n```\n\n前三层都是纯文本/结构操作,0 API 调用,但也无法\"理解\"对话内容。上下文可能仍然太大。→ L4。\n\n### L4: compact_history — LLM 全量摘要\n\n![LLM 全量摘要](/course-assets/s08_context_compact/auto-compact.svg)\n\n前三层全跑完了,但在超大项目中连续工作 30 分钟后,token 仍然超过阈值。\n\n三步流程:\n\n1. **保存 transcript**:完整对话写入 `.transcripts/`,JSONL 格式。transcript 保留了可恢复记录,但模型的活跃上下文里只剩摘要。对模型当下推理来说,细节已经不在上下文中了。教学代码没有提供 transcript 检索工具。\n2. **LLM 生成摘要**:把对话历史发给 LLM,要求保留当前目标、重要发现、已改文件、剩余工作、用户约束等关键信息。\n3. **替换消息列表**:所有旧消息被替换为一条摘要。教学版只保留摘要;真实 Claude Code 会在 compact 后重新附加部分最近文件、计划、agent/skill/tool 等上下文。\n\n```python\ndef compact_history(messages):\n transcript_path = write_transcript(messages) # 先保存完整对话\n summary = summarize_history(messages) # LLM 生成摘要\n return [{\"role\": \"user\",\n \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n```\n\n**熔断器**:连续失败 3 次后停止重试,防止死循环浪费 API 调用。\n\n### 应急: reactive_compact\n\n有时候 API 还是返回 `prompt_too_long`(413),上下文增长速度快于压缩触发速度时。\n\n这时触发 **reactive_compact**:比 compact_history 更激进,从尾部回退,以字节级精度裁剪到 API 可接受的大小,只保留最后 5 条消息 + 摘要。\n\n```python\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n tail = messages[-5:]\n return [{\"role\": \"user\",\n \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *tail]\n```\n\nreactive compact 有重试上限(默认 1 次)。再失败就抛出异常,不无限循环。完整的错误恢复逻辑留给 s11。\n\n### 合起来跑\n\n```python\ndef agent_loop(messages):\n reactive_retries = 0\n while True:\n # 三个预处理器(0 API 调用)\n # 顺序:budget 先跑,确保大内容落盘后再做占位和裁剪\n messages[:] = tool_result_budget(messages) # L3: 大结果落盘\n messages[:] = snip_compact(messages) # L1: 裁中间\n messages[:] = micro_compact(messages) # L2: 旧结果占位\n\n # 还不够?LLM 摘要(1 API 调用)\n if estimate_token_count(messages) > THRESHOLD:\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(...)\n except PromptTooLongError:\n if reactive_retries < MAX_REACTIVE_RETRIES:\n messages[:] = reactive_compact(messages) # 应急\n reactive_retries += 1\n continue\n raise # 超过重试上限,抛出异常\n # ... 工具执行 ...\n\n # compact 工具:模型主动调用时触发 compact_history\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({..., \"content\": \"[Compacted. History summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # 结束当前 turn,用压缩后的上下文开始新一轮\n```\n\n**顺序不能换。** L3(budget)在 L2(micro)前面,因为 micro 会把旧的大 tool_result 替换成一行占位符,budget 必须在那之前把完整内容落盘。这也是为什么 CC 源码把 `applyToolResultBudget` 放在最前面。\n\n---\n\n## 相对 s07 的变更\n\n| 组件 | 之前 (s07) | 之后 (s08) |\n|------|-----------|-----------|\n| 上下文管理 | 无(上下文无限膨胀) | 四层压缩管线 + 应急 |\n| 新函数 | — | snip_compact, micro_compact, tool_result_budget, compact_history, reactive_compact |\n| 工具 | bash, read, write, edit, glob, todo_write, task, load_skill (8) | 8 + compact (9) |\n| 循环 | LLM 调用 → 工具执行 | 每轮前跑三层预处理器 + 阈值触发 compact_history |\n| 设计原则 | — | 便宜的先跑,贵的后跑 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s08_context_compact/code.py\n```\n\n试试这些 prompt:\n\n1. `Read the file README.md, then read code.py, then read s01_agent_loop/README.md`(连续读多个文件,观察 L2 压缩旧结果)\n2. `Read every file in s08_context_compact/`(一次性读大量内容,观察 L3 落盘)\n3. 反复对话 20+ 轮,观察是否出现 `[auto compact]` 或 `[reactive compact]`\n\n观察重点:每次工具执行后,旧 tool_result 是否被压缩?连续对话后 token 超阈值时,是否自动触发了摘要?\n\n---\n\n## 接下来\n\n上下文压缩让 Agent 能跑很久不会崩。但每次压缩后,用户之前告诉它的偏好、约束也跟着丢了。能不能让 Agent 有选择地记住重要的事?\n\ns09 Memory → 三个子系统:选择记什么、提取关键信息、整理巩固。跨压缩、跨会话。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `compact.ts`、`autoCompact.ts`、`microCompact.ts`、`query.ts` 的分析。\n\n### 执行顺序对照\n\n教学版为了讲解方便按 L1/L2/L3/L4 编号,但实际执行顺序和编号不完全对应:\n\n| 维度 | 教学版 | Claude Code |\n|------|--------|-------------|\n| 执行顺序 | budget → snip → micro → auto | budget → snip → micro → collapse → auto(`query.ts:379-468`) |\n| snip_compact | 保留头 3 + 尾 47 | CC 仅主线程启用;实现不在开源仓库中(`HISTORY_SNIP` feature gate),但接口可见:`snipCompactIfNeeded(messages)` → `{ messages, tokensFreed, boundaryMessage? }`,还暴露了 `SnipTool` 工具让模型主动调用。教学版的 3/47 是简化参数 |\n| micro_compact | 文本占位符替换 | 两条路径:time-based 直接清内容,cached 走 API `cache_edits`(legacy path 已移除) |\n| micro_compact 白名单 | 按位置(最近 3 条) | time-based 按时间阈值触发;cached 按计数触发(`microCompact.ts`) |\n| tool_result_budget | 200KB 字符 | 200,000 字符(`toolLimits.ts:49`) |\n| compact_history 阈值 | 字符数估算 | 精确 token:`contextWindow - maxOutputTokens - 13_000` |\n| 摘要要求 | 5 类信息 | 9 个部分 + ``/`` 双标签 |\n| 压缩 prompt | 简单 prompt | 首尾双重防呆禁止调工具 |\n| PTL retry | 有(简化) | `truncateHeadForPTLRetry()` 按消息组回退(`compact.ts:243-290`) |\n| 后压缩恢复 | 无(教学版只保留摘要) | 自动重新读取最近文件、计划、agent/skill/tool 等 |\n| 熔断器 | 3 次 | 3 次(`autoCompact.ts:70`) |\n| reactive 重试 | 1 次 | CC 有更精细的分级重试 |\n\n### 执行顺序详解\n\nCC 源码 `query.ts` 中的真实顺序:\n\n1. `applyToolResultBudget`(L379):先处理大结果,确保完整内容落盘\n2. `snipCompact`(L403):裁中间消息\n3. `microcompact`(L414):旧结果占位\n4. `contextCollapse`(L441):独立的上下文管理系统(教学版无)\n5. `autoCompact`(L454):LLM 全量摘要\n\n教学版的 budget → snip → micro 顺序与此一致。教学版没有 contextCollapse 机制。\n\n### 完整常量参考\n\n| 常量 | 值 | 源文件 |\n|------|-----|--------|\n| `AUTOCOMPACT_BUFFER_TOKENS` | 13,000 | `autoCompact.ts:62` |\n| `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | 3 | `autoCompact.ts:70` |\n| `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | 20,000 | `autoCompact.ts:30` |\n| `POST_COMPACT_TOKEN_BUDGET` | 50,000 | `compact.ts:123` |\n| `POST_COMPACT_MAX_FILES_TO_RESTORE` | 5 | `compact.ts:122` |\n| `POST_COMPACT_MAX_TOKENS_PER_FILE` | 5,000 | `compact.ts:124` |\n| 时间 micro_compact 间隔 | 60 分钟 | `timeBasedMCConfig.ts` |\n| `MAX_COMPACT_STREAMING_RETRIES` | 2 | `compact.ts:131` |\n\n### contextCollapse 和 sessionMemoryCompact\n\nCC 源码中还有两个机制本教学版没有展开:\n\n- **contextCollapse**:独立的上下文管理系统,启用时抑制 proactive autocompact(`autoCompact.ts:215-222`),由 collapse 的 commit/blocking 流程接管上下文管理。但 manual `/compact` 和 reactive fallback 仍是独立路径,不受 contextCollapse 影响。\n- **sessionMemoryCompact**:compact_history 之前,CC 会先尝试用已有的 session memory(s09 会讲到)做轻量摘要,不调 LLM。这个机制等学完 s09 之后回头看会更清楚。\n\n### 压缩 prompt 长什么样?\n\nCC 的压缩 prompt 有两个硬性要求:\n\n1. **绝对禁止调用工具**:开头就是 `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.`,末尾还会再 REMINDER 一次\n2. **先分析再总结**:模型需要先在 `` 标签里理清思路,然后在 `` 标签里输出正式摘要。analysis 在格式化时被剥离\n\n### 教学版的简化是刻意的\n\n- micro_compact 用文本占位 → 我们没有 API 层的 `cache_edits` 权限\n- token 用字符数估算 → 精确 tokenizer 不在教学范围内\n- 后压缩恢复省略 → 教学版只保留摘要,不自动重新附加文件\n- 两个辅助机制不展开 → 属于 10% 的细节\n\n核心设计思想,便宜的先跑贵的后跑,完整保留。\n\n
\n\n\n" }, { "version": "s08", "locale": "ja", - "title": "s08: Background Tasks", - "content": "# s08: Background Tasks\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"遅い操作はバックグラウンドへ、エージェントは次を考え続ける\"* -- デーモンスレッドがコマンド実行、完了後に通知を注入。\n\n## 問題\n\n一部のコマンドは数分かかる: `npm install`、`pytest`、`docker build`。ブロッキングループでは、モデルはサブプロセスの完了を待って座っている。ユーザーが「依存関係をインストールして、その間にconfigファイルを作って」と言っても、エージェントは並列ではなく逐次的に処理する。\n\n## 解決策\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## 仕組み\n\n1. BackgroundManagerがスレッドセーフな通知キューでタスクを追跡する。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()`がデーモンスレッドを開始し、即座にリターンする。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. サブプロセス完了時に、結果を通知キューへ。\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. エージェントループが各LLM呼び出しの前に通知をドレインする。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\nループはシングルスレッドのまま。サブプロセスI/Oだけが並列化される。\n\n## s07からの変更点\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" + "title": "s08: Context Compact — コンテキストはいつか満杯になる、場所を空ける方法が必要", + "content": "# s08: Context Compact — コンテキストはいつか満杯になる、場所を空ける方法が必要\n\ns01 → s02 → s03 → s04 → s05 → s06 → s07 → `s08` → [s09](/ja/s09) → s10 → ... → s20\n> *\"Context will fill up — have a way to make room\"* — 4層圧縮戦略、安価なものを先に、高価なものを後に実行。\n>\n> **Harness レイヤー**: 圧縮 — クリーンな記憶、無限のセッション。\n\n---\n\n## 課題\n\nAgent が動いている途中で、止まってしまう。\n\nbash、read、write は揃っており、能力は十分。しかし 1000 行のファイル(~4000 token)を読み、さらに 30 のファイルを読み、20 のコマンドを実行したとします。各コマンドの出力、各ファイルの内容がすべて `messages` リストに蓄積されます。\n\nコンテキストウィンドウには上限があります。満杯になると、API は即座に拒否します:`prompt_too_long`。\n\n圧縮しなければ、Agent は大規模プロジェクトではまともに動けません。\n\n---\n\n## ソリューション\n\n![Compact Overview](/course-assets/s08_context_compact/compact-overview.ja.svg)\n\ns07 のフック構造、スキルロード、サブ Agent の骨格を維持し、圧縮に焦点を当てるため一部のツールは省略。コアの変更点:各 LLM 呼び出し前に 3 層のプリプロセッサ(0 API)を挿入し、token が閾値を超えた場合は LLM 要約(1 API)をトリガー、API エラー時には緊急トリムを実行。\n\nコア設計:安価なものを先に、高価なものを後に。\n\n---\n\n## 仕組み\n\n![4層圧縮パイプライン](/course-assets/s08_context_compact/compaction-layers.ja.svg)\n\n### L1: snip_compact — 無関係な古い会話を切り捨て\n\nAgent が 80 ラウンドの会話を実行し、`messages` が 160 件まで溜まった。先頭の「hello.py を作って」は現在の作業とほぼ無関係だが、スペースを占有し続けている。\n\nメッセージ数が 50 を超えた場合 → 先頭 3 件(初期コンテキスト)と末尾 47 件(現在の作業)を保持し、中間を切り捨て:\n\n```python\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages:\n return messages\n keep_head, keep_tail = 3, max_messages - 3\n snipped = len(messages) - keep_head - keep_tail\n placeholder = {\"role\": \"user\",\n \"content\": f\"[snipped {snipped} messages from conversation middle]\"}\n return messages[:keep_head] + [placeholder] + messages[-keep_tail:]\n```\n\nメッセージ全体は切り捨てたが、残ったメッセージ内の `tool_result` 内容はまだ蓄積され続けている。34 番目のメッセージに 30KB の古いファイル内容が残っているかもしれない。→ L2。\n\n### L2: micro_compact — 古いツール結果をプレースホルダに置換\n\n![古い結果のプレースホルダ](/course-assets/s08_context_compact/micro-compact.ja.svg)\n\nAgent が連続して 10 個のファイルを読んだ。1〜7 回目の完全な内容はまだコンテキストに残っており、もう不要だが、大量のスペースを占有している。\n\n直近 3 件の `tool_result` の完全な内容のみを保持し、それより古いものは 1 行のプレースホルダに置換:\n\n```python\nKEEP_RECENT_TOOL_RESULTS = 3\n\ndef micro_compact(messages):\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n```\n\n古い結果はクリーンアップされたが、1 件の新しい結果だけで 500KB の可能性がある。大きなファイルを `cat` するだけでコンテキストがいっぱいになる。→ L3。\n\n### L3: tool_result_budget — 大きな結果をディスクに退避\n\n![大きな結果のディスク退避](/course-assets/s08_context_compact/layer1-budget.ja.svg)\n\nモデルが一度に 5 つの大きなファイルを読み、1 つの user メッセージ内の全 `tool_result` の合計が 500KB に達した。\n\n最後の user メッセージ内のすべての `tool_result` の合計サイズを集計。200KB を超えた場合 → サイズ順にソートし、最大のものから順に `.task_outputs/tool-results/` に退避。コンテキストには `` マーカー + 先頭 2000 文字のプレビューのみを残す。モデルはマーカーを見て完全な内容がディスク上にあることを認識し、必要に応じて再読み込みできる。\n\n```python\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1]\n blocks = [(i, b) for i, b in enumerate(last[\"content\"])\n if b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for idx, block in ranked:\n if total <= max_bytes:\n break\n block[\"content\"] = persist_large_output(block[\"tool_use_id\"], str(block[\"content\"]))\n total = recalculate_total(blocks)\n return messages\n```\n\n最初の 3 層はすべて純粋なテキスト/構造操作(0 API 呼び出し)だが、会話内容を「理解」することはできない。コンテキストがまだ大きすぎる可能性がある。→ L4。\n\n### L4: compact_history — LLM 全量要約\n\n![LLM 全量要約](/course-assets/s08_context_compact/auto-compact.ja.svg)\n\n最初の 3 層がすべて実行されたが、超大規模プロジェクトで 30 分間連続作業すると、token がまだ閾値を超えている。\n\n3 ステップのフロー:\n\n1. **transcript を保存**:完全な会話を `.transcripts/` に JSONL 形式で書き出す。transcript は回復可能な記録として保存されるが、モデルのアクティブなコンテキストには要約しか残らない。モデルの現在の推論にとって、詳細はすでにコンテキストにない。教学コードは transcript 検索ツールを提供しない。\n2. **LLM で要約を生成**:会話履歴を LLM に送り、現在の目標、重要な発見、変更済みファイル、残りの作業、ユーザーの制約などの重要な情報を保持するよう指示。\n3. **メッセージリストを置換**:すべての古いメッセージが 1 件の要約に置き換えられる。教学版は要約のみを保持する。実際の Claude Code は compact 後に直近のファイル、計画、agent/skill/tool などのコンテキストを再付加する。\n\n```python\ndef compact_history(messages):\n transcript_path = write_transcript(messages) # 先に完全な会話を保存\n summary = summarize_history(messages) # LLM で要約を生成\n return [{\"role\": \"user\",\n \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n```\n\n**サーキットブレーカー**:連続 3 回失敗したらリトライを停止し、無限ループによる API 呼び出しの浪費を防止。\n\n### 緊急: reactive_compact\n\nAPI がまだ `prompt_too_long`(413)を返すことがある。コンテキストの増加速度が圧縮のトリガー速度を上回る場合。\n\nこの時 **reactive_compact** がトリガーされる:compact_history よりもさらに積極的で、末尾からバイト単位の精度で API が受け入れ可能なサイズまで切り詰め、最後の 5 件のメッセージ + 要約のみを保持。\n\n```python\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n tail = messages[-5:]\n return [{\"role\": \"user\",\n \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *tail]\n```\n\nreactive compact にはリトライ上限がある(デフォルト 1 回)。さらに失敗した場合は例外をスローし、無限ループしない。完全なエラー回復ロジックは s11 に委ねる。\n\n### 合わせて実行\n\n```python\ndef agent_loop(messages):\n reactive_retries = 0\n while True:\n # 3 つのプリプロセッサ(0 API 呼び出し)\n # 順序:budget を先に実行し、大きな内容をプレースホルダ化する前に退避\n messages[:] = tool_result_budget(messages) # L3: 大きな結果を退避\n messages[:] = snip_compact(messages) # L1: 中間を切り捨て\n messages[:] = micro_compact(messages) # L2: 古い結果をプレースホルダに\n\n # まだ足りない?LLM 要約(1 API 呼び出し)\n if estimate_token_count(messages) > THRESHOLD:\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(...)\n except PromptTooLongError:\n if reactive_retries < MAX_REACTIVE_RETRIES:\n messages[:] = reactive_compact(messages) # 緊急対応\n reactive_retries += 1\n continue\n raise # リトライ上限超過、例外をスロー\n # ... ツール実行 ...\n\n # compact ツール:モデルが能動的に呼び出した場合、compact_history をトリガー\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({..., \"content\": \"[Compacted. History summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # 現在のターンを終了し、圧縮後のコンテキストで新しく開始\n```\n\n**順序は変えられない。** L3(budget)が L2(micro)の前に実行される理由:micro は古い大きな tool_result を 1 行のプレースホルダに置換するため、budget はその前に完全な内容を退避させる必要がある。CC ソースが `applyToolResultBudget` を最初に配置する理由も同じ。\n\n---\n\n## s07 からの変更点\n\n| コンポーネント | 変更前 (s07) | 変更後 (s08) |\n|------|-----------|-----------|\n| コンテキスト管理 | なし(コンテキストが無限に膨張) | 4 層圧縮パイプライン + 緊急対応 |\n| 新規関数 | — | snip_compact, micro_compact, tool_result_budget, compact_history, reactive_compact |\n| ツール | bash, read_file, write_file, edit_file, glob, todo_write, task, load_skill (8) | 8 + compact (9) |\n| ループ | LLM 呼び出し → ツール実行 | 各ラウンド前に 3 層プリプロセッサを実行 + 閾値で compact_history をトリガー |\n| 設計原則 | — | 安価なものを先に、高価なものを後に |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s08_context_compact/code.py\n```\n\n以下のプロンプトを試してみてください:\n\n1. `Read the file README.md, then read code.py, then read s01_agent_loop/README.md`(連続して複数のファイルを読み、L2 の古い結果圧縮を観察)\n2. `Read every file in s08_context_compact/`(一度に大量の内容を読み込み、L3 のディスク退避を観察)\n3. 20+ ラウンドの対話を繰り返し、`[auto compact]` または `[reactive compact]` が表示されるか観察\n\n観察のポイント:ツール実行のたびに、古い tool_result は圧縮されているか?連続対話で token が閾値を超えたとき、要約が自動的にトリガーされたか?\n\n---\n\n## 次へ\n\nコンテキスト圧縮により、Agent は長時間クラッシュせずに動けるようになった。しかし、圧縮のたびにユーザーが以前に伝えた偏好や制約も一緒に失われてしまう。Agent が重要なことを選択的に記憶できるようにできないか?\n\ns09 Memory → 3 つのサブシステム:何を記憶するかの選択、重要情報の抽出、整理と統合。圧縮を越え、セッションを越えて。\n\n
\nCC ソースコードの詳細\n\n> 以下は CC ソースコード `compact.ts`、`autoCompact.ts`、`microCompact.ts`、`query.ts` の分析に基づく。\n\n### 実行順序の対応\n\n教学版は説明の便宜上 L1/L2/L3/L4 と番号を振っているが、実際の実行順序は番号と完全には一致しない:\n\n| 項目 | 教学版 | Claude Code |\n|------|--------|-------------|\n| 実行順序 | budget → snip → micro → auto | budget → snip → micro → collapse → auto(`query.ts:379-468`) |\n| snip_compact | 先頭 3 + 末尾 47 を保持 | CC はメインスレッドのみ有効;実装はオープンソースリポジトリにない(`HISTORY_SNIP` feature gate)、インターフェースは確認可能:`snipCompactIfNeeded(messages)` → `{ messages, tokensFreed, boundaryMessage? }`、`SnipTool` もモデルが能動的に呼び出し可能。教学版の 3/47 は簡略パラメータ |\n| micro_compact | テキストプレースホルダで置換 | 2 つのパス:time-based は直接内容をクリア、cached は API の `cache_edits` を使用(legacy パスは削除済み) |\n| micro_compact ホワイトリスト | 位置による(直近 3 件) | time-based は時間閾値でトリガー、cached はカウントでトリガー(`microCompact.ts`) |\n| tool_result_budget | 200KB 文字 | 200,000 文字(`toolLimits.ts:49`) |\n| compact_history 閾値 | 文字数で推定 | 精密な token 数:`contextWindow - maxOutputTokens - 13_000` |\n| 要約の要求 | 5 種類の情報 | 9 つのセクション + ``/`` デュアルタグ |\n| 圧縮プロンプト | シンプルなプロンプト | 先頭と末尾に二重の安全ガードでツール呼び出しを禁止 |\n| PTL retry | あり(簡略版) | `truncateHeadForPTLRetry()` がメッセージグループ単位でロールバック(`compact.ts:243-290`) |\n| 圧縮後のリカバリ | なし(教学版は要約のみ保持) | 直近のファイル、計画、agent/skill/tool などの自動再付加 |\n| サーキットブレーカー | 3 回 | 3 回(`autoCompact.ts:70`) |\n| reactive リトライ | 1 回 | CC にはより精緻な段階別リトライがある |\n\n### 実行順序の詳細\n\nCC ソース `query.ts` での実際の順序:\n\n1. `applyToolResultBudget`(L379):まず大きな結果を処理し、完全な内容を退避\n2. `snipCompact`(L403):中間メッセージを切り捨て\n3. `microcompact`(L414):古い結果のプレースホルダ化\n4. `contextCollapse`(L441):独立したコンテキスト管理システム(教学版にはなし)\n5. `autoCompact`(L454):LLM 全量要約\n\n教学版の budget → snip → micro の順序はこれと一致する。教学版には contextCollapse メカニズムがない。\n\n### 完全な定数リファレンス\n\n| 定数 | 値 | ソースファイル |\n|------|-----|--------|\n| `AUTOCOMPACT_BUFFER_TOKENS` | 13,000 | `autoCompact.ts:62` |\n| `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | 3 | `autoCompact.ts:70` |\n| `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | 20,000 | `autoCompact.ts:30` |\n| `POST_COMPACT_TOKEN_BUDGET` | 50,000 | `compact.ts:123` |\n| `POST_COMPACT_MAX_FILES_TO_RESTORE` | 5 | `compact.ts:122` |\n| `POST_COMPACT_MAX_TOKENS_PER_FILE` | 5,000 | `compact.ts:124` |\n| 時間ベース micro_compact 間隔 | 60 分 | `timeBasedMCConfig.ts` |\n| `MAX_COMPACT_STREAMING_RETRIES` | 2 | `compact.ts:131` |\n\n### contextCollapse と sessionMemoryCompact\n\nCC ソースコードには、この教学版では展開していない 2 つのメカニズムが存在する:\n\n- **contextCollapse**:独立したコンテキスト管理システム。有効時には proactive autocompact を抑制し(`autoCompact.ts:215-222`)、collapse の commit/blocking フローがコンテキスト管理を引き継ぐ。ただし manual `/compact` と reactive fallback は独立パスのままで、contextCollapse の影響を受けない。\n- **sessionMemoryCompact**:compact_history の前に、CC は既存の session memory(s09 で解説)を使った軽量要約を先に試みる。LLM を呼び出さない。このメカニズムは s09 を学んだ後に振り返るとより理解しやすい。\n\n### 圧縮プロンプトの中身\n\nCC の圧縮プロンプトには 2 つの厳格な要件がある:\n\n1. **ツール呼び出しの絶対禁止**:冒頭が `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.` で、末尾にも再度 REMINDER がある\n2. **先に分析してから要約**:モデルはまず `` タグで思考を整理し、その後 `` タグで正式な要約を出力する。analysis はフォーマット時に除去される\n\n### 教学版の簡略化は意図的\n\n- micro_compact でテキストプレースホルダを使用 → API 層の `cache_edits` 権限がないため\n- token を文字数で推定 → 精密な tokenizer は教学の対象外\n- 圧縮後のリカバリを省略 → 教学版は要約のみを保持し、ファイルの自動再付加を行わない\n- 2 つの補助メカニズムを展開しない → 10% の細部に属する\n\nコア設計思想、安価なものを先に高価なものを後に、は完全に保持されている。\n\n
\n\n\n" + }, + { + "version": "s09", + "locale": "en", + "title": "s09: Memory — Compression Loses Details, Keep a Layer That Doesn't", + "content": "# s09: Memory — Compression Loses Details, Keep a Layer That Doesn't\n\ns01 → ... → s07 → s08 → `s09` → [s10](/en/s10) → s11 → ... → s20\n> *\"Compression loses details, keep a layer that doesn't\"* — File store + index + on-demand loading, across compactions, across sessions.\n>\n> **Harness Layer**: Memory — knowledge that survives compaction and sessions.\n\n---\n\n## The Problem\n\ns08's autoCompact preserves current goals, remaining work, and user constraints in the summary, but details get lost: \"use tabs not spaces\" might get simplified to \"user has code style preferences\". And when you start a new session, even the summary is gone.\n\nLLMs have no persistent state; all information lives in the context window. When context fills up, it gets compressed, and compression is lossy. What's needed is a storage layer that doesn't participate in compression and persists across sessions.\n\n---\n\n## The Solution\n\n![Memory Overview](/course-assets/s09_memory/memory-overview.en.svg)\n\nThe s08 compression pipeline is preserved, focusing on memory. Storage uses the filesystem: a `.memory/` directory where each memory is a `.md` file with YAML frontmatter (`name` / `description` / `type`). When files accumulate, an index is needed: `MEMORY.md` holds one link per line and gets injected into the SYSTEM.\n\nKey design: the index stays in SYSTEM prompt (cacheable by prompt cache), file content is injected on demand (matched by filename/description to the current conversation, without breaking the cache). Writing has two paths: the user explicitly says \"remember\", or extraction runs in the background after each turn. When files accumulate, periodic consolidation deduplicates.\n\nFour memory types, each answering a different question:\n\n| Type | Answers | Example |\n|------|---------|---------|\n| user | Who you are | \"Use tabs not spaces\" |\n| feedback | How to work | \"Don't mock the database\" |\n| project | What's happening | \"Auth rewrite is compliance-driven\" |\n| reference | Where to find things | \"Pipeline bugs are in Linear INGEST\" |\n\n---\n\n## How It Works\n\n![Memory Subsystems](/course-assets/s09_memory/memory-subsystems.en.svg)\n\n### Storage: Markdown Files + Index\n\nEach memory is a `.md` file with YAML frontmatter for metadata:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` is the index, one link per line:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\nWriting a new memory automatically rebuilds the index:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### Loading: Two Paths\n\n**Path 1: Index in SYSTEM.** `build_system()` reads `MEMORY.md` every turn and injects the memory catalog into the SYSTEM prompt. The index in SYSTEM can be cached by prompt cache, avoiding resending it every turn.\n\n**Path 2: Relevant memories on demand.** Before each LLM call, `load_memories()` sends the recent conversation and the memory catalog (name + description) to the LLM as a lightweight side-query, selects relevant filenames, then reads and injects their contents. Capped at 5 to control cost.\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n indices = json.loads(re.search(r'\\[.*?\\]', response.content[0].text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\nIf the side-query fails (API error, JSON parse failure), it falls back to keyword matching on name + description.\n\n### Writing: Extraction After Each Turn\n\nUsers don't always say \"remember this\". Preferences are usually scattered across normal dialogue: \"tabs are better than spaces\", \"let's use single quotes from now on\".\n\n`extract_memories()` runs when each turn ends, triggered when the model stops without a tool_use (indicating the conversation has reached a natural break):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(messages) # Extract new memories from recent dialogue\n consolidate_memories() # Check if consolidation is needed\n return\n```\n\nBefore extraction, existing memories are checked to avoid duplicates. The extraction prompt asks the LLM to return a JSON array of `{name, type, description, body}`, writing files only when genuinely new information is found.\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### Consolidation: Low-Frequency Deduplication\n\nMemory files accumulate. `consolidate_memories()` triggers when the file count reaches a threshold (default 10), asking the LLM to deduplicate, merge contradictions, and prune stale memories:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # Too few, not worth consolidating\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC calls this process **Dream**, with four gates in practice: time interval, scan throttle, session count, file lock. The teaching version simplifies to a file-count threshold.\n\n### What Memory Stores\n\nMemory stores information that remains useful across sessions: user preferences, recurring feedback, project background, common entry points, and investigation clues. It focuses on \"what will be useful later\" and brings that information back through an index plus on-demand loading.\n\nSession memory focuses on continuity inside one session: what context should survive after compaction. The two work together: Memory handles long-term knowledge; session memory handles the current session across compaction.\n\n---\n\n## Changes From s08\n\n| Component | Before (s08) | After (s09) |\n|-----------|-------------|-------------|\n| Memory capability | None (preferences degrade with compaction) | Storage + loading + extraction + consolidation |\n| New functions | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| Storage | — | .memory/MEMORY.md index + .memory/*.md files |\n| Tools | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| Loop | Only compression each turn | Memory injection + compression + post-turn extraction + periodic consolidation |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\nTry these prompts (enter across multiple turns, observe memory accumulation and loading):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py` (observe whether the Agent uses tabs)\n3. `What did I tell you about my preferences?` (observe whether the Agent remembers)\n4. `I also prefer single quotes over double quotes for strings.`\n\nWhat to watch for: Does `[Memory: extracted N new memories]` appear after each turn? Are `.md` files generated in `.memory/`? Is `MEMORY.md` index updated? Does the Agent automatically load previous memories in new conversations?\n\n---\n\n## What's Next\n\nMemory, compression, and tools are all in place. But the system prompt is still a hardcoded string. Adding a new tool means manually adding a description; switching projects means rewriting the whole prompt. Prompts should be assembled at runtime.\n\ns10 System Prompt → segments + runtime assembly. Different projects, different tools, different prompts.\n\n
\nDeep Dive Into CC Source Code\n\n> The following is based on analysis of CC source code under `src/` in `memdir/`, `services/`, `utils/`, `query/`. Line numbers verified against source.\n\n### Source Code Paths\n\n| File | Lines | Responsibility |\n|------|-------|---------------|\n| `memdir/memdir.ts` | 507 | Core: MEMORY.md definition (`34-38`), memory behavior instructions distinguishing memory/plan/tasks (`199-266`), `loadMemoryPrompt()` three paths (`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query memory selection (`18-24` system prompt, `97-122` call logic) |\n| `memdir/memoryTypes.ts` | 271 | Type definitions, frontmatter fields |\n| `memdir/memoryScan.ts` | — | Scan .md files, exclude MEMORY.md, read frontmatter, max 200 files, sorted by mtime desc (`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | Forked agent extraction, restricted permissions, `skipTranscript: true`, `maxTurns: 5` (`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream consolidation, four-layer gating (`63-66` defaults, `130-190` gating, `224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | Session-level memory management |\n| `services/compact/sessionMemoryCompact.ts` | — | Session memory lightweight summary, thresholds 10K/5/40K (`56-61`) |\n| `utils/attachments.ts` | — | Injection budget: 200 lines / 4096 bytes per file, 60KB per session (`269-288`); find relevant memory by query (`2196-2241`) |\n| `query.ts` | — | Memory prefetch at start of each user turn (`301-304`), non-blocking collection (`1592-1614`) |\n| `query/stopHooks.ts` | — | Stop hook fire-and-forget triggers extraction and Dream (`141-155`) |\n\n### Memory Selection: LLM, Not Embedding\n\nCC uses **Sonnet itself to select** (`findRelevantMemories.ts`), not embedding vector similarity:\n\n1. `memoryScan.ts` scans all `.md` files in `.memory/` (excluding MEMORY.md), max 200 files, sorted by mtime descending\n2. Lists all memory files' `name` + `description` as a catalog\n3. Sends to Sonnet side-query: \"Select truly useful memories by name and description (max 5). Skip if unsure.\"\n4. Sonnet returns `{ selected_memories: [\"file1.md\", ...] }`\n5. Selected files' full contents are read (≤ 200 lines / 4096 bytes per file) and injected. Total session budget: 60KB\n\nAt the start of each user turn, `query.ts:301-304` starts memory prefetch (async); after tool execution, `1592-1614` collects completed results non-blocking.\n\n### Extraction Timing: Stop Hook, Not After autoCompact\n\nTrigger location (`stopHooks.ts:141-155`): inside `handleStopHooks()`, fire-and-forget triggers extraction and Dream. The teaching version places extraction in the `stop_reason != \"tool_use\"` branch, matching the direction.\n\nCC's extraction runs via forked agent (`extractMemories.ts:371-427`): restricted permissions, `skipTranscript: true`, `maxTurns: 5`. Also has overlap protection: if the main Agent already wrote memory files, extraction is skipped.\n\n### Memory File Format\n\nCC uses Markdown + YAML frontmatter, consistent with the teaching version. Four types: `user`, `feedback`, `project`, `reference`.\n\n`memdir.ts:34-38` defines index constraints: `MEMORY.md` max 200 lines / 25KB. `memdir.ts:199-266` builds memory behavior instructions, explicitly distinguishing memory from plan and tasks. Storage location: `~/.claude/projects//memory/`.\n\n### Dream: Four-Layer Gating\n\nNot \"triggered when idle\" or \"consolidate when count is enough\", but four gates (`autoDream.ts`, defaults `63-66`, gating logic `130-190`):\n\n1. **Time gate**: ≥ 24 hours since last consolidation\n2. **Scan throttle**: Avoid frequent filesystem scans\n3. **Session gate**: ≥ 5 session transcripts modified since last consolidation\n4. **Lock gate**: No other process currently consolidating (`.consolidate-lock` file)\n\nThe merge itself runs via forked agent (`224-233`): locate → collect recent signals → merge and write files → prune and update index. Lock file mtime serves as lastConsolidatedAt. Crash recovery: lock auto-expires after 1 hour.\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| Persistence | Cross-session | Single session |\n| Storage | Multiple .md files in `memory/` | `session-memory//memory.md` |\n| Loaded into | system prompt | compact summary |\n| Purpose | Cross-session knowledge accumulation | Cross-compact context continuity |\n\nsessionMemoryCompact (mentioned in s08) uses Session Memory: before autoCompact, it reads the session memory file and, if sufficient (≥ 10K tokens, ≥ 5 text messages, ≤ 40K tokens, `sessionMemoryCompact.ts:56-61`), uses it as a summary without calling the LLM.\n\n### Where the Real Implementation Is More Complex\n\n- **Feature flags**: Memory features have multiple feature gate layers\n- **Team memory**: Shared team memories, `loadMemoryPrompt()` has a dedicated path (not covered in teaching version)\n- **KAIROS**: Timing-aware memory extraction strategy, daily-log mode in `loadMemoryPrompt()`\n- **Prompt cache**: Memory injection must account for prompt cache TTL, avoiding full system prompt rewrites each turn\n- **File locks**: Concurrency control for multi-process scenarios\n- **Memory prefetch**: Async prefetch, non-blocking main flow\n\n### Teaching Version Simplifications Are Intentional\n\n- LLM side-query → LLM side-query + keyword fallback: teaching version keeps LLM selection, adds fallback path\n- Memory JSON → Markdown + frontmatter: teaching version matches CC\n- Stop hook trigger → `stop_reason != \"tool_use\"` branch: same direction\n- Four-layer gating → file-count threshold: teaching version lacks transcript system and multi-session concepts\n- Forked agent + restricted permissions → direct call: teaching version has no subprocess isolation\n\n
\n\n\n" + }, + { + "version": "s09", + "locale": "zh", + "title": "s09: Memory — 压缩会丢细节,要有一层不丢的", + "content": "# s09: Memory — 压缩会丢细节,要有一层不丢的\n\ns01 → ... → s07 → s08 → `s09` → [s10](/zh/s10) → s11 → ... → s20\n> *\"压缩会丢细节, 要有一层不丢的\"* — 文件仓库 + 索引 + 按需加载,跨压缩、跨会话。\n>\n> **Harness 层**: 记忆 — 跨压缩、跨会话的知识积累。\n\n---\n\n## 问题\n\ns08 的 autoCompact 会把当前目标、剩余工作、用户约束写进摘要,但细节会丢失:\"用 tab 缩进不要用空格\"可能被简化成\"用户有代码风格偏好\"。而且新开一个会话,连摘要也没了。\n\nLLM 没有持久状态,所有信息都在上下文窗口里。上下文满了要压缩,压缩就有损。需要一层不参与压缩、跨会话保留的存储。\n\n---\n\n## 解决方案\n\n![Memory Overview](/course-assets/s09_memory/memory-overview.svg)\n\ns08 的压缩管线保留,聚焦记忆。存储选文件系统:`.memory/` 目录下,每个记忆一个 `.md` 文件,带 YAML frontmatter(`name` / `description` / `type`)。文件多了需要索引:`MEMORY.md` 一行一个链接,注入 SYSTEM。\n\n关键设计:索引常驻 SYSTEM prompt(可被 prompt cache 缓存),文件内容按需注入(按 filename/description 匹配当前对话,不破坏 cache)。写入分两条路径:用户显式说\"记住\",或者每轮结束后后台提取。文件积累多了,定期整理去重。\n\n四类记忆,各有用途:\n\n| 类型 | 回答什么 | 示例 |\n|------|---------|------|\n| user | 你是谁 | \"用 tab 不用空格\" |\n| feedback | 怎么做事 | \"别 mock 数据库\" |\n| project | 正在发生什么 | \"auth 重写是合规驱动\" |\n| reference | 东西在哪找 | \"pipeline bug 在 Linear INGEST\" |\n\n---\n\n## 工作原理\n\n![Memory Subsystems](/course-assets/s09_memory/memory-subsystems.svg)\n\n### 存储:Markdown 文件 + 索引\n\n每个记忆是一个 `.md` 文件,YAML frontmatter 记录元数据:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` 是索引,一行一个链接:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\n写入新记忆时自动重建索引:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### 加载:两条路径\n\n**路径一:索引常驻 SYSTEM。** `build_system()` 每轮重建 SYSTEM 时读取 `MEMORY.md`,把记忆清单注入。SYSTEM prompt 中的索引可以被 prompt cache 缓存,不需要每轮重新发送。\n\n**路径二:相关记忆按需注入。** 每轮调用前,`load_memories()` 把最近对话和记忆目录(name + description)一起发给 LLM 做一次轻量 side-query,选出相关的文件名,再读文件内容注入上下文。最多 5 条,控制开销。\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n indices = json.loads(re.search(r'\\[.*?\\]', response.content[0].text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\n如果 side-query 失败(API 错误、JSON 解析失败),降级到关键词匹配 name + description。\n\n### 写入:每轮结束后提取\n\n用户不会每次都说\"记住这个\"。偏好通常散落在正常对话中:\"用 tab 比空格好\"、\"以后都用单引号\"。\n\n`extract_memories()` 在每轮结束时运行,条件是模型停止且没有 tool_use(说明对话告一段落):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(messages) # 从最近对话提取新记忆\n consolidate_memories() # 检查是否需要整理\n return\n```\n\n提取前先检查已有记忆,避免重复。提取 prompt 要求 LLM 返回 `{name, type, description, body}` 的 JSON 数组,只有确实有新信息时才写文件。\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### 整理:低频合并去重\n\n记忆文件会积累。`consolidate_memories()` 在文件数达到阈值(默认 10)时触发,让 LLM 去重、合并矛盾、淘汰过时记忆:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # 太少,不值得整理\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC 把这个过程叫 Dream,实际有四层门控:时间间隔、扫描节流、会话数、文件锁。教学版简化为文件数阈值。\n\n### Memory 适合保存什么\n\nMemory 保存跨会话仍然有用的信息:用户偏好、反复出现的反馈、项目背景、常用入口和排查线索。它关注“以后还会用到什么”,并通过索引 + 按需加载把这些信息带回当前对话。\n\nsession memory 关注同一会话内的连续性:compact 之后,当前会话还需要保留哪些上下文。两者配合使用:Memory 管长期知识,session memory 管当前会话的压缩续接。\n\n---\n\n## 相对 s08 的变更\n\n| 组件 | 之前 (s08) | 之后 (s09) |\n|------|-----------|-----------|\n| 记忆能力 | 无(压缩后偏好随摘要退化) | 存储 + 加载 + 提取 + 整理 |\n| 新函数 | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| 存储 | — | .memory/MEMORY.md 索引 + .memory/*.md 文件 |\n| 工具 | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| 循环 | 每轮只做压缩 | 每轮注入记忆 + 压缩 + 每轮结束后提取 + 定期整理 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\n试试这些 prompt(分多轮输入,观察记忆的累积和加载):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py`(观察 Agent 是否用了 tab)\n3. `What did I tell you about my preferences?`(观察 Agent 是否记得)\n4. `I also prefer single quotes over double quotes for strings.`\n\n观察重点:每轮结束后是否出现 `[Memory: extracted N new memories]`?`.memory/` 目录下是否生成了 `.md` 文件?`MEMORY.md` 索引是否更新?新一轮对话时 Agent 是否自动加载了之前的记忆?\n\n---\n\n## 接下来\n\n记忆、压缩、工具都已就绪。但 system prompt 还是硬编码的一大段字符串。加了新工具要手动加描述,换了项目要重写整个 prompt。prompt 应该运行时组装。\n\ns10 System Prompt → 分段 + 运行时组装。不同项目、不同工具,拼出不同的 prompt。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `src/` 下 `memdir/`、`services/`、`utils/`、`query/` 的分析,行号已对照核实。\n\n### 源码路径\n\n| 文件 | 行数 | 职责 |\n|------|------|------|\n| `memdir/memdir.ts` | 507 | 核心:MEMORY.md 定义(`34-38`)、记忆行为指令区分 memory/plan/tasks(`199-266`)、`loadMemoryPrompt()` 三条路径(`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query 选记忆(`18-24` 系统提示、`97-122` 调用逻辑) |\n| `memdir/memoryTypes.ts` | 271 | 类型定义,frontmatter 字段 |\n| `memdir/memoryScan.ts` | — | 扫描 .md 文件,排除 MEMORY.md,读 frontmatter,最多 200 个,按 mtime 降序(`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | forked agent 提取记忆,受限权限,`skipTranscript: true`,`maxTurns: 5`(`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream 整理,四层门控(`63-66` 默认值、`130-190` 门控、`224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | 会话级记忆管理 |\n| `services/compact/sessionMemoryCompact.ts` | — | session memory 轻量摘要,阈值 10K/5/40K(`56-61`) |\n| `utils/attachments.ts` | — | 注入预算:200 行 / 4096 字节每文件,60KB 每 session(`269-288`);按 query 找相关 memory(`2196-2241`) |\n| `query.ts` | — | memory prefetch 每轮启动(`301-304`),非阻塞收集(`1592-1614`) |\n| `query/stopHooks.ts` | — | stop hook fire-and-forget 触发提取和 Dream(`141-155`) |\n\n### 记忆选择:LLM 选,不是 embedding\n\nCC 用 **Sonnet 本身来选**(`findRelevantMemories.ts`),不是 embedding 向量相似度:\n\n1. `memoryScan.ts` 扫描 `.memory/` 下所有 `.md` 文件(排除 MEMORY.md),最多 200 个,按 mtime 降序\n2. 把 `name` + `description` 列成清单\n3. 发给 Sonnet side-query:\"根据名称和描述选出真正有用的记忆(最多 5 个)。不确定就不要选。\"\n4. Sonnet 返回 `{ selected_memories: [\"file1.md\", ...] }`\n5. 选中文件读取完整内容(每文件 ≤ 200 行 / 4096 字节),注入上下文。单 session 总预算 60KB\n\n每轮用户 turn 开始时,`query.ts:301-304` 启动 memory prefetch(异步);工具执行后 `1592-1614` 非阻塞收集结果,不卡主流程。\n\n### 提取时机:stop hook,不是 autoCompact 后\n\n触发位置(`stopHooks.ts:141-155`):在 `handleStopHooks()` 中,fire-and-forget 触发提取和 Dream。教学版把提取放在 `stop_reason != \"tool_use\"` 分支里,方向一致。\n\nCC 的提取通过 forked agent 执行(`extractMemories.ts:371-427`):受限权限、`skipTranscript: true`、`maxTurns: 5`。还有重叠保护:如果主 Agent 已经写入了记忆文件,跳过提取。\n\n### 记忆文件格式\n\nCC 用 Markdown + YAML frontmatter,和教学版一致。四种类型:`user`、`feedback`、`project`、`reference`。\n\n`memdir.ts:34-38` 定义索引约束:`MEMORY.md` 最多 200 行 / 25KB。`memdir.ts:199-266` 构建记忆行为指令,明确区分 memory、plan、tasks。存储位置:`~/.claude/projects//memory/`。\n\n### Dream:四层门控\n\n不是\"空闲时触发\"或\"数量够了就合并\",而是四层门控(`autoDream.ts`,默认值 `63-66`,门控逻辑 `130-190`):\n\n1. **时间门控**:距上次合并 ≥ 24 小时\n2. **扫描节流**:避免频繁扫描文件系统\n3. **会话门控**:自上次合并以来修改了 ≥ 5 个会话 transcript\n4. **锁门控**:没有其他进程正在合并(`.consolidate-lock` 文件)\n\n合并本身通过 forked agent 执行(`224-233`):定位 → 收集近期信号 → 合并写文件 → 剪枝更新索引。锁文件 mtime 就是 lastConsolidatedAt。崩溃恢复:1 小时后锁自动过期。\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| 持久性 | 跨会话 | 单会话 |\n| 存储 | `memory/` 下多个 .md 文件 | `session-memory//memory.md` |\n| 加载到 | system prompt | compact 摘要 |\n| 用途 | 跨会话的知识积累 | 跨 compact 的上下文连续性 |\n\nsessionMemoryCompact(s08 中提到的机制)正是使用了 Session Memory:autoCompact 前先读 session memory 文件,如果内容足够(≥ 10K token、≥ 5 条文本消息、≤ 40K token,`sessionMemoryCompact.ts:56-61`),就用它做摘要,不调 LLM。\n\n### 真实实现比教学版复杂的地方\n\n- **Feature flags**:记忆相关功能有多层 feature gate 控制\n- **Team memory**:团队共享记忆,`loadMemoryPrompt()` 有专门路径(教学版未涉及)\n- **KAIROS**:时机感知的记忆提取策略,`loadMemoryPrompt()` 中 daily-log 模式\n- **Prompt cache**:记忆注入需要考虑 prompt cache 的 TTL,避免每次都重写 system prompt 的大段内容\n- **文件锁**:多进程并发时的锁机制\n- **Memory prefetch**:异步预取,不阻塞主流程\n\n### 教学版的简化是刻意的\n\n- LLM side-query → LLM side-query + 关键词降级:教学版保留了 LLM 选择,加了降级路径\n- 记忆 JSON → Markdown + frontmatter:教学版与 CC 一致\n- stop hook 触发 → `stop_reason != \"tool_use\"` 分支:方向一致\n- 四层门控 → 文件数阈值:教学版没有 transcript 系统和多会话概念\n- forked agent + 受限权限 → 直接调用:教学版没有子进程隔离\n\n
\n\n\n" }, { "version": "s09", "locale": "ja", - "title": "s09: Agent Teams", - "content": "# s09: Agent Teams\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"一人で終わらないなら、チームメイトに任せる\"* -- 永続チームメイト + 非同期メールボックス。\n\n## 問題\n\nサブエージェント(s04)は使い捨てだ: 生成し、作業し、要約を返し、消滅する。アイデンティティもなく、呼び出し間の記憶もない。バックグラウンドタスク(s08)はシェルコマンドを実行するが、LLM誘導の意思決定はできない。\n\n本物のチームワークには: (1)単一プロンプトを超えて存続する永続エージェント、(2)アイデンティティとライフサイクル管理、(3)エージェント間の通信チャネルが必要だ。\n\n## 解決策\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## 仕組み\n\n1. TeammateManagerがconfig.jsonでチーム名簿を管理する。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()`がチームメイトを作成し、そのエージェントループをスレッドで開始する。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: 追記専用のJSONLインボックス。`send()`がJSON行を追記し、`read_inbox()`がすべて読み取ってドレインする。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 各チームメイトは各LLM呼び出しの前にインボックスを確認し、受信メッセージをコンテキストに注入する。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## s08からの変更点\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | message + broadcast |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. `/team`と入力してステータス付きのチーム名簿を確認する\n5. `/inbox`と入力してリーダーのインボックスを手動確認する\n" + "title": "s09: Memory — 圧縮は詳細を失う、失わない層が必要", + "content": "# s09: Memory — 圧縮は詳細を失う、失わない層が必要\n\ns01 → ... → s07 → s08 → `s09` → [s10](/ja/s10) → s11 → ... → s20\n> *\"圧縮は詳細を失う、失わない層が必要\"* — ファイルストア + インデックス + オンデマンド読み込み。圧縮を越え、セッションを越えて。\n>\n> **Harness レイヤー**: 記憶 — 圧縮とセッションを越える知識の蓄積。\n\n---\n\n## 課題\n\ns08 の autoCompact は現在の目標、残りの作業、ユーザーの制約をサマリに保持するが、詳細は失われる:「タブでインデント、スペース不可」が「ユーザーにコードスタイルの好みあり」と簡略化される。そして新しいセッションを開始すると、サマリすらない。\n\nLLM には永続状態がなく、すべての情報はコンテキストウィンドウ内にある。コンテキストが満杯になれば圧縮され、圧縮は非可逆。圧縮に参加せず、セッションを越えて保持されるストレージ層が必要。\n\n---\n\n## ソリューション\n\n![Memory Overview](/course-assets/s09_memory/memory-overview.ja.svg)\n\ns08 の圧縮パイプラインを維持し、記憶に焦点を当てる。ストレージにはファイルシステムを採用:`.memory/` ディレクトリに各記憶を `.md` ファイルとして保存、YAML frontmatter(`name` / `description` / `type`)付き。ファイルが増えたらインデックスが必要:`MEMORY.md` に 1 行 1 リンクを記録し、SYSTEM に注入。\n\n重要な設計:インデックスは SYSTEM prompt に常駐(prompt cache でキャッシュ可能)、ファイル内容はオンデマンド注入(filename/description で現在の会話にマッチ、cache を破壊しない)。書き込みは 2 つのパス:ユーザーが明示的に「覚えて」と言うか、毎ターン終了後にバックグラウンドで抽出。ファイルが蓄積されたら、定期的に整理して重複排除。\n\n4 種類の記憶、それぞれ異なる質問に答える:\n\n| タイプ | 何に答えるか | 例 |\n|--------|-------------|-----|\n| user | あなたは誰か | \"タブでスペース不可\" |\n| feedback | どう作業するか | \"DB をモックしない\" |\n| project | 何が起きているか | \"auth 書き直しはコンプライアンス主導\" |\n| reference | どこで探すか | \"パイプラインのバグは Linear INGEST\" |\n\n---\n\n## 仕組み\n\n![Memory Subsystems](/course-assets/s09_memory/memory-subsystems.ja.svg)\n\n### ストレージ:Markdown ファイル + インデックス\n\n各記憶は `.md` ファイル、YAML frontmatter でメタデータを記録:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` はインデックス、1 行に 1 リンク:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\n新しい記憶を書き込むとインデックスを自動再構築:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### 読み込み:2 つのパス\n\n**パス 1:インデックスを SYSTEM に常駐。** `build_system()` は毎ターン SYSTEM を再構築する際に `MEMORY.md` を読み込み、記憶カタログを注入。SYSTEM prompt 内のインデックスは prompt cache でキャッシュ可能で、毎ターン再送不要。\n\n**パス 2:関連記憶をオンデマンド注入。** 各 LLM 呼び出し前、`load_memories()` は最近の会話と記憶カタログ(name + description)を LLM に軽量 side-query として送信し、関連するファイル名を選択、ファイル内容を読み込んで注入。上限 5 件でコストを制御。\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n indices = json.loads(re.search(r'\\[.*?\\]', response.content[0].text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\nside-query が失敗した場合(API エラー、JSON パース失敗)、name + description のキーワードマッチにフォールバック。\n\n### 書き込み:毎ターン終了後の抽出\n\nユーザーが毎回「これを覚えて」と言うわけではない。好みは通常、通常の会話の中に散らばっている:「タブの方がスペースより良い」「これからはシングルクォートにしよう」。\n\n`extract_memories()` は各ターン終了時に実行、モデルが tool_use なしで停止した場合にトリガー(会話が自然な区切りに達したことを示す):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(messages) # 最近の会話から新しい記憶を抽出\n consolidate_memories() # 整理が必要かチェック\n return\n```\n\n抽出前に既存の記憶を確認し、重複を回避。抽出プロンプトは LLM に `{name, type, description, body}` の JSON 配列を要求、本当に新しい情報がある場合のみファイルに書き込む。\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### 整理:低頻度の重複排除\n\n記憶ファイルは蓄積される。`consolidate_memories()` はファイル数が閾値(デフォルト 10)に達した時にトリガー、LLM に重複排除、矛盾の統合、古い記憶の剪定を依頼:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # 少なすぎる、整理する価値なし\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC はこのプロセスを **Dream** と呼び、実際には 4 層のゲートがある:時間間隔、スキャンスロットル、セッション数、ファイルロック。教学版はファイル数閾値に簡略化。\n\n### Memory に保存するもの\n\nMemory はセッションを越えて有用な情報を保存する:ユーザーの好み、繰り返し出るフィードバック、プロジェクト背景、よく使う入口、調査の手がかりなど。「あとでまた使うもの」を対象にし、インデックス + オンデマンド読み込みで現在の会話に戻す。\n\nsession memory は 1 つのセッション内の連続性を扱う:compact 後も現在の会話に残すべき文脈を保持する。両者は役割が分かれている。Memory は長期知識を扱い、session memory は現在のセッションを compact 越しにつなぐ。\n\n---\n\n## s08 からの変更点\n\n| コンポーネント | 変更前 (s08) | 変更後 (s09) |\n|-----------|-------------|-------------|\n| 記憶能力 | なし(圧縮後、好みはサマリと共に劣化) | ストレージ + 読み込み + 抽出 + 整理 |\n| 新規関数 | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| ストレージ | — | .memory/MEMORY.md インデックス + .memory/*.md ファイル |\n| ツール | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| ループ | 毎ターン圧縮のみ | 記憶注入 + 圧縮 + ターン終了後の抽出 + 定期整理 |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\n以下のプロンプトを試してみてください(複数ターンに分けて入力し、記憶の蓄積と読み込みを観察):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py`(Agent がタブを使用したか観察)\n3. `What did I tell you about my preferences?`(Agent が覚えているか観察)\n4. `I also prefer single quotes over double quotes for strings.`\n\n観察のポイント:各ターン終了後に `[Memory: extracted N new memories]` が表示されるか?`.memory/` ディレクトリに `.md` ファイルが生成されたか?`MEMORY.md` インデックスが更新されたか?新しい会話で Agent が以前の記憶を自動的に読み込んだか?\n\n---\n\n## 次へ\n\n記憶、圧縮、ツールはすべて揃った。しかし system prompt はまだハードコードされた文字列。新しいツールを追加するには手動で説明を書き、プロジェクトを変えるにはプロンプト全体を書き直す。プロンプトは実行時に組み立てられるべき。\n\ns10 System Prompt → セグメント + 実行時組み立て。異なるプロジェクト、異なるツール、異なるプロンプト。\n\n
\nCC ソースコードの詳細\n\n> 以下は CC ソースコード `src/` 下の `memdir/`、`services/`、`utils/`、`query/` の分析に基づく。行番号はソースコードと照合済み。\n\n### ソースコードパス\n\n| ファイル | 行数 | 職責 |\n|------|------|------|\n| `memdir/memdir.ts` | 507 | 核心:MEMORY.md 定義(`34-38`)、記憶動作指示で memory/plan/tasks を区別(`199-266`)、`loadMemoryPrompt()` 3 パス(`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query で記憶選択(`18-24` システムプロンプト、`97-122` 呼び出しロジック) |\n| `memdir/memoryTypes.ts` | 271 | 型定義、frontmatter フィールド |\n| `memdir/memoryScan.ts` | — | .md ファイルをスキャン、MEMORY.md を除外、frontmatter を読み取り、最大 200 ファイル、mtime 降順(`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | forked agent で記憶を抽出、制限付き権限、`skipTranscript: true`、`maxTurns: 5`(`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream 整理、4 層ゲート(`63-66` デフォルト値、`130-190` ゲート、`224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | セッションレベルの記憶管理 |\n| `services/compact/sessionMemoryCompact.ts` | — | session memory 軽量サマリ、閾値 10K/5/40K(`56-61`) |\n| `utils/attachments.ts` | — | 注入予算:200 行 / 4096 バイト/ファイル、60KB/セッション(`269-288`);query で関連記憶を検索(`2196-2241`) |\n| `query.ts` | — | memory prefetch を毎ターン開始時に起動(`301-304`)、非ブロッキング収集(`1592-1614`) |\n| `query/stopHooks.ts` | — | stop hook fire-and-forget で抽出と Dream をトリガー(`141-155`) |\n\n### 記憶選択:embedding ではなく LLM\n\nCC は **Sonnet 自身で選択**(`findRelevantMemories.ts`)、embedding ベクトル類似度ではない:\n\n1. `memoryScan.ts` が `.memory/` 下のすべての `.md` ファイルをスキャン(MEMORY.md を除外)、最大 200 ファイル、mtime 降順\n2. `name` + `description` をカタログとしてリスト化\n3. Sonnet side-query に送信:「名前と説明から本当に有用な記憶を選択(最大 5 件)。不明ならスキップ。」\n4. Sonnet が `{ selected_memories: [\"file1.md\", ...] }` を返却\n5. 選択されたファイルの完全な内容を読み込み(≤ 200 行 / 4096 バイト/ファイル)、注入。セッション総予算:60KB\n\n毎ターンのユーザー turn 開始時、`query.ts:301-304` が memory prefetch を起動(非同期);ツール実行後、`1592-1614` が非ブロッキングで結果を収集。\n\n### 抽出タイミング:stop hook、autoCompact 後ではない\n\nトリガー位置(`stopHooks.ts:141-155`):`handleStopHooks()` 内で、fire-and-forget で抽出と Dream をトリガー。教学版は `stop_reason != \"tool_use\"` 分岐に抽出を配置、方向は一致。\n\nCC の抽出は forked agent で実行(`extractMemories.ts:371-427`):制限付き権限、`skipTranscript: true`、`maxTurns: 5`。重複保護もある:メイン Agent が既に記憶ファイルを書き込んだ場合、抽出をスキップ。\n\n### 記憶ファイル形式\n\nCC は Markdown + YAML frontmatter を使用、教学版と一致。4 種類:`user`、`feedback`、`project`、`reference`。\n\n`memdir.ts:34-38` がインデックス制約を定義:`MEMORY.md` 最大 200 行 / 25KB。`memdir.ts:199-266` が記憶動作指示を構築、memory と plan と tasks を明確に区別。保存場所:`~/.claude/projects//memory/`。\n\n### Dream:4 層ゲート\n\n「アイドル時にトリガー」や「数が足りたら統合」ではなく、4 層のゲート(`autoDream.ts`、デフォルト値 `63-66`、ゲートロジック `130-190`):\n\n1. **時間ゲート**:前回の統合から ≥ 24 時間\n2. **スキャンスロットル**:頻繁なファイルシステムスキャンを回避\n3. **セッションゲート**:前回の統合以降 ≥ 5 セッションの transcript が変更された\n4. **ロックゲート**:他のプロセスが統合中でない(`.consolidate-lock` ファイル)\n\n統合自体は forked agent で実行(`224-233`):定位 → 直近のシグナル収集 → 統合してファイル書き込み → 剪定してインデックス更新。ロックファイルの mtime が lastConsolidatedAt。クラッシュリカバリ:1 時間後にロックが自動期限切れ。\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| 永続性 | セッション間 | 単一セッション |\n| ストレージ | `memory/` 下の複数 .md ファイル | `session-memory//memory.md` |\n| 注入先 | system prompt | compact サマリ |\n| 目的 | セッション間の知識蓄積 | compact を越えたコンテキストの連続性 |\n\nsessionMemoryCompact(s08 で触れた仕組み)は Session Memory を活用:autoCompact の前に session memory ファイルを読み込み、内容が十分であれば(≥ 10K token、≥ 5 テキストメッセージ、≤ 40K token、`sessionMemoryCompact.ts:56-61`)、LLM を呼び出さずにサマリとして使用。\n\n### 実際の実装が教学版より複雑な点\n\n- **Feature flags**:記憶関連機能には複数の feature gate 層がある\n- **Team memory**:チーム共有記憶、`loadMemoryPrompt()` に専用パスあり(教学版では未カバー)\n- **KAIROS**:タイミング認識型の記憶抽出戦略、`loadMemoryPrompt()` の daily-log モード\n- **Prompt cache**:記憶注入は prompt cache の TTL を考慮する必要があり、毎ターン system prompt の大部分を書き直すことを避ける\n- **ファイルロック**:マルチプロセス時の並行制御\n- **Memory prefetch**:非同期プレフェッチ、メインフローをブロックしない\n\n### 教学版の簡略化は意図的\n\n- LLM side-query → LLM side-query + キーワードフォールバック:教学版は LLM 選択を維持し、フォールバックパスを追加\n- 記憶 JSON → Markdown + frontmatter:教学版は CC と一致\n- stop hook トリガー → `stop_reason != \"tool_use\"` 分岐:方向は一致\n- 4 層ゲート → ファイル数閾値:教学版には transcript システムやマルチセッションの概念がない\n- forked agent + 制限付き権限 → 直接呼び出し:教学版にはサブプロセス分離がない\n\n
\n\n\n" + }, + { + "version": "s10", + "locale": "en", + "title": "s10: System Prompt — Assembled at Runtime, Never Hardcoded", + "content": "# s10: System Prompt — Assembled at Runtime, Never Hardcoded\n\ns01 → ... → s08 → s09 → `s10` → [s11](/en/s11) → s12 → ... → s20\n> *\"prompt is assembled, not hardcoded\"* — Sections + on-demand assembly + caching.\n>\n> **Harness Layer**: Prompt — assembled at runtime, never hardcoded.\n\n---\n\n## The Problem\n\nFrom s01 to s09, the system prompt was always one hardcoded line:\n\n```python\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\n```\n\nThat worked for s01 — only bash, read, write. But by s09, the agent has memory, compression, skill loading. The prompt needs to describe more and more capabilities:\n\n```python\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Use tools to solve tasks. Act, don't explain. \"\n \"Before starting any multi-step task, use todo_write. \"\n \"Skills are available via list_skills and load_skill. \"\n \"Relevant memories are injected below when available. \"\n # ... add a capability, add a line\n)\n```\n\nThree problems:\n\n1. **Switching projects requires rewriting the entire prompt** — no way to know what to change and what to keep\n2. **One change can break others** — adding a tool description might conflict with earlier instructions\n3. **Every request carries everything** — even when the current conversation doesn't need certain sections, they waste tokens\n\nThe system prompt should be a configuration assembled at runtime based on current state: which tools are enabled, which context is visible, which memories are relevant, and which content must remain stable to hit prompt cache.\n\n---\n\n## The Solution\n\n![System Prompt Overview](/course-assets/s10_system_prompt/system-prompt-overview.en.svg)\n\ns10 focuses on prompt assembly. It builds on the s08-s09 capabilities but doesn't re-implement compression or memory. The core change: split the hardcoded `SYSTEM` into independent sections, assemble them at runtime based on real state, and cache the result.\n\nFour sections, two loading strategies:\n\n| Section | Strategy | Content | Condition |\n|---------|----------|---------|-----------|\n| identity | always | who you are, how to work | always present |\n| tools | always | available tool list | `enabled_tools` |\n| workspace | always | working directory | always present |\n| memory | on-demand | relevant memory content | whether `.memory/MEMORY.md` exists |\n\nKey design: whether a section loads depends on real state (tools exist, files exist), not keywords in messages.\n\n---\n\n## How It Works\n\n### PROMPT_SECTIONS: Topic-Keyed Fragments\n\nSplit the monolithic string into a dictionary, each key is a topic:\n\n```python\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n```\n\nEach section is maintained independently. Changing `tools` doesn't affect `identity`; adding `memory` doesn't touch `workspace`.\n\n### assemble_system_prompt: On-Demand Assembly\n\nNot every section is needed every turn. No memory files? Loading the memory section just wastes tokens. Assembly is based on real state in context:\n\n```python\ndef assemble_system_prompt(context: dict) -> str:\n sections = []\n\n # Always loaded\n sections.append(PROMPT_SECTIONS[\"identity\"])\n sections.append(PROMPT_SECTIONS[\"tools\"])\n sections.append(PROMPT_SECTIONS[\"workspace\"])\n\n # On-demand — based on real state, not keywords\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n\n return \"\\n\\n\".join(sections)\n```\n\n\"Always loaded\" sections are needed every turn: identity, tools, workspace. \"On-demand\" sections are only useful under specific conditions.\n\nWhy not load everything? Tokens have cost (system prompt is billed every turn), and fewer instructions means more focused output (irrelevant instructions are noise).\n\n### get_system_prompt: Cache to Avoid Re-Assembly\n\nWhen context hasn't changed (multiple LLM calls in the same turn with the same context), re-assembling is wasteful. Use deterministic serialization to detect changes and return cached result:\n\n```python\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n```\n\n`json.dumps` instead of `hash()`: Python's built-in `hash()` has process randomization (unsuitable for stable cache keys) and throws `unhashable type` on nested dicts/lists.\n\nNote: this cache only avoids redundant string assembly within a process. It's not the same as CC's API prompt cache, which uses `SYSTEM_PROMPT_DYNAMIC_BOUNDARY` to separate static and dynamic parts — the static parts hit global cache and don't invalidate when dynamic content changes.\n\n### context: Real State, Not Keyword Guessing\n\nContext reflects the actual runtime state:\n\n```python\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": list(TOOL_HANDLERS.keys()),\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n```\n\n`enabled_tools` lists actually registered tools. `memories` checks whether `.memory/MEMORY.md` exists. Section loading is based on this real state, not searching for keywords in messages.\n\n### Putting It Together\n\n```python\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n # ... tool execution ...\n context = update_context(context, messages)\n system = get_system_prompt(context)\n```\n\nAt the start of each loop iteration, get the system prompt. If context changed, re-assemble; if not, return cached version.\n\n---\n\n## Changes From s09\n\n| Component | Before (s09) | After (s10) |\n|-----------|-------------|-------------|\n| prompt | Hardcoded SYSTEM string | PROMPT_SECTIONS + assemble_system_prompt |\n| caching | None | get_system_prompt (json.dumps detection + cache) |\n| new functions | — | assemble_system_prompt, get_system_prompt, update_context |\n| tools | bash, read_file, write_file (3) | bash, read_file, write_file (3) — unchanged |\n| loop | Uses fixed SYSTEM | Uses get_system_prompt(context) |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s10_system_prompt/code.py\n```\n\nWhat to watch for:\n\n1. Output shows which sections were loaded (`[assembled] sections: ...` label)\n2. Cache hits show `[cache hit]` during continued conversation\n3. Creating `.memory/MEMORY.md` makes the memory section appear on the next turn\n\nTry these prompts:\n\n1. `Read the file README.md` (observe the three always-loaded sections)\n2. `Create a file called .memory/MEMORY.md with content \"- [test](test.md) — test memory\"` (write a memory index)\n3. `Read the file code.py` (observe whether the memory section appears)\n\n---\n\n## What's Next\n\nSystem prompts can now be assembled at runtime. But the agent still crashes on errors. Network hiccups, API rate limits, truncated output, context overflow — these aren't bugs, they're normal.\n\ns11 Error Recovery → four recovery paths. Upgrade tokens, compress context, exponential backoff, switch models.\n\n
\nDeep Dive Into CC Source Code\n\n> The following is based on analysis of CC source code `constants/prompts.ts` (914 lines), `constants/systemPromptSections.ts` (68 lines), `context.ts` (189 lines), `utils/api.ts` (718 lines), `utils/systemPrompt.ts` (123 lines), and `bootstrap/state.ts`.\n\n### How many sections does CC's system prompt have?\n\nThe count varies based on feature flags, output style, KAIROS/Proactive mode, user type, token budget, etc. Roughly two categories:\n\n**Static sections** (always loaded): identity, system, doing_tasks, actions, using_tools, tone_style, output_efficiency, etc.\n\n**Dynamic sections** (loaded by state): session_guidance, memory, ant_model_override, env_info_simple, language, output_style, mcp_instructions, scratchpad, frc, summarize_tool_results, numeric_length_anchors, token_budget, brief, etc.\n\n`mcp_instructions` is the only volatile section (created via `DANGEROUS_uncachedSystemPromptSection()`), because MCP servers can connect and disconnect between turns.\n\n### Assembly Function\n\n```typescript\ngetSystemPrompt(tools, model, additionalWorkingDirs?, mcpClients?): Promise\n```\n\nReturns `string[]` (each element is a section), separated by `SYSTEM_PROMPT_DYNAMIC_BOUNDARY` between static and dynamic parts.\n\n### cache scope\n\nWhen global cache boundary is enabled, static sections are merged into one global cache block, and dynamic sections don't use global cache (`cacheScope: null`). Only paths without boundary or skipping global cache fall back to org scope.\n\nThe teaching version's cache only avoids redundant string assembly. CC's three-layer cache:\n\n1. **lodash memoize**: `getSystemContext` and `getUserContext` cached per session (`context.ts`)\n2. **Section registry cache**: `STATE.systemPromptSectionCache` caches dynamic section results, cleared on `/clear` or `/compact`\n3. **API-level cache**: `splitSysPromptPrefix()` (`api.ts`) splits prompt into blocks with different cache scopes via boundary\n\n### getUserContext vs getSystemContext\n\n| | getSystemContext | getUserContext |\n|---|---|---|\n| Content | gitStatus, cacheBreaker | CLAUDE.md content, currentDate |\n| Injection | appended to system prompt array | prepended as `` user message |\n| When skipped | custom system prompt | always runs |\n\n### How modes change the prompt\n\n- **CLAUDE_CODE_SIMPLE**: entire prompt is 2 lines\n- **Proactive/KAIROS**: compact prompt replaces all standard sections\n- **Coordinator**: coordinator-specific prompt fully replaces default\n- **Agent mode**: agent-defined prompt replaces or appends to default\n\n### Total size\n\nStandard interactive mode system prompt core is ~20-30KB text. CLAUDE_CODE_SIMPLE is ~150 characters. User context (CLAUDE.md) and system context (git status) add on top.\n\n
\n\n\n" + }, + { + "version": "s10", + "locale": "zh", + "title": "s10: System Prompt — 运行时组装,不硬编码", + "content": "# s10: System Prompt — 运行时组装,不硬编码\n\ns01 → ... → s08 → s09 → `s10` → [s11](/zh/s11) → s12 → ... → s20\n> *\"prompt 是组装出来的, 不是写死的\"* — 分段 + 按需拼接 + 缓存。\n>\n> **Harness 层**: 提示 — 运行时组装, 不硬编码。\n\n---\n\n## 问题\n\n从 s01 到 s09,system prompt 都是一行硬编码:\n\n```python\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\n```\n\ns01 够用,只有 bash、read、write 三个工具。但到 s09,Agent 已经有记忆、有压缩、有技能加载。prompt 该提的能力越来越多:\n\n```python\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Use tools to solve tasks. Act, don't explain. \"\n \"Before starting any multi-step task, use todo_write. \"\n \"Skills are available via list_skills and load_skill. \"\n \"Relevant memories are injected below when available. \"\n # ... 加一个能力就多一段\n)\n```\n\n三个问题:\n\n1. **换项目要重写整个 prompt**,不知道哪些该改、哪些该留\n2. **修改一处可能影响全局**,加一段工具描述可能跟前面的指令冲突\n3. **每次请求都带全部内容**,即使当前对话用不到某些段落也浪费 token\n\nSystem prompt 应该是运行时根据当前状态组装的配置:哪些工具启用、哪些上下文可见、哪些记忆相关、哪些内容必须保持稳定以命中 prompt cache。\n\n---\n\n## 解决方案\n\n![System Prompt Overview](/course-assets/s10_system_prompt/system-prompt-overview.svg)\n\ns10 聚焦 prompt 组装机制。以 s08-s09 的能力为背景,但不重复实现压缩和记忆系统。核心变动:把硬编码的 `SYSTEM` 拆成独立段落(section),运行时根据真实状态按需拼接,缓存结果避免重复组装。\n\n四个 section,两种加载策略:\n\n| Section | 加载策略 | 内容 | 判断依据 |\n|---------|---------|------|---------|\n| identity | 始终 | 你是谁、怎么做事 | 始终存在 |\n| tools | 始终 | 可用工具列表 | `enabled_tools` |\n| workspace | 始终 | 工作目录 | 始终存在 |\n| memory | 按需 | 相关记忆内容 | `.memory/MEMORY.md` 是否存在 |\n\n关键设计:section 是否加载取决于真实状态(工具是否存在、文件是否存在),不是消息里的关键词。\n\n---\n\n## 工作原理\n\n### PROMPT_SECTIONS: 分段定义\n\n把一大段字符串拆成字典,每个 key 是一个主题:\n\n```python\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n```\n\n每个 section 独立维护。修改 `tools` 不影响 `identity`,新增 `memory` 不动 `workspace`。\n\n### assemble_system_prompt: 按需拼接\n\n不是所有 section 每次都需要。当前没有记忆文件,加载 memory section 只是浪费 token。根据 context 的真实状态决定加载哪些:\n\n```python\ndef assemble_system_prompt(context: dict) -> str:\n sections = []\n\n # 始终加载\n sections.append(PROMPT_SECTIONS[\"identity\"])\n sections.append(PROMPT_SECTIONS[\"tools\"])\n sections.append(PROMPT_SECTIONS[\"workspace\"])\n\n # 按需加载 — 基于真实状态,不是关键词\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n\n return \"\\n\\n\".join(sections)\n```\n\n\"始终加载\"的是每轮都需要的:身份、工具、工作目录。\"按需加载\"的只在特定条件下才有用。\n\n为什么不全加载?token 有成本(system prompt 每轮计费),信息越少 LLM 越专注(无关指令是噪音)。\n\n### get_system_prompt: 缓存避免重复拼接\n\n上下文没变时(同一轮对话的多次 LLM 调用,context 相同),重新拼接是浪费。用确定性序列化检测变化,命中缓存直接返回:\n\n```python\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n```\n\n用 `json.dumps` 而不是 `hash()`:Python 内置 `hash()` 有进程随机化,不适合做稳定 cache key,而且遇到 list/dict 会报 `unhashable type`。\n\n注意:这里的缓存只是\"避免重复拼接字符串\",和 CC 的 API prompt cache 不是一回事。CC 的 prompt cache 通过 `SYSTEM_PROMPT_DYNAMIC_BOUNDARY` 分隔静态和动态部分,静态部分命中 global cache,不因动态内容变化而失效。\n\n### context: 真实状态,不是关键词猜测\n\ncontext 反映当前运行态的真实状态:\n\n```python\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": list(TOOL_HANDLERS.keys()),\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n```\n\n`enabled_tools` 列出实际注册的工具。`memories` 检查 `.memory/MEMORY.md` 是否存在。section 加载基于这些真实状态,不在消息里搜关键词。\n\n### 合起来跑\n\n```python\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n # ... 工具执行 ...\n context = update_context(context, messages)\n system = get_system_prompt(context)\n```\n\n每轮循环开头拿一次 system prompt。context 变了就重新组装,没变就返回缓存。\n\n---\n\n## 相对 s09 的变更\n\n| 组件 | 之前 (s09) | 之后 (s10) |\n|------|-----------|-----------|\n| prompt | 硬编码 SYSTEM 字符串 | PROMPT_SECTIONS + assemble_system_prompt |\n| 缓存 | 无 | get_system_prompt(json.dumps 检测 + 缓存) |\n| 新函数 | — | assemble_system_prompt, get_system_prompt, update_context |\n| 工具 | bash, read_file, write_file (3) | bash, read_file, write_file (3) — 不变 |\n| 循环 | 用固定 SYSTEM | 用 get_system_prompt(context) |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s10_system_prompt/code.py\n```\n\n观察重点:\n\n1. 输出中能看到哪些 section 被加载了(`[assembled] sections: ...` 标签)\n2. 连续对话时,缓存命中显示 `[cache hit]`\n3. 创建 `.memory/MEMORY.md` 文件后,下一轮 memory section 自动加载\n\n试试这些 prompt:\n\n1. `Read the file README.md`(观察始终加载的三个 section)\n2. `Create a file called .memory/MEMORY.md with content \"- [test](test.md) — test memory\"`(写入记忆索引)\n3. `Read the file code.py`(观察 memory section 是否出现)\n\n---\n\n## 接下来\n\nSystem prompt 可以运行时组装了,但 Agent 碰到错误还是会崩。网络抖动、API 限流、输出被截断、上下文超限,这些不是 bug,是常态。\n\ns11 Error Recovery → 四条恢复路径。升级 token、压缩上下文、指数退避、切换模型。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `constants/prompts.ts`(914 行)、`constants/systemPromptSections.ts`(68 行)、`context.ts`(189 行)、`utils/api.ts`(718 行)、`utils/systemPrompt.ts`(123 行)、`bootstrap/state.ts` 的分析。\n\n### CC 的 system prompt 有多少 section?\n\n数量不固定,受 feature flag、output style、KAIROS/Proactive 模式、用户类型、token 预算等影响。大致分两类:\n\n**静态 section**(始终加载):identity、system、doing_tasks、actions、using_tools、tone_style、output_efficiency 等。\n\n**动态 section**(按状态加载):session_guidance、memory、ant_model_override、env_info_simple、language、output_style、mcp_instructions、scratchpad、frc、summarize_tool_results、numeric_length_anchors、token_budget、brief 等。\n\n`mcp_instructions` 是唯一的易失性 section(通过 `DANGEROUS_uncachedSystemPromptSection()` 创建),因为 MCP server 可以在轮次间连接和断开。\n\n### 组装函数\n\n```typescript\ngetSystemPrompt(tools, model, additionalWorkingDirs?, mcpClients?): Promise\n```\n\n返回 `string[]`(每个元素是一个 section),由 `SYSTEM_PROMPT_DYNAMIC_BOUNDARY` 分隔静态和动态部分。\n\n### cache scope\n\n启用 global cache boundary 时,静态 section 合并成一个 global cache block,动态 section 不使用 global cache(`cacheScope: null`)。没有 boundary 或跳过 global cache 的路径才会走 org scope。\n\n教学版的缓存只避免重复拼接字符串。CC 的三层缓存:\n\n1. **lodash memoize**:`getSystemContext` 和 `getUserContext` 在会话中缓存(`context.ts`)\n2. **section 注册缓存**:`STATE.systemPromptSectionCache` 缓存动态 section 结果,`/clear` 或 `/compact` 时清除\n3. **API 级缓存**:`splitSysPromptPrefix()`(`api.ts`)把 prompt 按 boundary 分成不同 cache scope 的块\n\n### getUserContext vs getSystemContext\n\n| | getSystemContext | getUserContext |\n|---|---|---|\n| 内容 | gitStatus、cacheBreaker | CLAUDE.md 内容、currentDate |\n| 注入方式 | 追加到 system prompt 数组 | 前置为 `` 用户消息 |\n| 何时跳过 | 自定义 system prompt 时 | 始终运行 |\n\n### 模式如何改变 prompt\n\n- **CLAUDE_CODE_SIMPLE**:整个 prompt 只有 2 行\n- **Proactive/KAIROS**:用紧凑版 prompt 替换所有标准 section\n- **Coordinator**:用协调器专用 prompt 完全替换\n- **Agent 模式**:Agent 定义的 prompt 替换或追加到默认 prompt\n\n### 总大小\n\n标准交互模式下 system prompt 核心约 20-30KB 文本。CLAUDE_CODE_SIMPLE 约 150 字符。用户上下文(CLAUDE.md)和系统上下文(git status)在此基础上累加。\n\n
\n\n\n" }, { "version": "s10", "locale": "ja", - "title": "s10: Team Protocols", - "content": "# s10: Team Protocols\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"チームメイト間には統一の通信ルールが必要\"* -- 1つの request-response パターンが全交渉を駆動。\n\n## 問題\n\ns09ではチームメイトが作業し通信するが、構造化された協調がない:\n\n**シャットダウン**: スレッドを強制終了するとファイルが中途半端に書かれ、config.jsonが不正な状態になる。ハンドシェイクが必要 -- リーダーが要求し、チームメイトが承認(完了して退出)か拒否(作業継続)する。\n\n**プラン承認**: リーダーが「認証モジュールをリファクタリングして」と言うと、チームメイトは即座に開始する。リスクの高い変更では、実行前にリーダーが計画をレビューすべきだ。\n\n両方とも同じ構造: 一方がユニークIDを持つリクエストを送り、他方がそのIDで応答する。\n\n## 解決策\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## 仕組み\n\n1. リーダーがrequest_idを生成し、インボックス経由でシャットダウンを開始する。\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. チームメイトがリクエストを受信し、承認または拒否で応答する。\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. プラン承認も同一パターン。チームメイトがプランを提出(request_idを生成)、リーダーがレビュー(同じrequest_idを参照)。\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\n1つのFSM、2つの応用。同じ`pending -> approved | rejected`状態機械が、あらゆるリクエスト-レスポンスプロトコルに適用できる。\n\n## s09からの変更点\n\n| Component | Before (s09) | After (s10) |\n|----------------|------------------|------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan)|\n| Shutdown | Natural exit only| Request-response handshake |\n| Plan gating | None | Submit/review with approval |\n| Correlation | None | request_id per request |\n| FSM | None | pending -> approved/rejected |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. `/team`と入力してステータスを監視する\n" + "title": "s10: System Prompt — 実行時アセンブリ、ハードコードなし", + "content": "# s10: System Prompt — 実行時アセンブリ、ハードコードなし\n\ns01 → ... → s08 → s09 → `s10` → [s11](/ja/s11) → s12 → ... → s20\n> *\"prompt は組み立てるもの、固定するものではない\"* — セグメント + オンデマンド結合 + キャッシュ。\n>\n> **Harness レイヤー**: プロンプト — 実行時組み立て、ハードコードなし。\n\n---\n\n## 課題\n\ns01 から s09 まで、system prompt は常に 1 行のハードコード:\n\n```python\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\n```\n\ns01 では十分だった。bash、read、write の 3 ツールのみ。しかし s09 では、Agent に記憶、圧縮、スキル読み込みがある。prompt が説明すべき能力が増え続ける:\n\n```python\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Use tools to solve tasks. Act, don't explain. \"\n \"Before starting any multi-step task, use todo_write. \"\n \"Skills are available via list_skills and load_skill. \"\n \"Relevant memories are injected below when available. \"\n # ... 能力を追加するたびに 1 行増える\n)\n```\n\n3 つの問題:\n\n1. **プロジェクトを変えるには prompt 全体を書き直す**必要がある。何を変え、何を残すべきか不明\n2. **一箇所の変更が全体に影響する**。ツール説明を追加すると、前の指示と矛盾する可能性\n3. **毎回のリクエストが全内容を送信する**。現在の会話で不要なセクションも token を無駄に消費\n\nSystem prompt は、実行時の現在状態に基づいて組み立てられる設定であるべき:どのツールが有効か、どのコンテキストが可視か、どの記憶が関連するか、どの内容を prompt cache に命中させるために安定させるべきか。\n\n---\n\n## ソリューション\n\n![System Prompt Overview](/course-assets/s10_system_prompt/system-prompt-overview.ja.svg)\n\ns10 は prompt アセンブリ機構に焦点を当てる。s08-s09 の能力を背景とするが、圧縮や記憶システムは再実装しない。核心の変更:ハードコードされた `SYSTEM` を独立セクションに分割し、実行時に実際の状態に基づいてオンデマンドで組み立て、結果をキャッシュして再組み立てを回避。\n\n4 つのセクション、2 つの読み込み戦略:\n\n| セクション | 戦略 | 内容 | 判断基準 |\n|-----------|------|------|---------|\n| identity | 常に | あなたは誰か、どう作業するか | 常に存在 |\n| tools | 常に | 利用可能ツール一覧 | `enabled_tools` |\n| workspace | 常に | 作業ディレクトリ | 常に存在 |\n| memory | オンデマンド | 関連記憶内容 | `.memory/MEMORY.md` が存在するか |\n\n重要な設計:セクションをロードするかどうかは実際の状態(ツールが存在するか、ファイルが存在するか)で決まり、メッセージ内のキーワードではない。\n\n---\n\n## 仕組み\n\n### PROMPT_SECTIONS: トピック別フラグメント\n\n単一の文字列を辞書に分割、各キーがトピック:\n\n```python\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n```\n\n各セクションは独立して管理。`tools` を変更しても `identity` に影響しない。`memory` を追加しても `workspace` はそのまま。\n\n### assemble_system_prompt: オンデマンド組み立て\n\nすべてのセクションが毎ターン必要なわけではない。記憶ファイルがなければ、memory セクションをロードしても token の無駄。context の実際の状態に基づいて組み立てる:\n\n```python\ndef assemble_system_prompt(context: dict) -> str:\n sections = []\n\n # 常にロード\n sections.append(PROMPT_SECTIONS[\"identity\"])\n sections.append(PROMPT_SECTIONS[\"tools\"])\n sections.append(PROMPT_SECTIONS[\"workspace\"])\n\n # オンデマンド — 実際の状態に基づく、キーワードではない\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n\n return \"\\n\\n\".join(sections)\n```\n\n「常にロード」は毎ターン必要なもの:アイデンティティ、ツール、作業ディレクトリ。「オンデマンド」は特定条件下でのみ有用。\n\nなぜ全部ロードしないのか?token にはコストがあり(system prompt は毎ターン課金)、情報が少ないほど LLM は集中する(無関係な指示はノイズ)。\n\n### get_system_prompt: キャッシュで再組み立てを回避\n\nコンテキストが変わっていない時(同じターン内で複数の LLM 呼び出し、context が同じ)、再組み立ては無駄。確定的シリアライズで変化を検出し、キャッシュヒット時は即座に返却:\n\n```python\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n```\n\n`hash()` ではなく `json.dumps` を使用:Python 組み込みの `hash()` にはプロセスランダム化があり(安定したキャッシュキーに不適切)、list/dict で `unhashable type` エラーになる。\n\n注意:このキャッシュは「プロセス内での文字列再組み立ての回避」のみ。CC の API prompt cache とは別物。CC の prompt cache は `SYSTEM_PROMPT_DYNAMIC_BOUNDARY` で静的/動的部分を分離し、静的部分が global cache に命中する。動的内容が変化しても静的部分は無効化されない。\n\n### context: 実際の状態、キーワード推測ではない\n\ncontext は現在の実行時状態の実際の状態を反映:\n\n```python\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": list(TOOL_HANDLERS.keys()),\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n```\n\n`enabled_tools` は実際に登録されたツールを一覧。`memories` は `.memory/MEMORY.md` が存在するかを確認。セクションの読み込みはこの実際の状態に基づき、メッセージ内のキーワード検索ではない。\n\n### 組み合わせて実行\n\n```python\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n # ... ツール実行 ...\n context = update_context(context, messages)\n system = get_system_prompt(context)\n```\n\n各ループ反復の開始時に system prompt を取得。context が変わっていれば再組み立て、変わっていなければキャッシュを返却。\n\n---\n\n## s09 からの変更点\n\n| コンポーネント | 変更前 (s09) | 変更後 (s10) |\n|-----------|-------------|-------------|\n| prompt | ハードコード SYSTEM 文字列 | PROMPT_SECTIONS + assemble_system_prompt |\n| キャッシュ | なし | get_system_prompt(json.dumps 検出 + キャッシュ) |\n| 新規関数 | — | assemble_system_prompt, get_system_prompt, update_context |\n| ツール | bash, read_file, write_file (3) | bash, read_file, write_file (3) — 変更なし |\n| ループ | 固定 SYSTEM を使用 | get_system_prompt(context) を使用 |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s10_system_prompt/code.py\n```\n\n観察のポイント:\n\n1. 出力にロードされたセクションが表示される(`[assembled] sections: ...` ラベル)\n2. 継続会話でキャッシュヒット時は `[cache hit]` と表示\n3. `.memory/MEMORY.md` を作成すると、次のターンで memory セクションが自動ロード\n\n以下のプロンプトを試してみてください:\n\n1. `Read the file README.md`(常にロードされる 3 つのセクションを観察)\n2. `Create a file called .memory/MEMORY.md with content \"- [test](test.md) — test memory\"`(記憶インデックスを書き込み)\n3. `Read the file code.py`(memory セクションが表示されるか観察)\n\n---\n\n## 次へ\n\nSystem prompt を実行時に組み立てられるようになった。しかし Agent はエラーでまだクラッシュする。ネットワークの不安定性、API レート制限、出力の切り詰め、コンテキスト超過、これらはバグではなく日常。\n\ns11 Error Recovery → 4 つのリカバリパス。token のアップグレード、コンテキスト圧縮、指数バックオフ、モデル切り替え。\n\n
\nCC ソースコードの詳細\n\n> 以下は CC ソースコード `constants/prompts.ts`(914 行)、`constants/systemPromptSections.ts`(68 行)、`context.ts`(189 行)、`utils/api.ts`(718 行)、`utils/systemPrompt.ts`(123 行)、`bootstrap/state.ts` の分析に基づく。\n\n### CC の system prompt にはいくつのセクションがあるか?\n\n数は固定されておらず、feature flag、output style、KAIROS/Proactive モード、ユーザータイプ、token 予算などに影響される。大まかに 2 つのカテゴリ:\n\n**静的セクション**(常にロード):identity、system、doing_tasks、actions、using_tools、tone_style、output_efficiency など。\n\n**動的セクション**(状態に応じてロード):session_guidance、memory、ant_model_override、env_info_simple、language、output_style、mcp_instructions、scratchpad、frc、summarize_tool_results、numeric_length_anchors、token_budget、brief など。\n\n`mcp_instructions` は唯一の揮発性セクション(`DANGEROUS_uncachedSystemPromptSection()` で作成)。MCP server はターン間で接続・切断可能なため。\n\n### 組み立て関数\n\n```typescript\ngetSystemPrompt(tools, model, additionalWorkingDirs?, mcpClients?): Promise\n```\n\n`string[]`(各要素がセクション)を返却。`SYSTEM_PROMPT_DYNAMIC_BOUNDARY` で静的/動的部分を分離。\n\n### cache scope\n\nglobal cache boundary が有効な場合、静的セクションは 1 つの global cache block にマージされ、動的セクションは global cache を使用しない(`cacheScope: null`)。boundary なしまたは global cache をスキップするパスでのみ org scope にフォールバック。\n\n教学版のキャッシュは文字列の再組み立てを回避するのみ。CC の 3 層キャッシュ:\n\n1. **lodash memoize**: `getSystemContext` と `getUserContext` がセッション中キャッシュ(`context.ts`)\n2. **セクション登録キャッシュ**: `STATE.systemPromptSectionCache` が動的セクションの結果をキャッシュ、`/clear` や `/compact` でクリア\n3. **API レベルキャッシュ**: `splitSysPromptPrefix()`(`api.ts`)が boundary を通じて異なる cache scope のブロックに分割\n\n### getUserContext vs getSystemContext\n\n| | getSystemContext | getUserContext |\n|---|---|---|\n| 内容 | gitStatus、cacheBreaker | CLAUDE.md 内容、currentDate |\n| 注入方式 | system prompt 配列に追加 | `` ユーザーメッセージとして先頭に配置 |\n| スキップ条件 | カスタム system prompt 時 | 常に実行 |\n\n### モードによる prompt の変化\n\n- **CLAUDE_CODE_SIMPLE**: prompt 全体が 2 行のみ\n- **Proactive/KAIROS**: コンパクト版 prompt が標準セクション全体を置換\n- **Coordinator**: コーディネータ専用 prompt がデフォルトを完全に置換\n- **Agent モード**: Agent 定義の prompt がデフォルトを置換または追加\n\n### 総サイズ\n\n標準インタラクティブモードの system prompt コアは約 20-30KB テキスト。CLAUDE_CODE_SIMPLE は約 150 文字。ユーザーコンテキスト(CLAUDE.md)とシステムコンテキスト(git status)がこれに加算。\n\n
\n\n\n" + }, + { + "version": "s11", + "locale": "en", + "title": "s11: Error Recovery — Errors aren't the end, they're the start of a retry", + "content": "# s11: Error Recovery — Errors aren't the end, they're the start of a retry\n\ns01 → ... → s09 → s10 → `s11` → [s12](/en/s12) → s13 → ... → s20\n> *\"Errors aren't the end, they're the start of a retry\"* — escalate tokens, compact context, switch models.\n>\n> **Harness layer**: Resilience — classify and recover when the main loop hits errors.\n\n---\n\n## The Problem\n\nThe Agent is running along and then errors out:\n\n```\nError: 529 overloaded\n```\n\nThe Agent crashes. It doesn't retry, doesn't switch models, doesn't reduce context — it just crashes.\n\nIn production, API errors are the norm. The three most common failure modes: **truncated output** (the model runs out of tokens mid-sentence), **context overflow** (still too long even after compaction), and **transient failures** (429 rate limiting / 529 overload). An Agent that doesn't handle errors is like a car that stalls at the slightest touch.\n\n---\n\n## Solution\n\n![Error Recovery Overview](/course-assets/s11_error_recovery/error-recovery-overview.en.svg)\n\nThe loop and prompt assembly from s10 are fully preserved. The only change: the LLM call is wrapped in try/except, with different recovery paths based on error type. After recovery, `continue` loops back to the top to call the LLM again.\n\nThe three most common recovery patterns (the teaching version only handles 429/529; real systems also cover connection errors, timeouts, cloud vendor credential caches, etc. CC actually has 13+ reason codes; see the Deep Dive for the rest):\n\n| Pattern | Trigger | Recovery Action |\n|----------|---------|-----------------|\n| Output truncated | `max_tokens` | Escalate 8K→64K / continuation prompt |\n| Context overflow | `prompt_too_long` | Reactive compact → retry |\n| Transient failure | 429 / 529 | Exponential backoff + jitter, fallback model on consecutive 529 |\n\n---\n\n## How It Works\n\n### Path 1: Output Truncated\n\nThe model runs out of tokens mid-sentence — `max_tokens` is exhausted. The default 8000 tokens isn't enough for a complete response.\n\nOn the first occurrence, escalate `max_tokens` from 8K to 64K (8x the space) and retry the same request — the truncated output is NOT appended to messages, keeping the original request intact. If 64K is still not enough, save the truncated output and inject a continuation prompt telling the model to pick up where it left off, up to 3 times:\n\n```python\nif response.stop_reason == \"max_tokens\":\n # First escalation: don't append truncated output, retry same request\n if not state.has_escalated:\n max_tokens = ESCALATED_MAX_TOKENS\n state.has_escalated = True\n continue # messages unchanged, same request with more tokens\n # 64K still truncated: save output + continuation prompt\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if state.recovery_count < MAX_RECOVERY_RETRIES:\n messages.append({\"role\": \"user\", \"content\":\n \"Output token limit hit. Resume directly — \"\n \"no apology, no recap. Pick up mid-thought.\"})\n state.recovery_count += 1\n continue\n return # still truncated after 3 continuations\n# Normal: append after max_tokens check\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\n```\n\nEscalation gets one chance; continuation gets up to 3. After that, exit — further continuations won't produce meaningful output.\n\n### Path 2: Context Overflow\n\nThe LLM says \"your context is too long\" (`prompt_too_long`). All four compaction layers from s08 have already run, and it's still over the limit.\n\nTrigger reactive compact — more aggressive than auto compact. The teaching version keeps only the last 5 messages to simulate compaction; real CC generates a compact summary via LLM, then retries with the compacted message list. Retry after compacting. But if it's still over the limit after one compaction, the only option is to exit — compacting again won't make it any smaller:\n\n```python\nexcept PromptTooLongError:\n if not state.has_attempted_reactive_compact:\n messages[:] = reactive_compact(messages)\n state.has_attempted_reactive_compact = True\n continue\n return # Already compacted and still over limit — must exit\n```\n\n### Path 3: Transient Failures\n\nNetwork blips, 429 rate limiting, 529 overload — these aren't bugs, they're normal in distributed systems.\n\nBoth 429 and 529 use exponential backoff + jitter: wait 0.5 seconds on the first attempt, 1 second on the second, 2 seconds on the third, up to 10 retries. Random jitter prevents concurrent requests from all retrying at the same instant. Three consecutive 529 overload errors → switch to the fallback model (if `FALLBACK_MODEL_ID` environment variable is configured):\n\n```python\ndef retry_delay(attempt, retry_after=None):\n if retry_after:\n return retry_after\n base = min(500 * (2 ** attempt), 32000) / 1000\n return base + random.uniform(0, base * 0.25)\n\ndef with_retry(fn, state, max_retries=10):\n for attempt in range(max_retries):\n try:\n return fn()\n except (RateLimitError, OverloadedError):\n delay = retry_delay(attempt)\n time.sleep(delay)\n if is_overloaded:\n state.consecutive_529 += 1\n if state.consecutive_529 >= 3 and FALLBACK_MODEL:\n state.current_model = FALLBACK_MODEL\n raise MaxRetriesExceeded()\n```\n\nBackoff formula: `min(500 × 2^attempt, 32000) + random(0~25%)`. If the server returns a `Retry-After` header, that value takes priority.\n\n### Putting It All Together\n\n```python\ndef agent_loop(messages, context):\n system = get_system_prompt(context)\n state = RecoveryState()\n max_tokens = 8000\n\n while True:\n try:\n response = with_retry(\n lambda: client.messages.create(\n model=state.current_model, system=system,\n messages=messages, tools=TOOLS,\n max_tokens=max_tokens),\n state)\n except Exception as e:\n if is_prompt_too_long_error(e):\n if not state.has_attempted_reactive_compact:\n messages[:] = reactive_compact(messages)\n state.has_attempted_reactive_compact = True\n continue\n return\n log_error(e)\n return\n\n # max_tokens check BEFORE appending to messages\n if response.stop_reason == \"max_tokens\":\n if not state.has_escalated:\n max_tokens = 64000\n state.has_escalated = True\n continue # retry same request, messages unchanged\n # save truncated output + continuation prompt\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n messages.append({\"role\": \"user\", \"content\": CONTINUATION_PROMPT})\n continue\n # Normal completion\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n # ... tool execution ...\n```\n\nThe outer try/except catches API exceptions (prompt_too_long, etc.), `with_retry` handles transient errors (429/529), and `stop_reason` checks handle truncation. Three recovery mechanisms, each handling its own error type.\n\n---\n\n## Changes from s10\n\n| Component | Before (s10) | After (s11) |\n|-----------|-------------|-------------|\n| Error handling | None (crashes on any error) | Three recovery patterns + exponential backoff |\n| New constants | — | ESCALATED_MAX_TOKENS=64000, MAX_RETRIES=10, BASE_DELAY_MS=500, FALLBACK_MODEL |\n| New functions | — | with_retry, retry_delay, reactive_compact, is_prompt_too_long_error, RecoveryState |\n| Tools | bash, read_file, write_file (3) | bash, read_file, write_file (3) — unchanged |\n| Loop | Bare LLM call | Wrapped in try/except + continue retry |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s11_error_recovery/code.py\n```\n\nTry these prompts:\n\n1. Ask the Agent to generate a very long piece of code, and observe whether it automatically continues after truncation (look for the `[max_tokens] escalating` log)\n2. Read many files consecutively to bloat the context, and observe reactive compact\n3. If you encounter 429/529, observe the exponential backoff log output\n\n---\n\n## What's Next\n\nThe Agent can now automatically recover from errors. But the tasks it handles are still one-shot — you give it a task, it finishes, it's done.\n\nWhat if the Agent could manage a **task list** — with dependencies, persisted to disk, resumable across sessions? A TODO list is not a task system.\n\ns12 Task System → Tasks form a dependency graph with state and persistence. This is the foundation for multi-Agent collaboration.\n\n
\nDeep Dive into CC Source\n\n> The following is based on CC source code: `query.ts` (1729 lines), `services/api/withRetry.ts` (822 lines), `query/tokenBudget.ts` (93 lines), and `utils/tokenBudget.ts` (73 lines).\n\n### 1. A Dozen-Plus Reason/Transition Codes (Not Just 3)\n\nThe teaching version covers 3 of the most common recovery patterns. CC actually has a dozen-plus reason/transition codes, evaluated after every LLM call:\n\n| Reason/Transition | Teaching Version | CC Behavior |\n|---|---|---|\n| `completed` | Normal completion | Return result |\n| `next_turn` | Normal tool call | Continue to next tool execution round |\n| `max_output_tokens_escalate` | Path 1 | 8K→64K escalation |\n| `max_output_tokens_recovery` | Path 1 continuation | Continuation prompt (up to 3 times) |\n| `reactive_compact_retry` | Path 2 | Reactive compact → retry |\n| `prompt_too_long` | Path 2 | Same as above |\n| `collapse_drain_retry` | Not covered | Context collapse — commit staged content first |\n| `model_error` | Not covered | Retry |\n| `image_error` | Not covered | `ImageSizeError` / `ImageResizeError` handled specifically |\n| `aborted_streaming` | Not covered | Streaming abort recovery |\n| `aborted_tools` | Not covered | Tool abort |\n| `stop_hook_blocking` | Not covered | Inject blocking error → model self-corrects |\n| `stop_hook_prevented` | Not covered | Hooks prevent execution |\n| `hook_stopped` | Not covered | Hook stopped execution |\n| `token_budget_continuation` | Not covered | Continue when token usage < 90% |\n| `blocking_limit` | Not covered | Blocking limit reached |\n| `max_turns` | Not covered | Maximum turns reached |\n\nThe teaching version only expands on the first 5 (most common); each of the rest has its own dedicated handling logic.\n\n### 2. Precise Exponential Backoff Formula\n\nCC's backoff delay (`withRetry.ts:530-548`):\n\n```\ndelay = min(500 × 2^(attempt-1), 32000) + random(0~25%)\n```\n\n| Attempt | Base Delay | + Jitter |\n|---------|-----------|----------|\n| 1 | 500ms | 0-125ms |\n| 2 | 1000ms | 0-250ms |\n| 4 | 4000ms | 0-1000ms |\n| 7+ | 32000ms (cap) | 0-8000ms |\n\nIf the server returns a `Retry-After` header, that value takes priority.\n\n### 3. Original CONTINUATION Prompt\n\nCC's continuation prompt (`query.ts:1225-1227`):\n\n```\nOutput token limit hit. Resume directly — no apology, no recap of what\nyou were doing. Pick up mid-thought if that is where the cut happened.\nBreak remaining work into smaller pieces.\n```\n\nToken budget nudge prompt (`tokenBudget.ts:72`):\n\n```\nStopped at {pct}% of token target. Keep working — do not summarize.\n```\n\n### 4. Streaming Error Handling\n\nIn CC's streaming path, recoverable errors (413, max_tokens, media errors) are **withheld from display** during streaming (`query.ts:788-822`) — SDK consumers don't see them, only the recovery logic does. After streaming ends, the system determines whether recovery is needed.\n\n### 5. 529 → Fallback Model Switch\n\nAfter 3 consecutive 529 overload errors (`MAX_529_RETRIES = 3`), CC automatically switches to the fallback model (e.g., Opus → Sonnet). On switch, all pending messages and tool results are cleared, and the user sees \"Switched to {model} due to high demand\".\n\n### 6. Diminishing Returns Detection\n\nToken budget \"continuations\" aren't unlimited. When there are 3 consecutive continuations with a token increment < 500, the system determines \"continuing won't produce meaningful output\" and stops continuation (`tokenBudget.ts:60-62`).\n\n
\n\n\n" + }, + { + "version": "s11", + "locale": "zh", + "title": "s11: Error Recovery — 错误不是结束,是重试的开始", + "content": "# s11: Error Recovery — 错误不是结束,是重试的开始\n\ns01 → ... → s09 → s10 → `s11` → [s12](/zh/s12) → s13 → ... → s20\n> *\"错误不是终点, 是重试的起点\"* — 升级 token、压缩上下文、切换模型。\n>\n> **Harness 层**: 韧性 — 主循环遇到错误时分类并恢复。\n\n---\n\n## 问题\n\nAgent 跑着跑着报错了:\n\n```\nError: 529 overloaded\n```\n\nAgent 崩溃了。它没有重试,没有换模型,没有减少上下文——直接崩溃。\n\n生产环境中 API 错误是常态。三种最常见的故障模式:**输出被截断**(模型话说一半 token 用完了)、**上下文超限**(压缩后还是太长)、**临时故障**(429 限流 / 529 过载)。一个不处理错误的 Agent 就像一个一碰就熄火的车。\n\n---\n\n## 解决方案\n\n![Error Recovery Overview](/course-assets/s11_error_recovery/error-recovery-overview.svg)\n\ns10 的循环、prompt 组装全部保留。唯一的变动:LLM 调用包裹在 try/except 里,根据错误类型走不同的恢复路径。恢复后 `continue` 回到循环开头重新调用 LLM。\n\n三种最常见的恢复模式(教学版只处理 429/529;真实系统还覆盖连接错误、超时、云厂商认证缓存等。CC 实际有 13+ reason code,其余见 Deep dive):\n\n| 模式 | 触发 | 恢复动作 |\n|------|------|---------|\n| 输出截断 | `max_tokens` | 升级 8K→64K / 续写提示 |\n| 上下文超限 | `prompt_too_long` | reactive compact → 重试 |\n| 临时故障 | 429 / 529 | 指数退避 + 抖动,连续 529 可切换备用模型 |\n\n---\n\n## 工作原理\n\n### 路径 1: 输出被截断\n\n模型话说一半,`max_tokens` 用完了。默认 8000 token 不够它输出完整回答。\n\n第一次发生时,直接把 `max_tokens` 从 8K 升级到 64K(8 倍空间),重试同一请求——此时不追加截断输出到 messages,保持原始请求不变。如果 64K 还是不够,才保存截断输出并注入续写提示让模型接着刚才的话继续说,最多 3 次:\n\n```python\nif response.stop_reason == \"max_tokens\":\n # First escalation: don't append truncated output, retry same request\n if not state.has_escalated:\n max_tokens = ESCALATED_MAX_TOKENS\n state.has_escalated = True\n continue # messages unchanged, same request with more tokens\n # 64K still truncated: save output + continuation prompt\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if state.recovery_count < MAX_RECOVERY_RETRIES:\n messages.append({\"role\": \"user\", \"content\":\n \"Output token limit hit. Resume directly — \"\n \"no apology, no recap. Pick up mid-thought.\"})\n state.recovery_count += 1\n continue\n return # still truncated after 3 continuations\n# Normal: append after max_tokens check\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\n```\n\n升级只有一次机会,续写最多 3 次。超过就退出——继续续写也不会有实质产出。\n\n### 路径 2: 上下文超限\n\nLLM 说\"你的上下文太长了\"(`prompt_too_long`)。s08 的四层压缩全跑过了,还是超。\n\n触发 reactive compact——比 auto compact 更激进。教学版只保留最后 5 条消息模拟压缩效果;真实实现会调用 LLM 生成 compact 摘要再重试。压缩后重试。但如果压缩过一次还是超限,只能退出——再压缩也不会变小:\n\n```python\nexcept PromptTooLongError:\n if not state.has_attempted_reactive_compact:\n messages[:] = reactive_compact(messages)\n state.has_attempted_reactive_compact = True\n continue\n return # 压缩过了还是超限,只能退出\n```\n\n### 路径 3: 临时故障\n\n网络抖动、429 限流、529 过载——这些不是 bug,是分布式系统的常态。\n\n429 和 529 统一走指数退避 + 抖动:第一次等 0.5 秒,第二次等 1 秒,第三次等 2 秒,最多 10 次。加随机抖动让并发请求不在同一时刻重试。连续 3 次 529 过载 → 切换到备用模型(若配置了 `FALLBACK_MODEL_ID` 环境变量):\n\n```python\ndef retry_delay(attempt, retry_after=None):\n if retry_after:\n return retry_after\n base = min(500 * (2 ** attempt), 32000) / 1000\n return base + random.uniform(0, base * 0.25)\n\ndef with_retry(fn, state, max_retries=10):\n for attempt in range(max_retries):\n try:\n return fn()\n except (RateLimitError, OverloadedError):\n delay = retry_delay(attempt)\n time.sleep(delay)\n if is_overloaded:\n state.consecutive_529 += 1\n if state.consecutive_529 >= 3 and FALLBACK_MODEL:\n state.current_model = FALLBACK_MODEL\n raise MaxRetriesExceeded()\n```\n\n退避公式:`min(500 × 2^attempt, 32000) + random(0~25%)`。如果服务器返回 `Retry-After` header,优先用那个值。\n\n### 合起来跑\n\n```python\ndef agent_loop(messages, context):\n system = get_system_prompt(context)\n state = RecoveryState()\n max_tokens = 8000\n\n while True:\n try:\n response = with_retry(\n lambda: client.messages.create(\n model=state.current_model, system=system,\n messages=messages, tools=TOOLS,\n max_tokens=max_tokens),\n state)\n except Exception as e:\n if is_prompt_too_long_error(e):\n if not state.has_attempted_reactive_compact:\n messages[:] = reactive_compact(messages)\n state.has_attempted_reactive_compact = True\n continue\n return\n log_error(e)\n return\n\n # max_tokens check BEFORE appending to messages\n if response.stop_reason == \"max_tokens\":\n if not state.has_escalated:\n max_tokens = 64000\n state.has_escalated = True\n continue # retry same request, messages unchanged\n # save truncated output + continuation prompt\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n messages.append({\"role\": \"user\", \"content\": CONTINUATION_PROMPT})\n continue\n # Normal completion\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n # ... tool execution ...\n```\n\n外层 try/except 捕获 API 异常(prompt_too_long 等),`with_retry` 处理瞬态错误(429/529),`stop_reason` 检查处理截断。三种恢复机制各管各的错误类型。\n\n---\n\n## 相对 s10 的变更\n\n| 组件 | 之前 (s10) | 之后 (s11) |\n|------|-----------|-----------|\n| 错误处理 | 无(一碰就崩溃) | 三种恢复模式 + 指数退避 |\n| 新常量 | — | ESCALATED_MAX_TOKENS=64000, MAX_RETRIES=10, BASE_DELAY_MS=500, FALLBACK_MODEL |\n| 新函数 | — | with_retry, retry_delay, reactive_compact, is_prompt_too_long_error, RecoveryState |\n| 工具 | bash, read_file, write_file (3) | bash, read_file, write_file (3) — 不变 |\n| 循环 | 裸调用 LLM | try/except 包裹 + continue 重试 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s11_error_recovery/code.py\n```\n\n试试这些 prompt:\n\n1. 让 Agent 生成一段很长的代码,观察截断后是否自动续写(看 `[max_tokens] escalating` 日志)\n2. 连续读取大量文件撑大上下文,观察 reactive compact\n3. 如果遇到 429/529,观察指数退避的日志输出\n\n---\n\n## 接下来\n\nAgent 现在能在错误中自动恢复了。但它处理的任务仍然是\"一次性\"的——你给它一个任务,它做完,结束。\n\n能不能让 Agent 管理一个**任务列表**——有依赖关系、持久化到磁盘、跨会话能恢复?TODO 列表不是任务系统。\n\ns12 Task System → 任务是有依赖、有状态、持久化的图。这是多 Agent 协作的基础。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `query.ts`(1729 行)、`services/api/withRetry.ts`(822 行)、`query/tokenBudget.ts`(93 行)、`utils/tokenBudget.ts`(73 行)的分析。\n\n### 一、十几种 reason/transition(不只是 3 条)\n\n教学版讲了 3 种最常见的恢复模式。CC 实际有十几种 reason/transition,每轮 LLM 调用后都会判断:\n\n| reason/transition | 教学版对应 | CC 行为 |\n|---|---|---|\n| `completed` | 正常完成 | 返回结果 |\n| `next_turn` | 正常工具调用 | 继续下一轮工具执行 |\n| `max_output_tokens_escalate` | 路径 1 | 8K→64K 升级 |\n| `max_output_tokens_recovery` | 路径 1 续写 | 续写提示(最多 3 次) |\n| `reactive_compact_retry` | 路径 2 | reactive compact → 重试 |\n| `prompt_too_long` | 路径 2 | 同上 |\n| `collapse_drain_retry` | 未展开 | context collapse 先提交暂存 |\n| `model_error` | 未展开 | 重试 |\n| `image_error` | 未展开 | `ImageSizeError` / `ImageResizeError` 专门处理 |\n| `aborted_streaming` | 未展开 | 流式中止恢复 |\n| `aborted_tools` | 未展开 | 工具中止 |\n| `stop_hook_blocking` | 未展开 | 注入 blocking error → 模型自纠 |\n| `stop_hook_prevented` | 未展开 | hooks 阻止 |\n| `hook_stopped` | 未展开 | hook 停止执行 |\n| `token_budget_continuation` | 未展开 | token 用量 < 90% 时继续 |\n| `blocking_limit` | 未展开 | 阻塞限制 |\n| `max_turns` | 未展开 | 达到最大轮次 |\n\n教学版只展开了前 5 种(最常见的),其余各有专门处理逻辑。\n\n### 二、指数退避的精确公式\n\nCC 的退避延迟(`withRetry.ts:530-548`):\n\n```\ndelay = min(500 × 2^(attempt-1), 32000) + random(0~25%)\n```\n\n| 尝试 | 基础延迟 | + 抖动 |\n|------|---------|--------|\n| 1 | 500ms | 0-125ms |\n| 2 | 1000ms | 0-250ms |\n| 4 | 4000ms | 0-1000ms |\n| 7+ | 32000ms(上限) | 0-8000ms |\n\n如果服务器返回 `Retry-After` header,优先用那个值。\n\n### 三、CONTINUATION 提示原文\n\nCC 的续写提示(`query.ts:1225-1227`):\n\n```\nOutput token limit hit. Resume directly — no apology, no recap of what\nyou were doing. Pick up mid-thought if that is where the cut happened.\nBreak remaining work into smaller pieces.\n```\n\nToken budget 的 nudge 提示(`tokenBudget.ts:72`):\n\n```\nStopped at {pct}% of token target. Keep working — do not summarize.\n```\n\n### 四、流式错误处理\n\nCC 的流式路径中,可恢复的错误(413、max_tokens、media error)在 streaming 期间**被暂扣不展示**(`query.ts:788-822`)——SDK 消费者看不到,只有恢复逻辑能看到。等 streaming 结束后才判断是否需要恢复。\n\n### 五、529 → Fallback Model 切换\n\n连续 3 次 529 过载错误后(`MAX_529_RETRIES = 3`),CC 自动切换到 fallback model(如 Opus → Sonnet)。切换时清除所有 pending 消息和 tool 结果,给用户展示 \"Switched to {model} due to high demand\"。\n\n### 六、Diminishing Returns 检测\n\nToken budget 的\"继续\"不是无限的。当连续 3 次 continuation 且 token 增量 < 500 时,系统判断\"继续也没有实质性产出\",停止 continuation(`tokenBudget.ts:60-62`)。\n\n
\n\n\n" }, { "version": "s11", "locale": "ja", - "title": "s11: Autonomous Agents", - "content": "# s11: Autonomous Agents\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"チームメイトが自らボードを見て、仕事を取る\"* -- リーダーが逐一割り振る必要はない。\n\n## 問題\n\ns09-s10では、チームメイトは明示的に指示された時のみ作業する。リーダーは各チームメイトを特定のプロンプトでspawnしなければならない。タスクボードに未割り当てのタスクが10個あっても、リーダーが手動で各タスクを割り当てる。これはスケールしない。\n\n真の自律性とは、チームメイトが自分で作業を見つけること: タスクボードをスキャンし、未確保のタスクを確保し、作業し、完了したら次を探す。\n\nもう1つの問題: コンテキスト圧縮(s06)後にエージェントが自分の正体を忘れる可能性がある。アイデンティティ再注入がこれを解決する。\n\n## 解決策\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## 仕組み\n\n1. チームメイトのループはWORKとIDLEの2フェーズ。LLMがツール呼び出しを止めた時(または`idle`ツールを呼んだ時)、IDLEフェーズに入る。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. IDLEフェーズがインボックスとタスクボードをポーリングする。\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. タスクボードスキャン: pendingかつ未割り当てかつブロックされていないタスクを探す。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. アイデンティティ再注入: コンテキストが短すぎる(圧縮が起きた)場合にアイデンティティブロックを挿入する。\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work.\"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## s10からの変更点\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. `/tasks`と入力してオーナー付きのタスクボードを確認する\n5. `/team`と入力して誰が作業中でアイドルかを監視する\n" + "title": "s11: Error Recovery — エラーは終わりではなく、リトライの始まり", + "content": "# s11: Error Recovery — エラーは終わりではなく、リトライの始まり\n\ns01 → ... → s09 → s10 → `s11` → [s12](/ja/s12) → s13 → ... → s20\n> *\"エラーは終わりではなく、リトライの始まり\"* — トークン拡張、コンテキスト圧縮、モデル切り替え。\n>\n> **Harness 層**: 耐障害性 — メインループのエラーを分類し復旧。\n\n---\n\n## 課題\n\nAgent が動いている途中でエラーが出た:\n\n```\nError: 529 overloaded\n```\n\nAgent がクラッシュした。リトライもしない、モデルも切り替えない、コンテキストも減らさない——そのままクラッシュ。\n\n本番環境では API エラーが日常茶飯事。最も一般的な 3 つの障害パターン:**出力の切り詰め**(モデルが途中まで出力して token が尽きた)、**コンテキスト超過**(圧縮後も長すぎる)、**一時的障害**(429 レート制限 / 529 過負荷)。エラーを処理しない Agent は、一度触れただけで止まる車のようなものだ。\n\n---\n\n## 解決策\n\n![Error Recovery Overview](/course-assets/s11_error_recovery/error-recovery-overview.ja.svg)\n\ns10 のループ、prompt 組み立てはすべてそのまま。唯一の変更点:LLM 呼び出しを try/except で包み、エラータイプに応じて異なる復旧パスに振り分ける。復旧後は `continue` でループ先頭に戻り、再度 LLM を呼び出す。\n\n最も一般的な 3 つの復旧パターン(教学版は 429/529 のみ対応;実際のシステムは接続エラー、タイムアウト、クラウドベンダーの認証キャッシュ等もカバー。CC には実際 13 以上の reason code があるが、残りは Deep dive で解説):\n\n| パターン | トリガー | 復旧アクション |\n|----------|----------|---------------|\n| 出力切り詰め | `max_tokens` | 8K→64K に拡張 / 続きのプロンプト注入 |\n| コンテキスト超過 | `prompt_too_long` | reactive compact → リトライ |\n| 一時的障害 | 429 / 529 | 指数バックオフ + ジッター、連続 529 でフォールバックモデルに切り替え可能 |\n\n---\n\n## 仕組み\n\n### パス 1: 出力が切り詰められた\n\nモデルが途中まで出力して、`max_tokens` に達した。デフォルトの 8000 token では完全な回答を出力しきれない。\n\n初回発生時、`max_tokens` を 8K から 64K に拡張(8 倍の空間)し、同じリクエストをリトライする——この時、切り詰められた出力は messages に追加せず、元のリクエストをそのまま維持する。64K でも足りない場合にのみ、切り詰められた出力を保存し、続きのプロンプトを注入してモデルに先ほどの続きを出力させる。最大 3 回まで:\n\n```python\nif response.stop_reason == \"max_tokens\":\n # First escalation: don't append truncated output, retry same request\n if not state.has_escalated:\n max_tokens = ESCALATED_MAX_TOKENS\n state.has_escalated = True\n continue # messages unchanged, same request with more tokens\n # 64K still truncated: save output + continuation prompt\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if state.recovery_count < MAX_RECOVERY_RETRIES:\n messages.append({\"role\": \"user\", \"content\":\n \"Output token limit hit. Resume directly — \"\n \"no apology, no recap. Pick up mid-thought.\"})\n state.recovery_count += 1\n continue\n return # still truncated after 3 continuations\n# Normal: append after max_tokens check\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\n```\n\n拡張は 1 回だけ、続きの出力は最大 3 回。超過したら終了——これ以上続けても実質的な出力は得られない。\n\n### パス 2: コンテキスト超過\n\nLLM が「コンテキストが長すぎる」と返す(`prompt_too_long`)。s08 の 4 層圧縮をすべて実行したのに、まだ超えている。\n\nreactive compact をトリガー——auto compact よりも積極的。教学版は最後の 5 メッセージだけを残して圧縮をシミュレート;実際の CC は LLM で compact サマリを生成してからリトライする。圧縮後にリトライ。ただし、一度圧縮してもまだ超過している場合は終了するしかない——再度圧縮しても小さくはならない:\n\n```python\nexcept PromptTooLongError:\n if not state.has_attempted_reactive_compact:\n messages[:] = reactive_compact(messages)\n state.has_attempted_reactive_compact = True\n continue\n return # 圧縮済みでも超過、終了するしかない\n```\n\n### パス 3: 一時的障害\n\nネットワークの揺らぎ、429 レート制限、529 過負荷——これらはバグではなく、分散システムの日常だ。\n\n429 と 529 は統一して指数バックオフ + ジッターを使用:1 回目は 0.5 秒待機、2 回目は 1 秒、3 回目は 2 秒、最大 10 回。ランダムジッターを加えることで、並行リクエストが同時にリトライするのを防ぐ。3 回連続で 529 過負荷 → フォールバックモデルに切り替え(`FALLBACK_MODEL_ID` 環境変数が設定されている場合):\n\n```python\ndef retry_delay(attempt, retry_after=None):\n if retry_after:\n return retry_after\n base = min(500 * (2 ** attempt), 32000) / 1000\n return base + random.uniform(0, base * 0.25)\n\ndef with_retry(fn, state, max_retries=10):\n for attempt in range(max_retries):\n try:\n return fn()\n except (RateLimitError, OverloadedError):\n delay = retry_delay(attempt)\n time.sleep(delay)\n if is_overloaded:\n state.consecutive_529 += 1\n if state.consecutive_529 >= 3 and FALLBACK_MODEL:\n state.current_model = FALLBACK_MODEL\n raise MaxRetriesExceeded()\n```\n\nバックオフの公式:`min(500 × 2^attempt, 32000) + random(0~25%)`。サーバーが `Retry-After` ヘッダーを返した場合、その値を優先して使用する。\n\n### 統合して実行\n\n```python\ndef agent_loop(messages, context):\n system = get_system_prompt(context)\n state = RecoveryState()\n max_tokens = 8000\n\n while True:\n try:\n response = with_retry(\n lambda: client.messages.create(\n model=state.current_model, system=system,\n messages=messages, tools=TOOLS,\n max_tokens=max_tokens),\n state)\n except Exception as e:\n if is_prompt_too_long_error(e):\n if not state.has_attempted_reactive_compact:\n messages[:] = reactive_compact(messages)\n state.has_attempted_reactive_compact = True\n continue\n return\n log_error(e)\n return\n\n # max_tokens check BEFORE appending to messages\n if response.stop_reason == \"max_tokens\":\n if not state.has_escalated:\n max_tokens = 64000\n state.has_escalated = True\n continue # retry same request, messages unchanged\n # save truncated output + continuation prompt\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n messages.append({\"role\": \"user\", \"content\": CONTINUATION_PROMPT})\n continue\n # Normal completion\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n # ... tool execution ...\n```\n\n外側の try/except が API 例外(prompt_too_long 等)を捕捉し、`with_retry` が一時的エラー(429/529)を処理し、`stop_reason` のチェックが切り詰めを処理する。3 つの復旧メカニズムがそれぞれ異なるエラータイプを担当する。\n\n---\n\n## s10 からの変更点\n\n| コンポーネント | 変更前 (s10) | 変更後 (s11) |\n|---------------|-------------|-------------|\n| エラー処理 | なし(エラーで即クラッシュ) | 3 つの復旧パターン + 指数バックオフ |\n| 新規定数 | — | ESCALATED_MAX_TOKENS=64000, MAX_RETRIES=10, BASE_DELAY_MS=500, FALLBACK_MODEL |\n| 新規関数 | — | with_retry, retry_delay, reactive_compact, is_prompt_too_long_error, RecoveryState |\n| ツール | bash, read_file, write_file (3) | bash, read_file, write_file (3) — 変更なし |\n| ループ | LLM を直接呼び出し | try/except で包み + continue でリトライ |\n\n---\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython s11_error_recovery/code.py\n```\n\n以下の prompt を試してみよう:\n\n1. Agent に長いコードを生成させ、切り詰め後に自動で続きが出力されるか観察する(`[max_tokens] escalating` ログを確認)\n2. 連続して大量のファイルを読み込みコンテキストを肥大化させ、reactive compact の動作を観察する\n3. 429/529 が発生した場合、指数バックオフのログ出力を観察する\n\n---\n\n## 次のステップ\n\nAgent はエラーから自動的に復旧できるようになった。しかし、まだ処理するタスクは「使い捨て」だ——タスクを与えると実行し、終わる。\n\nAgent に**タスクリスト**を管理させられないだろうか——依存関係があり、ディスクに永続化され、セッションをまたいで復旧できる?TODO リストはタスクシステムではない。\n\ns12 Task System → タスクとは依存関係があり、状態があり、永続化されたグラフだ。これはマルチ Agent 協調の基盤となる。\n\n
\nCC ソースコード深掘り\n\n> 以下は CC ソースコード `query.ts`(1729 行)、`services/api/withRetry.ts`(822 行)、`query/tokenBudget.ts`(93 行)、`utils/tokenBudget.ts`(73 行)の分析に基づく。\n\n### 一、十数種の reason/transition(3 つだけではない)\n\n教学版では最も一般的な 3 つの復旧パターンを解説した。CC には実際十数種の reason/transition があり、毎回の LLM 呼び出し後に判定される:\n\n| reason/transition | 教学版の対応 | CC の動作 |\n|---|---|---|\n| `completed` | 正常終了 | 結果を返す |\n| `next_turn` | 通常のツール呼び出し | 次のツール実行ラウンドへ |\n| `max_output_tokens_escalate` | パス 1 | 8K→64K に拡張 |\n| `max_output_tokens_recovery` | パス 1 続き出力 | 続きのプロンプト注入(最大 3 回) |\n| `reactive_compact_retry` | パス 2 | reactive compact → リトライ |\n| `prompt_too_long` | パス 2 | 同上 |\n| `collapse_drain_retry` | 未展開 | context collapse 時にまず保留中の内容をコミット |\n| `model_error` | 未展開 | リトライ |\n| `image_error` | 未展開 | `ImageSizeError` / `ImageResizeError` の専用処理 |\n| `aborted_streaming` | 未展開 | ストリーミング中断の復旧 |\n| `aborted_tools` | 未展開 | ツール中断 |\n| `stop_hook_blocking` | 未展開 | blocking error を注入 → モデルが自己修正 |\n| `stop_hook_prevented` | 未展開 | hooks によるブロック |\n| `hook_stopped` | 未展開 | hook による実行停止 |\n| `token_budget_continuation` | 未展開 | token 使用量 < 90% の時に継続 |\n| `blocking_limit` | 未展開 | ブロック制限 |\n| `max_turns` | 未展開 | 最大ターン数に到達 |\n\n教学版では最初の 5 つ(最も一般的なもの)だけを展開した。残りはそれぞれ専用の処理ロジックを持つ。\n\n### 二、指数バックオフの正確な公式\n\nCC のバックオフ遅延(`withRetry.ts:530-548`):\n\n```\ndelay = min(500 × 2^(attempt-1), 32000) + random(0~25%)\n```\n\n| 試行 | 基本遅延 | + ジッター |\n|------|---------|-----------|\n| 1 | 500ms | 0-125ms |\n| 2 | 1000ms | 0-250ms |\n| 4 | 4000ms | 0-1000ms |\n| 7+ | 32000ms(上限) | 0-8000ms |\n\nサーバーが `Retry-After` ヘッダーを返した場合、その値を優先して使用する。\n\n### 三、CONTINUATION プロンプト原文\n\nCC の続き出力プロンプト(`query.ts:1225-1227`):\n\n```\nOutput token limit hit. Resume directly — no apology, no recap of what\nyou were doing. Pick up mid-thought if that is where the cut happened.\nBreak remaining work into smaller pieces.\n```\n\nToken budget のナッジプロンプト(`tokenBudget.ts:72`):\n\n```\nStopped at {pct}% of token target. Keep working — do not summarize.\n```\n\n### 四、ストリーミングエラー処理\n\nCC のストリーミングパスでは、復旧可能なエラー(413、max_tokens、media error)はストリーミング中**表示を保留される**(`query.ts:788-822`)——SDK コンシューマーには見えず、復旧ロジックだけが認識できる。ストリーミング終了後に復旧が必要かどうかを判断する。\n\n### 五、529 → フォールバックモデル切り替え\n\n3 回連続で 529 過負荷エラーが発生した後(`MAX_529_RETRIES = 3`)、CC は自動的にフォールバックモデルに切り替える(例:Opus → Sonnet)。切り替え時にすべての保留中のメッセージと tool 結果をクリアし、ユーザーに \"Switched to {model} due to high demand\" と表示する。\n\n### 六、収穫逓減の検出\n\nToken budget の「継続」は無限ではない。連続 3 回の continuation で token 増分が 500 未満の場合、システムは「続けても実質的な出力は得られない」と判断し、continuation を停止する(`tokenBudget.ts:60-62`)。\n\n
\n\n\n" + }, + { + "version": "s12", + "locale": "en", + "title": "s12: Task System — Break Big Goals into Small Tasks", + "content": "# s12: Task System — Break Big Goals into Small Tasks\n\ns01 → ... → s10 → s11 → `s12` → [s13](/en/s13) → s14 → ... → s20\n\n> *\"Break big goals into small tasks, order them, persist\"* — File-persisted task graph, the foundation for multi-agent collaboration.\n>\n> **Harness Layer**: Tasks — Persisted goals, recoverable progress.\n\n---\n\n## The Problem\n\nThe agent receives a project: set up a database, write APIs, add tests. It uses s05's TodoWrite to create a checklist, then starts writing the API first, gets halfway through and realizes there are no database tables, goes back to fix them; when adding tests, discovers the API interface signatures have changed again...\n\nYou can't build the roof before laying the foundation. Tasks have ordering. Task dependencies should form a Directed Acyclic Graph (DAG); the teaching version only demonstrates `blockedBy` checking, without cycle detection.\n\ns05's TodoWrite is an execution checklist for the current task, kept in session memory. What you need here is a **task system**: each task is a JSON file, tasks have `blockedBy` dependencies, and they persist across sessions on disk.\n\n---\n\n## The Solution\n\n![Task System Overview](/course-assets/s12_task_system/task-system-overview.en.svg)\n\nTeaching code keeps a basic agent loop, omitting S11's full error recovery (RecoveryState, backoff, escalation, reactive compact, fallback model) to stay focused on the task system. Added: 5 new task tools + `.tasks/` directory for persistence + `blockedBy` dependency checking. The task system and error recovery are independent layers: in CC source, `utils/tasks.ts` only handles CRUD, while `query.ts`'s with_retry/RecoveryState handles error recovery, with no coupling between them.\n\nTodoWrite vs Task System:\n\n| | TodoWrite (s05) | Task System (s12) |\n|---|---|---|\n| Role | Execution checklist for the current task | Recoverable task system |\n| Storage | In-process / session state | `.tasks/{id}.json` |\n| Dependencies | None | `blockedBy` / `blocks` graph |\n| Lifecycle | Current session / current task | Cross-session |\n| Coordination | No task claiming | `owner` / claim |\n| Status | pending / in_progress / completed | pending / in_progress / completed |\n| Granularity | The agent's own steps | Tasks that can be claimed, tracked, and unblocked |\n\n---\n\n## How It Works\n\n![Task DAG](/course-assets/s12_task_system/task-dag.en.svg)\n\n### Task: Data Structure\n\nEach task is a JSON file, stored in the `.tasks/` directory:\n\n```python\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None # Agent name (multi-agent scenarios)\n blockedBy: list[str] # List of dependency task IDs\n```\n\nIDs are generated with `timestamp + random hex`, simple but sufficient. CC uses sequential IDs + a highwatermark file to prevent ID reuse, which is a more rigorous design.\n\n### create_task: Create Tasks\n\n```python\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random_hex(4)}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n```\n\nAutomatically calls `save_task` on creation to write `.tasks/{id}.json`. `blockedBy` declares dependencies, for example \"write API\" has `blockedBy: [\"task_schema\"]`.\n\n### can_start: Dependency Check\n\nA task can only start after all its `blockedBy` dependencies are **completed**:\n\n```python\ndef can_start(task_id: str) -> bool:\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False # missing dependency = blocked\n dep = load_task(dep_id)\n if dep.status != \"completed\":\n return False\n return True\n```\n\n`can_start` is a prerequisite check for `claim_task`: if any `blockedBy` dependency is not completed, the task cannot be claimed. Missing dependencies are treated as blocked, avoiding crashes from referencing wrong IDs.\n\n### claim_task: Claim a Task\n\nWhen the agent starts working on a task, it calls `claim_task`: sets `owner`, changes status from `pending` → `in_progress`. The `owner` field records who is working on the task, preventing duplicate claims in multi-agent scenarios:\n\n```python\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n return f\"Claimed {task_id} ({task.subject})\"\n```\n\nIf the task is already claimed by someone else (`status != \"pending\"`), or dependencies aren't met (`can_start` returns False), the claim is rejected.\n\n### complete_task: Complete and Unblock\n\nWhen a task is done, set it to `completed`. Simultaneously scan all other tasks to find downstream tasks that were **just unblocked**:\n\n```python\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n task.status = \"completed\"\n save_task(task)\n # Find newly unblocked downstream tasks\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy\n and can_start(t.id)]\n msg = f\"Completed {task_id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n```\n\nAfter completing \"schema\", `can_start` returns True for \"endpoints\" and \"docs\"; they can begin.\n\n### get_task: View Full Details\n\n`list_tasks` only shows a one-line summary. `get_task` returns the full task JSON, including description and dependency details. When recovering across sessions, the agent needs to read the full description to continue work:\n\n```python\ndef get_task(task_id: str) -> str:\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n```\n\n### State Machine: Two Actions, Three States\n\n```\npending ──claim──→ in_progress ──complete──→ completed\n```\n\nHere `claim` / `complete` are actions, while `pending` / `in_progress` / `completed` are states:\n\n- **claim_task**: `pending` → `in_progress`. Sets owner, begins work.\n- **complete_task**: `in_progress` → `completed`. Marks the task done and unblocks downstream.\n\nCC has no `in_progress → pending` release path. If a teammate terminates or shuts down, CC unassigns its unfinished tasks (clears owner) and resets status to `pending`, allowing other agents to reclaim them. The teaching version omits this recovery path.\n\n### Putting It Together\n\n```python\n# Create tasks with dependencies\nschema = create_task(\"setup database schema\")\nendpoints = create_task(\"create API endpoints\", blockedBy=[schema.id])\ntests = create_task(\"write tests\", blockedBy=[endpoints.id])\ndocs = create_task(\"write docs\", blockedBy=[schema.id])\n\n# Agent claims the first available task\nclaim_task(schema.id) # ✓ Claimed (no dependencies)\ncomplete_task(schema.id) # ✓ Completed → unblocks endpoints, docs\n\nclaim_task(endpoints.id) # ✓ Claimed (schema completed)\ncomplete_task(endpoints.id) # ✓ Completed → unblocks tests\n\nclaim_task(docs.id) # ✓ Claimed (schema completed)\ncomplete_task(docs.id) # ✓ Completed\n\nclaim_task(tests.id) # ✓ Claimed (endpoints completed)\ncomplete_task(tests.id) # ✓ Completed\n```\n\nEach `create_task` writes a JSON file, each `claim_task` / `complete_task` updates the file. Across sessions, the `.tasks/` directory persists — the agent reads the files to recover progress.\n\n---\n\n## Changes from s11\n\n| Component | Before (s11) | After (s12) |\n|-----------|-------------|-------------|\n| Task management | None | Task dataclass + 5 tools |\n| New types | — | Task (id, subject, description, status, owner, blockedBy) |\n| Storage | No persistence | `.tasks/{id}.json` cross-session |\n| Dependencies | None | `blockedBy` graph + `can_start` check |\n| Tools | bash, read_file, write_file (3) | + create_task, list_tasks, get_task, claim_task, complete_task (8) |\n| Lifecycle | — | pending → in_progress → completed (no release rollback) |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s12_task_system/code.py\n```\n\nTry these prompts:\n\n1. `Create tasks: setup database schema, create API endpoints (depends on schema), write tests (depends on endpoints), write docs (depends on schema)`\n2. `List all tasks and their statuses`\n3. `Claim the first unblocked task and complete it`\n4. `List tasks again — which ones are now unblocked?`\n\nWhat to observe: Are JSON files generated in the `.tasks/` directory? After completing a task, are the blocked tasks unblocked?\n\n---\n\n## What's Next\n\nThe task graph is in place. But some tasks take a long time — like running full test suites or deploying to a server. The agent calls the LLM billed by token, it can't afford to wait on a slow operation.\n\ns13 Background Tasks → Slow operations go to the background. The agent continues processing other tasks, and gets notified when the background work is done.\n\n
\nDeep Dive into CC Source\n\n> The following is a complete analysis based on CC source code `utils/tasks.ts` (862 lines), `tools/TaskCreateTool/TaskCreateTool.ts` (138 lines), `tools/TaskUpdateTool/TaskUpdateTool.ts` (406 lines), `tools/TaskGetTool/TaskGetTool.ts` (128 lines), `tools/TaskListTool/TaskListTool.ts` (116 lines), `hooks/useTaskListWatcher.ts` (221 lines).\n\n### 1. TaskRecord's Full Fields\n\nThe tutorial only covers id, subject, status, owner, blockedBy. CC actually has 9 fields (`utils/tasks.ts:76-89`):\n\n| Field | Type | Purpose |\n|------|------|---------|\n| `id` | string | Incrementing integer ID |\n| `subject` | string | Short title |\n| `description` | string | Free-form description |\n| `activeForm` | string? | Present tense form, shown in spinner when in_progress |\n| `owner` | string? | Assigned agent ID |\n| `status` | pending/in_progress/completed | Lifecycle |\n| `blocks` | string[] | Task IDs blocked by this task (downstream) |\n| `blockedBy` | string[] | Task IDs blocking this task (upstream) |\n| `metadata` | Record? | Arbitrary extension key-value pairs |\n\nStorage location: `~/.claude/tasks/{taskListId}/{id}.json`. One file per task.\n\n### 2. Not a TodoWrite Upgrade — Two Independent Systems\n\nIn CC, Task System and TodoWrite **coexist**, toggled by `isTodoV2Enabled()` (`utils/tasks.ts:133`) — interactive sessions default to Task (V2), non-interactive/SDK sessions default to TodoWrite. The `CLAUDE_CODE_ENABLE_TASKS` env var can force-enable Task. Task has what TodoWrite lacks: file-lock concurrency protection, dependency enforcement, ownership, fs.watch reactive monitoring, lifecycle hooks.\n\n### 3. Concurrent Claim Locking\n\n`claimTask()` (`utils/tasks.ts:541-612`) uses dual locking to prevent races:\n\n**Task file lock**: `proper-lockfile` locks `{taskId}.json` (up to 30 retries, exponential backoff 5-100ms). Inside the lock:\n1. Re-read task (prevent TOCTOU)\n2. Check already claimed by another → `already_claimed`\n3. Check already completed → `already_resolved`\n4. Check upstream not completed → `blocked`\n5. Set owner\n\n**List-level lock** (agent busy check): `.lock` file, atomic scan of all tasks to check if the agent already has other open tasks.\n\nNote: The teaching version combines claiming and starting work into one step (claim = set owner + in_progress); real CC's `claimTask` primarily resolves owner competition — it only sets owner without changing status. Status updates are handled by `TaskUpdate`.\n\n### 4. High-Water Mark to Prevent ID Reuse\n\nThe `.highwatermark` file records the highest task ID ever assigned. Even if a task is deleted, its ID won't be reused.\n\n### 5. Four Task Tools\n\nCC's task system has four tools (not the tutorial's single generic Task tool): `TaskCreate`, `TaskGet`, `TaskUpdate`, `TaskList`. All set `isConcurrencySafe: true` and `shouldDefer: true` (tool schemas aren't in the initial prompt; only visible after ToolSearch).\n\nThe teaching version's `create_task(blockedBy=...)` declares dependencies at creation time, which is a reasonable simplification. Real CC's `TaskCreate` only accepts subject/description/activeForm/metadata — dependencies are maintained via `TaskUpdate`'s `addBlocks/addBlockedBy`.\n\n
\n\n\n" + }, + { + "version": "s12", + "locale": "zh", + "title": "s12: Task System — 目标太大,拆成小任务", + "content": "# s12: Task System — 目标太大,拆成小任务\n\ns01 → ... → s10 → s11 → `s12` → [s13](/zh/s13) → s14 → ... → s20\n\n> *\"大目标拆成小任务, 排好序, 持久化\"* — 文件持久化的任务图, 多 agent 协作的基础。\n>\n> **Harness 层**: 任务 — 持久化的目标, 可恢复的进度。\n\n---\n\n## 问题\n\nAgent 接到一个项目:搭数据库、写 API、加测试。它用 s05 的 TodoWrite 列了一张清单,然后开始写 API,写到一半发现没数据库表,回头补;加测试时发现 API 接口签名又变了...\n\n盖房子不能先盖屋顶再打地基。任务之间有先后。任务依赖应该形成有向无环图(DAG);教学版只演示 `blockedBy` 检查,没有实现环检测。\n\ns05 的 TodoWrite 是当前任务的执行清单,保存在会话内存中。这里需要的是**任务系统**:每个任务是一个 JSON 文件,任务之间有 `blockedBy` 依赖,跨会话持久化在磁盘上。\n\n---\n\n## 解决方案\n\n![Task System Overview](/course-assets/s12_task_system/task-system-overview.svg)\n\n教学代码保留基础 agent loop,为聚焦任务系统省略了 S11 的完整错误恢复(RecoveryState、退避、升级、reactive compact、fallback model)。新增 5 个任务工具 + `.tasks/` 目录持久化 + `blockedBy` 依赖检查。任务系统与错误恢复是独立层:CC 源码中 `utils/tasks.ts` 只管 CRUD,`query.ts` 的 with_retry/RecoveryState 管错误恢复,互不耦合。\n\nTodoWrite vs Task System:\n\n| | TodoWrite (s05) | Task System (s12) |\n|---|---|---|\n| 定位 | 当前任务的执行清单 | 可恢复的任务系统 |\n| 存储 | 进程内 / 会话状态 | `.tasks/{id}.json` |\n| 依赖 | 无 | `blockedBy` / `blocks` 依赖图 |\n| 生命周期 | 当前会话 / 当前任务 | 跨会话保留 |\n| 分工 | 不负责任务认领 | `owner` / claim |\n| 状态 | pending / in_progress / completed | pending / in_progress / completed |\n| 粒度 | Agent 自己的步骤 | 可被认领、追踪、解锁的任务 |\n\n---\n\n## 工作原理\n\n![Task DAG](/course-assets/s12_task_system/task-dag.svg)\n\n### Task: 数据结构\n\n每个任务是一个 JSON 文件,存于 `.tasks/` 目录:\n\n```python\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None # Agent 名(多 Agent 场景)\n blockedBy: list[str] # 依赖的任务 ID 列表\n```\n\nID 用 `timestamp + random hex` 生成,简单但够用。CC 用顺序 ID + highwatermark 文件防止 ID 重用,是更严谨的设计。\n\n### create_task: 创建任务\n\n```python\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random_hex(4)}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n```\n\n创建时自动 `save_task` 到 `.tasks/{id}.json`。`blockedBy` 声明依赖,比如 \"写 API\" 的 `blockedBy` 是 `[\"task_schema\"]`。\n\n### can_start: 依赖检查\n\n一个任务只能在它的 `blockedBy` **全部 completed** 之后才能开始:\n\n```python\ndef can_start(task_id: str) -> bool:\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False # missing dependency = blocked\n dep = load_task(dep_id)\n if dep.status != \"completed\":\n return False\n return True\n```\n\n`can_start` 是 `claim_task` 的前置检查:`blockedBy` 里有任何一个不是 completed,就不能认领。不存在的依赖视为 blocked,避免引用错误 ID 时崩溃。\n\n### claim_task: 认领任务\n\nAgent 开始做一个任务时,调用 `claim_task`:设置 `owner`,状态从 `pending` → `in_progress`。`owner` 字段记录谁在做这个任务,多 Agent 场景下防止重复认领:\n\n```python\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n return f\"Claimed {task_id} ({task.subject})\"\n```\n\n如果任务已被别人认领(`status != \"pending\"`),或者依赖没完成(`can_start` 返回 False),拒绝认领。\n\n### complete_task: 完成与解锁\n\n任务做完后,设为 `completed`。同时扫描所有其他任务,找出**刚刚被解锁**的下游任务:\n\n```python\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n task.status = \"completed\"\n save_task(task)\n # 找出被解锁的下游任务\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy\n and can_start(t.id)]\n msg = f\"Completed {task_id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n```\n\n完成 \"schema\" 后,\"endpoints\" 和 \"docs\" 的 `can_start` 返回 True,它们可以开始。\n\n### get_task: 查看完整细节\n\n`list_tasks` 只显示一行摘要。`get_task` 返回完整的任务 JSON,包括 description 和依赖细节。跨会话恢复时,Agent 需要读取完整描述才能继续工作:\n\n```python\ndef get_task(task_id: str) -> str:\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n```\n\n### 状态机: 两个动作,三个状态\n\n```\npending ──claim──→ in_progress ──complete──→ completed\n```\n\n这里的 `claim` / `complete` 是动作,`pending` / `in_progress` / `completed` 是状态:\n\n- **claim_task**: `pending` → `in_progress`。设置 owner,开始工作。\n- **complete_task**: `in_progress` → `completed`。把任务标记为完成,并解锁下游。\n\nCC 没有 `in_progress → pending` 的 release 路径。如果 teammate 终止或 shutdown,CC 会把它未完成的任务 unassign(清除 owner),并将 status 重置为 `pending`,方便其他 agent 重新认领。教学版省略了这一恢复路径。\n\n### 合起来跑\n\n```python\n# 创建有依赖的任务\nschema = create_task(\"setup database schema\")\nendpoints = create_task(\"create API endpoints\", blockedBy=[schema.id])\ntests = create_task(\"write tests\", blockedBy=[endpoints.id])\ndocs = create_task(\"write docs\", blockedBy=[schema.id])\n\n# Agent 认领第一个可做的任务\nclaim_task(schema.id) # ✓ Claimed (无依赖)\ncomplete_task(schema.id) # ✓ Completed → 解锁 endpoints, docs\n\nclaim_task(endpoints.id) # ✓ Claimed (schema 已完成)\ncomplete_task(endpoints.id) # ✓ Completed → 解锁 tests\n\nclaim_task(docs.id) # ✓ Claimed (schema 已完成)\ncomplete_task(docs.id) # ✓ Completed\n\nclaim_task(tests.id) # ✓ Claimed (endpoints 已完成)\ncomplete_task(tests.id) # ✓ Completed\n```\n\n每个 `create_task` 写一个 JSON 文件,每个 `claim_task` / `complete_task` 更新文件。跨会话时,`.tasks/` 目录还在,Agent 读文件就能恢复进度。\n\n---\n\n## 相对 s11 的变更\n\n| 组件 | 之前 (s11) | 之后 (s12) |\n|------|-----------|-----------|\n| 任务管理 | 无 | Task dataclass + 5 个工具 |\n| 新类型 | — | Task(id, subject, description, status, owner, blockedBy) |\n| 存储 | 无持久化 | `.tasks/{id}.json` 跨会话 |\n| 依赖 | 无 | `blockedBy` 图 + `can_start` 检查 |\n| 工具 | bash, read_file, write_file (3) | + create_task, list_tasks, get_task, claim_task, complete_task (8) |\n| 生命周期 | — | pending → in_progress → completed(无 release 回退) |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s12_task_system/code.py\n```\n\n试试这些 prompt:\n\n1. `Create tasks: setup database schema, create API endpoints (depends on schema), write tests (depends on endpoints), write docs (depends on schema)`\n2. `List all tasks and their statuses`\n3. `Claim the first unblocked task and complete it`\n4. `List tasks again — which ones are now unblocked?`\n\n观察重点:`.tasks/` 目录下是否生成了 JSON 文件?完成任务后,被阻塞的任务是否解锁?\n\n---\n\n## 接下来\n\n任务图有了。但有些任务要跑很久——比如全量测试、部署到服务器。Agent 调 LLM 按量计费,不能干等一个慢操作。\n\ns13 Background Tasks → 慢操作放后台。Agent 继续处理其他任务,后台跑完了通知它。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `utils/tasks.ts`(862 行)、`tools/TaskCreateTool/TaskCreateTool.ts`(138 行)、`tools/TaskUpdateTool/TaskUpdateTool.ts`(406 行)、`tools/TaskGetTool/TaskGetTool.ts`(128 行)、`tools/TaskListTool/TaskListTool.ts`(116 行)、`hooks/useTaskListWatcher.ts`(221 行)的分析。\n\n### 一、TaskRecord 的完整字段\n\n教学版只讲了 id、subject、status、owner、blockedBy。CC 实际有 9 个字段(`utils/tasks.ts:76-89`):\n\n| 字段 | 类型 | 用途 |\n|------|------|------|\n| `id` | string | 递增整数 ID |\n| `subject` | string | 简短标题 |\n| `description` | string | 自由格式描述 |\n| `activeForm` | string? | 进行时态,in_progress 时在 spinner 显示 |\n| `owner` | string? | 分配的 agent ID |\n| `status` | pending/in_progress/completed | 生命周期 |\n| `blocks` | string[] | 此任务阻塞的任务 ID(下游) |\n| `blockedBy` | string[] | 阻塞此任务的任务 ID(上游) |\n| `metadata` | Record? | 任意扩展键值对 |\n\n存储位置:`~/.claude/tasks/{taskListId}/{id}.json`。每个任务一个文件。\n\n### 二、不是 TodoWrite 的升级,是两个独立系统\n\nCC 中 Task System 和 TodoWrite **同时存在**,通过 `isTodoV2Enabled()` 切换(`utils/tasks.ts:133`)——交互式会话默认启用 Task(V2),非交互式/SDK 默认用 TodoWrite。环境变量 `CLAUDE_CODE_ENABLE_TASKS` 可强制启用 Task。Task 有 TodoWrite 没有的:文件锁并发保护、依赖强制执行、ownership、fs.watch 响应式监听、生命周期 hooks。\n\n### 三、并发认领的锁机制\n\n`claimTask()`(`utils/tasks.ts:541-612`)用双重锁防竞争:\n\n**任务文件锁**:`proper-lockfile` 锁住 `{taskId}.json`(最多重试 30 次,指数退避 5-100ms)。锁内:\n1. 重新读取任务(防 TOCTOU)\n2. 检查已被他人认领 → `already_claimed`\n3. 检查已完成 → `already_resolved`\n4. 检查上游未完成 → `blocked`\n5. 设置 owner\n\n**列表级锁**(agent busy 检查时):`.lock` 文件,原子性扫描所有任务并检查该 agent 是否已有其他 open task。\n\n注意:教学版把 claim 和开始工作合成一步(claim = set owner + in_progress);真实 CC 的 `claimTask` 主要解决 owner 竞争,只设 owner 不改 status,状态更新由 `TaskUpdate` 完成。\n\n### 四、高水位标防 ID 重用\n\n`.highwatermark` 文件记录曾分配过的最高任务 ID。即使任务被删除,ID 也不会被重用。\n\n### 五、四个 Task 工具\n\nCC 的任务系统有四个工具(不是教学版的一个通用 Task 工具):`TaskCreate`、`TaskGet`、`TaskUpdate`、`TaskList`。全部设置 `isConcurrencySafe: true` 和 `shouldDefer: true`(工具 schema 不在初始 prompt 中,需 ToolSearch 后才可见)。\n\n教学版的 `create_task(blockedBy=...)` 在创建时直接声明依赖,是合理简化。真实 CC 的 `TaskCreate` 只接受 subject/description/activeForm/metadata,依赖关系由 `TaskUpdate` 的 `addBlocks/addBlockedBy` 维护。\n\n
\n\n\n" }, { "version": "s12", "locale": "ja", - "title": "s12: Worktree + Task Isolation", - "content": "# s12: Worktree + Task Isolation\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"各自のディレクトリで作業し、互いに干渉しない\"* -- タスクは目標を管理、worktree はディレクトリを管理、IDで紐付け。\n\n## 問題\n\ns11までにエージェントはタスクを自律的に確保して完了できるようになった。しかし全タスクが1つの共有ディレクトリで走る。2つのエージェントが同時に異なるモジュールをリファクタリングすると衝突する: 片方が`config.py`を編集し、もう片方も`config.py`を編集し、未コミットの変更が混ざり合い、どちらもクリーンにロールバックできない。\n\nタスクボードは*何をやるか*を追跡するが、*どこでやるか*には関知しない。解決策: 各タスクに専用のgit worktreeディレクトリを与える。タスクが目標を管理し、worktreeが実行コンテキストを管理する。タスクIDで紐付ける。\n\n## 解決策\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## 仕組み\n\n1. **タスクを作成する。** まず目標を永続化する。\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **worktreeを作成してタスクに紐付ける。** `task_id`を渡すと、タスクが自動的に`in_progress`に遷移する。\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\n紐付けは両側に状態を書き込む:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **worktree内でコマンドを実行する。** `cwd`が分離ディレクトリを指す。\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **終了処理。** 2つの選択肢:\n - `worktree_keep(name)` -- ディレクトリを保持する。\n - `worktree_remove(name, complete_task=True)` -- ディレクトリを削除し、紐付けられたタスクを完了し、イベントを発行する。1回の呼び出しで後片付けと完了を処理する。\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **イベントストリーム。** ライフサイクルの各ステップが`.worktrees/events.jsonl`に記録される:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\n発行されるイベント: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。\n\nクラッシュ後も`.tasks/` + `.worktrees/index.json`から状態を再構築できる。会話メモリは揮発性だが、ファイル状態は永続的だ。\n\n## s11からの変更点\n\n| Component | Before (s11) | After (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| Coordination | Task board (owner/status) | Task board + explicit worktree binding |\n| Execution scope | Shared directory | Task-scoped isolated directory |\n| Recoverability | Task status only | Task status + worktree index |\n| Teardown | Task completion | Task completion + explicit keep/remove |\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n" + "title": "s12: Task System — 大きな目標を小さなタスクに分割", + "content": "# s12: Task System — 大きな目標を小さなタスクに分割\n\ns01 → ... → s10 → s11 → `s12` → [s13](/ja/s13) → s14 → ... → s20\n\n> *\"大きな目標を小さなタスクに分け、順序付け、永続化\"* — ファイル永続化タスクグラフ、マルチ Agent 協調の基盤。\n>\n> **Harness 層**: タスク — 永続化された目標、復旧可能な進捗。\n\n---\n\n## 課題\n\nAgent がプロジェクトを受けた:データベース構築、API 実装、テスト追加。s05 の TodoWrite でリストを作り、まず API を書き始め、途中でデータベーステーブルがないことに気づいて戻る。テスト追加時に API インターフェースのシグネチャがまた変わっている...\n\n屋根を先に建てて基礎を後から打つことはできない。タスクには順序がある。タスクの依存関係は有向非巡回グラフ(DAG)を形成すべき;教学版は `blockedBy` チェックのみをデモし、循環検出は実装していない。\n\ns05 の TodoWrite は現在のタスクの実行チェックリストで、セッションメモリに保持される。ここで必要なのは**タスクシステム**:各タスクは JSON ファイル、タスク間に `blockedBy` 依存関係、ディスク上でセッションをまたいで永続化。\n\n---\n\n## ソリューション\n\n![Task System Overview](/course-assets/s12_task_system/task-system-overview.ja.svg)\n\n教学版は基本 agent loop を維持し、タスクシステムに集中するため S11 の完全なエラーリカバリ(RecoveryState、バックオフ、エスカレーション、reactive compact、フォールバックモデル)を省略。追加:5 つの新規タスクツール + `.tasks/` ディレクトリによる永続化 + `blockedBy` 依存チェック。タスクシステムとエラーリカバリは独立したレイヤー:CC ソースコードでは `utils/tasks.ts` は CRUD のみ、`query.ts` の with_retry/RecoveryState がエラーリカバリを担当し、互いに非結合。\n\nTodoWrite vs Task System:\n\n| | TodoWrite (s05) | Task System (s12) |\n|---|---|---|\n| 位置づけ | 現在のタスクの実行チェックリスト | 復旧可能なタスクシステム |\n| ストレージ | プロセス内 / セッション状態 | `.tasks/{id}.json` |\n| 依存関係 | なし | `blockedBy` / `blocks` グラフ |\n| ライフサイクル | 現在のセッション / 現在のタスク | セッション横断 |\n| 分担 | タスク認識を扱わない | `owner` / claim |\n| ステータス | pending / in_progress / completed | pending / in_progress / completed |\n| 粒度 | Agent 自身の手順 | 認識・追跡・アンロックできるタスク |\n\n---\n\n## 仕組み\n\n![Task DAG](/course-assets/s12_task_system/task-dag.ja.svg)\n\n### Task: データ構造\n\n各タスクは JSON ファイル、`.tasks/` ディレクトリに保存:\n\n```python\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None # Agent 名(マルチ Agent シナリオ)\n blockedBy: list[str] # 依存タスク ID のリスト\n```\n\nID は `timestamp + random hex` で生成、シンプルだが十分。CC は順次 ID + highwatermark ファイルで ID 再利用を防止する、より厳密な設計。\n\n### create_task: タスク作成\n\n```python\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random_hex(4)}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n```\n\n作成時に自動的に `save_task` で `.tasks/{id}.json` に書き込み。`blockedBy` で依存を宣言、例えば \"API を書く\" の `blockedBy` は `[\"task_schema\"]`。\n\n### can_start: 依存チェック\n\nタスクは `blockedBy` が**すべて completed** になってからでないと開始できない:\n\n```python\ndef can_start(task_id: str) -> bool:\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False # missing dependency = blocked\n dep = load_task(dep_id)\n if dep.status != \"completed\":\n return False\n return True\n```\n\n`can_start` は `claim_task` の事前チェック:`blockedBy` に一つでも completed でないものがあれば、認識不可。存在しない依存は blocked として扱い、誤った ID 参照時のクラッシュを防ぐ。\n\n### claim_task: タスク認識\n\nAgent がタスクに取り掛かる時、`claim_task` を呼び出し:`owner` を設定、ステータスを `pending` → `in_progress` に変更。`owner` フィールドは誰が作業中かを記録し、マルチ Agent シナリオで重複認識を防止:\n\n```python\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n return f\"Claimed {task_id} ({task.subject})\"\n```\n\nタスクが既に他者に認識されている(`status != \"pending\"`)、または依存が未完了(`can_start` が False)の場合、認識を拒否。\n\n### complete_task: 完了とアンロック\n\nタスク完了後、`completed` に設定。同時に他の全タスクを走査し、**直前にアンロックされた**下流タスクを特定:\n\n```python\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n task.status = \"completed\"\n save_task(task)\n # アンロックされた下流タスクを検索\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy\n and can_start(t.id)]\n msg = f\"Completed {task_id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n```\n\n\"schema\" 完了後、\"endpoints\" と \"docs\" の `can_start` が True を返し、開始可能になる。\n\n### get_task: 完全な詳細を確認\n\n`list_tasks` は 1 行サマリのみ表示。`get_task` は description と依存関係の詳細を含む完全なタスク JSON を返す。セッションをまたいで復旧する際、Agent は完全な説明を読んで作業を継続する必要がある:\n\n```python\ndef get_task(task_id: str) -> str:\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n```\n\n### 状態マシン: 2 つのアクション、3 つの状態\n\n```\npending ──claim──→ in_progress ──complete──→ completed\n```\n\nここで `claim` / `complete` はアクション、`pending` / `in_progress` / `completed` は状態:\n\n- **claim_task**: `pending` → `in_progress`。owner を設定し、作業を開始。\n- **complete_task**: `in_progress` → `completed`。タスクを完了済みにし、下流をアンロック。\n\nCC には `in_progress → pending` の release パスがない。teammate が終了または shutdown した場合、CC は未完了タスクの owner をクリアし、status を `pending` にリセットし、他の agent が再認識できるようにする。教学版はこの復旧パスを省略。\n\n### 組み合わせて実行\n\n```python\n# 依存関係のあるタスクを作成\nschema = create_task(\"setup database schema\")\nendpoints = create_task(\"create API endpoints\", blockedBy=[schema.id])\ntests = create_task(\"write tests\", blockedBy=[endpoints.id])\ndocs = create_task(\"write docs\", blockedBy=[schema.id])\n\n# Agent が最初に実行可能なタスクを認識\nclaim_task(schema.id) # ✓ Claimed(依存なし)\ncomplete_task(schema.id) # ✓ Completed → endpoints, docs をアンロック\n\nclaim_task(endpoints.id) # ✓ Claimed(schema 完了済み)\ncomplete_task(endpoints.id) # ✓ Completed → tests をアンロック\n\nclaim_task(docs.id) # ✓ Claimed(schema 完了済み)\ncomplete_task(docs.id) # ✓ Completed\n\nclaim_task(tests.id) # ✓ Claimed(endpoints 完了済み)\ncomplete_task(tests.id) # ✓ Completed\n```\n\n各 `create_task` が JSON ファイルを書き込み、各 `claim_task` / `complete_task` がファイルを更新。セッションをまたいでも `.tasks/` ディレクトリが残り、Agent はファイルを読んで進捗を復旧。\n\n---\n\n## s11 からの変更\n\n| コンポーネント | 変更前 (s11) | 変更後 (s12) |\n|--------------|------------|------------|\n| タスク管理 | なし | Task dataclass + 5 ツール |\n| 新規型 | — | Task(id, subject, description, status, owner, blockedBy) |\n| ストレージ | 永続化なし | `.tasks/{id}.json` セッション横断 |\n| 依存関係 | なし | `blockedBy` グラフ + `can_start` チェック |\n| ツール | bash, read_file, write_file (3) | + create_task, list_tasks, get_task, claim_task, complete_task (8) |\n| ライフサイクル | — | pending → in_progress → completed(release ロールバックなし) |\n\n---\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython s12_task_system/code.py\n```\n\n以下のプロンプトを試してください:\n\n1. `Create tasks: setup database schema, create API endpoints (depends on schema), write tests (depends on endpoints), write docs (depends on schema)`\n2. `List all tasks and their statuses`\n3. `Claim the first unblocked task and complete it`\n4. `List tasks again — which ones are now unblocked?`\n\n観察ポイント:`.tasks/` ディレクトリに JSON ファイルが生成されているか?タスク完了後、ブロックされていたタスクがアンロックされているか?\n\n---\n\n## 次の章\n\nタスクグラフができた。しかし、一部のタスクは長時間かかる — 全テスト実行やサーバーデプロイなど。Agent は LLM をトークン課金で呼び出しており、遅い操作を待つ余裕はない。\n\ns13 Background Tasks → 遅い操作はバックグラウンドへ。Agent は他のタスクの処理を続け、バックグラウンドの完了を通知で受け取る。\n\n
\nCC ソースコード深掘り\n\n> 以下は CC ソースコード `utils/tasks.ts`(862 行)、`tools/TaskCreateTool/TaskCreateTool.ts`(138 行)、`tools/TaskUpdateTool/TaskUpdateTool.ts`(406 行)、`tools/TaskGetTool/TaskGetTool.ts`(128 行)、`tools/TaskListTool/TaskListTool.ts`(116 行)、`hooks/useTaskListWatcher.ts`(221 行)の完全分析に基づく。\n\n### 一、TaskRecord の完全フィールド\n\nチュートリアルでは id、subject、status、owner、blockedBy のみ解説。CC は実際に 9 フィールドを持つ(`utils/tasks.ts:76-89`):\n\n| フィールド | 型 | 用途 |\n|------|------|------|\n| `id` | string | 昇順整数 ID |\n| `subject` | string | 短いタイトル |\n| `description` | string | 自由形式の説明 |\n| `activeForm` | string? | 現在進行形、in_progress 時にスピナーに表示 |\n| `owner` | string? | 割り当てられた agent ID |\n| `status` | pending/in_progress/completed | ライフサイクル |\n| `blocks` | string[] | このタスクがブロックするタスク ID(下流) |\n| `blockedBy` | string[] | このタスクをブロックするタスク ID(上流) |\n| `metadata` | Record? | 任意の拡張キーバリューペア |\n\n保存場所:`~/.claude/tasks/{taskListId}/{id}.json`。タスクごとに 1 ファイル。\n\n### 二、TodoWrite のアップグレードではなく、2 つの独立システム\n\nCC では Task System と TodoWrite **は共存**し、`isTodoV2Enabled()` で切り替え(`utils/tasks.ts:133`)— 対話セッションはデフォルトで Task (V2)、非対話/SDK セッションは TodoWrite。環境変数 `CLAUDE_CODE_ENABLE_TASKS` で Task を強制有効化可能。Task は TodoWrite にない機能を持つ:ファイルロック並行保護、依存関係強制、ownership、fs.watch リアクティブ監視、ライフサイクルフック。\n\n### 三、並行認識のロック機構\n\n`claimTask()`(`utils/tasks.ts:541-612`)は二重ロックで競合を防止:\n\n**タスクファイルロック**:`proper-lockfile` で `{taskId}.json` をロック(最大 30 リトライ、指数バックオフ 5-100ms)。ロック内:\n1. タスクを再読込(TOCTOU 防止)\n2. 既に他者が認識済み → `already_claimed`\n3. 既に完了済み → `already_resolved`\n4. 上流が未完了 → `blocked`\n5. owner を設定\n\n**リストレベルロック**(agent busy チェック時):`.lock` ファイル、全タスクを原子的に走査し該当 agent が他の open task を持つか確認。\n\n注意:教学版は認識と作業開始を 1 ステップに統合(claim = owner 設定 + in_progress);実際の CC の `claimTask` は主に owner 競合を解決し、owner のみを設定して status は変更しない。status の更新は `TaskUpdate` が担当。\n\n### 四、高水位標による ID 再利用防止\n\n`.highwatermark` ファイルが過去に割り当てられた最大タスク ID を記録。タスクが削除されても ID は再利用されない。\n\n### 五、4 つの Task ツール\n\nCC のタスクシステムは 4 つのツールを持つ(チュートリアルの汎用 Task ツールとは異なる):`TaskCreate`、`TaskGet`、`TaskUpdate`、`TaskList`。すべて `isConcurrencySafe: true` と `shouldDefer: true` が設定(ツールスキーマは初期プロンプトに含まれず、ToolSearch 後にのみ可視)。\n\n教学版の `create_task(blockedBy=...)` は作成時に直接依存を宣言する合理な簡略化。実際の CC の `TaskCreate` は subject/description/activeForm/metadata のみを受け付け、依存関係は `TaskUpdate` の `addBlocks/addBlockedBy` で管理される。\n\n
\n\n\n" + }, + { + "version": "s13", + "locale": "en", + "title": "s13: Background Tasks — Slow Operations Go to the Background", + "content": "# s13: Background Tasks — Slow Operations Go to the Background\n\ns01 → ... → s11 → s12 → `s13` → [s14](/en/s14) → s15 → ... → s20\n\n> *\"Slow operations go to the background, agent continues processing\"* — Background threads run commands, inject notifications when done.\n>\n> **Harness Layer**: Background — Async execution, doesn't block the main loop.\n\n---\n\n## The Problem\n\nEver used a washing machine? Throw clothes in, press start, then go do other things — cook, reply to messages, read papers. 30 minutes later the machine beeps: done. You don't stand there waiting for 30 minutes.\n\nThe agent's bash tool is the same. `pip install torch` takes 10 minutes, `npm run build` takes 3 minutes. While these commands run, the agent waits for bash to return, unable to use that time to process other tasks.\n\nReading files is milliseconds, no wait. `git status` returns in under a second, no wait. But `npm install`? Minutes. The agent waits 10 minutes doing nothing, and LLM calls are billed by token — idle time is waste.\n\n---\n\n## The Solution\n\n![Background Tasks Overview](/course-assets/s13_background_tasks/background-tasks-overview.en.svg)\n\nTeaching code carries forward S12's simplified task system and prompt assembly; to stay focused on background tasks, it omits full error recovery, memory, and skill systems. The only change: slow operations go to background threads, the agent continues running the loop, and background results are injected as notifications.\n\nSync vs Background:\n\n| | Sync (s12) | Background (s13) |\n|---|---|---|\n| Slow operations | Agent waits | Background thread executes |\n| Agent idle | Yes | No, continues processing |\n| Result | Immediate return | Notification injected next turn |\n| Decision criteria | — | `run_in_background` param (model explicit request), heuristic fallback |\n\n---\n\n## How It Works\n\n### should_run_background: Explicit Request First, Heuristic Fallback\n\nThe model explicitly requests background execution via the bash tool's `run_in_background` parameter. If the model doesn't specify, the teaching version falls back to keyword heuristics:\n\n```python\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n```\n\nCC's bash tool schema has a `run_in_background: boolean` parameter (`BashTool.tsx:241`). The model decides which commands go to background, no keyword guessing. The teaching version keeps heuristics as fallback, but the primary path is explicit model request.\n\n### start_background_task: Background Execution and Lifecycle\n\nWraps the tool call in a worker function, dispatches to a daemon thread. Each background task gets a unique ID, with state tracked in the `background_tasks` dict:\n\n```python\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {} # bg_id → {tool_use_id, command, status}\nbackground_results: dict[str, str] = {} # bg_id → output\nbackground_lock = threading.Lock()\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": block.input.get(\"command\", \"\"),\n \"status\": \"running\",\n }\n thread = threading.Thread(target=worker, daemon=True)\n thread.start()\n return bg_id\n```\n\nReturns `bg_id` instead of just `[Running in background...]`. `daemon=True` ensures threads exit when the agent process exits. The teaching version uses in-memory dicts for tracking; real CC has `LocalShellTaskState`, output redirected to files, with full lifecycle including stopping tasks and reading subsequent output.\n\n### collect_background_results: Notification Collection\n\nWhen background tasks complete, results are collected and formatted as `` messages:\n\n```python\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {output[:200]}\\n\"\n f\"\")\n return notifications\n```\n\nNotifications don't reuse the original `tool_use_id`. The original tool call was already answered with a placeholder `tool_result`; background completion is an independent event, injected in `task_notification` format. This respects Messages API tool pairing: one `tool_use` gets exactly one `tool_result`.\n\n### Loop Integration\n\nIn the agent loop, tool execution splits into two paths. Notifications and results merge into a single user message:\n\n```python\nresults = []\nfor block in response.content:\n if block.type != \"tool_use\":\n continue\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n\n# Merge notifications and tool results into one user message\nuser_content = []\nbg_notifications = collect_background_results()\nif bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\nuser_content.extend(results)\nmessages.append({\"role\": \"user\", \"content\": user_content})\n```\n\nSlow operations get a placeholder tool_result with `bg_id`, so the LLM knows this command is still running and can do other things first. When background completes, the notification is injected as an independent text block alongside the current turn's tool_results in one user message.\n\nThe teaching version polls background results while the agent loop continues running. Real CC uses a notification queue (`messageQueueManager.ts`) to deliver background completion events to subsequent turns, without waiting for the tool loop.\n\n### Putting It Together\n\n```\nTurn 1:\n LLM → bash \"npm install\" (run_in_background=true)\n → start_background_task → bg_0001\n → tool_result: \"[Background task bg_0001 started]...\"\n → LLM: \"OK, I'll check later. Let me also read the config.\"\n\nTurn 2:\n LLM → read_file \"package.json\" (fast, sync)\n → tool_result: file content\n → collect: bg_0001 done! inject \n → LLM sees: config file + install notification in one message\n```\n\nThe agent didn't wait — while npm install ran in the background, it read the config file.\n\n---\n\n## Changes from s12\n\n| Component | Before (s12) | After (s13) |\n|-----------|-------------|-------------|\n| Execution model | All synchronous | Slow ops to background thread + notification injection |\n| bash schema | `command` | `command` + `run_in_background` |\n| New functions | — | `should_run_background`, `is_slow_operation`, `start_background_task`, `collect_background_results` |\n| New types | — | `background_tasks: dict`, `background_results: dict`, `background_lock: Lock` |\n| Notification format | — | `` (doesn't reuse tool_use_id) |\n| Loop behavior | Tools execute serially | Slow ops async, fast ops sync, notifications collected each turn |\n| Tools | 8 (s12) | 8 (unchanged, execution strategy changed) |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s13_background_tasks/code.py\n```\n\nTry these prompts:\n\n1. `Run pip list in the background and find all Python files in this directory`\n2. `Run npm install (use run_in_background) and while waiting, read package.json`\n3. `Create a task to setup the project, then run pip list in the background`\n\nWhat to observe: Are slow operations dispatched to background? Is a `bg_id` returned? Are background notifications injected in `` format?\n\n---\n\n## What's Next\n\nBackground tasks solved \"slow operations don't block.\" But what if you want to do something on a schedule? Like \"run tests every morning at 9am\" or \"check server status every 5 minutes.\"\n\ns14 Cron Scheduler → Give the agent an alarm clock.\n\n
\nDeep Dive into CC Source\n\n> The following is a complete analysis based on CC source code `query.ts` (lines 211, 1054-1060, 1411-1482), `services/toolUseSummary/toolUseSummaryGenerator.ts` (L15 prompt text), `LocalShellTask.tsx` (L24-25 constants, L59-98 watchdog logic), `messageQueueManager.ts` (notification queue), `utils/task/framework.ts` (L267 `enqueueTaskNotification`).\n\n### 1. pendingToolUseSummary: Haiku Background Generation\n\nCC starts a Haiku side-query after each batch of tool executions to generate a tool use summary. Initiated at `query.ts:1411-1482`, prompt text defined at `services/toolUseSummary/toolUseSummaryGenerator.ts:15` (variable `TOOL_USE_SUMMARY_SYSTEM_PROMPT`). The prompt is \"Write a short summary label... think git-commit-subject, not sentence\", past tense, ~30 characters.\n\nHaiku summary (~1s) completes during the main model's streaming output (5-30s). Before the next turn starts, the summary is yielded. SDK consumers use these summaries for mobile progress display.\n\n### 2. Thread Model: No Real Threads\n\nCC runs on Node.js/Bun's single-threaded event loop. \"Background\" just means \"don't await\". `ShellCommand.background(taskId)` redirects stdout/stderr to files, letting the process run independently.\n\n### 3. Seven Background Task Types\n\nCC defines 7 background task types (`Task.ts:7-13`): `local_bash`, `local_agent`, `remote_agent`, `in_process_teammate`, `local_workflow`, `monitor_mcp`, `dream`. Each has its own registration, lifecycle, and notification mechanism.\n\n### 4. Notification Injection: Command Queue\n\nWhen a background task completes, it's enqueued via `enqueueTaskNotification` (`utils/task/framework.ts:267`) or `enqueuePendingNotification` (`messageQueueManager.ts`) into a shared command queue. The notification format is structured XML:\n\n```xml\n\n completed\n Background command \"npm test\" completed (exit code 0)\n\n```\n\nPriority is `next` > `later` (`messageQueueManager.ts`). Background tasks default to `later` (don't block user input). Consumption point at `query.ts:1566-1593`.\n\n### 5. Stall Watchdog\n\nBackground bash tasks have a watchdog (`LocalShellTask.tsx` L24-25 constants, L59-98 logic) that periodically checks if output has stalled. After 45 seconds with no growth, it detects interactive prompts (`(y/n)` etc.), preventing background tasks from getting stuck on unanswered interactive dialogs.\n\n### 6. Concurrency Limits\n\nForeground tool calls: `CLAUDE_CODE_MAX_TOOL_USE_CONCURRENCY` (default 10 concurrent safe tools). Background bash tasks: no hard limit, they're independent subprocesses.\n\n
\n\n\n" + }, + { + "version": "s13", + "locale": "zh", + "title": "s13: Background Tasks — 慢操作放后台", + "content": "# s13: Background Tasks — 慢操作放后台\n\ns01 → ... → s11 → s12 → `s13` → [s14](/zh/s14) → s15 → ... → s20\n\n> *\"慢操作丢后台, agent 继续处理\"* — 后台线程跑命令, 完成后注入通知。\n>\n> **Harness 层**: 后台 — 异步执行, 不阻塞主循环。\n\n---\n\n## 问题\n\n你用过洗衣机吗?把衣服扔进去,按下启动,然后去干别的——做饭、回消息、看论文。30 分钟后洗衣机\"滴滴滴\"提醒你:好了。你不会站在洗衣机前面干等 30 分钟。\n\nAgent 的 bash 工具也一样。`pip install torch` 要 10 分钟,`npm run build` 要 3 分钟。这些命令一跑,Agent 就在等 bash 工具返回,没法利用这段时间处理别的任务。\n\n读文件是毫秒级,不等。`git status` 一秒内返回,不等。但 `npm install`?分钟级。Agent 等 10 分钟什么都不做,而 LLM 按 token 计费,空转就是浪费。\n\n---\n\n## 解决方案\n\n![Background Tasks Overview](/course-assets/s13_background_tasks/background-tasks-overview.svg)\n\n教学代码沿用 S12 的简化任务系统和 prompt 组装;为了聚焦后台任务,省略完整错误恢复、记忆和技能系统。唯一的变动:慢操作扔到后台线程,Agent 继续跑循环,后台完成后把通知注入到对话里。\n\n同步 vs 后台:\n\n| | 同步 (s12) | 后台 (s13) |\n|---|---|---|\n| 慢操作 | Agent 干等 | 后台线程执行 |\n| Agent 空闲 | 是 | 否,继续处理 |\n| 结果 | 立即返回 | 下轮注入通知 |\n| 判断标准 | — | `run_in_background` 参数(模型显式请求),启发式兜底 |\n\n---\n\n## 工作原理\n\n### should_run_background: 显式请求优先,启发式兜底\n\n模型通过 bash 工具的 `run_in_background` 参数显式请求后台执行。如果模型没指定,教学版用关键词启发式兜底:\n\n```python\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n```\n\nCC 的 bash 工具 schema 里有 `run_in_background: boolean` 参数(`BashTool.tsx:241`)。模型自己决定哪些命令丢后台,不靠关键词猜。教学版保留启发式作为兜底,但主路径是模型显式请求。\n\n### start_background_task: 后台执行与生命周期\n\n把工具调用包装成 worker 函数,扔到 daemon 线程里执行。每个后台任务有唯一 ID,状态存在 `background_tasks` 字典里:\n\n```python\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {} # bg_id → {tool_use_id, command, status}\nbackground_results: dict[str, str] = {} # bg_id → output\nbackground_lock = threading.Lock()\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": block.input.get(\"command\", \"\"),\n \"status\": \"running\",\n }\n thread = threading.Thread(target=worker, daemon=True)\n thread.start()\n return bg_id\n```\n\n返回 `bg_id` 而不是只返回 `[Running in background...]`。`daemon=True` 确保 Agent 进程退出时线程跟着退出。教学版用内存字典追踪状态;真实 CC 有 `LocalShellTaskState`,输出重定向到文件,支持停止任务、读取后续输出等完整生命周期。\n\n### collect_background_results: 通知收集\n\n后台任务完成后,收集结果并格式化为 `` 通知:\n\n```python\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {output[:200]}\\n\"\n f\"\")\n return notifications\n```\n\n通知不复用原始 `tool_use_id`。原始 tool call 已经用占位 `tool_result` 回复了,后台完成是独立事件,用 `task_notification` 格式注入。这符合 Messages API 的工具配对语义:一个 `tool_use` 只对应一个 `tool_result`。\n\n### 循环中的集成\n\nagent_loop 里,工具执行分两条路,通知和结果合并为一条 user 消息:\n\n```python\nresults = []\nfor block in response.content:\n if block.type != \"tool_use\":\n continue\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n\n# 通知和工具结果合入同一条 user 消息\nuser_content = []\nbg_notifications = collect_background_results()\nif bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\nuser_content.extend(results)\nmessages.append({\"role\": \"user\", \"content\": user_content})\n```\n\n慢操作先回一个带 `bg_id` 的占位 tool_result,LLM 知道这个命令还在跑,可以先做别的事。后台完成后,通知作为独立 text block 和当前轮的 tool_result 一起组成 user 消息。\n\n教学版在 agent loop 继续运行时轮询后台结果。真实 CC 通过通知队列(`messageQueueManager.ts`)把后台完成事件送入后续 turn,不需要等工具循环。\n\n### 合起来跑\n\n```\nTurn 1:\n LLM → bash \"npm install\" (run_in_background=true)\n → start_background_task → bg_0001\n → tool_result: \"[Background task bg_0001 started]...\"\n → LLM: \"OK, I'll check later. Let me also read the config.\"\n\nTurn 2:\n LLM → read_file \"package.json\" (fast, sync)\n → tool_result: file content\n → collect: bg_0001 done! inject \n → LLM sees: config file + install notification in one message\n```\n\nAgent 没干等,npm install 跑后台的时候,它去读了配置文件。\n\n---\n\n## 相对 s12 的变更\n\n| 组件 | 之前 (s12) | 之后 (s13) |\n|------|-----------|-----------|\n| 执行模型 | 全部同步 | 慢操作后台线程 + 通知注入 |\n| bash schema | `command` | `command` + `run_in_background` |\n| 新函数 | — | `should_run_background`, `is_slow_operation`, `start_background_task`, `collect_background_results` |\n| 新类型 | — | `background_tasks: dict`, `background_results: dict`, `background_lock: Lock` |\n| 通知格式 | — | ``(不复用 tool_use_id) |\n| 循环行为 | 工具串行执行 | 慢操作异步,快操作同步,通知每轮收集 |\n| 工具 | 8 (s12) | 8(不变,执行策略变了) |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s13_background_tasks/code.py\n```\n\n试试这些 prompt:\n\n1. `Run pip list in the background and find all Python files in this directory`\n2. `Run npm install (use run_in_background) and while waiting, read package.json`\n3. `Create a task to setup the project, then run pip list in the background`\n\n观察重点:慢操作有没有被送到后台?`bg_id` 是否返回?后台通知有没有以 `` 格式注入?\n\n---\n\n## 接下来\n\n后台任务解决了\"慢操作不阻塞\"。但如果想定时做某件事呢?比如\"每天早上 9 点跑测试\"、\"每 5 分钟检查一次服务器状态\"。\n\ns14 Cron Scheduler → 给 Agent 装一个闹钟。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `query.ts`(211, 1054-1060, 1411-1482 行)、`services/toolUseSummary/toolUseSummaryGenerator.ts`(L15 prompt 文本)、`LocalShellTask.tsx`(L24-25 常量, L59-98 看门狗逻辑)、`messageQueueManager.ts`(通知队列)、`utils/task/framework.ts`(L267 `enqueueTaskNotification`)的完整分析。\n\n### 一、pendingToolUseSummary:Haiku 后台生成\n\nCC 在每批工具执行完后,启动一个 Haiku side-query 生成工具使用摘要。发起代码在 `query.ts:1411-1482`,prompt 文本定义在 `services/toolUseSummary/toolUseSummaryGenerator.ts:15`(变量名 `TOOL_USE_SUMMARY_SYSTEM_PROMPT`)。提示是 \"Write a short summary label... think git-commit-subject, not sentence\",过去时态,约 30 字符。\n\nHaiku 摘要(~1s)在主模型流式生成(5-30s)期间完成。下一轮开始前,把摘要 yield 出去。SDK 消费这些摘要做移动端进度展示。\n\n### 二、线程模型:没有真正的线程\n\nCC 运行在 Node.js/Bun 单线程事件循环中。\"后台\"只是 \"不 await\"。`ShellCommand.background(taskId)` 把 stdout/stderr 重定向到文件,让进程独立运行。\n\n### 三、七种后台任务类型\n\nCC 定义了 7 种后台任务(`Task.ts:7-13`):`local_bash`、`local_agent`、`remote_agent`、`in_process_teammate`、`local_workflow`、`monitor_mcp`、`dream`。每种有自己的注册、生命周期和通知机制。\n\n### 四、通知注入:命令队列\n\n后台任务完成后通过 `enqueueTaskNotification`(`utils/task/framework.ts:267`)或 `enqueuePendingNotification`(`messageQueueManager.ts`)入队到共享命令队列。通知格式是结构化的 XML:\n\n```xml\n\n completed\n Background command \"npm test\" completed (exit code 0)\n\n```\n\n优先级分 `next` > `later`(`messageQueueManager.ts`)。后台任务默认 `later`(不阻塞用户输入)。消费点在 `query.ts:1566-1593`。\n\n### 五、停滞看门狗\n\n后台 bash 任务有一个看门狗(`LocalShellTask.tsx` L24-25 常量, L59-98 逻辑),定期检查输出是否停滞,45 秒无增长后检测交互式提示(`(y/n)` 等),防止后台任务卡在无人响应的交互式对话框。\n\n### 六、并发限制\n\n前台工具调用:`CLAUDE_CODE_MAX_TOOL_USE_CONCURRENCY`(默认 10 个并发安全工具)。后台 bash 任务:没有硬性限制,它们是独立的子进程。\n\n
\n\n\n" + }, + { + "version": "s13", + "locale": "ja", + "title": "s13: Background Tasks — 遅い操作はバックグラウンドへ", + "content": "# s13: Background Tasks — 遅い操作はバックグラウンドへ\n\ns01 → ... → s11 → s12 → `s13` → [s14](/ja/s14) → s15 → ... → s20\n\n> *\"遅い操作はバックグラウンドへ、agent は処理を継続\"* — バックグラウンドスレッドでコマンドを実行、完了時に通知を注入。\n>\n> **Harness 層**: バックグラウンド — 非同期実行、メインループをブロックしない。\n\n---\n\n## 課題\n\n洗濯機を使ったことがあるか?衣類を入れ、スタートを押し、他のことをする——料理、メッセージ返信、論文読み。30 分後に洗濯機が「ピッピッ」と知らせる:完了。30 分間立って待つ人はいない。\n\nAgent の bash ツールも同じ。`pip install torch` は 10 分、`npm run build` は 3 分かかる。これらのコマンドが実行中、Agent は bash の戻りを待ち、その時間を他のタスクの処理に使えない。\n\nファイル読み込みはミリ秒、待たない。`git status` は 1 秒以内に戻る、待たない。しかし `npm install` は?分単位。Agent は 10 分間何もせず待ち、LLM 呼び出しはトークン課金、アイドル時間は無駄。\n\n---\n\n## ソリューション\n\n![Background Tasks Overview](/course-assets/s13_background_tasks/background-tasks-overview.ja.svg)\n\n教学版は S12 の簡易タスクシステムとプロンプト組み立てを踏襲。バックグラウンドタスクに集中するため、完全なエラーリカバリ、メモリ、スキルシステムは省略。唯一の変更:遅い操作をバックグラウンドスレッドに投げ、Agent はループを継続、バックグラウンド完了時に通知を注入。\n\n同期 vs バックグラウンド:\n\n| | 同期 (s12) | バックグラウンド (s13) |\n|---|---|---|\n| 遅い操作 | Agent が待機 | バックグラウンドスレッドで実行 |\n| Agent アイドル | はい | いいえ、処理を継続 |\n| 結果 | 即時返却 | 次ターンで通知を注入 |\n| 判断基準 | — | `run_in_background` パラメータ(モデル明示的リクエスト)、ヒューリスティックフォールバック |\n\n---\n\n## 仕組み\n\n### should_run_background: 明示的リクエスト優先、ヒューリスティックフォールバック\n\nモデルは bash ツールの `run_in_background` パラメータで明示的にバックグラウンド実行をリクエストする。モデルが指定しない場合、教学版はキーワードヒューリスティックにフォールバック:\n\n```python\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n```\n\nCC の bash ツールスキーマには `run_in_background: boolean` パラメータがある(`BashTool.tsx:241`)。モデルがどのコマンドをバックグラウンドにするかを決定、キーワード推測ではない。教学版はヒューリスティックをフォールバックとして残すが、主パスはモデルの明示的リクエスト。\n\n### start_background_task: バックグラウンド実行とライフサイクル\n\nツール呼び出しをワーカー関数にラップし、daemon スレッドにディスパッチ。各バックグラウンドタスクは一意 ID を持ち、`background_tasks` 辞書で状態を追跡:\n\n```python\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {} # bg_id → {tool_use_id, command, status}\nbackground_results: dict[str, str] = {} # bg_id → output\nbackground_lock = threading.Lock()\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": block.input.get(\"command\", \"\"),\n \"status\": \"running\",\n }\n thread = threading.Thread(target=worker, daemon=True)\n thread.start()\n return bg_id\n```\n\n`[Running in background...]` ではなく `bg_id` を返す。`daemon=True` で Agent プロセス終了時にスレッドも終了。教学版はメモリ内辞書で追跡。実際の CC は `LocalShellTaskState` を持ち、出力をファイルにリダイレクト、タスク停止や継続出力読み取りを含む完全なライフサイクルを備える。\n\n### collect_background_results: 通知収集\n\nバックグラウンドタスク完了時、結果を収集して `` メッセージとしてフォーマット:\n\n```python\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {output[:200]}\\n\"\n f\"\")\n return notifications\n```\n\n通知は元の `tool_use_id` を再利用しない。元のツール呼び出しはプレースホルダー `tool_result` で応答済み。バックグラウンド完了は独立したイベントで、`task_notification` 形式で注入する。これは Messages API のツールペアリングに従う:1 つの `tool_use` に対して正確に 1 つの `tool_result`。\n\n### ループ統合\n\nagent_loop でツール実行は 2 つのパスに分かれる。通知と結果は 1 つの user メッセージに統合:\n\n```python\nresults = []\nfor block in response.content:\n if block.type != \"tool_use\":\n continue\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n\n# 通知とツール結果を 1 つの user メッセージに統合\nuser_content = []\nbg_notifications = collect_background_results()\nif bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\nuser_content.extend(results)\nmessages.append({\"role\": \"user\", \"content\": user_content})\n```\n\n遅い操作は `bg_id` 付きプレースホルダー tool_result を返し、LLM はコマンドがまだ実行中だと知り、先に他のことをできる。バックグラウンド完了時、通知は独立した text block として現在のターンの tool_result と一緒に 1 つの user メッセージを構成する。\n\n教学版は agent loop が継続実行中にバックグラウンド結果をポーリングする。実際の CC は通知キュー(`messageQueueManager.ts`)でバックグラウンド完了イベントを後続ターンに配信、ツールループを待つ必要はない。\n\n### 組み合わせて実行\n\n```\nTurn 1:\n LLM → bash \"npm install\" (run_in_background=true)\n → start_background_task → bg_0001\n → tool_result: \"[Background task bg_0001 started]...\"\n → LLM: \"OK, I'll check later. Let me also read the config.\"\n\nTurn 2:\n LLM → read_file \"package.json\" (fast, sync)\n → tool_result: file content\n → collect: bg_0001 done! inject \n → LLM sees: config file + install notification in one message\n```\n\nAgent は待たなかった。npm install がバックグラウンドで実行中に、設定ファイルを読んだ。\n\n---\n\n## s12 からの変更\n\n| コンポーネント | 変更前 (s12) | 変更後 (s13) |\n|--------------|------------|------------|\n| 実行モデル | すべて同期 | 遅い操作はバックグラウンドスレッド + 通知注入 |\n| bash スキーマ | `command` | `command` + `run_in_background` |\n| 新規関数 | — | `should_run_background`, `is_slow_operation`, `start_background_task`, `collect_background_results` |\n| 新規型 | — | `background_tasks: dict`, `background_results: dict`, `background_lock: Lock` |\n| 通知形式 | — | ``(tool_use_id を再利用しない) |\n| ループ動作 | ツール直列実行 | 遅い操作は非同期、速い操作は同期、通知は毎ターン収集 |\n| ツール | 8 (s12) | 8(変更なし、実行戦略が変更) |\n\n---\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython s13_background_tasks/code.py\n```\n\n以下のプロンプトを試してください:\n\n1. `Run pip list in the background and find all Python files in this directory`\n2. `Run npm install (use run_in_background) and while waiting, read package.json`\n3. `Create a task to setup the project, then run pip list in the background`\n\n観察ポイント:遅い操作はバックグラウンドにディスパッチされているか?`bg_id` は返されているか?バックグラウンド通知は `` 形式で注入されているか?\n\n---\n\n## 次の章\n\nバックグラウンドタスクは「遅い操作がブロックしない」を解決した。しかし、定期的に何かをしたい場合は?例えば「毎朝 9 時にテストを実行」「5 分ごとにサーバーステータスを確認」。\n\ns14 Cron Scheduler → Agent にアラームクロックを付ける。\n\n
\nCC ソースコード深掘り\n\n> 以下は CC ソースコード `query.ts`(211, 1054-1060, 1411-1482 行)、`services/toolUseSummary/toolUseSummaryGenerator.ts`(L15 プロンプトテキスト)、`LocalShellTask.tsx`(L24-25 定数, L59-98 ウォッチドッグロジック)、`messageQueueManager.ts`(通知キュー)、`utils/task/framework.ts`(L267 `enqueueTaskNotification`)の完全分析に基づく。\n\n### 一、pendingToolUseSummary:Haiku バックグラウンド生成\n\nCC は各ツール実行バッチの後、Haiku サイドクエリを開始してツール使用サマリを生成。開始コードは `query.ts:1411-1482`、プロンプトテキストは `services/toolUseSummary/toolUseSummaryGenerator.ts:15`(変数 `TOOL_USE_SUMMARY_SYSTEM_PROMPT`)。プロンプトは \"Write a short summary label... think git-commit-subject, not sentence\"、過去形、約 30 文字。\n\nHaiku サマリ(~1s)はメインモデルのストリーミング出力(5-30s)中に完了。次のターン開始前にサマリを yield。SDK コンシューマーはこれらのサマリをモバイル進捗表示に使用。\n\n### 二、スレッドモデル:本当のスレッドはない\n\nCC は Node.js/Bun のシングルスレッドイベントループで動作。「バックグラウンド」は単に「await しない」こと。`ShellCommand.background(taskId)` は stdout/stderr をファイルにリダイレクトし、プロセスを独立実行。\n\n### 三、7 種のバックグラウンドタスク型\n\nCC は 7 種のバックグラウンドタスク型を定義(`Task.ts:7-13`):`local_bash`、`local_agent`、`remote_agent`、`in_process_teammate`、`local_workflow`、`monitor_mcp`、`dream`。それぞれ独自の登録、ライフサイクル、通知メカニズムを持つ。\n\n### 四、通知注入:コマンドキュー\n\nバックグラウンドタスク完了時、`enqueueTaskNotification`(`utils/task/framework.ts:267`)または `enqueuePendingNotification`(`messageQueueManager.ts`)で共有コマンドキューにエンキュー。通知形式は構造化 XML:\n\n```xml\n\n completed\n Background command \"npm test\" completed (exit code 0)\n\n```\n\n優先度は `next` > `later`(`messageQueueManager.ts`)。バックグラウンドタスクはデフォルト `later`(ユーザー入力をブロックしない)。消費点は `query.ts:1566-1593`。\n\n### 五、停滞ウォッチドッグ\n\nバックグラウンド bash タスクにはウォッチドッグがある(`LocalShellTask.tsx` L24-25 定数, L59-98 ロジック)。出力の停滞を定期チェックし、45 秒間増加がない場合にインタラクティブプロンプト(`(y/n)` 等)を検出、バックグラウンドタスクが無応答のインタラクティブダイアログでスタックするのを防ぐ。\n\n### 六、同時実行制限\n\nフォアグラウンドツール呼び出し:`CLAUDE_CODE_MAX_TOOL_USE_CONCURRENCY`(デフォルト 10 同時実行安全ツール)。バックグラウンド bash タスク:ハードリミットなし、独立したサブプロセス。\n\n
\n\n\n" + }, + { + "version": "s14", + "locale": "en", + "title": "s14: Cron Scheduler — Producing Work on a Schedule", + "content": "# s14: Cron Scheduler — Producing Work on a Schedule\n\ns01 → ... → s12 → s13 → `s14` → [s15](/en/s15) → s16 → ... → s20\n> *\"Produce work on a schedule, decouple scheduling from execution\"* — Cron scheduling, durable or session-level.\n>\n> **Harness Layer**: Scheduling — Independent thread checks time, queue delivers triggers.\n\n---\n\n## The Problem\n\nAn alarm clock doesn't need you to watch it. You set 7:00, it rings at 7:00 — you could be sleeping, showering, cooking, it rings regardless.\n\ns13 lets the agent run slow operations in the background, but every operation is still triggered manually. You say something, the agent acts. \"Run tests every morning at 9am\", \"Check CI status every 30 minutes\" — these recurring tasks shouldn't need a human to push them each time.\n\n---\n\n## The Solution\n\n![Cron Scheduler Overview](/course-assets/s14_cron_scheduler/cron-scheduler-overview.en.svg)\n\nTeaching code carries forward S13's simplified task system, background execution, and prompt assembly; to stay focused on the scheduler, it omits full error recovery, memory, and skill systems. Added: an independent cron scheduler thread that polls every second, queues matching jobs into `cron_queue`, and a queue processor that delivers them when the agent is idle.\n\nManual vs Scheduled:\n\n| | Manual (s13) | Scheduled (s14) |\n|---|---|---|\n| Triggered by | User input | Scheduler thread |\n| Trigger timing | Anytime | Specified by cron expression |\n| Human involvement | Yes | No (scheduler auto-enqueues, idle agent auto-delivers) |\n| Persistence | — | Durable survives restart |\n\n---\n\n## How It Works\n\n### Four-Layer Model\n\nCron scheduling has four layers:\n\n1. **Scheduler**: daemon thread, polls every second, checks if it's time\n2. **Queue**: `cron_queue`, scheduler writes fired jobs\n3. **Queue Processor**: sees non-empty queue and idle agent, starts one agent_loop turn\n4. **Consumer**: agent_loop consumes queue and injects into messages\n\nThe teaching version implements a minimal queue processor: `agent_lock` tells whether the agent is idle, and queued cron work is delivered automatically. Real CC's `useQueueProcessor.ts` also handles UI blocking, queue priority, and different message modes.\n\n### CronJob: Data Structure\n\nEach cron task is a `CronJob` object:\n\n```python\n@dataclass\nclass CronJob:\n id: str\n cron: str # \"0 9 * * *\" (5-field cron expression)\n prompt: str # Message injected to the agent when fired\n recurring: bool # True=recurring, False=one-shot\n durable: bool # True=write to disk, survives sessions\n```\n\nCron expression, 5 fields, used by Unix for 50 years:\n\n```\nmin hour dom month dow\n * * * * * Every minute\n 0 9 * * * Every day at 9:00\n*/5 * * * * Every 5 minutes\n 0 9 * * 1-5 Weekdays at 9:00\n```\n\nSupports `*`, `*/N`, `N`, `N-M`, `N,M,...`.\n\n### cron_matches: 5-Field Matching\n\nStandard cron semantics: minute, hour, month must all match; day-of-month (DOM) and day-of-week (DOW) use OR when both are constrained:\n\n```python\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7 # Python Monday=0 → cron Sunday=0\n\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n\n if not (m and h and month_ok):\n return False\n # DOM and DOW: both constrained → either matching is enough (OR)\n dom_unconstrained = dom == \"*\"\n dow_unconstrained = dow == \"*\"\n if dom_unconstrained and dow_unconstrained:\n return True\n if dom_unconstrained:\n return dow_ok\n if dow_unconstrained:\n return dom_ok\n return dom_ok or dow_ok\n```\n\n### Independent Scheduler Thread: 1-Second Polling\n\nThe scheduler runs in an independent daemon thread, not dependent on whether agent_loop is executing. Individual job errors don't kill the entire thread:\n\n```python\ndef cron_scheduler_loop():\n while True:\n time.sleep(1)\n now = datetime.now()\n minute_marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now):\n if _last_fired.get(job.id) != minute_marker:\n cron_queue.append(job)\n _last_fired[job.id] = minute_marker\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\"[cron error] {job.id}: {e}\")\n```\n\nKey design:\n- **Independent of agent_loop**: scheduler checks time in background even when agent_loop isn't running\n- **Date-aware minute_marker**: uses `\"YYYY-MM-DD HH:MM\"` to prevent same-minute double-fire while not skipping on the next day\n- **Per-job try/except**: one bad job doesn't crash the scheduler thread\n- **One-shot jobs**: auto-removed from scheduled_jobs after firing\n\n### Queue Processor + agent_loop: Delivery\n\nThe queue processor does not check time. It only starts a turn when queued work exists and the agent is idle:\n\n```python\ndef queue_processor_loop():\n while True:\n time.sleep(0.2)\n if not has_cron_queue():\n continue\n if not agent_lock.acquire(blocking=False):\n continue\n try:\n if has_cron_queue():\n run_agent_turn_locked()\n finally:\n agent_lock.release()\n```\n\nagent_loop also doesn't check time. It only takes fired tasks from `cron_queue` and injects them into messages:\n\n```python\nfired = consume_cron_queue()\nfor job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n```\n\nProducer (scheduler thread), deliverer (queue processor), and consumer (agent_loop) are decoupled via `cron_queue`, `cron_lock`, and `agent_lock`.\n\n### Validation: Prevent Bad Cron from Killing the Scheduler\n\n`schedule_job` validates the cron expression before registering, returning an error for invalid input:\n\n```python\ndef schedule_job(cron, prompt, recurring=True, durable=True):\n err = validate_cron(cron)\n if err:\n return err\n # ... register job\n```\n\nLoading durable jobs from disk also skips invalid expressions, preventing a single bad task from breaking startup.\n\n### Durable vs Session-only\n\n- **Durable**: Task definition written to `.scheduled_tasks.json`. Loaded on agent restart.\n- **Session-only**: In-memory only. Gone when the agent closes.\n\n> **Important caveat**: The cron scheduler must run inside the agent process. Process exits, scheduler stops. Durable only means the task definition survives restarts — next time the agent starts, the scheduler discovers \"it should fire\" and fires. If you need \"run even when the app is closed\", use system crontab or systemd timer.\n\n### Putting It Together\n\n```\n1. On startup:\n load_durable_jobs() → restore durable tasks from .scheduled_tasks.json\n Thread(cron_scheduler_loop, daemon=True).start() → scheduler begins polling\n Thread(queue_processor_loop, daemon=True).start() → processor waits to deliver\n\n2. Register a task:\n schedule_cron(cron=\"*/2 * * * *\", prompt=\"run date\", durable=True)\n → CronJob written to scheduled_jobs + .scheduled_tasks.json\n\n3. Every 2 minutes:\n Scheduler checks → cron_matches returns True → cron_queue.append(job)\n → queue processor sees idle agent → agent_loop consume_cron_queue\n → injects \"[Scheduled] run date\"\n → LLM receives message, runs date command\n\n4. Process shutdown:\n Scheduler thread stops (daemon=True)\n .scheduled_tasks.json stays on disk\n Next startup → load_durable_jobs → tasks restored\n```\n\n---\n\n## Changes from s13\n\n| Component | Before (s13) | After (s14) |\n|-----------|-------------|-------------|\n| Trigger method | User manual trigger | Scheduler thread auto-enqueues |\n| New types | — | CronJob dataclass (id, cron, prompt, recurring, durable) |\n| New functions | — | cron_matches, validate_cron, schedule_job, cancel_job, cron_scheduler_loop, queue_processor_loop |\n| New storage | — | .scheduled_tasks.json (durable) + memory (session-only) |\n| Threads | Background execution thread | + Scheduler thread (daemon, 1s polling) + queue processor thread |\n| Queue | background_results | + cron_queue (scheduler writes, queue processor delivers, agent_loop consumes) |\n| Tools | 8 (s12/s13) | + schedule_cron, list_crons, cancel_cron (11) |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s14_cron_scheduler/code.py\n```\n\nTry these prompts:\n\n1. `Schedule a task to print the current date every 2 minutes`\n2. `List all cron jobs`\n3. `Create a one-shot reminder in 1 minute to check the build status`\n4. `Cancel the recurring job and verify with list_crons`\n\nWhat to observe: Is the scheduler thread running independently? Do cron tasks fire at the correct time? Without a new prompt, do you see `[queue processor]` and automatic execution? Is the durable job written to `.scheduled_tasks.json`?\n\n---\n\n## What's Next\n\nOne agent can do a lot now: plan, compress, background, schedule. But some tasks are too big for one agent.\n\n\"Refactor the entire backend\" — overhaul auth, database layer, API routes, and tests. One agent's attention is limited. This needs a team.\n\ns15 Agent Teams → One agent isn't enough, form a team. Persistent teammates + async inboxes.\n\n
\nDeep Dive into CC Source\n\n> The following is a complete analysis based on CC source code `CronCreateTool.ts`, `cronScheduler.ts`, `cron.ts`, `cronTasks.ts`, `cronTasksLock.ts`, `useScheduledTasks.ts` (139 lines).\n\n### 1. Three Cron Tools\n\nCC exposes three cron tools to the model: `CronCreate`, `CronDelete`, `CronList`. All controlled by compile-time gate `feature('AGENT_TRIGGERS')` and runtime GrowthBook flag `tengu_kairos_cron`. There's also a `CLAUDE_CODE_DISABLE_CRON` env var for local override.\n\n### 2. Storage: `.claude/scheduled_tasks.json`\n\n```json\n{ \"tasks\": [{ \"id\": \"abc12345\", \"cron\": \"0 9 * * *\", \"prompt\": \"...\", \"recurring\": true, \"durable\": true, \"createdAt\": 1714567890000 }] }\n```\n\nDurable tasks write to disk; session-only tasks live in `STATE.sessionCronTasks` memory array (lost on process restart). A `.scheduled_tasks.lock` file prevents duplicate firing across multiple sessions of the same project.\n\n### 3. Scheduler: 1-Second Polling\n\n`cronScheduler.ts` checks every second (`CHECK_INTERVAL_MS = 1000`). Whoever holds the lock triggers file tasks; all sessions trigger session-only tasks. A `chokidar` file watcher monitors `scheduled_tasks.json` changes.\n\n### 4. Cron Expression: Standard 5 Fields\n\nMinute hour day month weekday. Supports `*`, `*/N`, `N`, `N-M`, `N-M/S`, `N,M,...`. Doesn't support `L`, `W`, `?`. All times interpreted in local timezone. Day-of-month and day-of-week use OR semantics when both are constrained.\n\n### 5. Jitter (Thundering Herd Prevention)\n\n- Recurring tasks: trigger delay up to 10% of period (max 15 min), deterministic hash based on task ID\n- One-shot tasks: up to 90s early when firing time falls on `:00` or `:30`\n- Jitter config adjustable via GrowthBook, refreshed every 60 seconds\n\n### 6. Auto-Expiration\n\nRecurring tasks auto-expire after 7 days (configurable, max 30 days). Fire one last time before expiry, then auto-delete.\n\n### 7. Job Limit\n\n`MAX_JOBS = 50` (`CronCreateTool.ts:25`). Returns error when exceeded: \"Too many scheduled jobs (max 50). Cancel one first.\"\n\n### 8. Trigger Injection\n\nAfter firing, enqueued via `enqueuePendingNotification()` with `priority: 'later'` into the command queue. Tagged `workload: WORKLOAD_CRON` — API serves cron-initiated requests at lower QoS when capacity is tight.\n\n### 9. Queue Processor: Automatic Delivery\n\nReal CC auto-triggers processing through `useQueueProcessor.ts:48-60` when no query is active, UI isn't blocked, and queue is non-empty. `queueProcessor.ts:52-87` dispatches commands to `handlePromptSubmit()` by queue priority. The teaching version keeps the core behavior with `queue_processor_loop`: when queued work exists and the agent is idle, it starts one agent_loop turn automatically.\n\n
\n\n\n" + }, + { + "version": "s14", + "locale": "zh", + "title": "s14: Cron Scheduler — 按时间表生产工作", + "content": "# s14: Cron Scheduler — 按时间表生产工作\n\ns01 → ... → s12 → s13 → `s14` → [s15](/zh/s15) → s16 → ... → s20\n> *\"按时间表生产工作, 调度与执行解耦\"* — cron 调度, 持久化或会话级。\n>\n> **Harness 层**: 调度 — 独立线程判断时间, 队列传递触发。\n\n---\n\n## 问题\n\n闹钟不需要你盯着它才会响。你设好 7:00,到点它自己响,你在睡觉、在洗澡、在做饭,它都照响不误。\n\ns13 让 Agent 能后台执行慢操作,但所有操作仍然是你手动触发的。你说一句,Agent 动一下。\"每天早上 9 点跑测试\"、\"每 30 分钟检查 CI 状态\",这些周期性任务不该需要人每次来推。\n\n---\n\n## 解决方案\n\n![Cron Scheduler Overview](/course-assets/s14_cron_scheduler/cron-scheduler-overview.svg)\n\n教学代码沿用 S13 的简化任务系统、后台执行和 prompt 组装;为了聚焦调度器,省略完整错误恢复、记忆和技能系统。新增:独立的 cron 调度线程,每秒检查一次,时间到了把任务塞进 `cron_queue`;再由 queue processor 在 Agent 空闲时自动交付。\n\n手动 vs 定时:\n\n| | 手动触发 (s13) | 定时触发 (s14) |\n|---|---|---|\n| 触发者 | 用户输入 | 调度线程 |\n| 触发时机 | 随时 | cron 表达式指定 |\n| 需要人参与 | 是 | 否(调度器自动入队,空闲时自动交付) |\n| 持久性 | — | durable 跨重启 |\n\n---\n\n## 工作原理\n\n### 四层模型\n\nCron 调度分四层:\n\n1. **Scheduler**:daemon 线程,每秒轮询,判断时间到了没有\n2. **Queue**:`cron_queue`,调度线程写入已触发任务\n3. **Queue Processor**:发现队列非空且 Agent 空闲,启动一轮 agent_loop\n4. **Consumer**:agent_loop 从队列消费,注入到 messages\n\n教学版实现的是最小 queue processor:用 `agent_lock` 判断 Agent 是否空闲,空闲时自动交付定时任务。真实 CC 的 `useQueueProcessor.ts` 还会处理 UI 阻塞、队列优先级和不同消息模式。\n\n### CronJob: 数据结构\n\n每个 cron 任务是一个 `CronJob` 对象:\n\n```python\n@dataclass\nclass CronJob:\n id: str\n cron: str # \"0 9 * * *\" (五段式 cron 表达式)\n prompt: str # 触发时注入给 Agent 的消息\n recurring: bool # True=周期性,False=一次性\n durable: bool # True=写磁盘,跨会话保留\n```\n\nCron 表达式,五段式,Unix 用了 50 年:\n\n```\n分钟 小时 日 月 星期\n * * * * * 每分钟\n 0 9 * * * 每天早上 9:00\n */5 * * * * 每 5 分钟\n 0 9 * * 1-5 工作日早上 9:00\n```\n\n支持 `*`、`*/N`、`N`、`N-M`、`N,M,...`。\n\n### cron_matches: 五段式匹配\n\n标准 cron 语义:分钟、小时、月必须全部匹配;日(DOM)和星期(DOW)同时被约束时任一匹配即可(OR):\n\n```python\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7 # Python Monday=0 → cron Sunday=0\n\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n\n if not (m and h and month_ok):\n return False\n # DOM and DOW: both constrained → either matching is enough (OR)\n dom_unconstrained = dom == \"*\"\n dow_unconstrained = dow == \"*\"\n if dom_unconstrained and dow_unconstrained:\n return True\n if dom_unconstrained:\n return dow_ok\n if dow_unconstrained:\n return dom_ok\n return dom_ok or dow_ok\n```\n\n### 独立调度线程: 每秒轮询\n\n调度器跑在独立的 daemon 线程里,不依赖 agent_loop 是否在执行。单个 job 异常不会杀掉整个线程:\n\n```python\ndef cron_scheduler_loop():\n while True:\n time.sleep(1)\n now = datetime.now()\n minute_marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now):\n if _last_fired.get(job.id) != minute_marker:\n cron_queue.append(job)\n _last_fired[job.id] = minute_marker\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\"[cron error] {job.id}: {e}\")\n```\n\n关键设计:\n- **独立于 agent_loop**:即使 agent_loop 没在跑,调度器也在后台检查时间\n- **date-aware minute_marker**:用 `\"YYYY-MM-DD HH:MM\"` 防止同一分钟重复触发,同时不会在第二天跳过\n- **单 job try/except**:一个坏 job 不会拖垮整个调度线程\n- **一次性任务**:触发后自动从 scheduled_jobs 里删除\n\n### Queue Processor + agent_loop: 交付端\n\nqueue processor 不检查时间,只负责在队列有任务且 Agent 空闲时拉起一轮执行:\n\n```python\ndef queue_processor_loop():\n while True:\n time.sleep(0.2)\n if not has_cron_queue():\n continue\n if not agent_lock.acquire(blocking=False):\n continue\n try:\n if has_cron_queue():\n run_agent_turn_locked()\n finally:\n agent_lock.release()\n```\n\nagent_loop 也不负责检查时间,它只从 `cron_queue` 里拿已触发的任务,注入到 messages 里:\n\n```python\nfired = consume_cron_queue()\nfor job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n```\n\n生产者(调度线程)、交付者(queue processor)和消费者(agent_loop)通过 `cron_queue`、`cron_lock`、`agent_lock` 解耦。\n\n### 校验:防止坏 cron 杀掉调度器\n\n`schedule_job` 在注册前校验 cron 表达式,非法的直接返回错误:\n\n```python\ndef schedule_job(cron, prompt, recurring=True, durable=True):\n err = validate_cron(cron)\n if err:\n return err\n # ... register job\n```\n\n从磁盘加载 durable job 时也会跳过非法表达式,避免单个坏任务拖垮启动。\n\n### Durable vs Session-only\n\n- **Durable**:任务定义写进 `.scheduled_tasks.json`。Agent 重启后加载文件,恢复任务。\n- **Session-only**:只在内存里。Agent 关闭就没了。\n\n> **重要前提**:cron 调度器必须在 Agent 进程内跑。进程关闭,调度也停。Durable 只意味着任务定义跨重启保留,下次 Agent 启动时调度器才会发现\"该触发了\"并触发。如果需要\"即使应用关闭也能定时跑\",请用系统 crontab 或 systemd timer。\n\n### 合起来跑\n\n```\n1. 启动时:\n load_durable_jobs() → 从 .scheduled_tasks.json 恢复持久化任务\n Thread(cron_scheduler_loop, daemon=True).start() → 调度线程开始轮询\n Thread(queue_processor_loop, daemon=True).start() → 队列处理器等待交付\n\n2. 注册任务:\n schedule_cron(cron=\"*/2 * * * *\", prompt=\"run date\", durable=True)\n → CronJob 写入 scheduled_jobs + .scheduled_tasks.json\n\n3. 每 2 分钟:\n 调度线程检查 → cron_matches 返回 True → cron_queue.append(job)\n → queue processor 发现 Agent 空闲 → agent_loop consume_cron_queue\n → 注入 \"[Scheduled] run date\"\n → LLM 收到消息,执行 date 命令\n\n4. 关闭进程:\n 调度线程跟着停(daemon=True)\n .scheduled_tasks.json 还在磁盘上\n 下次启动 → load_durable_jobs → 任务恢复\n```\n\n---\n\n## 相对 s13 的变更\n\n| 组件 | 之前 (s13) | 之后 (s14) |\n|------|-----------|-----------|\n| 触发方式 | 用户手动触发 | 调度线程自动入队 |\n| 新类型 | — | CronJob dataclass (id, cron, prompt, recurring, durable) |\n| 新函数 | — | cron_matches, validate_cron, schedule_job, cancel_job, cron_scheduler_loop, queue_processor_loop |\n| 新存储 | — | .scheduled_tasks.json (durable) + 内存 (session-only) |\n| 线程 | 后台执行线程 | + 调度线程 (daemon, 1s 轮询) + queue processor 线程 |\n| 队列 | background_results | + cron_queue (调度线程写, queue processor 交付, agent_loop 消费) |\n| 工具 | 8 (s12/s13) | + schedule_cron, list_crons, cancel_cron (11) |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s14_cron_scheduler/code.py\n```\n\n试试这些 prompt:\n\n1. `Schedule a task to print the current date every 2 minutes`\n2. `List all cron jobs`\n3. `Create a one-shot reminder in 1 minute to check the build status`\n4. `Cancel the recurring job and verify with list_crons`\n\n观察重点:调度线程是否在独立运行?cron 任务是否在正确的时间点触发?不输入新 prompt 时,是否也出现 `[queue processor]` 并自动执行?durable job 是否写入了 `.scheduled_tasks.json`?\n\n---\n\n## 接下来\n\n一个 Agent 能做很多事了,能计划、能压缩、能后台、能定时。但有些任务太大了,不是一个 Agent 能搞定的。\n\n\"重构整个后端\",把认证模块、数据库层、API 路由、测试全部翻新。一个 Agent 的注意力是有限的,这需要一个团队。\n\ns15 Agent Teams → 一个 Agent 不够,组队吧。持久队友 + 异步收件箱。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `CronCreateTool.ts`、`cronScheduler.ts`、`cron.ts`、`cronTasks.ts`、`cronTasksLock.ts`、`useScheduledTasks.ts`(139 行)的完整分析。\n\n### 一、三个 Cron 工具\n\nCC 暴露了三个 cron 工具给模型:`CronCreate`、`CronDelete`、`CronList`。全部由编译时门控 `feature('AGENT_TRIGGERS')` 和运行时 GrowthBook 标志 `tengu_kairos_cron` 控制。还有一个 `CLAUDE_CODE_DISABLE_CRON` 环境变量做本地覆盖。\n\n### 二、存储:`.claude/scheduled_tasks.json`\n\n```json\n{ \"tasks\": [{ \"id\": \"abc12345\", \"cron\": \"0 9 * * *\", \"prompt\": \"...\", \"recurring\": true, \"durable\": true, \"createdAt\": 1714567890000 }] }\n```\n\nDurable 任务写磁盘;session-only 任务存于 `STATE.sessionCronTasks` 内存数组(进程重启丢失)。还有一个 `.scheduled_tasks.lock` 文件防止同项目的多个 session 重复触发。\n\n### 三、调度器:1 秒轮询\n\n`cronScheduler.ts` 每秒检查一次(`CHECK_INTERVAL_MS = 1000`)。谁持有锁谁触发文件任务;所有 session 都触发仅 session 任务。还有一个 `chokidar` 文件观察者监视 `scheduled_tasks.json` 变更。\n\n### 四、Cron 表达式:标准 5 字段\n\n分钟 小时 日 月 星期。支持 `*`、`*/N`、`N`、`N-M`、`N-M/S`、`N,M,...`。不支持 `L`、`W`、`?`。所有时间以本地时区解释。Day-of-month 和 day-of-week 同时约束时用 OR 语义。\n\n### 五、抖动(防惊群效应)\n\n- 重复性任务:触发延迟最多可达期间的 10%(上限 15 分钟),基于任务 ID 的确定性哈希\n- 一次性任务:当触发时间落在 `:00` 或 `:30` 时,最多提前 90 秒触发\n- 抖动配置可通过 GrowthBook 实时调整,60 秒刷新一次\n\n### 六、自动过期\n\n重复性任务 7 天后自动过期(可配置,上限 30 天)。过期前最后一次触发,触发后自动删除。\n\n### 七、作业数上限\n\n`MAX_JOBS = 50`(`CronCreateTool.ts:25`)。超限时返回错误:\"Too many scheduled jobs (max 50). Cancel one first.\"\n\n### 八、触发注入\n\n触发后通过 `enqueuePendingNotification()` 以 `priority: 'later'` 入队命令队列。标记 `workload: WORKLOAD_CRON`,API 在容量紧张时以更低的 QoS 为 cron 发起的请求服务。\n\n### 九、Queue Processor:自动交付\n\n真实 CC 通过 `useQueueProcessor.ts:48-60` 在无 query、无阻塞 UI、队列非空时自动触发处理。`queueProcessor.ts:52-87` 按队列优先级把命令交给 `handlePromptSubmit()`。教学版用 `queue_processor_loop` 保留核心行为:队列有任务且 Agent 空闲时,自动启动一轮 agent_loop。\n\n
\n\n\n" + }, + { + "version": "s14", + "locale": "ja", + "title": "s14: Cron Scheduler — スケジュールに従って作業を生産", + "content": "# s14: Cron Scheduler — スケジュールに従って作業を生産\n\ns01 → ... → s12 → s13 → `s14` → [s15](/ja/s15) → s16 → ... → s20\n> *\"スケジュールに従って作業を生産、スケジューリングと実行を分離\"* — cron スケジューリング、永続またはセッションレベル。\n>\n> **Harness 層**: スケジューリング — 独立スレッドが時刻を判定、キューがトリガーを配信。\n\n---\n\n## 課題\n\n目覚まし時計はあなたが見ていないと鳴らないわけではない。7:00 にセットすれば、7:00 に鳴る。寝ていても、シャワーを浴びていても、料理をしていても、鳴る。\n\ns13 で Agent は遅い操作をバックグラウンドで実行できるようになった。しかし、すべての操作は手動でトリガーされる。一言言えば、Agent が動く。「毎朝 9 時にテストを実行」「30 分ごとに CI ステータスを確認」、これらの定期的なタスクに人が毎回押す必要はないはずだ。\n\n---\n\n## ソリューション\n\n![Cron Scheduler Overview](/course-assets/s14_cron_scheduler/cron-scheduler-overview.ja.svg)\n\n教学版は S13 の簡易タスクシステム、バックグラウンド実行、プロンプト組み立てを踏襲。スケジューラに集中するため、完全なエラーリカバリ、メモリ、スキルシステムは省略。追加:独立した cron スケジューラスレッド、1 秒ごとにポーリング、時間が来たらタスクを `cron_queue` に投入し、queue processor が Agent のアイドル時に自動配信。\n\n手動 vs スケジュール:\n\n| | 手動 (s13) | スケジュール (s14) |\n|---|---|---|\n| トリガー | ユーザー入力 | スケジューラスレッド |\n| トリガー時刻 | いつでも | cron 式で指定 |\n| 人の関与 | あり | なし(スケジューラが自動キュー投入、アイドル時に自動配信) |\n| 永続性 | — | durable は再起動後も保持 |\n\n---\n\n## 仕組み\n\n### 4 層モデル\n\ncron スケジューリングは 4 層に分かれる:\n\n1. **Scheduler**:daemon スレッド、1 秒ごとにポーリング、時刻が来たか判定\n2. **Queue**:`cron_queue`、スケジューラが発火済みタスクを書き込み\n3. **Queue Processor**:キューが空でなく Agent がアイドルなら、一回の agent_loop を開始\n4. **Consumer**:agent_loop がキューから消費、messages に注入\n\n教学版は最小の queue processor を実装する。`agent_lock` で Agent がアイドルかを判定し、キューに入った cron 作業を自動配信する。実際の CC の `useQueueProcessor.ts` はさらに UI ブロック、キュープライオリティ、メッセージモードを扱う。\n\n### CronJob: データ構造\n\n各 cron タスクは `CronJob` オブジェクト:\n\n```python\n@dataclass\nclass CronJob:\n id: str\n cron: str # \"0 9 * * *\"(5 フィールド cron 式)\n prompt: str # 発火時に Agent に注入するメッセージ\n recurring: bool # True=定期的、False=一回限り\n durable: bool # True=ディスク書き込み、セッション横断\n```\n\ncron 式、5 フィールド、Unix で 50 年使われている:\n\n```\n分 時 日 月 曜日\n * * * * * 毎分\n 0 9 * * * 毎日 9:00\n*/5 * * * * 5 分ごと\n 0 9 * * 1-5 平日 9:00\n```\n\n`*`、`*/N`、`N`、`N-M`、`N,M,...` をサポート。\n\n### cron_matches: 5 フィールドマッチング\n\n標準 cron セマンティクス:分、時、月はすべてマッチ必須。日(DOM)と曜日(DOW)が両方制約されている場合は、いずれかのマッチで十分(OR):\n\n```python\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7 # Python Monday=0 → cron Sunday=0\n\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n\n if not (m and h and month_ok):\n return False\n # DOM and DOW: both constrained → either matching is enough (OR)\n dom_unconstrained = dom == \"*\"\n dow_unconstrained = dow == \"*\"\n if dom_unconstrained and dow_unconstrained:\n return True\n if dom_unconstrained:\n return dow_ok\n if dow_unconstrained:\n return dom_ok\n return dom_ok or dow_ok\n```\n\n### 独立スケジューラスレッド:1 秒ポーリング\n\nスケジューラは独立した daemon スレッドで動作、agent_loop が実行中かどうかに依存しない。個々のジョブエラーはスレッド全体を殺さない:\n\n```python\ndef cron_scheduler_loop():\n while True:\n time.sleep(1)\n now = datetime.now()\n minute_marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now):\n if _last_fired.get(job.id) != minute_marker:\n cron_queue.append(job)\n _last_fired[job.id] = minute_marker\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\"[cron error] {job.id}: {e}\")\n```\n\n重要な設計:\n- **agent_loop から独立**:agent_loop が動いていなくても、スケジューラはバックグラウンドで時刻をチェック\n- **日付認識 minute_marker**:`\"YYYY-MM-DD HH:MM\"` を使用、同じ分の重複発火を防ぎつつ翌日のスキップも防止\n- **ジョブ単位の try/except**:一つの悪いジョブがスケジューラスレッド全体をクラッシュさせない\n- **一回限りジョブ**:発火後、scheduled_jobs から自動削除\n\n### Queue Processor + agent_loop: 配信側\n\nqueue processor は時刻をチェックしない。キューに作業があり、Agent がアイドルの時だけ一回の実行を開始する:\n\n```python\ndef queue_processor_loop():\n while True:\n time.sleep(0.2)\n if not has_cron_queue():\n continue\n if not agent_lock.acquire(blocking=False):\n continue\n try:\n if has_cron_queue():\n run_agent_turn_locked()\n finally:\n agent_lock.release()\n```\n\nagent_loop も時刻をチェックしない。`cron_queue` から発火済みタスクを取り出し、messages に注入するだけ:\n\n```python\nfired = consume_cron_queue()\nfor job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n```\n\n生産者(スケジューラスレッド)、配信者(queue processor)、消費者(agent_loop)は `cron_queue`、`cron_lock`、`agent_lock` で分離されている。\n\n### バリデーション:不正 cron がスケジューラを殺すのを防止\n\n`schedule_job` は登録前に cron 式をバリデーションし、不正な場合はエラーを返す:\n\n```python\ndef schedule_job(cron, prompt, recurring=True, durable=True):\n err = validate_cron(cron)\n if err:\n return err\n # ... ジョブ登録\n```\n\nディスクから durable ジョブを読み込む際も不正な式をスキップし、一つの悪いタスクが起動を妨げない。\n\n### Durable vs Session-only\n\n- **Durable**:タスク定義を `.scheduled_tasks.json` に書き込み。Agent 再起動後にファイルから復元。\n- **Session-only**:メモリ内のみ。Agent 終了で消失。\n\n> **重要な前提**:cron スケジューラは Agent プロセス内で実行される必要がある。プロセスが終了するとスケジューラも停止。Durable はタスク定義が再起動後も保持されることを意味するだけで、次回 Agent 起動時にスケジューラが「発火すべき」と判定して初めて発火する。「アプリケーションが閉じていても定期的に実行」が必要な場合は、システム crontab または systemd timer を使用。\n\n### 組み合わせて実行\n\n```\n1. 起動時:\n load_durable_jobs() → .scheduled_tasks.json から永続タスクを復元\n Thread(cron_scheduler_loop, daemon=True).start() → スケジューラスレッドがポーリング開始\n Thread(queue_processor_loop, daemon=True).start() → processor が配信待機\n\n2. タスク登録:\n schedule_cron(cron=\"*/2 * * * *\", prompt=\"run date\", durable=True)\n → CronJob を scheduled_jobs + .scheduled_tasks.json に書き込み\n\n3. 2 分ごと:\n スケジューラチェック → cron_matches が True → cron_queue.append(job)\n → queue processor がアイドル状態を検知 → agent_loop consume_cron_queue\n → \"[Scheduled] run date\" を注入\n → LLM がメッセージを受信、date コマンドを実行\n\n4. プロセス終了:\n スケジューラスレッドも停止(daemon=True)\n .scheduled_tasks.json はディスクに残存\n 次回起動 → load_durable_jobs → タスク復元\n```\n\n---\n\n## s13 からの変更\n\n| コンポーネント | 変更前 (s13) | 変更後 (s14) |\n|--------------|------------|------------|\n| トリガー方式 | ユーザー手動トリガー | スケジューラスレッドが自動キュー投入 |\n| 新規型 | — | CronJob データクラス (id, cron, prompt, recurring, durable) |\n| 新規関数 | — | cron_matches, validate_cron, schedule_job, cancel_job, cron_scheduler_loop, queue_processor_loop |\n| 新規ストレージ | — | .scheduled_tasks.json (durable) + メモリ (session-only) |\n| スレッド | バックグラウンド実行スレッド | + スケジューラスレッド (daemon, 1s ポーリング) + queue processor スレッド |\n| キュー | background_results | + cron_queue(スケジューラ書き込み、queue processor 配信、agent_loop 消費) |\n| ツール | 8 (s12/s13) | + schedule_cron, list_crons, cancel_cron (11) |\n\n---\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython s14_cron_scheduler/code.py\n```\n\n以下のプロンプトを試してください:\n\n1. `Schedule a task to print the current date every 2 minutes`\n2. `List all cron jobs`\n3. `Create a one-shot reminder in 1 minute to check the build status`\n4. `Cancel the recurring job and verify with list_crons`\n\n観察ポイント:スケジューラスレッドが独立して動いているか?cron タスクが正しい時刻に発火しているか?新しい prompt を入力しなくても `[queue processor]` が出て自動実行されるか?durable ジョブが `.scheduled_tasks.json` に書き込まれているか?\n\n---\n\n## 次の章\n\n一つの Agent でできることは増えた。計画、圧縮、バックグラウンド、スケジューリング。しかし、一部のタスクは一つの Agent では大きすぎる。\n\n「バックエンド全体をリファクタリング」、認証モジュール、データベース層、API ルート、テストを全面的に刷新。一つの Agent の注意力には限界がある。これにはチームが必要だ。\n\ns15 Agent Teams → 一人の Agent では足りない、チームを組もう。永続的なチームメイト + 非同期受信箱。\n\n
\nCC ソースコード深掘り\n\n> 以下は CC ソースコード `CronCreateTool.ts`、`cronScheduler.ts`、`cron.ts`、`cronTasks.ts`、`cronTasksLock.ts`、`useScheduledTasks.ts`(139 行)の完全分析に基づく。\n\n### 一、3 つの Cron ツール\n\nCC はモデルに 3 つの cron ツールを公開:`CronCreate`、`CronDelete`、`CronList`。すべてコンパイル時ゲート `feature('AGENT_TRIGGERS')` とランタイム GrowthBook フラグ `tengu_kairos_cron` で制御。`CLAUDE_CODE_DISABLE_CRON` 環境変数でローカル上書きも可能。\n\n### 二、ストレージ:`.claude/scheduled_tasks.json`\n\n```json\n{ \"tasks\": [{ \"id\": \"abc12345\", \"cron\": \"0 9 * * *\", \"prompt\": \"...\", \"recurring\": true, \"durable\": true, \"createdAt\": 1714567890000 }] }\n```\n\ndurable タスクはディスクに書き込み。session-only タスクは `STATE.sessionCronTasks` メモリ配列に格納(プロセス再起動で消失)。`.scheduled_tasks.lock` ファイルで同じプロジェクトの複数セッション間の重複発火を防止。\n\n### 三、スケジューラ:1 秒ポーリング\n\n`cronScheduler.ts` は毎秒チェック(`CHECK_INTERVAL_MS = 1000`)。ロックを保持しているセッションがファイルタスクをトリガー。すべてのセッションが session-only タスクをトリガー。`chokidar` ファイルウォッチャーが `scheduled_tasks.json` の変更を監視。\n\n### 四、cron 式:標準 5 フィールド\n\n分 時 日 月 曜日。`*`、`*/N`、`N`、`N-M`、`N-M/S`、`N,M,...` をサポート。`L`、`W`、`?` は非サポート。すべての時間はローカルタイムゾーンで解釈。day-of-month と day-of-week が両方制約されている場合は OR セマンティクス。\n\n### 五、ジッター(サンダリングハード防止)\n\n- 定期タスク:トリガー遅延は期間の最大 10%(上限 15 分)、タスク ID ベースの決定的ハッシュ\n- 一回限りタスク:発火時刻が `:00` または `:30` の場合、最大 90 秒早く発火\n- ジッター設定は GrowthBook でリアルタイム調整可能、60 秒ごとにリフレッシュ\n\n### 六、自動期限切れ\n\n定期タスクは 7 日後に自動期限切れ(設定可能、上限 30 日)。期限切れ前に最後の一回を発火、その後自動削除。\n\n### 七、ジョブ数上限\n\n`MAX_JOBS = 50`(`CronCreateTool.ts:25`)。超過時はエラーを返す:\"Too many scheduled jobs (max 50). Cancel one first.\"\n\n### 八、トリガー注入\n\n発火後、`enqueuePendingNotification()` で `priority: 'later'` としてコマンドキューにエンキュー。`workload: WORKLOAD_CRON` タグ付き、API は容量が逼迫している時に cron 発信リクエストを低い QoS で処理。\n\n### 九、Queue Processor:自動配信\n\n実際の CC は `useQueueProcessor.ts:48-60` により、アクティブな query がなく、UI がブロックされておらず、キューが空でない場合に自動的に処理をトリガーする。`queueProcessor.ts:52-87` がキュープライオリティに従ってコマンドを `handlePromptSubmit()` にディスパッチ。教学版は `queue_processor_loop` で核心動作を保つ:キューに作業があり Agent がアイドルなら、自動的に一回の agent_loop を開始する。\n\n
\n\n\n" + }, + { + "version": "s15", + "locale": "en", + "title": "s15: Agent Teams — One Agent Isn't Enough, Form a Team", + "content": "# s15: Agent Teams — One Agent Isn't Enough, Form a Team\n\ns01 → ... → s13 → s14 → `s15` → [s16](/en/s16) → s17 → s18 → s19 → s20\n> *\"One agent isn't enough, form a team\"* — File-based inboxes + teammate threads.\n>\n> **Harness Layer**: Teams — Multi-agent collaboration, message bus.\n\n---\n\n## The Problem\n\n\"Refactor the entire backend\" touches auth, database layer, API routes, and tests. One agent working on API routes no longer has auth module details in context. The context window is limited, a single agent can't cover every module.\n\ns06's sub-agents are temps, called in for one job, then gone. Some tasks need teammates that can communicate and collaborate.\n\n---\n\n## The Solution\n\n![Agent Teams Overview](/course-assets/s15_agent_teams/agent-teams-overview.en.svg)\n\nTeaching code carries forward S14's capabilities (prompt assembly, task system, background execution, cron scheduling). To stay focused on the team mechanism, it omits full error recovery, memory, and skill systems. Added: **MessageBus** (file-based inboxes), **spawn_teammate_thread** (launch teammate threads), **inbox injection** (Lead receives teammate messages and injects into history).\n\nSub-agent vs Teammate:\n\n| | s06 Sub-agent | s15 Teammate |\n|---|---|---|\n| Lifetime | One-shot, destroyed after use | Multi-turn (teaching: 10 rounds; real CC: idle loop) |\n| Communication | Only returns conclusion | Async inbox, communicate anytime |\n| Context | Fully isolated | Shared via messages |\n| Count | One lead + occasional sub-agent | One Lead + multiple teammates |\n\n---\n\n## How It Works\n\n![Team Topology](/course-assets/s15_agent_teams/team-topology.en.svg)\n\n### MessageBus: File-Based Inboxes\n\nEach agent (including Lead and teammates) has a `.jsonl` inbox. Send = append a JSON line to the target's file. Read = read file + delete (consumption):\n\n```python\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str,\n content: str, msg_type: str = \"message\"):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time()}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()]\n inbox.unlink() # consume: read + delete\n return msgs\n```\n\nWhy files instead of in-memory queues? Teaching code uses files because they're intuitive and observable across threads. Real CC also uses file inboxes (`~/.claude/teams/{team}/inboxes/`) but adds `proper-lockfile` for concurrent write safety. The teaching version's `read_inbox` has a read + unlink race, concurrent reads could lose messages, acceptable for teaching purposes.\n\n### spawn_teammate_thread: Launching a Teammate\n\nLead calls the `spawn_teammate` tool to start a teammate. The teammate runs in its own daemon thread with its own system prompt, messages, and simplified tool set:\n\n```python\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n system = f\"You are '{name}', a {role}. Use tools to complete tasks.\"\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [bash, read_file, write_file, send_message]\n for _ in range(10): # max 10 rounds\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{json.dumps(inbox)}\"})\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n # ... execute tools, process results\n # Send final summary to Lead\n BUS.send(name, \"lead\", summary, \"result\")\n\n threading.Thread(target=run, daemon=True).start()\n```\n\nKey design:\n- **Simplified tool set**: bash, read, write, send_message. Teaching code omits tasks and cron to focus on communication. Real CC teammates also have TaskCreate, TaskUpdate, etc., the task system is shared across the team\n- **Teaching: 10 rounds max**: prevents infinite loops. Real CC uses idle loop: after each round, send `idle_notification`, wait for inbox messages, resume on arrival, exit only on `shutdown_request`\n- **Auto-report on completion**: `BUS.send(name, \"lead\", summary)` sends the final result to Lead's inbox\n\n### Lead's Inbox Injection\n\nLead checks inbox after each main loop iteration. Teammate messages are injected into history so the LLM can see and react to them:\n\n```python\n# After main loop iteration\ninbox = BUS.read_inbox(\"lead\")\nif inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']}: {m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n```\n\nTeaching code injects in the user input loop. Real CC is more refined, Lead's `useInboxPoller` checks every 1 second, submitting messages as new turns without waiting for user input.\n\n### Permission Bubbling\n\nTeaching code omits permission bubbling. Real CC's flow (`permissionSync.ts`, `useSwarmPermissionPoller.ts`):\n\n1. Teammate encounters an operation needing approval → sends `permission_request` to Lead's inbox\n2. Lead's `useInboxPoller` detects the request → routes to approval queue\n3. User approves → Lead sends `permission_response` back to teammate\n4. Teammate's `useSwarmPermissionPoller` (polls every 500ms) receives reply → continue or reject\n\n### Putting It Together\n\n```\n1. Lead: \"Build the backend: one agent isn't enough, form a team\"\n2. Lead → spawn_teammate(\"alice\", \"backend dev\", \"Create database schema\")\n3. Lead → spawn_teammate(\"bob\", \"frontend dev\", \"Write API client\")\n4. Alice thread starts → her own LLM call → bash \"python manage.py migrate\"\n5. Bob thread starts → his own LLM call → write_file(\"client.ts\", ...)\n6. Alice done → BUS.send(\"alice\", \"lead\", \"Schema done: users, orders tables\")\n7. Bob done → BUS.send(\"bob\", \"lead\", \"Client written with types\")\n8. Lead next iteration → inbox injected into history → LLM sees both results\n```\n\nTwo teammates work in parallel.\n\n---\n\n## Changes from s14\n\n| Component | Before (s14) | After (s15) |\n|-----------|-------------|-------------|\n| Agent count | 1 | 1 Lead + N teammate threads |\n| Communication | None | MessageBus + .mailboxes/*.jsonl |\n| New classes | — | MessageBus, active_teammates dict |\n| New functions | — | spawn_teammate_thread, run_send_message, run_check_inbox |\n| Lead tools | 11 (s14) | + spawn_teammate, send_message, check_inbox (14) |\n| Teammate tools | — | bash, read_file, write_file, send_message (4) |\n| Permissions | Local decisions | Teaching code omits (real CC has bubbling) |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s15_agent_teams/code.py\n```\n\nTry these prompts:\n\n1. `Spawn alice as a backend developer. Ask her to create a file called schema.sql with a users table.`\n2. `Check your inbox for alice's result.`\n3. `Spawn bob as a tester. Ask him to check if schema.sql exists and list its contents.`\n\nWhat to observe: How does Lead spawn teammates? What do the `.mailboxes/` JSONL files look like? After teammates finish, is Lead's inbox injected into history?\n\n---\n\n## What's Next\n\nTeammates can work and communicate. But if Lead wants Alice to shut down, killing the thread outright could leave half-written files. A graceful shutdown protocol is needed: Lead sends shutdown_request, teammate wraps up and exits.\n\ns16 Team Protocols → Shutdown handshake and message conventions.\n\n
\nDeep Dive into CC Source\n\n> The following is a complete analysis based on CC source code `spawnMultiAgent.ts`, `useInboxPoller.ts` (969 lines), `useSwarmPermissionPoller.ts` (330 lines), `teammateMailbox.ts`, `teamHelpers.ts`.\n\n### 1. No Central Message Bus, It's the Filesystem\n\nTeaching code uses a `MessageBus` class to send and receive messages. Real CC is more direct, each agent writes directly to other agents' inbox files.\n\nInbox path: `~/.claude/teams/{teamName}/inboxes/{agentName}.json`\n\nWrites use `proper-lockfile` for concurrent write safety (up to 10 retries). Each file is a JSON array; appending reads → appends → writes back.\n\n### 2. 15 Message Types\n\nCC team communication has 15 structured message types (`teammateMailbox.ts`):\n\n| Type | Direction | Purpose |\n|------|-----------|---------|\n| `plain text` | Both ways | Normal inter-teammate communication |\n| `idle_notification` | Teammate→Lead | Teammate finished a turn, now idle |\n| `permission_request` | Teammate→Lead | Teammate needs operation approval |\n| `permission_response` | Lead→Teammate | Lead's approval result |\n| `plan_approval_request` | Teammate→Lead | Teammate submits plan for review |\n| `plan_approval_response` | Lead→Teammate | Lead's plan review |\n| `shutdown_request` | Lead→Teammate | Request graceful shutdown |\n| `shutdown_approved` | Teammate→Lead | Confirm shutdown |\n| `shutdown_rejected` | Teammate→Lead | Reject shutdown (with reason) |\n| `task_assignment` | Lead→Teammate | Assign a task |\n| `team_permission_update` | Lead→Teammate | Broadcast permission changes |\n| `mode_set_request` | Lead→Teammate | Change teammate's permission mode |\n| `sandbox_permission_*` | Both ways | Network permission request/reply |\n| `teammate_terminated` | System | Teammate removed notification |\n\nText messages are wrapped in `` XML tags for delivery to the model.\n\n### 3. Permission Bubbling: Bidirectional Polling\n\nTeaching code omits permission bubbling. Real CC's flow (`permissionSync.ts`):\n\n1. **Teammate** encounters operation needing approval → sends `permission_request` to Lead's inbox\n2. **Lead's** `useInboxPoller` (polls every 1s) detects request → routes to `ToolUseConfirmQueue`\n3. Lead's UI shows approval dialog with teammate name and color\n4. User approves → Lead sends `permission_response` back to teammate's inbox\n5. **Teammate's** `useSwarmPermissionPoller` (polls every 500ms) receives reply → continue or reject\n\n### 4. Teammate Lifecycle\n\nCC teammates are created by `spawnTeammate()` (`spawnMultiAgent.ts`):\n\n1. **Spawn**: Create tmux pane (or in-process), assign color, write team config\n2. **Work**: `useInboxPoller` checks inbox every 1s → submit as new turn when messages arrive\n3. **Idle**: Stop hook fires → send `idle_notification` to Lead\n4. **Shutdown**: Lead sends `shutdown_request` → teammate replies `shutdown_approved` → Lead cleans up\n\n### 5. Team Config\n\nTeam registry at `~/.claude/teams/{teamName}/config.json` (`teamHelpers.ts`):\n\n```json\n{\n \"name\": \"my-team\",\n \"leadAgentId\": \"lead@my-team\",\n \"members\": [{\n \"agentId\": \"researcher@my-team\",\n \"name\": \"researcher\",\n \"agentType\": \"general-purpose\",\n \"color\": \"blue\",\n \"isActive\": true\n }]\n}\n```\n\nTeammates cannot be nested (`AgentTool.tsx:273` explicitly forbids \"teammates spawning other teammates\").\n\n
\n\n\n" + }, + { + "version": "s15", + "locale": "zh", + "title": "s15: Agent Teams — 一个搞不定,组队来", + "content": "# s15: Agent Teams — 一个搞不定,组队来\n\ns01 → ... → s13 → s14 → `s15` → [s16](/zh/s16) → s17 → s18 → s19 → s20\n> *\"一个搞不定, 组队来\"* — 文件收件箱 + 队友线程。\n>\n> **Harness 层**: 团队 — 多 Agent 协作, 消息总线。\n\n---\n\n## 问题\n\n\"重构整个后端\"涉及认证模块、数据库层、API 路由、测试。一个 Agent 在修 API 路由时,认证模块的细节已经不在上下文里了。上下文窗口就那么大,单个 Agent 的注意力覆盖不了所有模块。\n\ns06 的子 Agent 是临时工,叫来干一件事就走了。但有些任务需要能通信、能协作的队友。\n\n---\n\n## 解决方案\n\n![Agent Teams Overview](/course-assets/s15_agent_teams/agent-teams-overview.svg)\n\n教学代码沿用 S14 的能力(prompt 组装、任务系统、后台执行、cron 调度)。为了聚焦团队机制,省略了完整错误恢复、记忆和技能系统。新增三样:**MessageBus**(文件收件箱)、**spawn_teammate_thread**(启动队友线程)、**inbox 注入**(Lead 接收队友消息并注入 history)。\n\n子 Agent vs 队友:\n\n| | s06 子 Agent | s15 队友 |\n|---|---|---|\n| 生命周期 | 一次性,用完销毁 | 多轮(教学版限 10 轮,真实 CC 用 idle loop) |\n| 通信 | 只回传结论 | 异步收件箱,随时通信 |\n| 上下文 | 完全隔离 | 通过消息共享信息 |\n| 数量 | 一个主 Agent + 偶尔子 Agent | 一个 Lead + 多个队友 |\n\n---\n\n## 工作原理\n\n![Team Topology](/course-assets/s15_agent_teams/team-topology.svg)\n\n### MessageBus: 文件收件箱\n\n每个 Agent(包括 Lead 和队友)有一个 `.jsonl` 邮箱。发消息 = 往对方的文件里 append 一行 JSON。读消息 = 读文件 + 删除(消费式):\n\n```python\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str,\n content: str, msg_type: str = \"message\"):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time()}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()]\n inbox.unlink() # 消费式:读完删除\n return msgs\n```\n\n为什么用文件而不是内存队列?教学版选文件是因为直观、跨线程可观察。真实 CC 也用文件收件箱(`~/.claude/teams/{team}/inboxes/`),但加了 `proper-lockfile` 防并发写冲突。教学版的 `read_inbox` 有 read + unlink 竞态,多线程同时读可能丢消息,对教学场景可以接受。\n\n### spawn_teammate_thread: 启动队友\n\nLead 调用 `spawn_teammate` 工具启动一个队友。队友跑在自己的 daemon 线程里,有自己的 system prompt、自己的 messages、自己的简化工具集:\n\n```python\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n system = f\"You are '{name}', a {role}. Use tools to complete tasks.\"\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [bash, read_file, write_file, send_message]\n for _ in range(10): # 最多 10 轮\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{json.dumps(inbox)}\"})\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n # ... 执行工具、处理结果\n # 完成后发 summary 给 Lead\n BUS.send(name, \"lead\", summary, \"result\")\n\n threading.Thread(target=run, daemon=True).start()\n```\n\n关键设计:\n- **队友有简化工具集**:bash、read、write、send_message。教学版省略了任务和 cron,聚焦通信机制。真实 CC 的队友也有 TaskCreate、TaskUpdate 等工具,任务系统是团队共享的\n- **教学版限 10 轮**:防止队友无限循环。真实 CC 用 idle loop:跑完一轮后发 `idle_notification`,等 inbox 消息,收到后继续,直到 `shutdown_request` 才退出\n- **完成后自动汇报**:`BUS.send(name, \"lead\", summary)` 把最终结果发到 Lead 的收件箱\n\n### Lead 的 inbox 注入\n\nLead 在每轮主循环结束后检查收件箱。队友发来的消息注入到 history 里,让 LLM 能看到并做出反应:\n\n```python\n# 主循环结束后\ninbox = BUS.read_inbox(\"lead\")\nif inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']}: {m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n```\n\n教学版在用户输入循环外注入。CC 更精细,Lead 的 `useInboxPoller` 每 1 秒检查一次,有消息就提交为新的 turn,不需要等用户输入。\n\n### 权限冒泡\n\n教学版省略了权限冒泡。真实 CC 的流程(`permissionSync.ts`、`useSwarmPermissionPoller.ts`):\n\n1. 队友遇到需要审批的操作 → 发 `permission_request` 到 Lead 收件箱\n2. Lead 的 `useInboxPoller` 检测到请求 → 路由到审批队列\n3. 用户审批后 → Lead 发 `permission_response` 回队友\n4. 队友的 `useSwarmPermissionPoller`(每 500ms 轮询)收到回复 → 继续或拒绝\n\n### 合起来跑\n\n```\n1. Lead: \"搭建后端:一个人搞不定,组队吧\"\n2. Lead → spawn_teammate(\"alice\", \"backend dev\", \"创建数据库 schema\")\n3. Lead → spawn_teammate(\"bob\", \"frontend dev\", \"写 API 客户端\")\n4. alice 线程启动 → 自己的 LLM 调用 → bash \"python manage.py migrate\"\n5. bob 线程启动 → 自己的 LLM 调用 → write_file(\"client.ts\", ...)\n6. alice 完成 → BUS.send(\"alice\", \"lead\", \"Schema done: users, orders tables\")\n7. bob 完成 → BUS.send(\"bob\", \"lead\", \"Client written with types\")\n8. Lead 下次循环 → inbox 注入 history → LLM 看到 alice 和 bob 的结果\n```\n\n两个队友并行工作。\n\n---\n\n## 相对 s14 的变更\n\n| 组件 | 之前 (s14) | 之后 (s15) |\n|------|-----------|-----------|\n| Agent 数量 | 1 | 1 Lead + N 队友线程 |\n| 通信 | 无 | MessageBus + .mailboxes/*.jsonl |\n| 新类 | — | MessageBus, active_teammates dict |\n| 新函数 | — | spawn_teammate_thread, run_send_message, run_check_inbox |\n| Lead 工具 | 11 (s14) | + spawn_teammate, send_message, check_inbox (14) |\n| 队友工具 | — | bash, read_file, write_file, send_message (4) |\n| 权限 | 本地决策 | 教学版省略(真实 CC 有冒泡机制) |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s15_agent_teams/code.py\n```\n\n试试这些 prompt:\n\n1. `Spawn alice as a backend developer. Ask her to create a file called schema.sql with a users table.`\n2. `Check your inbox for alice's result.`\n3. `Spawn bob as a tester. Ask him to check if schema.sql exists and list its contents.`\n\n观察重点:Lead 如何启动队友?`.mailboxes/` 目录下的 JSONL 文件长什么样?队友完成后 Lead 的 inbox 有没有注入到 history?\n\n---\n\n## 接下来\n\n队友能干活、能通信。但如果 Lead 想让 Alice 关机,直接杀线程会留下写到一半的文件。需要一个体面的关机协议:Lead 发 shutdown_request,队友收尾后退出。\n\ns16 Team Protocols → 关机握手与消息约定。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `spawnMultiAgent.ts`、`useInboxPoller.ts`(969 行)、`useSwarmPermissionPoller.ts`(330 行)、`teammateMailbox.ts`、`teamHelpers.ts` 的完整分析。\n\n### 一、没有中央消息总线,是文件系统\n\n教学版用 `MessageBus` 类收发消息。CC 的做法更直接,每个 Agent 直接写其他 Agent 的收件箱文件。\n\n收件箱路径:`~/.claude/teams/{teamName}/inboxes/{agentName}.json`\n\n写入时用 `proper-lockfile` 文件锁保证并发安全(最多重试 10 次)。每个文件是一个 JSON 数组,append 新消息时读→追加→写回。\n\n### 二、15 种消息类型\n\nCC 的团队通信有 15 种结构化消息(`teammateMailbox.ts`):\n\n| 类型 | 方向 | 用途 |\n|------|------|------|\n| `plain text` | 双向 | 普通队友间通信 |\n| `idle_notification` | 队友→Lead | 队友完成一轮工作,进入空闲 |\n| `permission_request` | 队友→Lead | 队友需要操作审批 |\n| `permission_response` | Lead→队友 | Lead 审批结果 |\n| `plan_approval_request` | 队友→Lead | 队友提交计划待审 |\n| `plan_approval_response` | Lead→队友 | Lead 审批计划 |\n| `shutdown_request` | Lead→队友 | 请求体面关机 |\n| `shutdown_approved` | 队友→Lead | 确认关机 |\n| `shutdown_rejected` | 队友→Lead | 拒绝关机(附原因) |\n| `task_assignment` | Lead→队友 | 分配任务 |\n| `team_permission_update` | Lead→队友 | 广播权限变更 |\n| `mode_set_request` | Lead→队友 | 修改队友的权限模式 |\n| `sandbox_permission_*` | 双向 | 网络权限请求/回复 |\n| `teammate_terminated` | 系统 | 队友被移除通知 |\n\n文本消息被包装在 `` XML 标签中交付给模型。\n\n### 三、权限冒泡:双向轮询\n\n教学版省略了权限冒泡。CC 的实际流程(`permissionSync.ts`):\n\n1. **队友**遇到需要审批的操作 → 发 `permission_request` 到 Lead 的收件箱\n2. **Lead** 的 `useInboxPoller`(每 1 秒轮询)检测到请求 → 路由到 `ToolUseConfirmQueue`\n3. Lead 的 UI 显示审批对话框,带队友名字和颜色\n4. 用户审批后 → Lead 发 `permission_response` 回队友的收件箱\n5. **队友**的 `useSwarmPermissionPoller`(每 500ms 轮询)收到回复 → 继续或拒绝执行\n\n### 四、队友生命周期\n\nCC 的队友由 `spawnTeammate()`(`spawnMultiAgent.ts`)创建:\n\n1. **Spawn**:创建 tmux 窗格(或进程内),分配颜色,写入 team config\n2. **Work**:`useInboxPoller` 每 1 秒检查收件箱 → 有消息就提交为新的 turn\n3. **Idle**:Stop hook 触发 → 发 `idle_notification` 给 Lead\n4. **Shutdown**:Lead 发 `shutdown_request` → 队友回复 `shutdown_approved` → Lead 清理\n\n### 五、Team Config\n\n团队注册表在 `~/.claude/teams/{teamName}/config.json`(`teamHelpers.ts`):\n\n```json\n{\n \"name\": \"my-team\",\n \"leadAgentId\": \"lead@my-team\",\n \"members\": [{\n \"agentId\": \"researcher@my-team\",\n \"name\": \"researcher\",\n \"agentType\": \"general-purpose\",\n \"color\": \"blue\",\n \"isActive\": true\n }]\n}\n```\n\n队友之间不能嵌套(`AgentTool.tsx:273` 明确禁止 \"teammates spawning other teammates\")。\n\n
\n\n\n" + }, + { + "version": "s15", + "locale": "ja", + "title": "s15: Agent Teams — 一人では無理、チームを組もう", + "content": "# s15: Agent Teams — 一人では無理、チームを組もう\n\ns01 → ... → s13 → s14 → `s15` → [s16](/ja/s16) → s17 → s18 → s19 → s20\n> *\"一人では無理、チームを組もう\"* — ファイル受信箱 + チームメイトスレッド。\n>\n> **Harness 層**: チーム — マルチ Agent 協調、メッセージバス。\n\n---\n\n## 課題\n\n「バックエンド全体をリファクタリング」は認証モジュール、データベース層、API ルート、テストに及ぶ。一つの Agent が API ルートを修正中、認証モジュールの詳細はコンテキストから外れている。コンテキストウィンドウには限界があり、単一 Agent の注意は全モジュールをカバーできない。\n\ns06 のサブ Agent は臨時スタッフ、一つの仕事を終えたら去る。だが、通信でき、協力できるチームメイトが必要なタスクもある。\n\n---\n\n## ソリューション\n\n![Agent Teams Overview](/course-assets/s15_agent_teams/agent-teams-overview.ja.svg)\n\n教学版は S14 の能力(プロンプト組み立て、タスクシステム、バックグラウンド実行、cron スケジューリング)を踏襲。チーム機構に集中するため、完全なエラーリカバリ、メモリ、スキルシステムは省略。追加:**MessageBus**(ファイル受信箱)、**spawn_teammate_thread**(チームメイトスレッド起動)、**inbox 注入**(Lead がチームメイトメッセージを受信し history に注入)。\n\nサブ Agent vs チームメイト:\n\n| | s06 サブ Agent | s15 チームメイト |\n|---|---|---|\n| ライフサイクル | 一回きり、終了後に破棄 | マルチターン(教学版は 10 ラウンド制限、真实 CC は idle loop) |\n| 通信 | 結果のみ返却 | 非同期受信箱、いつでも通信可能 |\n| コンテキスト | 完全に隔離 | メッセージで情報共有 |\n| 数 | メイン Agent + たまにサブ Agent | 1 Lead + 複数チームメイト |\n\n---\n\n## 仕組み\n\n![Team Topology](/course-assets/s15_agent_teams/team-topology.ja.svg)\n\n### MessageBus: ファイル受信箱\n\n各 Agent(Lead とチームメイトを含む)には `.jsonl` 受信箱がある。メッセージ送信 = 相手のファイルに 1 行 JSON を append。メッセージ読み取り = ファイル読み込み + 削除(消費式):\n\n```python\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str,\n content: str, msg_type: str = \"message\"):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time()}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()]\n inbox.unlink() # 消費式:読んだら削除\n return msgs\n```\n\nなぜファイルか、メモリキューではなく?教学版がファイルを選ぶ理由は、直感的でスレッドをまたいで観察可能だから。真实 CC もファイル受信箱(`~/.claude/teams/{team}/inboxes/`)を使うが、`proper-lockfile` で並行書き込みの安全性を確保。教学版の `read_inbox` には read + unlink の競合状態があり、マルチスレッド同時読みでメッセージを損失する可能性があるが、教学目的には許容範囲。\n\n### spawn_teammate_thread: チームメイト起動\n\nLead が `spawn_teammate` ツールを呼び出してチームメイトを起動。チームメイトは独自の daemon スレッドで動作、独自の system prompt、messages、簡易ツールセットを持つ:\n\n```python\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n system = f\"You are '{name}', a {role}. Use tools to complete tasks.\"\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [bash, read_file, write_file, send_message]\n for _ in range(10): # 最大 10 ラウンド\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{json.dumps(inbox)}\"})\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n # ... ツール実行、結果処理\n # 完了後 summary を Lead に送信\n BUS.send(name, \"lead\", summary, \"result\")\n\n threading.Thread(target=run, daemon=True).start()\n```\n\n重要な設計:\n- **チームメイトの簡易ツールセット**:bash、read、write、send_message。教学版は通信機構に集中するためタスクと cron を省略。真实 CC のチームメイトには TaskCreate、TaskUpdate 等のツールもあり、タスクシステムはチーム全体で共有\n- **教学版は 10 ラウンド制限**:無限ループを防止。真实 CC は idle loop:1 ラウンド終了後に `idle_notification` を送信、inbox メッセージを待機、到着後に再開、`shutdown_request` でのみ終了\n- **完了時自動報告**:`BUS.send(name, \"lead\", summary)` で最終結果を Lead の受信箱に送信\n\n### Lead の inbox 注入\n\nLead はメインループの各反復後に受信箱を確認。チームメイトからのメッセージを history に注入し、LLM が確認して反応できるようにする:\n\n```python\n# メインループ反復後\ninbox = BUS.read_inbox(\"lead\")\nif inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']}: {m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n```\n\n教学版はユーザー入力ループ内で注入。真实 CC はより精密、Lead の `useInboxPoller` が毎秒チェックし、ユーザー入力を待たずにメッセージを新しい turn として送信。\n\n### 権限バブリング\n\n教学版は権限バブリングを省略。真实 CC のフロー(`permissionSync.ts`、`useSwarmPermissionPoller.ts`):\n\n1. チームメイトが承認が必要な操作に遭遇 → `permission_request` を Lead の受信箱に送信\n2. Lead の `useInboxPoller` がリクエストを検出 → 承認キューにルーティング\n3. ユーザーが承認 → Lead が `permission_response` をチームメイトに返信\n4. チームメイトの `useSwarmPermissionPoller`(500ms ごとにポーリング)が返信を受信 → 続行または拒否\n\n### 組み合わせて実行\n\n```\n1. Lead: \"バックエンド構築:一人では無理、チームを組もう\"\n2. Lead → spawn_teammate(\"alice\", \"backend dev\", \"データベーススキーマを作成\")\n3. Lead → spawn_teammate(\"bob\", \"frontend dev\", \"API クライアントを作成\")\n4. alice スレッド起動 → 独自の LLM 呼び出し → bash \"python manage.py migrate\"\n5. bob スレッド起動 → 独自の LLM 呼び出し → write_file(\"client.ts\", ...)\n6. alice 完了 → BUS.send(\"alice\", \"lead\", \"Schema done: users, orders tables\")\n7. bob 完了 → BUS.send(\"bob\", \"lead\", \"Client written with types\")\n8. Lead 次回反復 → inbox を history に注入 → LLM が alice と bob の結果を確認\n```\n\n2 人のチームメイトが並行作業。\n\n---\n\n## s14 からの変更\n\n| コンポーネント | 変更前 (s14) | 変更後 (s15) |\n|--------------|------------|------------|\n| Agent 数 | 1 | 1 Lead + N チームメイトスレッド |\n| 通信 | なし | MessageBus + .mailboxes/*.jsonl |\n| 新規クラス | — | MessageBus, active_teammates dict |\n| 新規関数 | — | spawn_teammate_thread, run_send_message, run_check_inbox |\n| Lead ツール | 11 (s14) | + spawn_teammate, send_message, check_inbox (14) |\n| チームメイトツール | — | bash, read_file, write_file, send_message (4) |\n| 権限 | ローカル判断 | 教学版は省略(真实 CC はバブリング機構あり) |\n\n---\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython s15_agent_teams/code.py\n```\n\n以下のプロンプトを試してください:\n\n1. `Spawn alice as a backend developer. Ask her to create a file called schema.sql with a users table.`\n2. `Check your inbox for alice's result.`\n3. `Spawn bob as a tester. Ask him to check if schema.sql exists and list its contents.`\n\n観察ポイント:Lead はチームメイトをどう起動するか?`.mailboxes/` ディレクトリの JSONL ファイルの中身は?チームメイト完了後、Lead の inbox は history に注入されているか?\n\n---\n\n## 次の章\n\nチームメイトは仕事をし、通信できる。しかし、Lead が Alice にシャットダウンを頼む場合、スレッドを強制終了すると書きかけのファイルが残る。丁寧なシャットダウンプロトコルが必要:Lead が shutdown_request を送信、チームメイトは收尾後に終了。\n\ns16 Team Protocols → シャットダウンハンドシェイクとメッセージの取り決め。\n\n
\nCC ソースコード深掘り\n\n> 以下は CC ソースコード `spawnMultiAgent.ts`、`useInboxPoller.ts`(969 行)、`useSwarmPermissionPoller.ts`(330 行)、`teammateMailbox.ts`、`teamHelpers.ts` の完全分析に基づく。\n\n### 一、中央メッセージバスはない、ファイルシステム\n\n教学版は `MessageBus` クラスでメッセージを送受信。真实 CC はもっと直接的、各 Agent が他の Agent の受信箱ファイルに直接書き込む。\n\n受信箱パス:`~/.claude/teams/{teamName}/inboxes/{agentName}.json`\n\n書き込み時は `proper-lockfile` で並行安全性を確保(最大 10 回リトライ)。各ファイルは JSON 配列、append 時に読み取り→追加→書き戻し。\n\n### 二、15 種のメッセージ型\n\nCC のチーム通信には 15 種の構造化メッセージ(`teammateMailbox.ts`)がある:\n\n| 型 | 方向 | 用途 |\n|------|------|------|\n| `plain text` | 双方向 | 通常のチームメイト間通信 |\n| `idle_notification` | チームメイト→Lead | チームメイトが 1 ターン完了、アイドル状態に |\n| `permission_request` | チームメイト→Lead | 操作承認が必要 |\n| `permission_response` | Lead→チームメイト | Lead の承認結果 |\n| `plan_approval_request` | チームメイト→Lead | 計画提出、審査待ち |\n| `plan_approval_response` | Lead→チームメイト | Lead の計画審査 |\n| `shutdown_request` | Lead→チームメイト | 丁寧なシャットダウン要求 |\n| `shutdown_approved` | チームメイト→Lead | シャットダウン確認 |\n| `shutdown_rejected` | チームメイト→Lead | シャットダウン拒否(理由付き) |\n| `task_assignment` | Lead→チームメイト | タスク割り当て |\n| `team_permission_update` | Lead→チームメイト | 権限変更のブロードキャスト |\n| `mode_set_request` | Lead→チームメイト | チームメイトの権限モード変更 |\n| `sandbox_permission_*` | 双方向 | ネットワーク権限リクエスト/返信 |\n| `teammate_terminated` | システム | チームメイト削除通知 |\n\nテキストメッセージは `` XML タグでラップされモデルに配信。\n\n### 三、権限バブリング:双方向ポーリング\n\n教学版は権限バブリングを省略。真实 CC のフロー(`permissionSync.ts`):\n\n1. **チームメイト**が承認が必要な操作に遭遇 → `permission_request` を Lead の受信箱に送信\n2. **Lead** の `useInboxPoller`(1 秒ごとにポーリング)がリクエストを検出 → `ToolUseConfirmQueue` にルーティング\n3. Lead の UI にチームメイト名と色付きの承認ダイアログを表示\n4. ユーザー承認後 → Lead が `permission_response` をチームメイトの受信箱に返信\n5. **チームメイト**の `useSwarmPermissionPoller`(500ms ごとにポーリング)が返信を受信 → 続行または拒否\n\n### 四、チームメイトライフサイクル\n\nCC のチームメイトは `spawnTeammate()`(`spawnMultiAgent.ts`)で作成:\n\n1. **Spawn**:tmux ペイン(またはプロセス内)を作成、色を割り当て、team config に書き込み\n2. **Work**:`useInboxPoller` が毎秒受信箱をチェック → メッセージ到着時に新しい turn として送信\n3. **Idle**:Stop hook 発火 → `idle_notification` を Lead に送信\n4. **Shutdown**:Lead が `shutdown_request` を送信 → チームメイトが `shutdown_approved` で返信 → Lead がクリーンアップ\n\n### 五、Team Config\n\nチーム登録は `~/.claude/teams/{teamName}/config.json`(`teamHelpers.ts`):\n\n```json\n{\n \"name\": \"my-team\",\n \"leadAgentId\": \"lead@my-team\",\n \"members\": [{\n \"agentId\": \"researcher@my-team\",\n \"name\": \"researcher\",\n \"agentType\": \"general-purpose\",\n \"color\": \"blue\",\n \"isActive\": true\n }]\n}\n```\n\nチームメイトのネストは禁止(`AgentTool.tsx:273` で \"teammates spawning other teammates\" を明示的に禁止)。\n\n
\n\n\n" + }, + { + "version": "s16", + "locale": "en", + "title": "s16: Team Protocols — Teammates Need Agreements", + "content": "# s16: Team Protocols — Teammates Need Agreements\n\ns01 → ... → s14 → s15 → `s16` → [s17](/en/s17) → s18 → s19 → s20\n> *\"Teammates need agreements\"* — request-response pattern drives all negotiation.\n>\n> **Harness Layer**: Protocols — Structured handshakes between agents.\n\n---\n\n## The Problem\n\ns15's teammates can work, but coordination is loose: Lead sends a message, teammate replies, no structured protocol. Two scenarios expose the gap:\n\n**Shutdown**: Lead wants Alice to shut down. Killing the thread outright leaves half-written files on disk. A handshake is needed: Lead sends a request, Alice confirms after wrapping up.\n\n**Plan approval**: Bob wants to refactor the auth module, a high-risk operation. Lead should review Bob's plan first, approve before Bob proceeds.\n\nBoth scenarios share the same structure: one side sends a request, the other replies, both linked by the same ID. A state machine tracks: pending → approved / rejected.\n\n---\n\n## The Solution\n\n![Team Protocols Overview](/course-assets/s16_team_protocols/team-protocols-overview.en.svg)\n\nTeaching code continues the agent capability arc from earlier chapters and adds structured protocols on top of S15's team communication. To stay focused on the protocol mechanism, it omits full error recovery, memory, and skill systems. Added: **ProtocolState** (request state tracking), **dispatch_message** (routes incoming messages by type to handlers), **match_response** (correlates response to request via request_id, with type validation).\n\nTwo protocols, one mechanism:\n\n| Protocol | Direction | Purpose |\n|----------|-----------|---------|\n| shutdown_request / response | Lead → Teammate | Graceful shutdown handshake |\n| plan_approval_request / response | Teammate → Lead | Plan approval protocol example |\n\n> Teaching version demonstrates the request-response message flow for plan approval, but does not implement execution gating (intercepting bash/write_file when not approved). Real CC has a permission gating mechanism for teammates.\n\n---\n\n## How It Works\n\n### ProtocolState: Request State\n\nEach protocol request creates a state record tracking who sent it, to whom, current status, and payload:\n\n```python\n@dataclass\nclass ProtocolState:\n request_id: str # Unique ID, e.g. \"req_004281\"\n type: str # \"shutdown\" | \"plan_approval\"\n sender: str # Sender\n target: str # Recipient\n status: str # pending | approved | rejected\n payload: str # Plan text or shutdown reason\n created_at: float # Timestamp\n\npending_requests: dict[str, ProtocolState] = {}\n```\n\nA record is created when sending a request, found via `request_id` when receiving a response, and its status updated.\n\n### Four-Step Protocol Flow\n\nUsing shutdown as an example, the full chain:\n\n```\n1. Lead sends request\n req_id = new_request_id() # \"req_004281\"\n pending_requests[req_id] = ProtocolState(type=\"shutdown\", status=\"pending\", ...)\n BUS.send(\"lead\", \"alice\", \"shutdown_request\", metadata={\"request_id\": req_id})\n\n2. Teammate receives → dispatch\n inbox = BUS.read_inbox(\"alice\")\n msg_type = msg[\"type\"] # \"shutdown_request\"\n → routed to handle_shutdown_request()\n\n3. Teammate replies\n BUS.send(\"alice\", \"lead\", \"shutdown_response\",\n metadata={\"request_id\": req_id, \"approve\": True})\n\n4. Lead receives response → match\n match_response(\"shutdown_response\", req_id, approve=True)\n pending_requests[req_id].status = \"approved\"\n```\n\n`request_id` is the correlation key across the entire chain: the request carries it out, the response carries it back.\n\n### dispatch_message: Route by Type\n\nA teammate's inbox receives both plain messages and protocol messages. `handle_inbox_message` dispatches by message type:\n\n```python\ndef handle_inbox_message(name, msg, messages):\n msg_type = msg.get(\"type\", \"message\")\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down.\", \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return True # Stop the loop\n\n if msg_type == \"plan_approval_response\":\n approve = msg[\"metadata\"].get(\"approve\", False)\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved]\" if approve else \"[Plan rejected]\"})\n return False # Continue\n```\n\nAdding a new protocol type means adding a new `if` branch.\n\n### match_response: Type Validation\n\n`match_response` doesn't just find state by `request_id`, it also validates that the response type matches the request type:\n\n```python\ndef match_response(response_type, request_id, approve):\n state = pending_requests.get(request_id)\n if not state:\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n return # type mismatch, skip\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n return\n if state.status != \"pending\":\n return # already resolved, skip duplicate\n state.status = \"approved\" if approve else \"rejected\"\n```\n\nA shutdown_response cannot accidentally approve a plan_approval request.\n\n### Unified Inbox Consumer: consume_lead_inbox\n\nBoth the `check_inbox` tool and the main loop call the same `consume_lead_inbox()` function, routing protocol messages before returning remaining content. This prevents messages from being consumed without protocol state updates:\n\n```python\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n```\n\nThe main loop also injects inbox messages into `history` so the LLM can see and react to them.\n\n### Teammate Idle Loop: Wait Instead of Exit\n\ns15's teammates exit after 10 rounds. s16's teammates enter idle waiting after the LLM returns a non-tool_use response: poll inbox, respond to shutdown_request and exit, or continue working on new messages.\n\n```\nLLM returns non-tool_use\n → idle: poll inbox every second\n → receives shutdown_request → reply shutdown_response → exit\n → receives new message → inject into messages → continue LLM turn\n```\n\nTeaching version omits idle_notification to Lead. Real CC sends `idle_notification` when idle, so Lead knows the teammate is free for new tasks.\n\n### Putting It Together\n\n```\n1. Lead: \"Have Alice create a file, then shut her down\"\n2. Lead → spawn_teammate(\"alice\", \"backend\", \"Create config.py\")\n3. alice thread starts → write_file(\"config.py\", \"...\") → done → idle\n4. Lead → request_shutdown(\"alice\")\n → BUS.send(\"shutdown_request\", {request_id: \"req_000142\"})\n5. alice idle poll receives → handle_shutdown_request\n → BUS.send(\"shutdown_response\", {request_id: \"req_000142\", approve: True})\n6. Lead consume_lead_inbox → match_response(\"req_000142\", approve=True)\n → pending_requests[\"req_000142\"].status = \"approved\"\n → inbox message injected into history, LLM sees shutdown result\n```\n\nShutdown handshake complete: request → confirm → shutdown. Every step tracked by `request_id`.\n\n---\n\n## Changes from s15\n\n| Component | Before (s15) | After (s16) |\n|-----------|-------------|-------------|\n| Coordination | Loose text messages | Structured request-response protocol |\n| Request tracking | None | ProtocolState + pending_requests dict |\n| Message routing | All treated as text | dispatch_message routes by type |\n| Shutdown | Natural exit or kill thread | request_id handshake mechanism |\n| Plan approval | None | Message flow example (no execution gating) |\n| New message types | message, result | + shutdown_request/response, plan_approval_request/response |\n| Teammate lifecycle | Max 10 rounds | Idle loop (waits for inbox messages) |\n| Lead inbox | check_inbox and main loop read separately | Unified consume_lead_inbox |\n| Lead tools | 14 (s15) | 14 (core tool set plus request_shutdown, request_plan, review_plan) |\n| Teammate tools | 4 (s15) | + submit_plan (5) |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s16_team_protocols/code.py\n```\n\nTry these prompts:\n\n1. `Spawn alice as a backend dev. Ask her to create a file. Then request her shutdown.`\n2. `Spawn bob with a refactoring task. Have him submit a plan first. Then review and approve it.`\n\nWhat to observe: Is the shutdown handshake complete (request → confirm → shutdown)? Does `pending_requests` state transition correctly? Is `request_id` consistent between request and response? Can the idle teammate receive shutdown_request?\n\n---\n\n## What's Next\n\nIn s15-s16, Lead must assign tasks to each teammate. \"Alice does this, Bob does that.\" With 10 unclaimed tasks on the board, Lead has to manually assign each one.\n\nWhat if teammates could check the board and claim tasks themselves? Lead only needs to create tasks; teammates discover, claim, and complete them on their own.\n\ns17 Autonomous Agents → Self-organizing teammates, no leader assignment needed.\n\n
\nDeep Dive into CC Source\n\nCC's team protocol implementation (`teammateMailbox.ts`, 1184 lines) shares the same core structure as the teaching version: request_id + approve/reject request-response pattern. Differences:\n\n**Shutdown protocol**: CC's shutdown is three-way communication (`teammateMailbox.ts:720-763`, `SendMessageTool.ts:268-430`). Lead sends `shutdown_request`, teammate replies `shutdown_approved` (or `shutdown_rejected` with reason), system sends `teammate_terminated` to notify all parties. After confirmation, system cleans up pane (tmux/iTerm2), unassigns tasks, removes member from team config (`useInboxPoller.ts:677-800`). Teaching version uses `shutdown_response` as a unified name; real source splits into `shutdown_approved` and `shutdown_rejected` as two separate message types.\n\n**Plan approval**: In the real source, plan approval request is generated by `ExitPlanModeV2Tool.ts:263-312` when a plan-mode-required teammate exits plan mode. `useInboxPoller.ts:599-661` currently auto-writes approval and passes the request to Lead as context (regular message). `SendMessageTool.ts:434-518` retains explicit approve/reject response capability — approval can simultaneously set `permissionMode` (e.g. \"approved but run in plan mode\"), response can include `feedback` string for teammate to revise and resubmit. Not a simple \"Lead manually uses review_plan tool\" flow.\n\n**Message format**: CC's protocol messages are structured JSON (with Zod schema validation), teaching version uses simple type + metadata dict. Field names are also inconsistent: permission uses `request_id` (`teammateMailbox.ts:453-462`), shutdown and plan approval use `requestId` (`teammateMailbox.ts:684-763`).\n\n**Execution gating**: CC's teammates have full permission gating. Unapproved high-risk operations are intercepted, not optional. Teaching version only demonstrates the message flow without execution interception.\n\n**Generality**: Teaching version's single FSM (pending → approved | rejected) maps to two protocols. This simplification is correct. CC's protocol messages all share the same request id correlation mechanism.\n\n
\n\n\n" + }, + { + "version": "s16", + "locale": "zh", + "title": "s16: Team Protocols — 队友之间要有约定", + "content": "# s16: Team Protocols — 队友之间要有约定\n\ns01 → ... → s14 → s15 → `s16` → [s17](/zh/s17) → s18 → s19 → s20\n> *\"队友之间要有约定\"* — request-response 模式驱动协商。\n>\n> **Harness 层**: 协议 — Agent 之间的结构化握手。\n\n---\n\n## 问题\n\ns15 的队友能干活了,但协调是松散的:Lead 发消息,队友回复,没有结构化的协议。两个场景暴露了问题:\n\n**关机**:Lead 想让 Alice 关机。直接杀线程,Alice 写了一半的文件留在磁盘上。需要握手:Lead 发请求,Alice 确认收尾后关机。\n\n**计划审批**:Bob 想重构认证模块,属于高风险操作。应该先让 Lead 看 Bob 的计划,审批通过后再动手。\n\n这两个场景结构完全一样:一方发请求,另一方给回复,请求和回复通过同一个 ID 关联。有状态机追踪:pending → approved / rejected。\n\n---\n\n## 解决方案\n\n![Team Protocols Overview](/course-assets/s16_team_protocols/team-protocols-overview.svg)\n\n教学代码承接前面章节的 Agent 能力脉络,在 S15 团队通信基础上加入结构化协议。为了聚焦协议机制,省略了完整错误恢复、记忆和技能系统。新增三样:**ProtocolState**(请求状态追踪)、**dispatch_message**(按消息类型路由到处理器)、**match_response**(通过 request_id 关联回复与请求,含类型校验)。\n\n两种协议,一套机制:\n\n| 协议 | 方向 | 用途 |\n|------|------|------|\n| shutdown_request / response | Lead → 队友 | 体面关机握手 |\n| plan_approval_request / response | 队友 → Lead | 计划审批协议示例 |\n\n> 教学版演示了计划审批的请求-响应消息流程,没有实现执行门控(未 approved 时拦截 bash/write_file)。真实 CC 的队友有 permission gating 机制。\n\n---\n\n## 工作原理\n\n### ProtocolState: 请求状态\n\n每个协议请求创建一条状态记录,记录谁发的、发给谁、当前状态、附带内容:\n\n```python\n@dataclass\nclass ProtocolState:\n request_id: str # 唯一 ID,如 \"req_004281\"\n type: str # \"shutdown\" | \"plan_approval\"\n sender: str # 发起方\n target: str # 接收方\n status: str # pending | approved | rejected\n payload: str # 计划文本或关机原因\n created_at: float # 时间戳\n\npending_requests: dict[str, ProtocolState] = {}\n```\n\n发请求时创建记录,收回复时通过 `request_id` 找到对应记录,更新状态。\n\n### 四步协议流程\n\n以关机为例,完整链路:\n\n```\n① Lead 发请求\n req_id = new_request_id() # \"req_004281\"\n pending_requests[req_id] = ProtocolState(type=\"shutdown\", status=\"pending\", ...)\n BUS.send(\"lead\", \"alice\", \"shutdown_request\", metadata={\"request_id\": req_id})\n\n② 队友收到 → dispatch\n inbox = BUS.read_inbox(\"alice\")\n msg_type = msg[\"type\"] # \"shutdown_request\"\n → 路由到 handle_shutdown_request()\n\n③ 队友回复\n BUS.send(\"alice\", \"lead\", \"shutdown_response\",\n metadata={\"request_id\": req_id, \"approve\": True})\n\n④ Lead 收响应 → match\n match_response(\"shutdown_response\", req_id, approve=True)\n pending_requests[req_id].status = \"approved\"\n```\n\n`request_id` 是贯穿全链路的关联键,请求带着它出去,回复带着它回来。\n\n> 教学版用 `shutdown_response` 统一命名(approve 字段区分同意/拒绝)。真实源码拆成 `shutdown_approved` 和 `shutdown_rejected` 两种独立消息类型(`teammateMailbox.ts:720-763`)。\n\n### dispatch_message: 按类型路由\n\n队友的 inbox 不只收普通消息,还收协议消息。`handle_inbox_message` 按消息类型分发:\n\n```python\ndef handle_inbox_message(name, msg, messages):\n msg_type = msg.get(\"type\", \"message\")\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down.\", \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return True # 停止循环\n\n if msg_type == \"plan_approval_response\":\n approve = msg[\"metadata\"].get(\"approve\", False)\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved]\" if approve else \"[Plan rejected]\"})\n return False # 继续循环\n```\n\n新增协议类型只需加新的 `if` 分支。\n\n### match_response: 类型校验\n\n`match_response` 不只按 `request_id` 找状态,还会校验响应类型是否匹配请求类型:\n\n```python\ndef match_response(response_type, request_id, approve):\n state = pending_requests.get(request_id)\n if not state:\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n return # type mismatch, skip\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n return\n if state.status != \"pending\":\n return # already resolved, skip duplicate\n state.status = \"approved\" if approve else \"rejected\"\n```\n\n一个 shutdown_response 不会意外 approve 一个 plan_approval 请求。\n\n### 统一 inbox 消费:consume_lead_inbox\n\n`check_inbox` 工具和主循环末尾都调用同一个 `consume_lead_inbox()` 函数,先路由协议消息再返回剩余内容,避免消息被读走但协议状态没更新:\n\n```python\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n```\n\n主循环末尾还会把 inbox 消息注入到 `history`,让 LLM 能看到并做出反应。\n\n### 队友 idle loop:等待而不是退出\n\ns15 的队友跑完 10 轮就退出。s16 的队友在 LLM 返回非 tool_use 后进入 idle 等待:轮询 inbox,收到 shutdown_request 就响应退出,收到新消息就继续工作。\n\n```\nLLM 返回非 tool_use\n → idle: 每秒轮询 inbox\n → 收到 shutdown_request → 回复 shutdown_response → 退出\n → 收到新消息 → 注入 messages → 继续 LLM turn\n```\n\n教学版省略了 idle_notification 给 Lead 的通知。真实 CC 在 idle 时发 `idle_notification`,Lead 收到后知道队友空闲,可以分配新任务。\n\n### 合起来跑\n\n```\n1. Lead: \"让 Alice 创建一个文件,然后关机\"\n2. Lead → spawn_teammate(\"alice\", \"backend\", \"创建 config.py\")\n3. alice 线程启动 → write_file(\"config.py\", \"...\") → 完成 → idle\n4. Lead → request_shutdown(\"alice\")\n → BUS.send(\"shutdown_request\", {request_id: \"req_000142\"})\n5. alice idle 轮询收到 → handle_shutdown_request\n → BUS.send(\"shutdown_response\", {request_id: \"req_000142\", approve: True})\n6. Lead consume_lead_inbox → match_response(\"req_000142\", approve=True)\n → pending_requests[\"req_000142\"].status = \"approved\"\n → inbox 消息注入 history,LLM 看到关机结果\n```\n\n关机握手完整:请求 → 确认 → 关机。每一步有 `request_id` 追溯。\n\n---\n\n## 相对 s15 的变更\n\n| 组件 | 之前 (s15) | 之后 (s16) |\n|------|-----------|-----------|\n| 协调方式 | 松散文本消息 | 结构化请求-响应协议 |\n| 请求追踪 | 无 | ProtocolState + pending_requests dict |\n| 消息路由 | 全部当文本处理 | dispatch_message 按类型分发 |\n| 关机 | 自然退出或杀线程 | request_id 握手机制 |\n| 计划审批 | 无 | 消息流程示例(未实现执行门控) |\n| 新消息类型 | message, result | + shutdown_request/response, plan_approval_request/response |\n| 队友生命周期 | 最多 10 轮 | idle loop(等待 inbox 消息) |\n| Lead inbox | check_inbox 和主循环分别读 | 统一 consume_lead_inbox |\n| Lead 工具 | 14 (s15) | 14(核心工具集加入 request_shutdown, request_plan, review_plan) |\n| 队友工具 | 4 (s15) | + submit_plan (5) |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s16_team_protocols/code.py\n```\n\n试试这些 prompt:\n\n1. `Spawn alice as a backend dev. Ask her to create a file. Then request her shutdown.`\n2. `Spawn bob with a refactoring task. Have him submit a plan first. Then review and approve it.`\n\n观察重点:关机握手是否完整(请求 → 确认 → 关机)?`pending_requests` 的状态是否正确转换?`request_id` 是否在请求和响应之间保持一致?队友 idle 后是否能收到 shutdown_request?\n\n---\n\n## 接下来\n\ns15-s16 中,Lead 必须给每个队友分配任务。\"Alice 做这个,Bob 做那个\"。任务看板上有 10 个未认领的任务,Lead 得手动 assign。\n\n能不能让队友自己看板、自己认领?Lead 只需要创建任务,队友自己发现、自己认领、自己完成。\n\ns17 Autonomous Agents → 队友自组织,不需要领导分配。\n\n
\n深入 CC 源码\n\nCC 的团队协议实现(`teammateMailbox.ts`,1184 行)和教学版在核心结构上一致:request_id + approve/reject 的请求-响应模式。差异在于:\n\n**关机协议**:CC 的 shutdown 是三向通信(`teammateMailbox.ts:720-763`、`SendMessageTool.ts:268-430`)。Lead 发 `shutdown_request`,队友回复 `shutdown_approved`(或 `shutdown_rejected` 附原因),系统发送 `teammate_terminated` 通知所有相关方。关机确认后系统自动清理 pane(tmux/iTerm2)、unassign 任务、从 team config 移除成员(`useInboxPoller.ts:677-800`)。教学版用 `shutdown_response` 统一命名,真实源码拆成 approved/rejected 两种独立消息。\n\n**计划审批**:真实源码里 plan approval request 由 `ExitPlanModeV2Tool.ts:263-312` 在 plan-mode-required 队友退出 plan mode 时产生。`useInboxPoller.ts:599-661` 当前会自动回写 approval,并把请求交给 Lead 作为上下文(regular message)。`SendMessageTool.ts:434-518` 仍保留显式 approve/reject response 能力,审批时可同时设置 `permissionMode`(如\"批准但以 plan mode 运行\"),响应中可包含 `feedback` 字符串供队友修正后重新提交。不是简单的\"Lead 手动 review_plan 工具\"流程。\n\n**消息格式**:CC 的协议消息是结构化的 JSON(有 Zod schema 验证),教学版用简单的 type + metadata 字典。字段名也不统一:permission 用 `request_id`(`teammateMailbox.ts:453-462`),shutdown 和 plan approval 用 `requestId`(`teammateMailbox.ts:684-763`)。\n\n**执行门控**:CC 的队友有完整的 permission gating。未获批准的高风险操作会被拦截,不是可选的。教学版只演示了消息流程,没有实现执行拦截。\n\n**通用性**:教学版的一个 FSM(pending → approved | rejected)对应两种协议,这个简化完全正确。CC 的所有协议消息共用同一个 request id 关联机制。\n\n
\n\n\n" + }, + { + "version": "s16", + "locale": "ja", + "title": "s16: Team Protocols — チームメイト間には取り決めが必要", + "content": "# s16: Team Protocols — チームメイト間には取り決めが必要\n\ns01 → ... → s14 → s15 → `s16` → [s17](/ja/s17) → s18 → s19 → s20\n> *\"チームメイト間には取り決めが必要\"* — request-response パターンが全てのネゴシエーションを駆動。\n>\n> **Harness 層**: プロトコル — Agent 間の構造化ハンドシェイク。\n\n---\n\n## 課題\n\ns15 のチームメイトは仕事ができるが、連携は緩い:Lead がメッセージを送り、チームメイトが返信するだけで、構造化されたプロトコルがない。2 つのシナリオで問題が露呈する:\n\n**シャットダウン**:Lead が Alice にシャットダウンを頼む。スレッドを強制終了すると、書きかけのファイルがディスクに残る。ハンドシェイクが必要:Lead がリクエストを送信、Alice が收尾後に確認。\n\n**計画承認**:Bob が認証モジュールのリファクタリングを提案、高リスク操作。Lead が Bob の計画を確認し、承認後に実行すべき。\n\nこれら 2 つのシナリオは同じ構造:一方がリクエストを送信、もう一方が返信、両者は同じ ID で関連付けられる。状態機械が追跡:pending → approved / rejected。\n\n---\n\n## ソリューション\n\n![Team Protocols Overview](/course-assets/s16_team_protocols/team-protocols-overview.ja.svg)\n\n教学版は前章までの Agent 能力の流れを受け継ぎ、S15 のチーム通信の上に構造化プロトコルを追加する。プロトコル機構に集中するため、完全なエラーリカバリ、メモリ、スキルシステムは省略。追加:**ProtocolState**(リクエスト状態追跡)、**dispatch_message**(メッセージタイプ別ルーティング)、**match_response**(request_id でリクエストとレスポンスを関連付け、型検証付き)。\n\n2 つのプロトコル、1 つの仕組み:\n\n| プロトコル | 方向 | 用途 |\n|-----------|------|------|\n| shutdown_request / response | Lead → チームメイト | 丁寧なシャットダウンハンドシェイク |\n| plan_approval_request / response | チームメイト → Lead | 計画承認プロトコルの例 |\n\n> 教学版は計画承認の request-response メッセージフローをデモするが、実行ゲーティング(未承認時の bash/write_file 拦截)は未実装。真实 CC にはチームメイト向けの permission gating 機構がある。\n\n---\n\n## 仕組み\n\n### ProtocolState: リクエスト状態\n\n各プロトコルリクエストは、送信者、受信者、現在の状態、ペイロードを記録する状態レコードを作成:\n\n```python\n@dataclass\nclass ProtocolState:\n request_id: str # 一意 ID、例 \"req_004281\"\n type: str # \"shutdown\" | \"plan_approval\"\n sender: str # 送信者\n target: str # 受信者\n status: str # pending | approved | rejected\n payload: str # 計画テキストまたはシャットダウン理由\n created_at: float # タイムスタンプ\n\npending_requests: dict[str, ProtocolState] = {}\n```\n\nリクエスト送信時にレコードを作成、レスポンス受信時に `request_id` で該当レコードを見つけて状態を更新。\n\n### 4 ステッププロトコルフロー\n\nシャットダウンを例にした完全な流れ:\n\n```\n1. Lead がリクエスト送信\n req_id = new_request_id() # \"req_004281\"\n pending_requests[req_id] = ProtocolState(type=\"shutdown\", status=\"pending\", ...)\n BUS.send(\"lead\", \"alice\", \"shutdown_request\", metadata={\"request_id\": req_id})\n\n2. チームメイト受信 → dispatch\n inbox = BUS.read_inbox(\"alice\")\n msg_type = msg[\"type\"] # \"shutdown_request\"\n → handle_shutdown_request() にルーティング\n\n3. チームメイト返信\n BUS.send(\"alice\", \"lead\", \"shutdown_response\",\n metadata={\"request_id\": req_id, \"approve\": True})\n\n4. Lead がレスポンス受信 → match\n match_response(\"shutdown_response\", req_id, approve=True)\n pending_requests[req_id].status = \"approved\"\n```\n\n`request_id` はチェーン全体を貫く関連キー、リクエストが持ち出し、レスポンスが持ち帰る。\n\n### dispatch_message: タイプ別ルーティング\n\nチームメイトの inbox は通常メッセージとプロトコルメッセージの両方を受信。`handle_inbox_message` がメッセージタイプで振り分け:\n\n```python\ndef handle_inbox_message(name, msg, messages):\n msg_type = msg.get(\"type\", \"message\")\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down.\", \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return True # ループ停止\n\n if msg_type == \"plan_approval_response\":\n approve = msg[\"metadata\"].get(\"approve\", False)\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved]\" if approve else \"[Plan rejected]\"})\n return False # 継続\n```\n\n新しいプロトコルタイプの追加は新しい `if` 分岐を追加するだけ。\n\n### match_response: 型検証\n\n`match_response` は `request_id` で状態を見つけるだけでなく、レスポンスタイプがリクエストタイプと一致するか検証:\n\n```python\ndef match_response(response_type, request_id, approve):\n state = pending_requests.get(request_id)\n if not state:\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n return # タイプ不一致、スキップ\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n return\n if state.status != \"pending\":\n return # 既に解決済み、重複をスキップ\n state.status = \"approved\" if approve else \"rejected\"\n```\n\nshutdown_response が誤って plan_approval リクエストを承認することはない。\n\n### 統一 inbox コンシューマ:consume_lead_inbox\n\n`check_inbox` ツールとメインループ末尾の両方が同じ `consume_lead_inbox()` 関数を呼び出す。プロトコルメッセージを先にルーティングしてから残りの内容を返す。メッセージが消費されてもプロトコル状態が更新されない問題を防ぐ:\n\n```python\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n```\n\nメインループは inbox メッセージを `history` に注入し、LLM が確認して反応できるようにする。\n\n### チームメイト idle loop:終了ではなく待機\n\ns15 のチームメイトは 10 ラウンドで終了。s16 のチームメイトは LLM が非 tool_use を返した後 idle 待機に入る:inbox をポーリング、shutdown_request に応答して終了、または新メッセージで作業継続。\n\n```\nLLM が非 tool_use を返す\n → idle: 毎秒 inbox をポーリング\n → shutdown_request 受信 → shutdown_response 返信 → 終了\n → 新メッセージ受信 → messages に注入 → LLM ターン継続\n```\n\n教学版は Lead への idle_notification を省略。真实 CC は idle 時に `idle_notification` を送信、Lead はチームメイトが空いていることを知り、新しいタスクを割り当て可能。\n\n### 組み合わせて実行\n\n```\n1. Lead: \"Alice にファイルを作成させ、その後シャットダウン\"\n2. Lead → spawn_teammate(\"alice\", \"backend\", \"config.py を作成\")\n3. alice スレッド起動 → write_file(\"config.py\", \"...\") → 完了 → idle\n4. Lead → request_shutdown(\"alice\")\n → BUS.send(\"shutdown_request\", {request_id: \"req_000142\"})\n5. alice idle ポーリング受信 → handle_shutdown_request\n → BUS.send(\"shutdown_response\", {request_id: \"req_000142\", approve: True})\n6. Lead consume_lead_inbox → match_response(\"req_000142\", approve=True)\n → pending_requests[\"req_000142\"].status = \"approved\"\n → inbox メッセージが history に注入、LLM がシャットダウン結果を確認\n```\n\nシャットダウンハンドシェイク完了:リクエスト → 確認 → シャットダウン。各ステップは `request_id` で追跡。\n\n---\n\n## s15 からの変更\n\n| コンポーネント | 変更前 (s15) | 変更後 (s16) |\n|--------------|------------|------------|\n| 連携方法 | 緩いテキストメッセージ | 構造化 request-response プロトコル |\n| リクエスト追跡 | なし | ProtocolState + pending_requests dict |\n| メッセージルーティング | 全てテキストとして処理 | dispatch_message がタイプ別にルーティング |\n| シャットダウン | 自然終了またはスレッド強制終了 | request_id ハンドシェイク機構 |\n| 計画承認 | なし | メッセージフローの例(実行ゲーティングなし) |\n| 新規メッセージ型 | message, result | + shutdown_request/response, plan_approval_request/response |\n| チームメイトライフサイクル | 最大 10 ラウンド | idle loop(inbox メッセージを待機) |\n| Lead inbox | check_inbox とメインループが別々に読み取り | 統一 consume_lead_inbox |\n| Lead ツール | 14 (s15) | 14(コアツールセットに request_shutdown、request_plan、review_plan を追加) |\n| チームメイトツール | 4 (s15) | + submit_plan (5) |\n\n---\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython s16_team_protocols/code.py\n```\n\n以下のプロンプトを試してください:\n\n1. `Spawn alice as a backend dev. Ask her to create a file. Then request her shutdown.`\n2. `Spawn bob with a refactoring task. Have him submit a plan first. Then review and approve it.`\n\n観察ポイント:シャットダウンハンドシェイクは完了しているか(リクエスト → 確認 → シャットダウン)?`pending_requests` の状態は正しく遷移しているか?`request_id` はリクエストとレスポンス間で一貫しているか?idle チームメイトは shutdown_request を受信できるか?\n\n---\n\n## 次の章\n\ns15-s16 では、Lead が各チームメイトにタスクを割り当てる必要がある。\"Alice はこれ、Bob はあれ\"。ボードに 10 個の未認領タスクがあれば、Lead が手動で assign しなければならない。\n\nチームメイトが自分でボードを見て認領できたらどうか?Lead はタスクを作成するだけで、チームメイトが自分で発見、認領、完了する。\n\ns17 Autonomous Agents → チームメイトの自己組織化、リーダーの割り当て不要。\n\n
\nCC ソースコード深掘り\n\nCC のチームプロトコル実装(`teammateMailbox.ts`、1184 行)は教学版と同じコア構造:request_id + approve/reject の request-response パターン。違いは以下の通り:\n\n**シャットダウンプロトコル**:CC のシャットダウンは三方向通信(`teammateMailbox.ts:720-763`、`SendMessageTool.ts:268-430`)。Lead が `shutdown_request` を送信、チームメイトが `shutdown_approved`(または理由付き `shutdown_rejected`)で返信、システムが `teammate_terminated` で全関係者に通知。確認後、システムが自動的に pane(tmux/iTerm2)をクリーンアップ、タスクを unassign、team config からメンバーを削除(`useInboxPoller.ts:677-800`)。教学版は `shutdown_response` で統一命名、真实源码は `shutdown_approved` と `shutdown_rejected` の 2 つの独立したメッセージ型に分割。\n\n**計画承認**:真实源码では plan approval request は `ExitPlanModeV2Tool.ts:263-312` で plan-mode-required チームメイトが plan mode を終了する際に生成される。`useInboxPoller.ts:599-661` は現在自動的に approval を書き戻し、リクエストを Lead にコンテキスト(regular message)として渡す。`SendMessageTool.ts:434-518` は明示的な approve/reject response 能力を保持、承認時に同時に `permissionMode` を設定可能(例:\"承認するが plan mode で実行\")、レスポンスにはチームメイトが修正して再提出するための `feedback` 文字列を含めることができる。単純な「Lead が手動で review_plan ツールを使う」フローではない。\n\n**メッセージ形式**:CC のプロトコルメッセージは構造化 JSON(Zod schema 検証付き)、教学版はシンプルな type + metadata dict。フィールド名も統一されていない:permission は `request_id`(`teammateMailbox.ts:453-462`)、shutdown と plan approval は `requestId`(`teammateMailbox.ts:684-763`)。\n\n**実行ゲーティング**:CC のチームメイトには完全な permission gating がある。未承認の高リスク操作は拦截され、オプションではない。教学版はメッセージフローのみをデモ。\n\n**汎用性**:教学版の 1 つの FSM(pending → approved | rejected)が 2 つのプロトコルに対応する簡略化は正しい。CC の全プロトコルメッセージは同じ request id 関連機構を共有。\n\n
\n\n\n" + }, + { + "version": "s17", + "locale": "en", + "title": "s17: Autonomous Agents — Check the Board, Claim the Task", + "content": "# s17: Autonomous Agents — Check the Board, Claim the Task\n\ns01 → ... → s15 → s16 → `s17` → [s18](/en/s18) → s19 → s20\n\n> *\"Check the board, claim the task\"* — poll when idle, work when found.\n>\n> **Harness Layer**: Autonomy — Self-organizing teammates, no leader assignment needed.\n\n---\n\n## The Problem\n\ns16's teammates can communicate and handshake shutdown. But each teammate waits for Lead to assign tasks — with 10 unclaimed tasks on the board, Lead has to manually assign 10 times. This doesn't scale. Teammates should check the task board themselves, claim unowned tasks, and look for the next one when done.\n\n---\n\n## The Solution\n\n![Autonomous Agents Overview](/course-assets/s17_autonomous_agents/autonomous-agents-overview.en.svg)\n\nCarries forward S16's teaching-version MessageBus and protocol tools. This chapter adds: **idle_poll** (poll every 5 seconds when idle), **scan_unclaimed_tasks** (scan the board for claimable tasks), **auto-claim** (claim on sight, no Lead needed).\n\nTeammate lifecycle expands from two phases to three:\n\n| Phase | Behavior | Exit condition |\n|-------|----------|----------------|\n| WORK | inbox → LLM → tool loop | `stop_reason != tool_use` |\n| IDLE | 5s poll inbox + task board | 60s timeout |\n| SHUTDOWN | Send summary, exit | — |\n\n---\n\n## How It Works\n\n### idle_poll: Idle Polling\n\nAfter completing a task, the teammate doesn't exit. It enters the IDLE phase — checking every 5 seconds for new work:\n\n```python\nIDLE_POLL_INTERVAL = 5 # seconds\nIDLE_TIMEOUT = 60 # seconds\n\ndef idle_poll(agent_name, messages, name, role) -> str:\n \"\"\"Return 'work', 'shutdown', or 'timeout'.\"\"\"\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n\n # ① Check inbox (priority)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n # shutdown_request handled immediately\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n # ... reply shutdown_response\n return \"shutdown\"\n # Regular messages: inject into context, return to WORK\n messages.append(...)\n return \"work\"\n\n # ② Scan task board\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n result = claim_task(task[\"id\"], agent_name)\n if \"Claimed\" in result:\n messages.append(...)\n return \"work\"\n return \"timeout\"\n```\n\nInbox takes priority (may contain protocol messages like shutdown_request), task board second. A shutdown_request received during IDLE is dispatched immediately — no need to wait for the next WORK phase.\n\n### scan_unclaimed_tasks: Scan the Task Board\n\nFind tasks that are pending, unowned, with all dependencies completed (`can_start`):\n\n```python\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n```\n\nThree conditions: must be pending, no owner, all blockedBy dependencies completed. `can_start` checks dependency task status — having dependencies doesn't mean the task can't start, only unresolved dependencies block it. Teaching version picks the first by filename; CC uses file locks to prevent multiple teammates from claiming the same task.\n\n### claim_task: Owner Check\n\nAuto-claim checks the claim result, not treating failure as success:\n\n```python\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n return f\"Claimed {task.id} ({task.subject})\"\n```\n\nTeaching version has no file locks, so concurrent claims may still race. But the `task.owner` check avoids the most obvious \"last writer wins\" problem. CC uses `proper-lockfile` to protect task files, with `claimTask` doing read-modify-write inside a file lock (`utils/tasks.ts:541-612`).\n\n### Teammate Lifecycle: WORK → IDLE → SHUTDOWN\n\ns16's teammates exit after finishing. s17 adds the IDLE phase — teammates cycle through WORK → IDLE in an outer loop:\n\n```python\n# Outer loop: WORK → IDLE cycle\nwhile True:\n # WORK phase: inner loop (max 10 LLM rounds)\n for _ in range(10):\n # Check inbox, dispatch protocol, call LLM, execute tools\n ...\n if response.stop_reason != \"tool_use\":\n break # WORK phase ends\n\n # IDLE phase\n idle_result = idle_poll(name, messages, name, role)\n if idle_result == \"shutdown\":\n break\n if idle_result == \"timeout\":\n break # 60s timeout → SHUTDOWN\n\n# SHUTDOWN: send summary to Lead\nBUS.send(name, \"lead\", summary, \"result\")\n```\n\nKey design:\n- **Outer while True**: WORK and IDLE alternate until timeout or shutdown request\n- **Inner for 10**: WORK phase caps at 10 LLM rounds (prevents infinite loops)\n- **IDLE timeout 60s**: 12 polls × 5s = 60s. Timeout sends summary and exits\n- **shutdown_request works in both phases**: WORK phase dispatches via `handle_inbox_message`; IDLE phase's `idle_poll` checks and replies directly\n\n### Identity Re-injection\n\nAfter autoCompact (s08), a teammate's messages list may be compressed into a summary. On each new WORK phase entry, check:\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n```\n\nShort messages suggest compression happened — re-inject identity. In real CC, context compaction preserves the system prompt; the teaching version's simplified implementation needs manual handling.\n\n### consume_lead_inbox: Unified Inbox Consumer\n\nBoth the `check_inbox` tool and the main loop call the same `consume_lead_inbox()` function: route protocol responses to update state first, then inject all messages into Lead's conversation history. Teammates' summaries and results don't just print to terminal — Lead's LLM can see them and coordinate next steps.\n\n### Putting It Together\n\n```\n1. Lead: \"Build the backend — too many tasks, let teammates self-claim\"\n2. Lead → create_task(\"Create database schema\")\n3. Lead → create_task(\"Write API routes\")\n4. Lead → create_task(\"Write unit tests\")\n5. Lead → spawn_teammate(\"alice\", \"backend\", \"You are a backend developer\")\n6. Lead → spawn_teammate(\"bob\", \"backend\", \"You are a backend developer\")\n\n7. alice thread starts → WORK: no initial inbox → spins → IDLE\n8. bob thread starts → WORK: no initial inbox → spins → IDLE\n\n9. alice IDLE poll 1 → scan_unclaimed → finds \"Create database schema\"\n10. alice → claim_task → \"Create database schema\" → back to WORK\n11. bob IDLE poll 1 → scan_unclaimed → finds \"Write API routes\"\n12. bob → claim_task → \"Write API routes\" → back to WORK\n\n13. alice WORK: write_file(\"schema.sql\", ...) → complete_task → WORK ends\n14. alice IDLE → scan → \"Write unit tests\" → claim → WORK\n15. alice WORK: write_file(\"test_api.py\", ...) → complete_task → WORK ends\n16. alice IDLE → 60s no new tasks → SHUTDOWN\n\n17. bob similar flow → done → SHUTDOWN\n18. Lead consume_lead_inbox → sees alice and bob's summaries\n```\n\nTwo teammates claim and work in parallel. Lead only creates tasks and spawns teammates — no manual assignment needed.\n\n---\n\n## Changes from s16\n\n| Component | Before (s16) | After (s17) |\n|-----------|-------------|-------------|\n| Task assignment | Lead manually assigns | Teammates auto-claim (can_start checks deps) |\n| Teammate state | WORK or exit | WORK → IDLE (60s poll) → SHUTDOWN |\n| claim_task | No owner check | Rejects tasks that already have an owner |\n| IDLE phase shutdown | Doesn't handle shutdown_request | Dispatches shutdown immediately and exits |\n| Lead inbox | Prints only, not in context | consume_lead_inbox injects into history |\n| New functions | — | idle_poll, scan_unclaimed_tasks, consume_lead_inbox |\n| Identity persistence | System prompt only | Auto re-inject after compression |\n| Lead tools | 14 (s16) | 14 (unchanged) |\n| Teammate tools | 5 | 8 (+ list_tasks, claim_task, complete_task) |\n| Teammate exit | Exit after task done | Exit only after 60s idle timeout |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s17_autonomous_agents/code.py\n```\n\nTry this prompt:\n\n`Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim and work.`\n\nWhat to observe: Do teammates auto-claim unassigned tasks? Are tasks with blockedBy dependencies claimed only after their dependencies complete? Does idle timeout trigger shutdown? Does a shutdown_request in IDLE phase get an immediate response? How do task states change in `.tasks/`?\n\n---\n\n## What's Next\n\nTeammates self-organize now. But Alice and Bob both work in the same directory — Alice edits `config.py`, Bob also edits `config.py`, overwriting each other.\n\ns18 Worktree Isolation → Each task gets its own working directory, no conflicts.\n\n
\nDeep Dive into CC Source\n\n> Teaching note: This chapter's idle_poll + auto-claim mechanism is a teaching design, using a unified polling function to demonstrate \"find work when idle.\" CC's actual implementation combines multiple mechanisms, but shares the same goal — reducing Lead's manual assignment burden.\n\n### 1. CC's Idle Mechanism: Combined Approach, Not Single Polling\n\nTeaching version uses a single `idle_poll()` to handle both inbox checking and task claiming during idle. CC's actual implementation combines four mechanisms:\n\n**idle_notification**: After completing a round of work, `sendIdleNotification()` (`inProcessRunner.ts:569-589`) sends an idle notification to Lead. Lead knows the teammate is available and can assign new tasks or request shutdown.\n\n**mailbox polling**: `waitForNextPromptOrShutdown()` (`inProcessRunner.ts:689-868`) is a **500ms polling loop** that continuously checks three sources: pending user messages, mailbox file messages, and task list. Shutdown requests are prioritized (`inProcessRunner.ts:768-804`), preventing starvation by regular messages.\n\n**task watcher**: `useTaskListWatcher` (`hooks/useTaskListWatcher.ts:34-189`) uses `fs.watch()` to monitor the `.claude/tasks/` directory with 1-second debounce, triggering checks when new tasks are created or dependencies unblock. The dependency check (`L197-207`) verifies \"no incomplete tasks in blockedBy\", not \"blockedBy is empty\".\n\n**active claiming**: The polling loop also calls `tryClaimNextTask()` (`inProcessRunner.ts:853-860`) — actively claiming tasks from the task list while waiting. So \"teammates don't actively poll for tasks\" is inaccurate; CC has both passive notification and active claiming.\n\n### 2. Task Claiming: File Locks + Atomic Operations\n\n`claimTask()` (`utils/tasks.ts:541-612`) uses `proper-lockfile` task-level locks, performing read-check-modify-write within the lock. Checks: owner already exists (`L575-576`), already completed (`L580-581`), unresolved blockers in blockedBy (`L585-594`). `claimTaskWithBusyCheck()` (`utils/tasks.ts:614-692`) uses task-list level locks, making busy check and claim atomic to avoid TOCTOU.\n\n`findAvailableTask()` (`inProcessRunner.ts:595-604`) checks \"all blockedBy completed\" using `task.blockedBy.every(id => !unresolvedTaskIds.has(id))`. `tryClaimNextTask()` (`inProcessRunner.ts:624-657`) updates status to `in_progress` after claiming, so the UI immediately reflects the change.\n\n### 3. Teaching Version vs CC Comparison\n\n| Dimension | Teaching (s17) | CC |\n|-----------|----------------|-----|\n| Idle mechanism | idle_poll unified polling (5s) | idle_notification + 500ms mailbox polling + task watcher |\n| Task discovery | scan_unclaimed_tasks (polling) | useTaskListWatcher (file watching) + tryClaimNextTask (active polling) |\n| Dependency check | can_start (all blockedBy completed) | findAvailableTask (same semantics) |\n| Concurrency safety | Owner check (no file lock) | proper-lockfile task lock + task-list lock |\n| Shutdown handling | IDLE dispatches directly, WORK via handle_inbox_message | 500ms polling loop prioritizes shutdown_request |\n| Timeout exit | 60s with no new tasks | No fixed timeout, Lead manual shutdown |\n| Identity persistence | Messages length detection | Context compaction preserves system prompt |\n| Claim failure handling | Check return value, skip on failure | File locks guarantee atomicity |\n\nTeaching version's `idle_poll()` merges CC's four mechanisms into one polling function — a reasonable simplification since the core semantics (find work when idle, claim after deps resolve, prioritize shutdown) are consistent.\n\n
\n\n\n" + }, + { + "version": "s17", + "locale": "zh", + "title": "s17: Autonomous Agents — 自己看板,自己认领", + "content": "# s17: Autonomous Agents — 自己看板,自己认领\n\ns01 → ... → s15 → s16 → `s17` → [s18](/zh/s18) → s19 → s20\n\n> *\"自己看板,自己认领\"* — 空闲时轮询,有活就干。\n>\n> **Harness 层**: 自治 — 队友自组织,不依赖 Lead 分配。\n\n---\n\n## 问题\n\ns16 的队友能通信、能握手关机。但每个队友等 Lead 分配任务——如果任务看板上有 10 个未认领任务,Lead 得手动 assign 10 次。这不能扩展。队友应该自己看任务看板,发现没人做的任务就认领,做完再找下一个。\n\n---\n\n## 解决方案\n\n![Autonomous Agents Overview](/course-assets/s17_autonomous_agents/autonomous-agents-overview.svg)\n\n沿用 S16 的教学版 MessageBus 和协议工具。本章新增:**idle_poll**(空闲时每 5 秒轮询一次)、**scan_unclaimed_tasks**(扫描看板上可认领的任务)、**自动认领**(找到任务就 claim,不用 Lead 操心)。\n\n队友生命周期从两阶段变成三阶段:\n\n| 阶段 | 行为 | 退出条件 |\n|------|------|---------|\n| WORK | inbox → LLM → 工具循环 | `stop_reason != tool_use` |\n| IDLE | 每 5s 轮询 inbox + 任务板 | 60s 超时 |\n| SHUTDOWN | 发 summary,退出 | — |\n\n---\n\n## 工作原理\n\n### idle_poll: 空闲轮询\n\n队友完成当前任务后不退出,进入 IDLE 阶段——每 5 秒检查一次有没有新工作:\n\n```python\nIDLE_POLL_INTERVAL = 5 # seconds\nIDLE_TIMEOUT = 60 # seconds\n\ndef idle_poll(agent_name, messages, name, role) -> str:\n \"\"\"Return 'work', 'shutdown', or 'timeout'.\"\"\"\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n\n # ① 检查收件箱(优先)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n # shutdown_request 立即处理\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n # ... 回复 shutdown_response\n return \"shutdown\"\n # 普通消息注入上下文,回到 WORK\n messages.append(...)\n return \"work\"\n\n # ② 扫描任务看板\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n result = claim_task(task[\"id\"], agent_name)\n if \"Claimed\" in result:\n messages.append(...)\n return \"work\"\n return \"timeout\"\n```\n\ninbox 优先(可能包含 shutdown_request 等协议消息),任务板其次。IDLE 阶段收到 shutdown_request 会直接回复并退出,不等到下一轮 WORK。\n\n### scan_unclaimed_tasks: 扫描任务看板\n\n找 pending 状态、无 owner、所有依赖已完成(`can_start`)的任务:\n\n```python\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n```\n\n三个条件:必须是 pending、没有 owner、所有 blockedBy 依赖已完成。`can_start` 检查依赖任务的状态——有依赖不代表不能做,只有被未完成的任务阻塞才不能做。教学版按文件名排序取第一个;CC 用文件锁防止多个队友同时认领同一个任务。\n\n### claim_task: owner 检查\n\n自动认领时检查 claim 结果,不把失败当成功:\n\n```python\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n return f\"Claimed {task.id} ({task.subject})\"\n```\n\n教学版没有文件锁,并发认领可能出现竞争。但至少 `task.owner` 检查避免了最明显的\"后写覆盖\"问题。CC 用 `proper-lockfile` 保护任务文件,`claimTask` 在文件锁内完成读-改-写(`utils/tasks.ts:541-612`)。\n\n### 队友生命周期: WORK → IDLE → SHUTDOWN\n\ns16 的队友做完任务就退出。s17 加了 IDLE 阶段,队友在外层循环中反复 WORK → IDLE:\n\n```python\n# Outer loop: WORK → IDLE cycle\nwhile True:\n # WORK phase: 内层循环(最多 10 轮 LLM 调用)\n for _ in range(10):\n # 检查 inbox、处理协议消息、调 LLM、执行工具\n ...\n if response.stop_reason != \"tool_use\":\n break # WORK 阶段结束\n\n # IDLE phase\n idle_result = idle_poll(name, messages, name, role)\n if idle_result == \"shutdown\":\n break\n if idle_result == \"timeout\":\n break # 60s 超时 → SHUTDOWN\n\n# SHUTDOWN: 发 summary 给 Lead\nBUS.send(name, \"lead\", summary, \"result\")\n```\n\n关键设计:\n- **外层 while True**:WORK 和 IDLE 交替进行,直到超时或收到关机请求\n- **内层 for 10**:WORK 阶段最多 10 轮 LLM 调用(防止无限循环)\n- **IDLE 超时 60 秒**:12 次轮询 × 5 秒 = 60 秒。超时后发送 summary 并退出\n- **shutdown_request 两阶段都能响应**:WORK 阶段通过 `handle_inbox_message` 分发;IDLE 阶段 `idle_poll` 直接检查并回复\n\n### 身份重注入\n\nautoCompact(s08)之后,队友的 messages 列表可能被压缩成一段摘要。每次进入新的 WORK 阶段时检查:\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n```\n\n消息过短说明发生了压缩,此时重新注入身份信息。真实 CC 中 context compaction 会保留 system prompt,教学版的简化实现需要手动处理。\n\n### consume_lead_inbox: 统一 inbox 消费\n\n`check_inbox` 工具和主循环末尾都调用同一个 `consume_lead_inbox()` 函数:先路由协议 response 更新状态,再把所有消息注入 Lead 的对话历史。队友发来的 summary/result 不会只打印在终端,Lead 的 LLM 能看到并协调下一步。\n\n### 合起来跑\n\n```\n1. Lead: \"搭建后端——任务太多,让队友自己认领\"\n2. Lead → create_task(\"创建数据库 schema\")\n3. Lead → create_task(\"写 API 路由\")\n4. Lead → create_task(\"写单元测试\")\n5. Lead → spawn_teammate(\"alice\", \"backend\", \"你是后端开发者\")\n6. Lead → spawn_teammate(\"bob\", \"backend\", \"你是后端开发者\")\n\n7. alice 线程启动 → WORK: 没有初始 inbox → 空转 → IDLE\n8. bob 线程启动 → WORK: 没有初始 inbox → 空转 → IDLE\n\n9. alice IDLE 第 1 次轮询 → scan_unclaimed → 发现\"创建数据库 schema\"\n10. alice → claim_task → \"创建数据库 schema\" → 回到 WORK\n11. bob IDLE 第 1 次轮询 → scan_unclaimed → 发现\"写 API 路由\"\n12. bob → claim_task → \"写 API 路由\" → 回到 WORK\n\n13. alice WORK: write_file(\"schema.sql\", ...) → complete_task → WORK 结束\n14. alice IDLE → scan → \"写单元测试\" → claim → WORK\n15. alice WORK: write_file(\"test_api.py\", ...) → complete_task → WORK 结束\n16. alice IDLE → 60s 无新任务 → SHUTDOWN\n\n17. bob 类似流程 → 做完 → SHUTDOWN\n18. Lead consume_lead_inbox → 看到 alice 和 bob 的 summary\n```\n\n两个队友并行认领、并行工作。Lead 只需要创建任务和启动队友,不需要手动分配。\n\n---\n\n## 相对 s16 的变更\n\n| 组件 | 之前 (s16) | 之后 (s17) |\n|------|-----------|-----------|\n| 任务分配 | Lead 手动 assign | 队友自动认领(can_start 检查依赖) |\n| 队友状态 | WORK 或退出 | WORK → IDLE(轮询 60s) → SHUTDOWN |\n| claim_task | 无 owner 检查 | 拒绝已有 owner 的任务 |\n| IDLE 阶段关机 | 不处理 shutdown_request | 直接 dispatch shutdown 并退出 |\n| Lead inbox | 只打印,不进上下文 | consume_lead_inbox 统一注入 history |\n| 新函数 | — | idle_poll, scan_unclaimed_tasks, consume_lead_inbox |\n| 身份保持 | 仅 system prompt | 压缩后自动重注入 |\n| Lead 工具 | 14 (s16) | 14(不变) |\n| 队友工具 | 5 | 8(+ list_tasks, claim_task, complete_task) |\n| 队友退出条件 | 完成任务即退出 | 60s 无新任务才退出 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s17_autonomous_agents/code.py\n```\n\n试试这个 prompt:\n\n`Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim and work.`\n\n观察重点:队友是否自动认领了未分配的任务?有 blockedBy 依赖的任务是否在前置完成后被正确认领?空闲超时后是否自动关机?IDLE 阶段收到 shutdown_request 是否立即响应?`.tasks/` 目录下的任务状态如何变化?\n\n---\n\n## 接下来\n\n队友自组织了。但 Alice 和 Bob 都在同一个目录下工作——Alice 改 `config.py`,Bob 也改 `config.py`,互相覆盖。\n\ns18 Worktree Isolation → 每个任务有自己的工作目录,互不干扰。\n\n
\n深入 CC 源码\n\n> 教学说明:本章的 idle_poll + auto-claim 机制是教学设计,用统一的轮询函数演示\"空闲后找活干\"。CC 的实际实现是多个机制的组合,但目标一致——减少 Lead 的手动分配负担。\n\n### 一、CC 的空闲机制:组合路径,不是单一轮询\n\n教学版用一个 `idle_poll()` 统一处理空闲时的 inbox 检查和任务认领。CC 的实际实现是四个机制的组合:\n\n**idle_notification**:队友完成一轮工作后,`sendIdleNotification()`(`inProcessRunner.ts:569-589`)向 Lead 发送空闲通知。Lead 知道队友可用了,可以分配新任务或请求关机。\n\n**mailbox 轮询**:`waitForNextPromptOrShutdown()`(`inProcessRunner.ts:689-868`)是一个 **500ms 轮询循环**,持续检查三类来源:pending user messages、mailbox 文件消息、task list。shutdown_request 被优先处理(`inProcessRunner.ts:768-804`),不会被普通消息饿死。\n\n**task watcher**:`useTaskListWatcher`(`hooks/useTaskListWatcher.ts:34-189`)用 `fs.watch()` 监听 `.claude/tasks/` 目录变化,1 秒 debounce,当新任务创建或依赖解锁时触发检查。依赖判断(`L197-207`)是\"blockedBy 中没有未完成的任务\",不是\"blockedBy 为空\"。\n\n**主动 claim**:轮询循环内部也会调用 `tryClaimNextTask()`(`inProcessRunner.ts:853-860`)——在等待期间主动从 task list 领取任务。所以\"队友不主动轮询任务\"不准确,CC 同时有被动通知和主动认领。\n\n### 二、任务认领:文件锁 + 原子操作\n\n`claimTask()`(`utils/tasks.ts:541-612`)用 `proper-lockfile` 的任务文件锁,在锁内完成读-检查-改-写。检查项:owner 是否已存在(`L575-576`)、是否已完成(`L580-581`)、blockedBy 中是否有未完成任务(`L585-594`)。`claimTaskWithBusyCheck()`(`utils/tasks.ts:614-692`)用 task-list 级别锁,把 busy check 和 claim 做成原子操作,避免 TOCTOU。\n\n`findAvailableTask()`(`inProcessRunner.ts:595-604`)的依赖判断也是\"所有 blockedBy 已完成\",用 `task.blockedBy.every(id => !unresolvedTaskIds.has(id))` 实现。`tryClaimNextTask()`(`inProcessRunner.ts:624-657`)在认领后把状态更新为 `in_progress`,让 UI 立即反映变化。\n\n### 三、教学版 vs CC 对比\n\n| 维度 | 教学版 (s17) | CC |\n|------|-------------|-----|\n| 空闲机制 | idle_poll 统一轮询(5s) | idle_notification + 500ms mailbox 轮询 + task watcher |\n| 任务发现 | scan_unclaimed_tasks(轮询) | useTaskListWatcher(文件监听)+ tryClaimNextTask(主动轮询) |\n| 依赖判断 | can_start(所有 blockedBy 已完成) | findAvailableTask(同样语义) |\n| 并发安全 | owner 检查(无文件锁) | proper-lockfile 任务锁 + task-list 锁 |\n| shutdown 处理 | IDLE 直接分发,WORK 通过 handle_inbox_message | 500ms 轮询中优先处理 shutdown_request |\n| 超时退出 | 60s 无新任务 | 无固定超时,Lead 手动 shutdown |\n| 身份保持 | messages 长度检测 | context compaction 保留 system prompt |\n| claim 失败处理 | 检查返回值,失败不注入 | 文件锁保证原子性 |\n\n教学版的 `idle_poll()` 把 CC 的四个机制合并成一个轮询函数——简化合理,因为核心语义(空闲时找活干、依赖解锁后可认领、shutdown 优先)是一致的。\n\n
\n\n\n" + }, + { + "version": "s17", + "locale": "ja", + "title": "s17: Autonomous Agents — ボードを見て、自分で認領", + "content": "# s17: Autonomous Agents — ボードを見て、自分で認領\n\ns01 → ... → s15 → s16 → `s17` → [s18](/ja/s18) → s19 → s20\n\n> *\"ボードを見て、自分で認領\"* — 空き時にポーリング、仕事があれば開始。\n>\n> **Harness 層**: 自治 — チームメイトが自己組織化、リーダーの割り当て不要。\n\n---\n\n## 課題\n\ns16 のチームメイトは通信でき、シャットダウンハンドシェイクもできる。しかし各チームメイトは Lead がタスクを割り当てるのを待つ——ボードに 10 個の未認領タスクがあれば、Lead は 10 回手動で assign しなければならない。これはスケールしない。チームメイトは自分でタスクボードを見て、未認領のタスクを見つけて認領し、終わったら次を探すべき。\n\n---\n\n## ソリューション\n\n![Autonomous Agents Overview](/course-assets/s17_autonomous_agents/autonomous-agents-overview.ja.svg)\n\nS16 の教学版 MessageBus とプロトコルツールを踏襲。本章の追加:**idle_poll**(空き時に 5 秒ごとにポーリング)、**scan_unclaimed_tasks**(ボード上の認領可能なタスクをスキャン)、**自動認領**(見つけたら即座に claim、Lead 不要)。\n\nチームメイトのライフサイクルは 2 フェーズから 3 フェーズに:\n\n| フェーズ | 動作 | 終了条件 |\n|----------|------|---------|\n| WORK | inbox → LLM → ツールループ | `stop_reason != tool_use` |\n| IDLE | 5s ポーリング inbox + タスクボード | 60s タイムアウト |\n| SHUTDOWN | summary を送信、終了 | — |\n\n---\n\n## 仕組み\n\n### idle_poll: 空き時ポーリング\n\nチームメイトはタスク完了後も終了せず、IDLE フェーズに入る——5 秒ごとに新しい仕事がないか確認:\n\n```python\nIDLE_POLL_INTERVAL = 5 # seconds\nIDLE_TIMEOUT = 60 # seconds\n\ndef idle_poll(agent_name, messages, name, role) -> str:\n \"\"\"Return 'work', 'shutdown', or 'timeout'.\"\"\"\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n\n # ① 受信箱確認(優先)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n # shutdown_request は即座に処理\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n # ... shutdown_response 返信\n return \"shutdown\"\n # 通常メッセージ:コンテキストに注入、WORK に戻る\n messages.append(...)\n return \"work\"\n\n # ② タスクボードスキャン\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n result = claim_task(task[\"id\"], agent_name)\n if \"Claimed\" in result:\n messages.append(...)\n return \"work\"\n return \"timeout\"\n```\n\ninbox を優先(shutdown_request 等のプロトコルメッセージの可能性)、タスクボードが次。IDLE フェーズで shutdown_request を受信すると即座に返信して終了し、次の WORK を待つ必要がない。\n\n### scan_unclaimed_tasks: タスクボードスキャン\n\npending 状態、owner なし、全依存関係完了(`can_start`)のタスクを検索:\n\n```python\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n```\n\n3 つの条件:pending であること、owner がないこと、全 blockedBy 依存が完了していること。`can_start` は依存タスクの状態を確認——依存があるからといってタスクを開始できないわけではなく、未解決の依存のみがブロックする。教学版はファイル名順で最初のものを選択、CC はファイルロックで複数チームメイトの同時認領を防止。\n\n### claim_task: owner チェック\n\n自動認領時に claim 結果を確認し、失敗を成功として扱わない:\n\n```python\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n return f\"Claimed {task.id} ({task.subject})\"\n```\n\n教学版にはファイルロックがないため、並行認領で競合する可能性がある。しかし `task.owner` チェックで最も明白な「後書き上書き」問題を回避。CC は `proper-lockfile` でタスクファイルを保護、`claimTask` はファイルロック内で read-modify-write を実行(`utils/tasks.ts:541-612`)。\n\n### チームメイトライフサイクル: WORK → IDLE → SHUTDOWN\n\ns16 のチームメイトはタスク完了後に終了。s17 は IDLE フェーズを追加——外側ループで WORK → IDLE を繰り返す:\n\n```python\n# 外側ループ: WORK → IDLE サイクル\nwhile True:\n # WORK フェーズ: 内側ループ(最大 10 ラウンド LLM 呼び出し)\n for _ in range(10):\n # inbox 確認、プロトコルメッセージ処理、LLM 呼び出し、ツール実行\n ...\n if response.stop_reason != \"tool_use\":\n break # WORK フェーズ終了\n\n # IDLE フェーズ\n idle_result = idle_poll(name, messages, name, role)\n if idle_result == \"shutdown\":\n break\n if idle_result == \"timeout\":\n break # 60s タイムアウト → SHUTDOWN\n\n# SHUTDOWN: summary を Lead に送信\nBUS.send(name, \"lead\", summary, \"result\")\n```\n\n主要設計:\n- **外側 while True**:WORK と IDLE がタイムアウトまたはシャットダウン要求まで交互に続く\n- **内側 for 10**:WORK フェーズは最大 10 ラウンドの LLM 呼び出し(無限ループ防止)\n- **IDLE タイムアウト 60 秒**:12 回ポーリング × 5 秒 = 60 秒。タイムアウト後 summary を送信して終了\n- **shutdown_request は両フェーズで応答**:WORK フェーズは `handle_inbox_message` でディスパッチ、IDLE フェーズは `idle_poll` が直接確認して返信\n\n### 身份再注入\n\nautoCompact(s08)後、チームメイトの messages リストが要約に圧縮される可能性がある。新しい WORK フェーズに入るたびに確認:\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n```\n\nメッセージが短い場合、圧縮が発生したことを示す——身份情報を再注入。真实 CC では context compaction が system prompt を保持、教学版の簡略実装は手動処理が必要。\n\n### consume_lead_inbox: 統一 inbox コンシューマ\n\n`check_inbox` ツールとメインループ末尾の両方が同じ `consume_lead_inbox()` 関数を呼び出す:プロトコル response を先にルーティングして状態を更新し、全メッセージを Lead の会話履歴に注入。チームメイトからの summary/result は端末に表示されるだけでなく、Lead の LLM も確認して次のステップを調整可能。\n\n### 組み合わせて実行\n\n```\n1. Lead: \"バックエンド構築——タスクが多すぎる、チームメイトに自己認領させる\"\n2. Lead → create_task(\"データベーススキーマを作成\")\n3. Lead → create_task(\"API ルートを書く\")\n4. Lead → create_task(\"ユニットテストを書く\")\n5. Lead → spawn_teammate(\"alice\", \"backend\", \"あなたはバックエンド開発者\")\n6. Lead → spawn_teammate(\"bob\", \"backend\", \"あなたはバックエンド開発者\")\n\n7. alice スレッド起動 → WORK: 初期 inbox なし → 空転 → IDLE\n8. bob スレッド起動 → WORK: 初期 inbox なし → 空転 → IDLE\n\n9. alice IDLE ポーリング 1 回目 → scan_unclaimed → \"データベーススキーマを作成\" を発見\n10. alice → claim_task → \"データベーススキーマを作成\" → WORK に戻る\n11. bob IDLE ポーリング 1 回目 → scan_unclaimed → \"API ルートを書く\" を発見\n12. bob → claim_task → \"API ルートを書く\" → WORK に戻る\n\n13. alice WORK: write_file(\"schema.sql\", ...) → complete_task → WORK 終了\n14. alice IDLE → scan → \"ユニットテストを書く\" → claim → WORK\n15. alice WORK: write_file(\"test_api.py\", ...) → complete_task → WORK 終了\n16. alice IDLE → 60s 新しいタスクなし → SHUTDOWN\n\n17. bob も同様のフロー → 完了 → SHUTDOWN\n18. Lead consume_lead_inbox → alice と bob の summary を確認\n```\n\n2 人のチームメイトが並行して認領・作業。Lead はタスクを作成してチームメイトを起動するだけで、手動割り当て不要。\n\n---\n\n## s16 からの変更\n\n| コンポーネント | 変更前 (s16) | 変更後 (s17) |\n|--------------|------------|------------|\n| タスク割り当て | Lead が手動 assign | チームメイトが自動認領(can_start で依存確認) |\n| チームメイト状態 | WORK または終了 | WORK → IDLE(60s ポーリング) → SHUTDOWN |\n| claim_task | owner チェックなし | 既に owner があるタスクを拒否 |\n| IDLE フェーズシャットダウン | shutdown_request を処理しない | 即座にシャットダウンをディスパッチして終了 |\n| Lead inbox | 印刷のみ、コンテキストに入らない | consume_lead_inbox で history に注入 |\n| 新規関数 | — | idle_poll, scan_unclaimed_tasks, consume_lead_inbox |\n| 身份保持 | system prompt のみ | 圧縮後に自動再注入 |\n| Lead ツール | 14 (s16) | 14(変更なし) |\n| チームメイトツール | 5 | 8(+ list_tasks, claim_task, complete_task) |\n| チームメイト終了条件 | タスク完了後即終了 | 60s アイドルタイムアウト後のみ終了 |\n\n---\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython s17_autonomous_agents/code.py\n```\n\n以下のプロンプトを試してください:\n\n`Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim and work.`\n\n観察ポイント:チームメイトは未割り当てのタスクを自動認領したか?blockedBy 依存のあるタスクは依存完了後に正しく認領されたか?アイドルタイムアウトでシャットダウンしたか?IDLE フェーズで shutdown_request に即座に応答したか?`.tasks/` ディレクトリのタスク状態はどう変化したか?\n\n---\n\n## 次の章\n\nチームメイトが自己組織化した。しかし Alice も Bob も同じディレクトリで作業——Alice が `config.py` を編集し、Bob も `config.py` を編集して互いに上書きしてしまう。\n\ns18 Worktree Isolation → 各タスクに専用の作業ディレクトリ、競合なし。\n\n
\nCC ソースコード深掘り\n\n> 教学注記:本章の idle_poll + auto-claim 機構は教学設計であり、統一ポーリング関数で「空き時に仕事を探す」をデモ。CC の実際の実装は複数機構の組み合わせだが、目標は同じ——Lead の手動割り当て負担を軽減。\n\n### 一、CC の空き機構:組み合わせ路径、単一ポーリングではない\n\n教学版は 1 つの `idle_poll()` で空き時の inbox 確認とタスク認領を統一処理。CC の実際の実装は 4 つの機構の組み合わせ:\n\n**idle_notification**:チームメイトが 1 ラウンドの作業を完了後、`sendIdleNotification()`(`inProcessRunner.ts:569-589`)が Lead に空き通知を送信。Lead はチームメイトが利用可能であることを知り、新しいタスクを割り当てたりシャットダウンを要求可能。\n\n**mailbox ポーリング**:`waitForNextPromptOrShutdown()`(`inProcessRunner.ts:689-868`)は **500ms ポーリングループ**で、3 つのソースを継続チェック:pending user messages、mailbox ファイルメッセージ、task list。shutdown_request は優先処理(`inProcessRunner.ts:768-804`)、通常メッセージによる飢餓を防止。\n\n**task watcher**:`useTaskListWatcher`(`hooks/useTaskListWatcher.ts:34-189`)が `fs.watch()` で `.claude/tasks/` ディレクトリの変化を監視、1 秒 debounce で新タスク作成や依存アンロック時にチェックをトリガー。依存判断(`L197-207`)は「blockedBy に未完了タスクがない」で、「blockedBy が空」ではない。\n\n**能動 claim**:ポーリングループ内でも `tryClaimNextTask()`(`inProcessRunner.ts:853-860`)を呼び出し——待機中に task list から能動的にタスクを認領。したがって「チームメイトは能動的にタスクをポーリングしない」は不正確、CC は受動通知と能動認領の両方を持つ。\n\n### 二、タスク認領:ファイルロック + 原子操作\n\n`claimTask()`(`utils/tasks.ts:541-612`)は `proper-lockfile` のタスクファイルロックを使用、ロック内で read-check-modify-write を実行。チェック項目:owner が既に存在(`L575-576`)、完了済み(`L580-581`)、blockedBy に未完了タスクがあるか(`L585-594`)。`claimTaskWithBusyCheck()`(`utils/tasks.ts:614-692`)はタスクリストレベルロックを使用、busy check と claim を原子操作にして TOCTOU を回避。\n\n`findAvailableTask()`(`inProcessRunner.ts:595-604`)の依存判断も「全 blockedBy 完了」で、`task.blockedBy.every(id => !unresolvedTaskIds.has(id))` で実装。`tryClaimNextTask()`(`inProcessRunner.ts:624-657`)は認領後 status を `in_progress` に更新、UI に即座に反映。\n\n### 三、教学版 vs CC 対比\n\n| 次元 | 教学版 (s17) | CC |\n|------|-------------|-----|\n| 空き機構 | idle_poll 統一ポーリング(5s) | idle_notification + 500ms mailbox ポーリング + task watcher |\n| タスク発見 | scan_unclaimed_tasks(ポーリング) | useTaskListWatcher(ファイル監視)+ tryClaimNextTask(能動ポーリング) |\n| 依存チェック | can_start(全 blockedBy 完了) | findAvailableTask(同じセマンティクス) |\n| 並行安全性 | owner チェック(ファイルロックなし) | proper-lockfile タスクロック + タスクリストロック |\n| shutdown 処理 | IDLE 直接ディスパッチ、WORK は handle_inbox_message | 500ms ポーリングループで shutdown_request を優先 |\n| タイムアウト終了 | 60s 新しいタスクなし | 固定タイムアウトなし、Lead 手動 shutdown |\n| 身份保持 | messages 長さ検出 | context compaction が system prompt を保持 |\n| claim 失敗処理 | 戻り値を確認、失敗時はスキップ | ファイルロックで原子性を保証 |\n\n教学版の `idle_poll()` は CC の 4 つの機構を 1 つのポーリング関数に統合——核心セマンティクス(空き時に仕事を探す、依存アンロック後に認領、shutdown 優先)が一致するため、合理的な簡略化。\n\n
\n\n\n" + }, + { + "version": "s18", + "locale": "en", + "title": "s18: Worktree Isolation — Separate Directories, No Conflicts", + "content": "# s18: Worktree Isolation — Separate Directories, No Conflicts\n\ns01 → ... → s16 → s17 → `s18` → [s19](/en/s19) → s20\n\n> *\"Separate directories, no conflicts\"* — Tasks own the goal, worktrees own the directory, bound by ID.\n>\n> **Harness Layer**: Isolation — Parallel execution in separate directories.\n\n---\n\n## The Problem\n\nIn s17, Alice and Bob both work in the same directory. Alice's task is \"refactor auth module\", Bob's task is \"refactor UI login page\".\n\nAlice calls `write_file(\"config.py\", ...)`. Bob also calls `write_file(\"config.py\", ...)`. Both edit the same file, overwriting each other. And there's no clean rollback — you can't tell whose changes are whose.\n\ns15-s17 solved \"who does what\" (task system) and \"how to communicate\" (message bus), but not \"where to work\".\n\n---\n\n## The Solution\n\n![Worktree Overview](/course-assets/s18_worktree_isolation/worktree-overview.en.svg)\n\nGit worktree lets you create multiple independent working directories in the same repo, each with its own branch. Alice works in `.worktrees/auth-refactor/`, Bob in `.worktrees/ui-login/` — no conflicts.\n\nCarries forward S17's teaching-version MessageBus, protocols, and autonomous claiming. This chapter adds:\n\n| Capability | Purpose |\n|------------|---------|\n| create_worktree | Create isolated directory + branch for a task |\n| bind_task_to_worktree | Bind task and directory (no status change) |\n| remove_worktree / keep_worktree | Cleanup or preserve after completion |\n| validate_worktree_name | Reject path traversal and illegal characters |\n\n---\n\n## How It Works\n\n### Creation: Task-Worktree Binding\n\n```python\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n validate_worktree_name(name) # Only [A-Za-z0-9._-]{1,64}\n path = WORKTREES_DIR / name\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n return f\"Worktree '{name}' created at {path}\"\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n task = load_task(task_id)\n task.worktree = worktree_name # Write worktree field only\n save_task(task) # Status stays pending, waits for teammate claim\n```\n\nBinding rule: one task binds to one worktree. Binding does NOT change task status — the task stays `pending`, and advances to `in_progress` only when a teammate claims it. This way Lead can pre-create tasks and worktrees, and teammates naturally claim worktree-bound tasks during idle.\n\n### Teammate Tool Cwd Switching\n\nTeaching version maintains a `wt_ctx` dict per teammate, tracking the current worktree path. When a teammate claims a task with a worktree, `wt_ctx` is automatically set to the worktree path; the teammate's `bash`, `read_file`, `write_file` execute in the worktree directory:\n\n```python\n# Inside teammate thread\nwt_ctx = {\"path\": None}\n\ndef _run_claim_task(task_id):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n task = load_task(task_id)\n if task.worktree:\n wt_ctx[\"path\"] = str(WORKTREES_DIR / task.worktree)\n return result\n\ndef _run_bash(command):\n return run_bash(command, cwd=wt_ctx[\"path\"]) # Execute in worktree\n```\n\nThis is a teaching simplification. Real CC's EnterWorktree uses `process.chdir()` to switch the entire process directory, and AgentTool isolation uses `cwdOverride` to wrap sub-agent execution.\n\n### Cleanup: Keep or Remove\n\nAfter task completion, two choices:\n\n```python\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n # Safety check: refuse by default if changes exist\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files > 0 or commits > 0:\n return \"Has uncommitted changes. Use discard_changes=true to force, or keep_worktree\"\n ok, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok:\n return \"Remove failed\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n\ndef keep_worktree(name: str) -> str:\n log_event(\"keep\", name)\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n```\n\nKeep = preserve branch for manual review and merge. Remove = refuse by default if uncommitted changes; requires `discard_changes=true` to confirm. Does NOT auto-complete task — task completion is triggered explicitly by the teammate's `complete_task`.\n\n### Event Log: Auditable\n\nEach lifecycle operation writes to a log for auditing:\n\n```python\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n # append to .worktrees/events.jsonl\n```\n\nEvent types: `create`, `remove`, `keep`. Teaching version logs events for manual auditing; full recovery would need an index or `git worktree list` scanning.\n\n### run_git: Returns Success/Failure\n\n```python\ndef run_git(args: list[str]) -> tuple[bool, str]:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR, ...)\n return r.returncode == 0, output\n```\n\n`create_worktree` and `remove_worktree` only write event logs after successful git commands, ensuring logs reflect actual state.\n\n---\n\n## Changes from s17\n\n| Component | Before (s17) | After (s18) |\n|-----------|-------------|-------------|\n| Working directory | All agents share WORKDIR | Each task can bind to a git worktree |\n| Task data | id/subject/status/owner/blockedBy | + worktree field |\n| Teammate tool cwd | Always WORKDIR | Auto-switches when claiming worktree-bound task |\n| New functions | — | create_worktree, bind_task_to_worktree, remove_worktree, keep_worktree, validate_worktree_name |\n| Worktree safety | None | Name validation + refuse removal with changes |\n| Event log | None | events.jsonl lifecycle auditing |\n| Lead tools | 14 (s17) | + create_worktree, remove_worktree, keep_worktree (17) |\n| Teammate tools | 8 (s17) | 8 (bash/read/write execute in worktree cwd) |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s18_worktree_isolation/code.py\n```\n\nTry this prompt:\n\n`Create two tasks, then create worktrees for each (bind with task_id). Spawn alice and bob. Watch them auto-claim and work in isolated directories.`\n\nWhat to observe: Do both worktrees show different branches in `git status`? After claiming a worktree-bound task, does the teammate's bash run in the worktree directory? Does `remove_worktree` refuse when there are changes? Is task status still `pending` after binding?\n\n---\n\n## What's Next\n\nAgent teams can now self-organize in isolated workspaces. But Agent capabilities are limited to the tools we wrote — bash, read, write, task...\n\nWhat if users already have their own tools? Like an internal Jira API, or a custom deployment system?\n\ns19 MCP Plugin → Give Agent a plugin system. External tools connect via standard protocol; Agent doesn't need to know who wrote them.\n\n
\nDeep Dive into CC Source\n\nCC's worktree system has two paths: **EnterWorktree** (current session switches in) and **AgentTool isolation** (sub-agent isolation).\n\n### EnterWorktree: Current Session Switch\n\n`EnterWorktreeTool.ts:92-97` after creating the worktree, immediately calls `process.chdir(worktreePath)`, `setCwd()`, `setOriginalCwd()`, `saveWorktreeState()`. The current session's working directory switches directly to the worktree — not a prompt hint, but a process-level directory change.\n\n`ExitWorktreeTool.ts:261-320` both keep and remove call `restoreSessionToOriginalCwd()` to restore the original directory. Remove checks for uncommitted changes (`ExitWorktreeTool.ts:190-220`), refusing without `discard_changes: true`.\n\n### AgentTool Isolation: Sub-Agent Isolation\n\n`AgentTool.tsx:590-641` when `isolation: \"worktree\"`, calls `createAgentWorktree()` to create a worktree, uses `cwdOverridePath` to wrap sub-agent execution. All sub-agent operations automatically run in the worktree directory. `AgentTool/prompt.ts:272` tells the model: this is a temporary worktree, auto-cleanup if no changes, return path and branch if changes exist.\n\n`worktree.ts:902-951` `createAgentWorktree()` does NOT modify global session cwd, only for sub-agent use. `worktree.ts:961-1020` `removeAgentWorktree()` deletes from the main repo root.\n\n### Name Validation\n\n`worktree.ts:76-84` validates slug: rejects `.`/`..`, allows `[a-zA-Z0-9._-]`. `worktree.ts:48` defines `VALID_WORKTREE_SLUG_SEGMENT`. Teaching version's `validate_worktree_name` uses the same rule.\n\n### Path and Branch Naming\n\nReal path is `.claude/worktrees/`, branch name `worktree-{slug}` (`worktree.ts:204-227`, slashes replaced with `+`). Teaching version uses `.worktrees/` and `wt/{name}` for simplicity.\n\nCreation uses `git worktree add -B` (`worktree.ts:326-328`), preferring `origin/` over current HEAD.\n\n### State Management\n\nCC has no task-worktree binding. Worktree state is managed through `PersistedWorktreeSession` (`worktree.ts:756-768`), with fields including `originalCwd`, `worktreePath`, `worktreeName`, `worktreeBranch`, `originalBranch`, `originalHeadCommit`, `sessionId`, etc. — no taskId field. `saveWorktreeState()` (`sessionStorage.ts:2883-2920`) writes to session transcript with `type: 'worktree-state'`.\n\nTeaching version uses the task's `worktree` field for binding, a teaching simplification. CC treats worktree and task as two independent systems, connected through the Agent's context understanding.\n\n
\n\n\n" + }, + { + "version": "s18", + "locale": "zh", + "title": "s18: Worktree Isolation — 各干各的,互不干扰", + "content": "# s18: Worktree Isolation — 各干各的,互不干扰\n\ns01 → ... → s16 → s17 → `s18` → [s19](/zh/s19) → s20\n\n> *\"各干各的目录, 互不干扰\"* — 任务管目标, worktree 管目录, 按 ID 绑定。\n>\n> **Harness 层**: 隔离 — 并行执行的目录隔离。\n\n---\n\n## 问题\n\ns17 中,Alice 和 Bob 都在同一个目录下工作。Alice 的任务是\"重构认证模块\",Bob 的任务是\"重构 UI 登录页\"。\n\nAlice `write_file(\"config.py\", ...)`。Bob 也 `write_file(\"config.py\", ...)`。两个人改同一个文件,互相覆盖。而且无法干净地回滚——分不清哪些改动是谁的。\n\ns15-s17 解决了\"谁干什么\"(任务系统)和\"怎么通信\"(消息总线),但没解决\"在哪干\"。\n\n---\n\n## 解决方案\n\n![Worktree Overview](/course-assets/s18_worktree_isolation/worktree-overview.svg)\n\nGit worktree 让你在同一仓库中创建多个独立的工作目录,每个有自己的分支。Alice 在 `.worktrees/auth-refactor/` 下工作,Bob 在 `.worktrees/ui-login/` 下工作——互不干扰。\n\n沿用 S17 的教学版 MessageBus、协议和自治认领机制。本章新增:\n\n| 能力 | 作用 |\n|------|------|\n| create_worktree | 为任务创建独立目录 + 独立分支 |\n| bind_task_to_worktree | 把任务和工作目录绑定(不改状态) |\n| remove_worktree / keep_worktree | 完成后清理或保留 |\n| validate_worktree_name | 拒绝路径穿越和非法字符 |\n\n---\n\n## 工作原理\n\n### 创建:任务-Worktree 绑定\n\n```python\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n validate_worktree_name(name) # 只允许 [A-Za-z0-9._-]{1,64}\n path = WORKTREES_DIR / name\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n return f\"Worktree '{name}' created at {path}\"\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n task = load_task(task_id)\n task.worktree = worktree_name # 只写 worktree 字段\n save_task(task) # 状态保持 pending,等队友 claim\n```\n\n绑定规则:一个任务绑定一个 worktree。绑定不改任务状态——任务仍是 `pending`,队友自动认领时才推进到 `in_progress`。这样 Lead 可以提前创建任务和 worktree,队友 idle 时自然认领带 worktree 的任务。\n\n### 队友工具的 cwd 切换\n\n教学版给每个队友维护一个 `wt_ctx` 字典,记录当前 worktree 路径。队友认领带 worktree 的任务时,`wt_ctx` 自动设置为 worktree 路径;队友的 `bash`、`read_file`、`write_file` 在 worktree 目录下执行:\n\n```python\n# 队友线程内部\nwt_ctx = {\"path\": None}\n\ndef _run_claim_task(task_id):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n task = load_task(task_id)\n if task.worktree:\n wt_ctx[\"path\"] = str(WORKTREES_DIR / task.worktree)\n return result\n\ndef _run_bash(command):\n return run_bash(command, cwd=wt_ctx[\"path\"]) # 在 worktree 下执行\n```\n\n这是教学简化。真实 CC 的 EnterWorktree 用 `process.chdir()` 切换整个进程目录,AgentTool isolation 用 `cwdOverride` 包住子 agent 执行。\n\n### 收尾:Keep 还是 Remove\n\n任务完成后,两个选择:\n\n```python\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n # 安全检查:有改动时默认拒绝\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files > 0 or commits > 0:\n return \"有未提交改动,使用 discard_changes=true 强制删除,或 keep_worktree 保留\"\n ok, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok:\n return \"删除失败\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n\ndef keep_worktree(name: str) -> str:\n log_event(\"keep\", name)\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n```\n\nKeep = 留着分支,等人工 review 后合并到主分支。Remove = 有改动时默认拒绝,需要 `discard_changes=true` 确认。不自动 complete task——任务完成由队友的 `complete_task` 显式触发。\n\n### 事件流:可审计\n\n每次生命周期操作写入日志,方便排查:\n\n```python\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n # append to .worktrees/events.jsonl\n```\n\n事件类型:`create`(创建)、`remove`(删除)、`keep`(保留)。教学版只记录事件用于人工排查;完整恢复还需要 index 或 `git worktree list` 扫描。\n\n### run_git:返回成功/失败\n\n```python\ndef run_git(args: list[str]) -> tuple[bool, str]:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR, ...)\n return r.returncode == 0, output\n```\n\n`create_worktree` 和 `remove_worktree` 只在 git 命令成功后才写事件日志,保证日志反映真实状态。\n\n---\n\n## 相对 s17 的变更\n\n| 组件 | 之前 (s17) | 之后 (s18) |\n|------|-----------|-----------|\n| 工作目录 | 所有 Agent 共享 WORKDIR | 每个任务可绑定独立 git worktree |\n| Task 数据 | id/subject/status/owner/blockedBy | + worktree 字段 |\n| 队友工具 cwd | 始终 WORKDIR | 认领带 worktree 的任务时自动切换 |\n| 新函数 | — | create_worktree, bind_task_to_worktree, remove_worktree, keep_worktree, validate_worktree_name |\n| worktree 安全 | 无 | name 校验 + 有改动时拒绝删除 |\n| 事件日志 | 无 | events.jsonl 生命周期审计 |\n| Lead 工具 | 14 (s17) | + create_worktree, remove_worktree, keep_worktree (17) |\n| 队友工具 | 8 (s17) | 8(bash/read/write 在 worktree cwd 执行) |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s18_worktree_isolation/code.py\n```\n\n试试这个 prompt:\n\n`Create two tasks, then create worktrees for each (bind with task_id). Spawn alice and bob. Watch them auto-claim and work in isolated directories.`\n\n观察重点:两个 worktree 的 `git status` 输出是否显示不同的分支?队友认领带 worktree 的任务后,bash 命令是否在 worktree 目录下执行?`remove_worktree` 对有改动的 worktree 是否拒绝?`.tasks/` 中的任务在绑定后状态是否仍为 `pending`?\n\n---\n\n## 接下来\n\nAgent 团队能在隔离的工作空间中自组织了。但 Agent 的能力受限于我们给它写的工具——bash、read、write、task...\n\n如果用户已经有了自己的工具怎么办?比如一个公司内部的 Jira API、一个自建的部署系统?\n\ns19 MCP Plugin → 给 Agent 装一个插件系统。外部工具通过标准协议接入,Agent 不需要知道它们是谁写的。\n\n
\n深入 CC 源码\n\nCC 的 worktree 系统有两条路径:**EnterWorktree**(当前会话切入)和 **AgentTool isolation**(子 agent 隔离)。\n\n### EnterWorktree:当前会话切换\n\n`EnterWorktreeTool.ts:92-97` 创建 worktree 后立即 `process.chdir(worktreePath)`、`setCwd()`、`setOriginalCwd()`、`saveWorktreeState()`。当前会话的工作目录直接切换到 worktree——不是 prompt 提醒,而是进程级目录变更。\n\n`ExitWorktreeTool.ts:261-320` 的 keep/remove 都会 `restoreSessionToOriginalCwd()` 恢复原目录。Remove 时检查未提交改动(`ExitWorktreeTool.ts:190-220`),没有 `discard_changes: true` 就拒绝删除。\n\n### AgentTool isolation:子 agent 隔离\n\n`AgentTool.tsx:590-641` 在 `isolation: \"worktree\"` 时调用 `createAgentWorktree()` 创建 worktree,用 `cwdOverridePath` 包住子 agent 执行。子 agent 的所有操作自动在 worktree 目录下进行。`AgentTool/prompt.ts:272` 告诉模型:这是临时 worktree,无改动自动清理,有改动返回路径和分支。\n\n`worktree.ts:902-951` 的 `createAgentWorktree()` 不修改全局 session cwd,只给子 agent 用。`worktree.ts:961-1020` 的 `removeAgentWorktree()` 从主 repo root 删除。\n\n### name 校验\n\n`worktree.ts:76-84` 校验 slug:拒绝 `.`/`..`,允许 `[a-zA-Z0-9._-]`。`worktree.ts:48` 定义 `VALID_WORKTREE_SLUG_SEGMENT`。教学版的 `validate_worktree_name` 用同样的规则。\n\n### 路径和分支命名\n\n真实路径是 `.claude/worktrees/`,分支名 `worktree-{slug}`(`worktree.ts:204-227`,斜杠用 `+` 替代)。教学版用 `.worktrees/` 和 `wt/{name}` 简化。\n\n创建时用 `git worktree add -B`(`worktree.ts:326-328`),优先基于 `origin/` 而非当前 HEAD。\n\n### 状态管理\n\nCC 没有 task-worktree 绑定。Worktree 状态通过 `PersistedWorktreeSession`(`worktree.ts:756-768`)管理,字段包括 `originalCwd`、`worktreePath`、`worktreeName`、`worktreeBranch`、`originalBranch`、`originalHeadCommit`、`sessionId` 等——没有 taskId。`saveWorktreeState()`(`sessionStorage.ts:2883-2920`)以 `type: 'worktree-state'` 写入 session transcript。\n\n教学版用 task 的 `worktree` 字段做绑定,是教学简化。CC 把 worktree 和 task 作为两个独立系统,通过 Agent 理解上下文来关联。\n\n
\n\n\n" + }, + { + "version": "s18", + "locale": "ja", + "title": "s18: Worktree Isolation — それぞれのディレクトリ、互いに干渉しない", + "content": "# s18: Worktree Isolation — それぞれのディレクトリ、互いに干渉しない\n\ns01 → ... → s16 → s17 → `s18` → [s19](/ja/s19) → s20\n\n> *\"それぞれのディレクトリ、互いに干渉しない\"* — タスクは目標を管理、worktree はディレクトリを管理、ID で紐付け。\n>\n> **Harness 層**: 隔離 — 並列実行のディレクトリ分離。\n\n---\n\n## 課題\n\ns17 では、Alice も Bob も同じディレクトリで作業。Alice のタスクは「認証モジュールのリファクタリング」、Bob のタスクは「UI ログインページのリファクタリング」。\n\nAlice が `write_file(\"config.py\", ...)` を呼び出し、Bob も `write_file(\"config.py\", ...)` を呼び出す。両者が同じファイルを編集し、互いに上書き。クリーンなロールバックもできない——どの変更が誰のものか区別できない。\n\ns15-s17 は「誰が何をするか」(タスクシステム)と「どう通信するか」(メッセージバス)を解決したが、「どこで作業するか」は未解決。\n\n---\n\n## ソリューション\n\n![Worktree Overview](/course-assets/s18_worktree_isolation/worktree-overview.ja.svg)\n\nGit worktree を使うと、同じリポジトリ内に複数の独立した作業ディレクトリを作成でき、それぞれが独自のブランチを持つ。Alice は `.worktrees/auth-refactor/` で作業、Bob は `.worktrees/ui-login/` で作業——互いに干渉しない。\n\nS17 の教学版 MessageBus、プロトコル、自治認領機構を踏襲。本章の追加:\n\n| 機能 | 目的 |\n|------|------|\n| create_worktree | タスク用の独立ディレクトリ + 独立ブランチを作成 |\n| bind_task_to_worktree | タスクとディレクトリを紐付け(状態は変更しない) |\n| remove_worktree / keep_worktree | 完了後のクリーンアップまたは保持 |\n| validate_worktree_name | パストラバーサルと不正文字を拒否 |\n\n---\n\n## 仕組み\n\n### 作成:タスク-Worktree 紐付け\n\n```python\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n validate_worktree_name(name) # [A-Za-z0-9._-]{1,64} のみ許可\n path = WORKTREES_DIR / name\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n return f\"Worktree '{name}' created at {path}\"\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n task = load_task(task_id)\n task.worktree = worktree_name # worktree フィールドのみ書き込み\n save_task(task) # 状態は pending のまま、チームメイトの claim を待つ\n```\n\n紐付けルール:1 つのタスクに 1 つの worktree を紐付け。紐付けはタスクの状態を変更しない——タスクは `pending` のままで、チームメイトが認領した時に `in_progress` に進む。これにより Lead は事前にタスクと worktree を作成でき、チームメイトは idle 時に自然に worktree 紐付け済みタスクを認領する。\n\n### チームメイトツールの cwd 切り替え\n\n教学版は各チームメイトに `wt_ctx` 辞書を維持し、現在の worktree パスを追跡。チームメイトが worktree 紐付けタスクを認領すると、`wt_ctx` が自動的に worktree パスに設定され、チームメイトの `bash`、`read_file`、`write_file` は worktree ディレクトリで実行される:\n\n```python\n# チームメイトスレッド内部\nwt_ctx = {\"path\": None}\n\ndef _run_claim_task(task_id):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n task = load_task(task_id)\n if task.worktree:\n wt_ctx[\"path\"] = str(WORKTREES_DIR / task.worktree)\n return result\n\ndef _run_bash(command):\n return run_bash(command, cwd=wt_ctx[\"path\"]) # worktree で実行\n```\n\nこれは教学簡略化。真实 CC の EnterWorktree は `process.chdir()` でプロセス全体のディレクトリを切り替え、AgentTool isolation は `cwdOverride` でサブエージェント実行をラップする。\n\n### クリーンアップ:Keep または Remove\n\nタスク完了後、2 つの選択肢:\n\n```python\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n # 安全チェック:変更がある場合デフォルトで拒否\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files > 0 or commits > 0:\n return \"未コミットの変更あり。discard_changes=true で強制削除、または keep_worktree で保持\"\n ok, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok:\n return \"削除失敗\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n\ndef keep_worktree(name: str) -> str:\n log_event(\"keep\", name)\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n```\n\nKeep = ブランチを保持し、手動 review 後にマージ。Remove = 未コミット変更がある場合デフォルトで拒否、`discard_changes=true` で確認が必要。タスクの自動 complete はしない——タスク完了はチームメイトの `complete_task` で明示的にトリガー。\n\n### イベントログ:監査可能\n\n各ライフサイクル操作はログに記録され、監査に利用:\n\n```python\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n # .worktrees/events.jsonl に append\n```\n\nイベントタイプ:`create`、`remove`、`keep`。教学版はイベントを記録するだけで手動監査用。完全な復元には index または `git worktree list` スキャンが必要。\n\n### run_git:成功/失敗を返す\n\n```python\ndef run_git(args: list[str]) -> tuple[bool, str]:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR, ...)\n return r.returncode == 0, output\n```\n\n`create_worktree` と `remove_worktree` は git コマンド成功後のみイベントログに書き込み、ログが実際の状態を反映することを保証。\n\n---\n\n## s17 からの変更\n\n| コンポーネント | 変更前 (s17) | 変更後 (s18) |\n|--------------|------------|------------|\n| 作業ディレクトリ | 全 Agent が WORKDIR を共有 | 各タスクが git worktree に紐付け可能 |\n| タスクデータ | id/subject/status/owner/blockedBy | + worktree フィールド |\n| チームメイトツール cwd | 常に WORKDIR | worktree 紐付けタスク認領時に自動切り替え |\n| 新規関数 | — | create_worktree, bind_task_to_worktree, remove_worktree, keep_worktree, validate_worktree_name |\n| worktree 安全性 | なし | name 検証 + 変更ありの場合削除拒否 |\n| イベントログ | なし | events.jsonl ライフサイクル監査 |\n| Lead ツール | 14 (s17) | + create_worktree, remove_worktree, keep_worktree (17) |\n| チームメイトツール | 8 (s17) | 8(bash/read/write が worktree cwd で実行) |\n\n---\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython s18_worktree_isolation/code.py\n```\n\n以下のプロンプトを試してください:\n\n`Create two tasks, then create worktrees for each (bind with task_id). Spawn alice and bob. Watch them auto-claim and work in isolated directories.`\n\n観察ポイント:2 つの worktree の `git status` 出力は異なるブランチを表示しているか?チームメイトが worktree 紐付けタスクを認領後、bash コマンドは worktree ディレクトリで実行されているか?`remove_worktree` は変更がある場合に拒否するか?紐付け後のタスク状態は `pending` のままか?\n\n---\n\n## 次の章\n\nAgent チームが隔離されたワークスペースで自己組織化できるようになった。しかし Agent の能力はツールに制限される——bash、read、write、task...\n\nもしユーザーが独自のツールを持っていたら?例えば社内 Jira API や独自デプロイシステム?\n\ns19 MCP Plugin → Agent にプラグインシステムを追加。外部ツールが標準プロトコルで接続、Agent は誰が書いたか知る必要がない。\n\n
\nCC ソースコード深掘り\n\nCC の worktree システムには 2 つのパスがある:**EnterWorktree**(現在のセッションが切り替え)と **AgentTool isolation**(サブエージェント隔離)。\n\n### EnterWorktree:現在のセッション切り替え\n\n`EnterWorktreeTool.ts:92-97` worktree 作成後、直ちに `process.chdir(worktreePath)`、`setCwd()`、`setOriginalCwd()`、`saveWorktreeState()` を呼び出し。現在のセッションの作業ディレクトリが直接 worktree に切り替わる——プロンプトのヒントではなく、プロセスレベルのディレクトリ変更。\n\n`ExitWorktreeTool.ts:261-320` keep/remove どちらも `restoreSessionToOriginalCwd()` で元のディレクトリに復元。Remove は未コミット変更をチェック(`ExitWorktreeTool.ts:190-220`)、`discard_changes: true` なしでは拒否。\n\n### AgentTool Isolation:サブエージェント隔離\n\n`AgentTool.tsx:590-641` `isolation: \"worktree\"` の場合、`createAgentWorktree()` を呼び出して worktree を作成し、`cwdOverridePath` でサブエージェント実行をラップ。サブエージェントの全操作が自動的に worktree ディレクトリで実行される。`AgentTool/prompt.ts:272` はモデルに伝える:これは一時的な worktree、変更なしで自動クリーンアップ、変更ありの場合はパスとブランチを返す。\n\n`worktree.ts:902-951` `createAgentWorktree()` はグローバル session cwd を変更せず、サブエージェント専用。`worktree.ts:961-1020` `removeAgentWorktree()` はメインリポジトリルートから削除。\n\n### name 検証\n\n`worktree.ts:76-84` slug を検証:`.`/`..` を拒否、`[a-zA-Z0-9._-]` を許可。`worktree.ts:48` で `VALID_WORKTREE_SLUG_SEGMENT` を定義。教学版の `validate_worktree_name` も同じルールを使用。\n\n### パスとブランチ命名\n\n実際のパスは `.claude/worktrees/`、ブランチ名は `worktree-{slug}`(`worktree.ts:204-227`、スラッシュは `+` に置換)。教学版は `.worktrees/` と `wt/{name}` で簡略化。\n\n作成時は `git worktree add -B`(`worktree.ts:326-328`)を使用し、現在の HEAD より `origin/` を優先。\n\n### 状態管理\n\nCC にはタスク-worktree 紐付けがない。Worktree 状態は `PersistedWorktreeSession`(`worktree.ts:756-768`)で管理、フィールドは `originalCwd`、`worktreePath`、`worktreeName`、`worktreeBranch`、`originalBranch`、`originalHeadCommit`、`sessionId` 等を含む——taskId フィールドはない。`saveWorktreeState()`(`sessionStorage.ts:2883-2920`)は `type: 'worktree-state'` で session transcript に書き込み。\n\n教学版はタスクの `worktree` フィールドで紐付けを行う教学簡略化。CC は worktree とタスクを 2 つの独立システムとして扱い、Agent のコンテキスト理解で関連付ける。\n\n
\n\n\n" + }, + { + "version": "s19", + "locale": "en", + "title": "s19: MCP Tools — External Tools, Standard Protocol", + "content": "# s19: MCP Tools — External Tools, Standard Protocol\n\ns01 → ... → s17 → s18 → `s19` → [s20](/en/s20)\n\n> *\"External tools, standard protocol\"* — Discover, assemble, invoke. Agent doesn't need to know who wrote them.\n>\n> **Harness layer**: Plugins — External capabilities via a standard protocol.\n\n---\n\n## The Problem\n\nFrom s01 through s18, every tool the agent uses was hand-written — bash, read, write, task, worktree. Input validation, execution logic, error handling — all written line by line.\n\nNow you have 3 external services to integrate: the company's Jira API (query issues, create tickets), an in-house deployment system (trigger deploys, view logs), and the team's Notion knowledge base (search docs, create pages). You don't want to rewrite tool code for every service.\n\nYou need a standard protocol — as long as an external service implements it, the agent can call its tools directly, regardless of what language the service is written in.\n\n---\n\n## The Solution\n\n![MCP Architecture](/course-assets/s19_mcp_plugin/mcp-architecture.en.svg)\n\nMCP (Model Context Protocol) defines how agents discover and invoke external tools. Core concepts:\n\n| Concept | Purpose |\n|------|------|\n| MCPClient | The agent-side client — connects to servers, discovers tools, invokes tools |\n| MCP Server | The external service — implements `tools/list` + `tools/call` |\n| assemble_tool_pool | Assembles built-in tools and MCP tools into one tool pool |\n| mcp\\_\\_server\\_\\_tool naming | Prevents tool name collisions across different servers |\n\nCarries forward s18's teaching-version worktree isolation, autonomous claiming, idle polling, and protocol system. This chapter adds: the `connect_mcp` tool — connect to external services, discover tools, add them to the tool pool.\n\nThe tutorial uses mock handlers to simulate external servers. The real version would spawn subprocesses and communicate via stdin/stdout JSON-RPC. Mocks let you run the full flow without external dependencies; the tradeoff is you don't see real network communication or process management.\n\n---\n\n## How It Works\n\n### MCPClient: Discovery + Invocation\n\n```python\nclass MCPClient:\n def __init__(self, name: str):\n self.name = name\n self.tools: list[dict] = []\n self._handlers: dict[str, callable] = {}\n\n def register(self, tool_defs, handlers):\n \"\"\"Simulates tools/list discovery.\"\"\"\n self.tools = tool_defs\n self._handlers = handlers\n\n def call_tool(self, tool_name: str, args: dict) -> str:\n \"\"\"Simulates tools/call.\"\"\"\n handler = self._handlers.get(tool_name)\n if not handler:\n return f\"MCP error: unknown tool '{tool_name}'\"\n return handler(**args)\n```\n\nThe tutorial uses Python functions to simulate server tool implementations. The real version communicates with subprocesses via stdio JSON-RPC.\n\n### connect_mcp: Connect + Discover\n\n```python\ndef connect_mcp(name: str) -> str:\n if name in mcp_clients:\n return f\"MCP server '{name}' already connected\"\n factory = MOCK_SERVERS.get(name)\n if not factory:\n return f\"Unknown server '{name}'. Available: ...\"\n mcp_client = factory()\n mcp_clients[name] = mcp_client\n return f\"Connected to '{name}'. Discovered: ...\"\n```\n\nAfter connecting, the server's tools are immediately available.\n\n### normalize_mcp_name: Name Normalization\n\n```python\n_DISALLOWED_CHARS = re.compile(r'[^a-zA-Z0-9_-]')\n\ndef normalize_mcp_name(name: str) -> str:\n return _DISALLOWED_CHARS.sub('_', name)\n```\n\nAll non-`[a-zA-Z0-9_-]` characters are replaced with `_`. Prevents special characters in server or tool names from causing naming conflicts or injection issues.\n\n### assemble_tool_pool: Assemble Tool Pool\n\n```python\ndef assemble_tool_pool() -> tuple[list[dict], dict]:\n tools = list(BUILTIN_TOOLS)\n handlers = dict(BUILTIN_HANDLERS)\n for server_name, mcp_client in mcp_clients.items():\n safe_server = normalize_mcp_name(server_name)\n for tool_def in mcp_client.tools:\n safe_tool = normalize_mcp_name(tool_def[\"name\"])\n prefixed = f\"mcp__{safe_server}__{safe_tool}\"\n tools.append(...)\n handlers[prefixed] = (\n lambda *, c=mcp_client, t=tool_def[\"name\"], **kw:\n c.call_tool(t, kw))\n return tools, handlers\n```\n\nThe prefix `mcp__{server}__{tool}` prevents tool name collisions across different servers. Names are normalized through `normalize_mcp_name`.\n\nMCP tool descriptions include `(readOnly)` or `(destructive)` annotations — the tutorial uses text annotations, while real CC uses structured tool annotations for the permission system.\n\n### No Cache: Tool Pool Changes, Prompt Changes Too\n\ns10-s18's agent_loop used prompt caching to avoid re-serialization. s19 removes the cache:\n\n```python\ndef agent_loop(messages, context):\n tools, handlers = assemble_tool_pool() # Rebuild every time\n system = assemble_system_prompt(context) # Regenerate every time\n ...\n if any(b.name == \"connect_mcp\" ...):\n tools, handlers = assemble_tool_pool() # Rebuild after connection\n system = assemble_system_prompt(context)\n```\n\nReason: after `connect_mcp`, the tool pool changes — new tools like `mcp__docs__search` are added. The cached tool list is stale; continuing to use it means the model can't call the new tools. The tutorial simply removes caching, at the cost of slightly more serialization time.\n\n### MCP Tools: Lead Only\n\nIn the tutorial, `connect_mcp` is a Lead tool, and `assemble_tool_pool` only serves the Lead's agent_loop. Teammates still use a fixed 8-tool subset (bash, read_file, write_file, send_message, submit_plan, list_tasks, claim_task, complete_task).\n\nThis is a teaching simplification. In real CC, MCP tools are available to both the main agent and sub-agents — sub-agents inherit the parent's MCP configuration.\n\n---\n\n## Changes from s18\n\n| Component | Before (s18) | After (s19) |\n|------|-----------|-----------|\n| Tool source | All hand-written built-in | Hand-written + MCP external tools with dynamic discovery |\n| Tool pool | Fixed BUILTIN_TOOLS | assemble_tool_pool dynamically assembles mcp\\_\\_ prefixed tools |\n| Name safety | None | normalize_mcp_name normalization |\n| New type | — | MCPClient class (simulates tools/list + tools/call) |\n| Namespace | — | mcp\\_\\_server\\_\\_tool prevents collisions |\n| Tool descriptions | No annotations | (readOnly)/(destructive) annotations |\n| Prompt cache | Yes (since s10) | Removed — tool pool is dynamic, cache goes stale |\n| Lead tools | 17 (s18) | 18 (+connect_mcp) |\n| Teammate tools | 8 (s18) | 8 (unchanged, MCP tools are Lead-only) |\n| Extension method | Write code to add tools | Standard protocol, implement servers in any language |\n\n---\n\n## Try It Out\n\n```sh\ncd learn-claude-code\npython s19_mcp_plugin/code.py\n```\n\nTry these prompts:\n\n1. `Connect to the docs MCP server and search for something`\n2. `Connect to the deploy server and trigger a deployment`\n3. `Connect both servers — what tools are now available?`\n\nWhat to observe: After connecting to an MCP server, do tool names have `mcp__docs__` or `mcp__deploy__` prefixes? Are both servers' tools available simultaneously? Do MCP tool descriptions include (readOnly)/(destructive) annotations?\n\n---\n\n## What's Next\n\nThe Agent can now connect external tools through a standard protocol. But the first 19 chapters each add one mechanism in isolation; a real Agent does not run as 19 separate demos.\n\nTools, permissions, hooks, todo, task graph, memory, compact, background work, cron, teams, worktrees, and MCP should all attach to the same loop, not live in separate examples.\n\ns20 Comprehensive Agent → Combine the first 19 chapters into one complete harness. Many mechanisms, one loop.\n\n
\nDeep Dive into CC Source\n\n> The following is based on analysis of CC source: `services/mcp/client.ts`, `auth.ts`, `config.ts`, `channelNotification.ts`.\n\n### 1. Six Transport Types\n\nThe tutorial only shows a stdio mock. CC supports 6 transport types (`types.ts:23-25`):\n\n| Transport | Communication method |\n|-----------|---------|\n| `stdio` | Subprocess stdin/stdout (cross-platform default) |\n| `sse` | HTTP Server-Sent Events |\n| `http` | Streamable HTTP (POST/SSE bidirectional) |\n| `ws` | WebSocket |\n| `sse-ide` | IDE-embedded SSE transport |\n| `sdk` | In-process SDK transport |\n\nOn connection, local (stdio) and remote (http/sse/ws) servers are batched concurrently: local batch of 3, remote batch of 20.\n\n### 2. Tool Pool Merging Algorithm\n\n`assembleToolPool()` (`tools.ts:345-364`):\n\n```typescript\n// Dedup with priority: built-in tools win on name collision (sorted first)\nreturn uniqBy(\n [...builtInTools.sort(byName), ...filteredMcpTools.sort(byName)],\n 'name',\n)\n```\n\nBuilt-in and MCP tools are sorted separately, not together. The reason is CC's `claude_code_system_cache_policy` places a global cache breakpoint after the last built-in tool at a specific position — mixing the sort would break this design.\n\n### 3. Naming Convention: `mcp__server__tool`\n\n`buildMcpToolName()` (`mcpStringUtils.ts:50-52`):\n\n```\nmcp____\n```\n\nAll non-`[a-zA-Z0-9_-]` characters are replaced with `_` (`normalization.ts:17-23`). The tutorial's `normalize_mcp_name` uses the same rule.\n\n### 4. Permission Checks\n\nCC has a separate permission system for MCP tools. `checkPermissions()` applies different logic for MCP tools than for built-in tools — MCP tools can declare their own permission requirements (readOnly, destructive, etc.), and CC decides whether user confirmation is needed based on the declaration. The tutorial only uses text annotations `(readOnly)` / `(destructive)` in descriptions, without permission enforcement.\n\n### 5. Configuration Sources and Priority\n\nMCP server configuration comes from multiple sources. CC's priority from lowest to highest:\n\n```\nclaude.ai connectors < plugin < user settings.json < approved project .mcp.json < local settings.local.json\n```\n\n`claude.ai` connectors are fetched separately, deduplicated by content signature, and merged at the lowest precedence (`config.ts:1267-1289`). When enterprise `managed-mcp.json` exists, all other configurations are excluded.\n\nThe tutorial passes server names directly to the `MOCK_SERVERS` dict, without config merging.\n\n### 6. Channel Notifications: Servers Push Messages Back\n\nThe tutorial only covers agent → MCP Server unidirectional calls. CC also supports reverse notifications (`channelNotification.ts`):\n\n1. Server declares `capabilities.experimental['claude/channel']`\n2. Server sends messages to agent via MCP notification `notifications/claude/channel`\n3. Messages are wrapped in `...` XML tags\n4. Agent is woken up by SleepTool (within 1 second)\n\nServers can also request permissions: `notifications/claude/channel/permission_request` → Agent replies `notifications/claude/channel/permission`. Users confirm/deny via a 5-letter short ID.\n\n### 7. OAuth Authentication Flow\n\nCC's MCP authentication (`auth.ts`) supports a full OAuth 2.0 + PKCE flow:\n- OAuth metadata discovery via public client + PKCE (RFC 8414 / RFC 9728)\n- Local callback server receives authorization code\n- Tokens persisted via `getSecureStorage()` (macOS Keychain / Linux encrypted file / Windows Credential Manager)\n- Auto-refresh 5 minutes before expiry\n- Cross-application access (XAA): browser gets id_token → RFC 8693 + RFC 7523 exchange → no repeated browser popups\n\n### 8. Connection Lifecycle Error Handling\n\nCC has fine-grained error classification and retry for MCP connections (`client.ts:1266-1402`):\n- Terminal errors (ECONNRESET, ETIMEDOUT, EPIPE, etc.): 3 consecutive failures → close + reconnect\n- Tool call 401: Token expired → throw `McpAuthError` → trigger re-authentication\n- Tool call timeout: `Promise.race` timeout (configurable, default ~28 hours)\n- Stdio disconnect: Kill process in SIGINT → SIGTERM → SIGKILL order\n\n### The Tutorial's Simplifications\n\n- 6 transport types → 1 (mock stdio): Manageable concept count\n- Channel reverse notifications → omitted: Tutorial agent is always the initiator\n- OAuth flow → omitted: Tutorial assumes servers need no auth\n- Multi-layer config priority → omitted: Tutorial passes server name directly\n- Complex error classification → omitted: Tutorial uses try/except as fallback\n- MCP tools Lead-only → omitted sub-agent inheritance: Simplifies code structure\n\n
\n\n\n" + }, + { + "version": "s19", + "locale": "zh", + "title": "s19: MCP Tools — 外接工具,标准协议", + "content": "# s19: MCP Tools — 外接工具,标准协议\n\ns01 → ... → s17 → s18 → `s19` → [s20](/zh/s20)\n\n> *\"外接工具, 标准协议\"* — 发现、组装、调用,Agent 不需要知道工具是谁写的。\n>\n> **Harness 层**: 插件 — 外部能力通过标准协议接入。\n\n---\n\n## 问题\n\ns01 到 s18,Agent 的所有工具都是手写的——bash、read、write、task、worktree。每个工具的输入验证、执行逻辑、错误处理,都是你一行行写的。\n\n现在你有 3 个外部服务想接入:公司的 Jira API(查 issue、建 ticket)、自建的部署系统(触发 deploy、看日志)、团队的 Notion 知识库(搜文档、建页面)。你不想为每个服务重写一套工具代码。\n\n你需要一个标准协议——外部服务只要实现它,Agent 就能直接调用,不管服务用什么语言写的。\n\n---\n\n## 解决方案\n\n![MCP Architecture](/course-assets/s19_mcp_plugin/mcp-architecture.svg)\n\nMCP(Model Context Protocol)定义了 Agent 如何发现和调用外部工具。核心概念:\n\n| 概念 | 作用 |\n|------|------|\n| MCPClient | Agent 端的客户端,连接 server、发现工具、调用工具 |\n| MCP Server | 外部服务,实现 `tools/list` + `tools/call` |\n| assemble_tool_pool | 把内置工具和 MCP 工具组装成一个工具池 |\n| mcp\\_\\_server\\_\\_tool 命名 | 避免不同 server 的工具名冲突 |\n\n沿用 s18 的教学版 worktree 隔离、自主认领、空闲轮询、协议系统。本章新增:`connect_mcp` 工具——连接外部服务,发现工具,加入工具池。\n\n教学版用 mock handler 模拟外部 server。真实版会启动子进程,通过 stdin/stdout 发送 JSON-RPC 请求。mock 的好处是不依赖外部服务就能跑完整流程;代价是你看不到真正的网络通信和进程管理。\n\n---\n\n## 工作原理\n\n### MCPClient:发现 + 调用\n\n```python\nclass MCPClient:\n def __init__(self, name: str):\n self.name = name\n self.tools: list[dict] = []\n self._handlers: dict[str, callable] = {}\n\n def register(self, tool_defs, handlers):\n \"\"\"Simulates tools/list discovery.\"\"\"\n self.tools = tool_defs\n self._handlers = handlers\n\n def call_tool(self, tool_name: str, args: dict) -> str:\n \"\"\"Simulates tools/call.\"\"\"\n handler = self._handlers.get(tool_name)\n if not handler:\n return f\"MCP error: unknown tool '{tool_name}'\"\n return handler(**args)\n```\n\n教学版用 Python 函数模拟 server 的工具实现。真实版通过 stdio JSON-RPC 与子进程通信。\n\n### connect_mcp:连接 + 发现\n\n```python\ndef connect_mcp(name: str) -> str:\n if name in mcp_clients:\n return f\"MCP server '{name}' already connected\"\n factory = MOCK_SERVERS.get(name)\n if not factory:\n return f\"Unknown server '{name}'. Available: ...\"\n mcp_client = factory()\n mcp_clients[name] = mcp_client\n return f\"Connected to '{name}'. Discovered: ...\"\n```\n\n连接后,server 提供的工具立即可用。\n\n### normalize_mcp_name:名称规范化\n\n```python\n_DISALLOWED_CHARS = re.compile(r'[^a-zA-Z0-9_-]')\n\ndef normalize_mcp_name(name: str) -> str:\n return _DISALLOWED_CHARS.sub('_', name)\n```\n\n所有非 `[a-zA-Z0-9_-]` 的字符替换为 `_`。防止 server 名或工具名中包含特殊字符导致命名冲突或注入问题。\n\n### assemble_tool_pool:组装工具池\n\n```python\ndef assemble_tool_pool() -> tuple[list[dict], dict]:\n tools = list(BUILTIN_TOOLS)\n handlers = dict(BUILTIN_HANDLERS)\n for server_name, mcp_client in mcp_clients.items():\n safe_server = normalize_mcp_name(server_name)\n for tool_def in mcp_client.tools:\n safe_tool = normalize_mcp_name(tool_def[\"name\"])\n prefixed = f\"mcp__{safe_server}__{safe_tool}\"\n tools.append(...)\n handlers[prefixed] = (\n lambda *, c=mcp_client, t=tool_def[\"name\"], **kw:\n c.call_tool(t, kw))\n return tools, handlers\n```\n\n前缀 `mcp__{server}__{tool}` 避免不同 server 的工具名冲突。名称经过 `normalize_mcp_name` 规范化。\n\nMCP 工具的 description 带 `(readOnly)` 或 `(destructive)` 标注——教学版用文本标注,真实 CC 用 tool annotations 结构体让权限系统判断。\n\n### 无缓存:工具池变了,prompt 也变\n\ns10-s18 的 agent_loop 用 prompt cache 避免重复序列化。s19 去掉了缓存:\n\n```python\ndef agent_loop(messages, context):\n tools, handlers = assemble_tool_pool() # 每次重新构建\n system = assemble_system_prompt(context) # 每次重新生成\n ...\n if any(b.name == \"connect_mcp\" ...):\n tools, handlers = assemble_tool_pool() # 连接后重建\n system = assemble_system_prompt(context)\n```\n\n原因:`connect_mcp` 之后工具池变化了——新增了 `mcp__docs__search` 等工具。缓存中的工具列表是旧的,继续用会导致模型调用不到新工具。教学版直接去掉缓存,代价是多花一点序列化时间。\n\n### MCP 工具只有 Lead 可用\n\n教学版中,`connect_mcp` 是 Lead 工具,`assemble_tool_pool` 也只服务于 Lead 的 agent_loop。Teammate 仍使用固定的 8 个子集工具(bash、read_file、write_file、send_message、submit_plan、list_tasks、claim_task、complete_task)。\n\n这是教学简化。真实 CC 中,MCP 工具对主 agent 和子 agent 都可用——子 agent 继承父级的 MCP 配置。\n\n---\n\n## 相对 s18 的变更\n\n| 组件 | 之前 (s18) | 之后 (s19) |\n|------|-----------|-----------|\n| 工具来源 | 全部手写 builtin | 手写 + MCP 外部工具动态发现 |\n| 工具池 | 固定 BUILTIN_TOOLS | assemble_tool_pool 动态组装 mcp\\_\\_ 前缀工具 |\n| 名称安全 | 无 | normalize_mcp_name 规范化 |\n| 新类型 | — | MCPClient 类(模拟 tools/list + tools/call) |\n| 命名空间 | — | mcp\\_\\_server\\_\\_tool 避免冲突 |\n| 工具描述 | 无标注 | (readOnly)/(destructive) 标注 |\n| prompt 缓存 | 有(s10 起) | 去掉——工具池动态变化后缓存失效 |\n| Lead 工具 | 17 (s18) | 18 (+connect_mcp) |\n| Teammate 工具 | 8 (s18) | 8(不变,MCP 工具仅 Lead 可用) |\n| 扩展方式 | 写代码加工具 | 标准协议,任意语言实现 server |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s19_mcp_plugin/code.py\n```\n\n试试这些 prompt:\n\n1. `Connect to the docs MCP server and search for something`\n2. `Connect to the deploy server and trigger a deployment`\n3. `Connect both servers — what tools are now available?`\n\n观察重点:连接 MCP server 后,工具名是否带 `mcp__docs__` 或 `mcp__deploy__` 前缀?两个 server 的工具是否同时可用?MCP 工具的 description 是否带 (readOnly)/(destructive) 标注?\n\n---\n\n## 接下来\n\n现在 Agent 可以通过标准协议接入外部工具了。但前面 19 章每章都只加一个机制,真实 Agent 不会这样拆开运行。\n\n工具、权限、hooks、todo、任务图、记忆、压缩、后台、cron、团队、worktree、MCP 这些机制应该挂在同一个循环上,而不是散在 19 个 demo 里。\n\ns20 Comprehensive Agent → 把前 19 章的机制合回一个完整 harness。机制很多,循环一个。\n\n
\n深入 CC 源码\n\n> 以下基于 CC 源码 `services/mcp/client.ts`、`auth.ts`、`config.ts`、`channelNotification.ts` 的分析。\n\n### 一、6 种 Transport 类型\n\n教学版只展示了 stdio mock。CC 支持 6 种传输(`types.ts:23-25`):\n\n| Transport | 通信方式 |\n|-----------|---------|\n| `stdio` | 子进程 stdin/stdout(跨平台默认) |\n| `sse` | HTTP Server-Sent Events |\n| `http` | Streamable HTTP(POST/SSE 双向) |\n| `ws` | WebSocket |\n| `sse-ide` | IDE 内嵌 SSE 传输 |\n| `sdk` | 进程内 SDK 传输 |\n\n连接时本地(stdio)和远程(http/sse/ws)服务器分批并发:本地批量 3 个,远程批量 20 个。\n\n### 二、工具池组装算法\n\n`assembleToolPool()`(`tools.ts:345-364`):\n\n```typescript\n// 去重时优先保留内置工具(name 相同时内置在前)\nreturn uniqBy(\n [...builtInTools.sort(byName), ...filteredMcpTools.sort(byName)],\n 'name',\n)\n```\n\n内置工具和 MCP 工具分开排序,不是合起来排。原因是 CC 的 `claude_code_system_cache_policy` 在最后一个内置工具之后的某个位置放全局缓存断点——混排会破坏这个设计。\n\n### 三、命名规则:`mcp__server__tool`\n\n`buildMcpToolName()`(`mcpStringUtils.ts:50-52`):\n\n```\nmcp____\n```\n\n所有非 `[a-zA-Z0-9_-]` 字符替换为 `_`(`normalization.ts:17-23`)。教学版的 `normalize_mcp_name` 用同样的规则。\n\n### 四、权限检查\n\nCC 对 MCP 工具有独立的权限系统。`checkPermissions()` 对 MCP 工具的检查逻辑不同于内置工具——MCP 工具可以声明自己的权限需求(readOnly、destructive 等),CC 根据声明决定是否需要用户确认。教学版只在 description 中用文本标注 `(readOnly)` / `(destructive)`,不做权限拦截。\n\n### 五、配置来源与优先级\n\nMCP 服务器配置来自多个来源。CC 的配置优先级从低到高:\n\n```\nclaude.ai 连接器 < plugin < user settings.json < approved project .mcp.json < local settings.local.json\n```\n\n`claude.ai` 连接器单独拉取、按内容签名去重,以最低优先级合并(`config.ts:1267-1289`)。企业 `managed-mcp.json` 存在时完全排除其他配置。\n\n教学版直接传 server name 给 `MOCK_SERVERS` 字典,不做配置合并。\n\n### 六、Channel 通知:服务器反向推消息\n\n教学版只讲了 Agent → MCP Server 的单向调用。CC 还支持反向通知(`channelNotification.ts`):\n\n1. Server 声明 `capabilities.experimental['claude/channel']`\n2. Server 通过 MCP 通知 `notifications/claude/channel` 给 Agent 发消息\n3. 消息包装在 `...` XML 标签中\n4. Agent 被 SleepTool 唤醒(1 秒内)\n\nServer 还可以请求权限:`notifications/claude/channel/permission_request` → Agent 回复 `notifications/claude/channel/permission`。用户通过 5 字母短 ID 确认/拒绝。\n\n### 七、OAuth 认证流程\n\nCC 的 MCP 认证(`auth.ts`)支持完整的 OAuth 2.0 + PKCE 流程:\n- 通过公钥客户端 + PKCE 发现 OAuth 元数据(RFC 8414 / RFC 9728)\n- 本地回调服务器接收授权码\n- 令牌通过 `getSecureStorage()` 持久化(macOS Keychain / Linux 加密文件 / Windows 凭据管理器)\n- 过期前 5 分钟自动刷新\n- 支持跨应用访问(XAA):浏览器获取 id_token → RFC 8693 + RFC 7523 交换 → 无需反复弹浏览器\n\n### 八、连接生命周期的错误处理\n\nCC 对 MCP 连接有精细的错误分类和重试(`client.ts:1266-1402`):\n- 终局性错误(ECONNRESET、ETIMEDOUT、EPIPE 等):连续 3 次 → 关闭 + 重连\n- 工具调用 401:令牌过期 → 抛出 `McpAuthError` → 触发重认证\n- 工具调用超时:`Promise.race` 超时(可配置,默认约 28 小时)\n- Stdio 断连:按 SIGINT → SIGTERM → SIGKILL 顺序杀进程\n\n### 教学版的简化\n\n- 6 种 transport → 1 种(mock stdio):概念量可控\n- Channel 反向通知 → 省略:教学版 Agent 是主动方\n- OAuth 流程 → 省略:教学版假设 server 不需要认证\n- 多层配置优先级 → 省略:教学版直接传 server name\n- 复杂的错误分类 → 省略:教学版用 try/except 兜底\n- MCP 工具只给 Lead → 省略子 agent 继承:简化代码结构\n\n
\n\n\n" + }, + { + "version": "s19", + "locale": "ja", + "title": "s19: MCP Tools — 外部ツール、標準プロトコル", + "content": "# s19: MCP Tools — 外部ツール、標準プロトコル\n\ns01 → ... → s17 → s18 → `s19` → [s20](/ja/s20)\n\n> *\"外部ツール、標準プロトコル\"* — 発見、組み立て、呼び出し。Agent はツールを誰が書いたか知る必要がない。\n>\n> **Harness 層**: プラグイン — 外部能力を標準プロトコルで接続。\n\n---\n\n## 課題\n\ns01 から s18 まで、Agent の全ツールは手書き — bash、read、write、task、worktree。入力検証、実行ロジック、エラーハンドリング、全て一行ずつ書いた。\n\n今、統合したい外部サービスが 3 つある:社内の Jira API(issue 検索、ticket 作成)、独自のデプロイシステム(deploy トリガー、ログ閲覧)、チームの Notion ナレッジベース(ドキュメント検索、ページ作成)。各サービスのためにツールコードを書き直したくない。\n\n標準プロトコルが必要 — 外部サービスがこのプロトコルを実装していれば、サービスが何の言語で書かれていても、Agent は直接そのツールを呼び出せる。\n\n---\n\n## ソリューション\n\n![MCP Architecture](/course-assets/s19_mcp_plugin/mcp-architecture.ja.svg)\n\nMCP(Model Context Protocol)は、Agent が外部ツールを発見・呼び出しする方法を定義。核心概念:\n\n| 概念 | 目的 |\n|------|------|\n| MCPClient | Agent 側のクライアント — server に接続、ツールを発見、ツールを呼び出し |\n| MCP Server | 外部サービス側 — `tools/list` + `tools/call` を実装 |\n| assemble_tool_pool | 組み込みツールと MCP ツールを一つのツールプールに組み立てる |\n| mcp\\_\\_server\\_\\_tool 命名 | 異なる server 間のツール名衝突を防止 |\n\ns18 の教学版 worktree 隔離、自動認領、空き時ポーリング、プロトコルシステムを踏襲。本章の追加:`connect_mcp` ツール — 外部サービスに接続、ツールを発見、ツールプールに追加。\n\n教学版は mock handler で外部 server をシミュレート。実際の版はサブプロセスを起動し、stdin/stdout で JSON-RPC リクエストを送信。mock の利点は外部サービスなしで完全なフローを実行できること;代償は実際のネットワーク通信やプロセス管理が見えないこと。\n\n---\n\n## 仕組み\n\n### MCPClient:発見 + 呼び出し\n\n```python\nclass MCPClient:\n def __init__(self, name: str):\n self.name = name\n self.tools: list[dict] = []\n self._handlers: dict[str, callable] = {}\n\n def register(self, tool_defs, handlers):\n \"\"\"Simulates tools/list discovery.\"\"\"\n self.tools = tool_defs\n self._handlers = handlers\n\n def call_tool(self, tool_name: str, args: dict) -> str:\n \"\"\"Simulates tools/call.\"\"\"\n handler = self._handlers.get(tool_name)\n if not handler:\n return f\"MCP error: unknown tool '{tool_name}'\"\n return handler(**args)\n```\n\n教学版は Python 関数で server のツール実装をシミュレート。実際の版は stdio JSON-RPC でサブプロセスと通信。\n\n### connect_mcp:接続 + 発見\n\n```python\ndef connect_mcp(name: str) -> str:\n if name in mcp_clients:\n return f\"MCP server '{name}' already connected\"\n factory = MOCK_SERVERS.get(name)\n if not factory:\n return f\"Unknown server '{name}'. Available: ...\"\n mcp_client = factory()\n mcp_clients[name] = mcp_client\n return f\"Connected to '{name}'. Discovered: ...\"\n```\n\n接続後、server が提供するツールが即座に利用可能。\n\n### normalize_mcp_name:名前の正規化\n\n```python\n_DISALLOWED_CHARS = re.compile(r'[^a-zA-Z0-9_-]')\n\ndef normalize_mcp_name(name: str) -> str:\n return _DISALLOWED_CHARS.sub('_', name)\n```\n\n`[a-zA-Z0-9_-]` 以外の全文字を `_` に置換。server 名やツール名の特殊文字による名前衝突やインジェクション問題を防止。\n\n### assemble_tool_pool:ツールプールの組み立て\n\n```python\ndef assemble_tool_pool() -> tuple[list[dict], dict]:\n tools = list(BUILTIN_TOOLS)\n handlers = dict(BUILTIN_HANDLERS)\n for server_name, mcp_client in mcp_clients.items():\n safe_server = normalize_mcp_name(server_name)\n for tool_def in mcp_client.tools:\n safe_tool = normalize_mcp_name(tool_def[\"name\"])\n prefixed = f\"mcp__{safe_server}__{safe_tool}\"\n tools.append(...)\n handlers[prefixed] = (\n lambda *, c=mcp_client, t=tool_def[\"name\"], **kw:\n c.call_tool(t, kw))\n return tools, handlers\n```\n\nプレフィックス `mcp__{server}__{tool}` で異なる server 間のツール名衝突を防止。名前は `normalize_mcp_name` で正規化。\n\nMCP ツールの description に `(readOnly)` または `(destructive)` アノテーションを付与 — 教学版はテキストアノテーション、実際の CC は tool annotations 構造体で権限システムが判断。\n\n### キャッシュなし:ツールプールが変われば、プロンプトも変わる\n\ns10-s18 の agent_loop は prompt cache で再シリアライズを回避。s19 はキャッシュを削除:\n\n```python\ndef agent_loop(messages, context):\n tools, handlers = assemble_tool_pool() # 毎回再構築\n system = assemble_system_prompt(context) # 毎回再生成\n ...\n if any(b.name == \"connect_mcp\" ...):\n tools, handlers = assemble_tool_pool() # 接続後に再構築\n system = assemble_system_prompt(context)\n```\n\n理由:`connect_mcp` 後にツールプールが変化 — `mcp__docs__search` などの新ツールが追加される。キャッシュ内のツールリストは古く、使い続けるとモデルが新ツールを呼び出せない。教学版はキャッシュを単に削除、代償はシリアライズ時間の若干の増加。\n\n### MCP ツールは Lead のみ利用可能\n\n教学版では、`connect_mcp` は Lead ツール、`assemble_tool_pool` も Lead の agent_loop のみにサービスを提供。チームメイトは引き続き固定の 8 ツールサブセット(bash、read_file、write_file、send_message、submit_plan、list_tasks、claim_task、complete_task)を使用。\n\nこれは教学簡略化。実際の CC では、MCP ツールはメイン agent とサブ agent の両方で利用可能 — サブ agent は親の MCP 設定を継承。\n\n---\n\n## s18 からの変更\n\n| コンポーネント | 変更前 (s18) | 変更後 (s19) |\n|--------------|------------|------------|\n| ツールソース | 全て手書き builtin | 手書き + MCP 外部ツール動的発見 |\n| ツールプール | 固定 BUILTIN_TOOLS | assemble_tool_pool が動的に mcp\\_\\_ プレフィックスツールを組み立てる |\n| 名前の安全性 | なし | normalize_mcp_name 正規化 |\n| 新規タイプ | — | MCPClient クラス(tools/list + tools/call をシミュレート) |\n| 名前空間 | — | mcp\\_\\_server\\_\\_tool 衝突防止 |\n| ツール説明 | アノテーションなし | (readOnly)/(destructive) アノテーション |\n| プロンプトキャッシュ | あり(s10 から) | 削除 — ツールプールが動的、キャッシュが陳腐化 |\n| Lead ツール | 17 (s18) | 18 (+connect_mcp) |\n| チームメイトツール | 8 (s18) | 8(変更なし、MCP ツールは Lead のみ) |\n| 拡張方法 | ツール追加のコードを書く | 標準プロトコル、任意言語で server を実装 |\n\n---\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython s19_mcp_plugin/code.py\n```\n\n以下のプロンプトを試してください:\n\n1. `Connect to the docs MCP server and search for something`\n2. `Connect to the deploy server and trigger a deployment`\n3. `Connect both servers — what tools are now available?`\n\n観察ポイント:MCP server 接続後、ツール名に `mcp__docs__` や `mcp__deploy__` プレフィックスが付いているか?両方の server のツールが同時に利用可能か?MCP ツールの description に (readOnly)/(destructive) アノテーションが付いているか?\n\n---\n\n## 次の章\n\nAgent は標準プロトコルで外部ツールに接続できるようになりました。しかし前 19 章は各章で 1 つの仕組みだけを追加しています。実際の Agent は 19 個の demo に分かれて動くわけではありません。\n\ntools、permissions、hooks、todo、task graph、memory、compact、background work、cron、teams、worktree、MCP は、別々の例ではなく同じ loop に接続されるべきです。\n\ns20 Comprehensive Agent → 前 19 章の仕組みを 1 つの完全な harness に統合。仕組みは多く、loop は 1 つ。\n\n
\nCC ソースコード深掘り\n\n> 以下は CC ソースコード `services/mcp/client.ts`、`auth.ts`、`config.ts`、`channelNotification.ts` の分析に基づく。\n\n### 一、6 種の Transport タイプ\n\n教学版は stdio mock のみ。CC は 6 種のトランスポートをサポート(`types.ts:23-25`):\n\n| Transport | 通信方式 |\n|-----------|---------|\n| `stdio` | サブプロセス stdin/stdout(クロスプラットフォームデフォルト) |\n| `sse` | HTTP Server-Sent Events |\n| `http` | Streamable HTTP(POST/SSE 双方向) |\n| `ws` | WebSocket |\n| `sse-ide` | IDE 内蔵 SSE トランスポート |\n| `sdk` | プロセス内 SDK トランスポート |\n\n接続時、ローカル(stdio)とリモート(http/sse/ws)サーバーをバッチで並行処理:ローカルは 3 つずつ、リモートは 20 つずつ。\n\n### 二、ツールプール組み立てアルゴリズム\n\n`assembleToolPool()`(`tools.ts:345-364`):\n\n```typescript\n// 重複排除時に組み込みツールを優先(name が同じ場合、組み込みが先)\nreturn uniqBy(\n [...builtInTools.sort(byName), ...filteredMcpTools.sort(byName)],\n 'name',\n)\n```\n\n組み込みツールと MCP ツールは別々にソート、混ぜてソートしない。理由は CC の `claude_code_system_cache_policy` が最後の組み込みツールの後の特定位置にグローバルキャッシュブレークポイントを置く設計のため — ソートを混ぜるとこの設計が壊れる。\n\n### 三、命名規則:`mcp__server__tool`\n\n`buildMcpToolName()`(`mcpStringUtils.ts:50-52`):\n\n```\nmcp____\n```\n\n`[a-zA-Z0-9_-]` 以外の全文字を `_` に置換(`normalization.ts:17-23`)。教学版の `normalize_mcp_name` も同じルールを使用。\n\n### 四、権限チェック\n\nCC は MCP ツールに対して独立した権限システムを持つ。`checkPermissions()` は MCP ツールに対して組み込みツールとは異なるロジックを適用 — MCP ツールは独自の権限要件(readOnly、destructive 等)を宣言でき、CC は宣言に基づいてユーザー確認が必要かを判断。教学版は description 内のテキストアノテーション `(readOnly)` / `(destructive)` のみで、権限インターセプトは行わない。\n\n### 五、設定ソースと優先度\n\nMCP サーバー設定は複数のソースから。CC の優先度は低い順に:\n\n```\nclaude.ai コネクタ < プラグイン < ユーザー settings.json < 承認済みプロジェクト .mcp.json < ローカル settings.local.json\n```\n\n`claude.ai` コネクタは個別に取得、コンテンツ署名で重複排除し、最低優先度で統合(`config.ts:1267-1289`)。企業 `managed-mcp.json` が存在する場合、他の全設定を完全に除外。\n\n教学版は server 名を直接 `MOCK_SERVERS` 辞書に渡し、設定マージは行わない。\n\n### 六、Channel 通知:サーバーからの逆方向メッセージ\n\n教学版は Agent → MCP Server の一方向呼び出しのみ。CC は逆方向通知もサポート(`channelNotification.ts`):\n\n1. Server が `capabilities.experimental['claude/channel']` を宣言\n2. Server が MCP 通知 `notifications/claude/channel` で Agent にメッセージを送信\n3. メッセージは `...` XML タグでラップ\n4. Agent は SleepTool で起床(1 秒以内)\n\nServer は権限リクエストも可能:`notifications/claude/channel/permission_request` → Agent が `notifications/claude/channel/permission` で応答。ユーザーは 5 文字の短い ID で確認/拒否。\n\n### 七、OAuth 認証フロー\n\nCC の MCP 認証(`auth.ts`)は完全な OAuth 2.0 + PKCE フローをサポート:\n- 公開クライアント + PKCE で OAuth メタデータを発見(RFC 8414 / RFC 9728)\n- ローカルコールバックサーバーが認可コードを受信\n- トークンは `getSecureStorage()` で永続化(macOS Keychain / Linux 暗号化ファイル / Windows 資格情報マネージャー)\n- 有効期限 5 分前に自動リフレッシュ\n- クロスアプリケーションアクセス(XAA):ブラウザが id_token を取得 → RFC 8693 + RFC 7523 交換 → 繰り返しブラウザポップアップ不要\n\n### 八、接続ライフサイクルのエラーハンドリング\n\nCC は MCP 接続にきめ細かいエラー分類とリトライを行う(`client.ts:1266-1402`):\n- 終局エラー(ECONNRESET、ETIMEDOUT、EPIPE 等):連続 3 回 → クローズ + 再接続\n- ツール呼び出し 401:トークン期限切れ → `McpAuthError` スロー → 再認証トリガー\n- ツール呼び出しタイムアウト:`Promise.race` タイムアウト(設定可能、デフォルト約 28 時間)\n- Stdio 切断:SIGINT → SIGTERM → SIGKILL の順でプロセスを kill\n\n### 教学版の簡略化\n\n- 6 種のトランスポート → 1 種(mock stdio):概念量を管理可能に\n- Channel 逆方向通知 → 省略:教学版 Agent は常にイニシエータ\n- OAuth フロー → 省略:教学版は server が認証不要と仮定\n- 多層設定優先度 → 省略:教学版は直接 server 名を渡す\n- 複雑なエラー分類 → 省略:教学版は try/except でフォールバック\n- MCP ツールは Lead のみ → サブ agent 継承を省略:コード構造を簡略化\n\n
\n\n\n" + }, + { + "version": "s20", + "locale": "en", + "title": "s20: Comprehensive Agent — All Mechanisms, One Loop", + "content": "# s20: Comprehensive Agent — All Mechanisms, One Loop\n\ns01 → ... → s18 → s19 → `s20`\n\n> *\"Many mechanisms, one loop\"* — tools, permissions, memory, tasks, teams, and plugins all hang off the same `while True`.\n>\n> **Harness layer**: Comprehensive — put the previous 19 mechanisms back into one runnable system.\n\n---\n\n## Problem\n\nThe first 19 chapters add one mechanism at a time. That is the right way to learn, but a real agent does not run with only one mechanism enabled.\n\nA long-running coding agent needs all of these at once:\n\n- tool dispatch and permission boundaries\n- hook extension points\n- todo planning and task graphs\n- skills, memory, and runtime system prompt assembly\n- compaction and error recovery\n- background tasks and cron scheduling\n- teams, protocols, autonomous claiming\n- worktree isolation\n- MCP external tool integration\n\nThe hard part is not piling up features. The hard part is seeing where each mechanism belongs around the loop. S20 is the endpoint chapter: every component is placed back into one harness.\n\n---\n\n## Solution\n\n![System Architecture](/course-assets/s20_comprehensive/system-architecture.en.svg)\n\nS20 does not invent a new mechanism. It merges the teaching components from the earlier chapters into one complete harness:\n\n```text\nuser input\n → UserPromptSubmit hooks\n → cron/background notification injection\n → context compact\n → memory + skills + MCP state assemble the system prompt\n → LLM\n → has tool_use block?\n no → Stop hooks → return\n yes → PreToolUse hooks + permission\n → TOOL_HANDLERS / MCP handlers / background dispatch\n → PostToolUse hooks\n → tool_result / task_notification back to messages\n → next round\n```\n\nThe loop is still the same structure: call the model, check whether the response contains a `tool_use` block, execute tools, append results back to `messages`. CC source does not directly trust `stop_reason == \"tool_use\"`; the actual presence of a tool_use block is the continuation signal. What changed is that the harness around the loop is now complete.\n\n---\n\n## Where Each Component Sits\n\n| Position | Component | Role |\n|----------|-----------|------|\n| Around user input | `UserPromptSubmit` hooks | Log, inject, or audit user input |\n| Before LLM | cron queue | Inject scheduled prompts into `messages` |\n| Before LLM | background notifications | Inject completed background work as `` |\n| Before LLM | compaction pipeline | Budget large outputs, trim history, compact old tool results, summarize when needed |\n| Before LLM | memory / skills / MCP state | Assemble the system prompt so the model sees current capabilities and long-term context |\n| LLM call | error recovery | Retry 429/529, escalate `max_tokens`, compact on prompt-too-long |\n| Before tool execution | `PreToolUse` hooks + permission | Block dangerous commands, out-of-bounds writes, destructive MCP tools |\n| Tool dispatch | `assemble_tool_pool` | Assemble built-in tools and dynamic MCP tools |\n| During tool execution | background dispatch | Move slow bash work into a daemon thread and return a placeholder result |\n| After tool execution | `PostToolUse` hooks | Large-output warnings, logs, post-processing |\n| Back to loop | tool_result | One `tool_result` per `tool_use`, then the next model round |\n| No tool_use this round / on stop | `Stop` hooks | Stats, cleanup, audit |\n\n---\n\n## What code.py Contains\n\n### Tools and Dispatch\n\nThe built-in tool pool contains 27 tools:\n\n```text\nbash, read_file, write_file, edit_file, glob\ntodo_write, task, load_skill, compact\ncreate_task, list_tasks, get_task, claim_task, complete_task\nschedule_cron, list_crons, cancel_cron\nspawn_teammate, send_message, check_inbox\nrequest_shutdown, request_plan, review_plan\ncreate_worktree, remove_worktree, keep_worktree\nconnect_mcp\n```\n\n`assemble_tool_pool()` assembles these every round:\n\n```text\nBUILTIN_TOOLS + connected MCP tools\nBUILTIN_HANDLERS + mcp__server__tool handlers\n```\n\nAfter `connect_mcp(\"docs\")`, the next round exposes tools like `mcp__docs__search`.\n\n### Permissions and Hooks\n\nPermission is not hardcoded into the tool execution line. It is a `PreToolUse` hook:\n\n```python\nblocked = trigger_hooks(\"PreToolUse\", block)\nif blocked:\n results.append(tool_result(block.id, blocked))\n continue\n```\n\nThat means permission, logging, and audit logic all attach to the same hook point. After execution, `PostToolUse` hooks run.\n\n### Planning and Tasks\n\nS20 keeps two planning layers:\n\n- `todo_write`: lightweight plan for the current session, kept in memory\n- task graph: cross-session, dependency-aware, claimable task files under `.tasks/task_*.json`\n\nThe first keeps a single agent from drifting. The second supports team coordination.\n\n### Subagents and Teams\n\nS20 has two kinds of delegation:\n\n- `task`: one-shot subagent. It uses an isolated `messages[]`, discards intermediate context, and returns only a final summary.\n- `spawn_teammate`: persistent teammate thread. It communicates through `MessageBus`, polls the task board while idle, and can claim work autonomously.\n\nOne-shot subagents solve context isolation. Persistent teammates solve long-running parallel collaboration.\n\n### Memory, Skills, and Prompt\n\n`assemble_system_prompt(context)` assembles each round from:\n\n- identity and tool guidance\n- workspace\n- skills catalog\n- `.memory/MEMORY.md`\n- connected MCP servers\n\nSkills only put their catalog into the system prompt. Full content is loaded on demand through `load_skill(name)`.\n\n### Compaction and Recovery\n\nBefore the LLM call, S20 runs the compaction pipeline:\n\n```text\ntool_result_budget → snip_compact → micro_compact → compact_history\n```\n\nThe model call is wrapped with recovery:\n\n- 429: exponential backoff retry\n- 529: exponential backoff, optionally switch to fallback model after repeated failures\n- `max_tokens`: raise max tokens, then request continuation\n- prompt too long: reactive compact and retry\n\n### Background and Cron\n\nSlow bash work does not block the main loop:\n\n```text\nshould_run_background → start_background_task → placeholder tool_result\nbackground done → task_notification → next round injects messages\n```\n\nThe cron scheduler runs as a daemon thread and checks once per second. The CLI watches `cron_queue`; when a job fires, it injects `[Scheduled] ...` and runs one agent turn automatically.\n\n### Worktree and MCP\n\nWorktree isolation owns directories:\n\n- `create_worktree(name, task_id)` creates an isolated branch and directory\n- the task `worktree` field binds a task to that directory\n- when a teammate claims a task with a worktree, its bash/read/write tools run in that directory\n\nMCP owns external capability:\n\n- `connect_mcp(name)` connects a mock server\n- `assemble_tool_pool()` assembles MCP tools into the tool pool\n- tool names use `mcp__server__tool`\n\n---\n\n## Changes from s19\n\n| Component | s19 | s20 |\n|-----------|-----|-----|\n| tool pool | built-in + MCP | built-in + MCP, with s01-s18 tools restored |\n| permission | omitted in teaching body | runs inside `PreToolUse` hook |\n| hooks | omitted | UserPromptSubmit / PreToolUse / PostToolUse / Stop |\n| todo | omitted | `todo_write` + reminder |\n| skill | omitted | catalog in system prompt + `load_skill` |\n| compact | omitted | pre-LLM compaction + `compact` tool + reactive compact |\n| error recovery | simple try/except | retry / max_tokens / prompt too long |\n| background | omitted | slow-operation thread + task notification |\n| cron | omitted | daemon scheduler + durable jobs |\n| multi-agent | kept | kept; teammates use basic tools in isolated directories |\n| worktree | kept | kept |\n| MCP | new | kept as part of the final tool pool |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s20_comprehensive/code.py\n```\n\nTry:\n\n1. `Create a todo list for inspecting this repo, then list Python files`\n2. `Connect to the docs MCP server and search for agent loop`\n3. `Create two tasks, create worktrees for them, then spawn alice and bob. Ask them to submit plans before claiming tasks.`\n4. `remind me of the meeting in 3 minutes.`\n5. `Run npm install in the background and continue reading README.md`\n\nWatch for:\n\n- whether each tool call passes through hooks/permission\n- whether MCP tools appear on the next round after `connect_mcp`\n- whether slow operations return a background placeholder\n- whether cron automatically reminds you when the time arrives\n- whether teammates submit plans and pause before approval\n- whether teammates can claim tasks after plan approval\n- whether teammates switch to the bound worktree directory\n\n---\n\n## The End Is the Beginning\n\nFrom s01 to s20, the code gets more capable, but the core remains unchanged:\n\n```python\nwhile True:\n response = LLM(messages, tools)\n if not has_tool_use(response.content):\n return\n results = execute_tools(response.content)\n messages.append(tool_results)\n```\n\nClaude Code's complexity is not \"another agent brain.\" It is the complexity of a mature harness. The model decides and chooses actions; the harness organizes environment, tools, permissions, memory, teams, and external capabilities.\n\nThis is the endpoint of the course: many mechanisms, one loop.\n" + }, + { + "version": "s20", + "locale": "zh", + "title": "s20: Comprehensive Agent — 全部机制,归到一个循环", + "content": "# s20: Comprehensive Agent — 全部机制,归到一个循环\n\ns01 → ... → s18 → s19 → `s20`\n\n> *\"机制很多,循环一个\"* — 工具、权限、记忆、任务、团队、插件都挂在同一个 while True 上。\n>\n> **Harness 层**: 综合 — 把前 19 章的机制放回同一个可运行系统。\n\n---\n\n## 问题\n\n前 19 章每章只加一个机制。这样适合学习,但真实 Agent 不会只带一个机制运行。\n\n一个能长期工作的 coding agent 需要同时拥有:\n\n- 工具分发和权限边界\n- hooks 扩展点\n- todo 计划和任务图\n- 技能、记忆、系统 prompt 组装\n- 压缩和错误恢复\n- 后台任务和 cron 调度\n- 团队、协议、自治认领\n- worktree 隔离\n- MCP 外部工具接入\n\n难点不是把功能堆起来,而是看清楚它们都挂在循环的哪个位置。S20 就是终点章:把所有组件归位。\n\n---\n\n## 解决方案\n\n![System Architecture](/course-assets/s20_comprehensive/system-architecture.svg)\n\nS20 不是再发明一个新机制,而是把前面的教学组件合成一个完整 harness:\n\n```text\n用户输入\n → UserPromptSubmit hooks\n → cron/background 通知注入\n → context compact\n → memory + skills + MCP 状态组装 system prompt\n → LLM\n → has tool_use block?\n 否 → Stop hooks → 返回\n 是 → PreToolUse hooks + permission\n → TOOL_HANDLERS / MCP handlers / background dispatch\n → PostToolUse hooks\n → tool_result / task_notification 回 messages\n → 下一轮\n```\n\n循环本身仍然是同一个结构:调用模型,检查响应里是否出现 `tool_use` block,执行工具,把结果追加回 `messages`。CC 源码里也不直接信任 `stop_reason == \"tool_use\"`,而是以实际出现的 tool_use block 作为是否继续工具轮的信号。变化的是循环周围的 harness 变完整了。\n\n---\n\n## 组件在循环中的位置\n\n| 位置 | 组件 | 作用 |\n|------|------|------|\n| 用户输入前后 | `UserPromptSubmit` hooks | 记录、注入、审计用户输入 |\n| LLM 前 | cron queue | 把定时触发的 prompt 注入 `messages` |\n| LLM 前 | background notifications | 后台任务完成后以 `` 注入 |\n| LLM 前 | compaction pipeline | 先压大输出,再裁历史,再压旧 tool_result,必要时摘要 |\n| LLM 前 | memory / skills / MCP state | 组装 system prompt,让模型看到当前能力和长期上下文 |\n| LLM 调用 | error recovery | 429/529 重试,`max_tokens` 升级,prompt too long 触发 reactive compact |\n| 工具执行前 | `PreToolUse` hooks + permission | 拦截危险命令、写越界、破坏性 MCP 工具 |\n| 工具分发 | `assemble_tool_pool` | 组装内置工具和 MCP 动态工具 |\n| 工具执行时 | background dispatch | 慢 bash 操作放 daemon thread,主循环先返回占位结果 |\n| 工具执行后 | `PostToolUse` hooks | 大输出告警、日志等后处理 |\n| 返回循环 | tool_result | 每个 `tool_use` 对应一个 `tool_result`,再回到下一轮 |\n| 本轮没有 tool_use / 停止时 | `Stop` hooks | 统计、清理、审计 |\n\n---\n\n## code.py 包含什么\n\n### 工具与分发\n\n内置工具池包含 27 个工具:\n\n```text\nbash, read_file, write_file, edit_file, glob\ntodo_write, task, load_skill, compact\ncreate_task, list_tasks, get_task, claim_task, complete_task\nschedule_cron, list_crons, cancel_cron\nspawn_teammate, send_message, check_inbox\nrequest_shutdown, request_plan, review_plan\ncreate_worktree, remove_worktree, keep_worktree\nconnect_mcp\n```\n\n`assemble_tool_pool()` 每轮组装:\n\n```text\nBUILTIN_TOOLS + connected MCP tools\nBUILTIN_HANDLERS + mcp__server__tool handlers\n```\n\n所以 `connect_mcp(\"docs\")` 后,下一轮工具池里会出现 `mcp__docs__search`。\n\n### 权限和 hooks\n\n权限不写死在工具执行行里,而是作为 `PreToolUse` hook:\n\n```python\nblocked = trigger_hooks(\"PreToolUse\", block)\nif blocked:\n results.append(tool_result(block.id, blocked))\n continue\n```\n\n这样 permission、log、审计都可以挂在同一个 hook 点上。执行后再触发 `PostToolUse`。\n\n### 计划与任务\n\nS20 同时保留两层计划:\n\n- `todo_write`:当前会话内的轻量计划,保存在内存中\n- task graph:跨会话、可依赖、可认领的任务文件,写入 `.tasks/task_*.json`\n\n前者帮助单个 Agent 不漂移;后者支撑团队协作。\n\n### 子 agent 与团队\n\nS20 有两种 delegation:\n\n- `task`:一次性 subagent。独立 `messages[]`,中间过程丢弃,只返回最终摘要。\n- `spawn_teammate`:持久队友线程。通过 MessageBus 收发消息,能 idle 轮询任务板并自动认领。\n\n一次性 subagent 解决“上下文隔离”;持久队友解决“长期并行协作”。\n\n### 记忆、技能和 prompt\n\n`assemble_system_prompt(context)` 每轮组装:\n\n- 身份和工具说明\n- workspace\n- skills catalog\n- `.memory/MEMORY.md`\n- 已连接 MCP server\n\n技能只在 system prompt 里放目录。完整内容通过 `load_skill(name)` 按需加载。\n\n### 压缩和恢复\n\nLLM 前先跑压缩管线:\n\n```text\ntool_result_budget → snip_compact → micro_compact → compact_history\n```\n\n调用模型时再包一层恢复:\n\n- 429:指数退避重试\n- 529:指数退避,连续失败可切 fallback model\n- `max_tokens`:先提高 max_tokens,再要求 continuation\n- prompt too long:reactive compact 后重试\n\n### 后台和 cron\n\n慢 bash 操作不会阻塞主循环:\n\n```text\nshould_run_background → start_background_task → placeholder tool_result\n后台完成 → task_notification → 下一轮注入 messages\n```\n\ncron 调度器独立 daemon thread 每秒检查一次。CLI 会监听 `cron_queue`,命中后主动把 `[Scheduled] ...` 注入并运行一轮 Agent。\n\n### worktree 与 MCP\n\nworktree 负责隔离目录:\n\n- `create_worktree(name, task_id)` 创建独立分支和目录\n- task 的 `worktree` 字段绑定目录\n- 队友 claim 到带 worktree 的 task 后,bash/read/write 自动在对应目录下执行\n\nMCP 负责外部能力:\n\n- `connect_mcp(name)` 连接 mock server\n- `assemble_tool_pool()` 把 MCP 工具组装进工具池\n- 工具名统一为 `mcp__server__tool`\n\n---\n\n## 相对 s19 的变化\n\n| 组件 | s19 | s20 |\n|------|-----|-----|\n| 工具池 | 内置 + MCP | 内置 + MCP,补齐 s01-s18 的工具 |\n| 权限 | 教学主体省略 | `PreToolUse` hook 中执行 |\n| hooks | 省略 | UserPromptSubmit / PreToolUse / PostToolUse / Stop |\n| todo | 省略 | `todo_write` + reminder |\n| skill | 省略 | catalog in system prompt + `load_skill` |\n| compact | 省略 | LLM 前压缩 + `compact` 工具 + reactive compact |\n| error recovery | 简化 try/except | retry / max_tokens / prompt too long |\n| background | 省略 | 慢操作后台线程 + task notification |\n| cron | 省略 | daemon scheduler + durable jobs |\n| multi-agent | 保留 | 保留;队友使用隔离目录下的基础工具 |\n| worktree | 保留 | 保留 |\n| MCP | 新增 | 保留,作为最终工具池的一部分 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s20_comprehensive/code.py\n```\n\n可以试:\n\n1. `Create a todo list for inspecting this repo, then list Python files`\n2. `Connect to the docs MCP server and search for agent loop`\n3. `Create two tasks, create worktrees for them, then spawn alice and bob. Ask them to submit plans before claiming tasks.`\n4. `remind me of the meeting in 3 minutes.`\n5. `Run npm install in the background and continue reading README.md`\n\n观察重点:\n\n- 工具调用前是否经过 hooks/permission\n- `connect_mcp` 后下一轮是否出现 MCP 工具\n- 慢操作是否返回 background placeholder\n- 到点是不是自动提醒开会\n- 队友是否提交 plan,并在 approval 前暂停\n- plan 批准后,队友是否能认领任务\n- worktree 绑定后,队友是否切到对应目录\n\n---\n\n## 结束亦是开始\n\n从 s01 到 s20,代码表面越来越复杂,但核心始终没变:\n\n```python\nwhile True:\n response = LLM(messages, tools)\n if not has_tool_use(response.content):\n return\n results = execute_tools(response.content)\n messages.append(tool_results)\n```\n\nClaude Code 的复杂性不是“另一个 agent 大脑”,而是一个成熟 harness 的复杂性。模型负责判断和行动选择;harness 负责把环境、工具、权限、记忆、团队和外部能力组织好。\n\n这就是全书的终点:机制很多,循环一个。\n" + }, + { + "version": "s20", + "locale": "ja", + "title": "s20: Comprehensive Agent — すべての仕組みを 1 つのループへ", + "content": "# s20: Comprehensive Agent — すべての仕組みを 1 つのループへ\n\ns01 → ... → s18 → s19 → `s20`\n\n> *\"仕組みは多い、ループは 1 つ\"* — tools、permissions、memory、tasks、teams、plugins はすべて同じ `while True` に接続される。\n>\n> **Harness レイヤー**: 総合 — 前 19 章の仕組みを 1 つの実行可能なシステムへ戻す。\n\n---\n\n## 問題\n\n前 19 章では、各章が 1 つの仕組みだけを追加した。学習にはその形が適している。しかし実際の agent は、1 つの仕組みだけで動くわけではない。\n\n長時間動く coding agent には、同時に次のものが必要になる:\n\n- tool dispatch と permission boundary\n- hook extension point\n- todo plan と task graph\n- skill、memory、runtime system prompt assembly\n- compaction と error recovery\n- background task と cron scheduling\n- team、protocol、autonomous claiming\n- worktree isolation\n- MCP external tool integration\n\n難しいのは機能を積み上げることではない。それぞれの仕組みが loop のどこに接続されるかを見抜くことだ。S20 は終点章であり、すべての component を 1 つの harness に戻す。\n\n---\n\n## 解決策\n\n![System Architecture](/course-assets/s20_comprehensive/system-architecture.ja.svg)\n\nS20 は新しい単独 mechanism を発明しない。前章までの teaching component を 1 つの完全な harness に統合する:\n\n```text\nuser input\n → UserPromptSubmit hooks\n → cron/background notification injection\n → context compact\n → memory + skills + MCP state で system prompt を組み立てる\n → LLM\n → has tool_use block?\n no → Stop hooks → return\n yes → PreToolUse hooks + permission\n → TOOL_HANDLERS / MCP handlers / background dispatch\n → PostToolUse hooks\n → tool_result / task_notification を messages へ戻す\n → next round\n```\n\nloop 自体は同じ構造のままだ。model を呼び、response に `tool_use` block があるかを見て、tool を実行し、結果を `messages` に戻す。CC source でも `stop_reason == \"tool_use\"` を直接信頼せず、実際に tool_use block が出たかを continuation signal として扱う。変わったのは、loop の周囲の harness が完成形になったことだけ。\n\n---\n\n## 各 Component の位置\n\n| 位置 | Component | 役割 |\n|------|-----------|------|\n| user input 周辺 | `UserPromptSubmit` hooks | user input の記録、注入、監査 |\n| LLM 前 | cron queue | scheduled prompt を `messages` へ注入 |\n| LLM 前 | background notifications | 完了した background work を `` として注入 |\n| LLM 前 | compaction pipeline | 大きな出力を予算化し、履歴を切り、古い tool_result を圧縮し、必要なら要約 |\n| LLM 前 | memory / skills / MCP state | current capabilities と long-term context を system prompt に組み込む |\n| LLM call | error recovery | 429/529 retry、`max_tokens` escalation、prompt-too-long compact |\n| tool 実行前 | `PreToolUse` hooks + permission | 危険な command、範囲外 write、destructive MCP tool を止める |\n| tool dispatch | `assemble_tool_pool` | built-in tools と dynamic MCP tools を組み立てる |\n| tool 実行中 | background dispatch | 遅い bash work を daemon thread に逃がし、placeholder result を返す |\n| tool 実行後 | `PostToolUse` hooks | large-output warning、log、後処理 |\n| loop へ戻る | tool_result | 1 つの `tool_use` に 1 つの `tool_result`、そして次の model round |\n| tool_use がない round / stop 時 | `Stop` hooks | 統計、cleanup、audit |\n\n---\n\n## code.py に含まれるもの\n\n### Tools と Dispatch\n\nbuilt-in tool pool には 27 個の tool がある:\n\n```text\nbash, read_file, write_file, edit_file, glob\ntodo_write, task, load_skill, compact\ncreate_task, list_tasks, get_task, claim_task, complete_task\nschedule_cron, list_crons, cancel_cron\nspawn_teammate, send_message, check_inbox\nrequest_shutdown, request_plan, review_plan\ncreate_worktree, remove_worktree, keep_worktree\nconnect_mcp\n```\n\n`assemble_tool_pool()` は毎 round で次を組み立てる:\n\n```text\nBUILTIN_TOOLS + connected MCP tools\nBUILTIN_HANDLERS + mcp__server__tool handlers\n```\n\n`connect_mcp(\"docs\")` のあと、次の round では `mcp__docs__search` のような tool が出現する。\n\n### Permission と Hooks\n\npermission は tool 実行行に直接埋め込まない。`PreToolUse` hook として扱う:\n\n```python\nblocked = trigger_hooks(\"PreToolUse\", block)\nif blocked:\n results.append(tool_result(block.id, blocked))\n continue\n```\n\nこれにより permission、logging、audit が同じ hook point に接続できる。実行後には `PostToolUse` hook が走る。\n\n### Plan と Task\n\nS20 には 2 層の plan がある:\n\n- `todo_write`: current session 用の軽量 plan。メモリに保持。\n- task graph: cross-session、dependency-aware、claimable な task file。`.tasks/task_*.json` に保存。\n\n前者は単独 agent の drift を防ぐ。後者は team coordination の土台になる。\n\n### Subagent と Team\n\nS20 には 2 種類の delegation がある:\n\n- `task`: one-shot subagent。独立した `messages[]` を使い、中間 context を捨て、final summary だけ返す。\n- `spawn_teammate`: persistent teammate thread。`MessageBus` で通信し、idle 中に task board を polling して自律的に claim できる。\n\none-shot subagent は context isolation を解決する。persistent teammate は長期並列協作を解決する。\n\n### Memory、Skills、Prompt\n\n`assemble_system_prompt(context)` は毎 round 次を組み立てる:\n\n- identity と tool guidance\n- workspace\n- skills catalog\n- `.memory/MEMORY.md`\n- connected MCP servers\n\nskills は system prompt には catalog だけ置く。全文は `load_skill(name)` で必要な時に読む。\n\n### Compaction と Recovery\n\nLLM call の前に compaction pipeline を走らせる:\n\n```text\ntool_result_budget → snip_compact → micro_compact → compact_history\n```\n\nmodel call は recovery で包む:\n\n- 429: exponential backoff retry\n- 529: exponential backoff、連続失敗時は fallback model へ切替可能\n- `max_tokens`: max tokens を上げ、その後 continuation を要求\n- prompt too long: reactive compact 後に retry\n\n### Background と Cron\n\n遅い bash work は main loop を止めない:\n\n```text\nshould_run_background → start_background_task → placeholder tool_result\nbackground done → task_notification → next round injects messages\n```\n\ncron scheduler は daemon thread として動き、1 秒ごとに確認する。CLI は `cron_queue` を監視し、発火した job を `[Scheduled] ...` として注入して Agent を 1 turn 自動実行する。\n\n### Worktree と MCP\n\nworktree isolation は directory を担当する:\n\n- `create_worktree(name, task_id)` が isolated branch と directory を作る\n- task の `worktree` field が task と directory を紐付ける\n- teammate が worktree 付き task を claim すると、bash/read/write はその directory で実行される\n\nMCP は external capability を担当する:\n\n- `connect_mcp(name)` が mock server に接続する\n- `assemble_tool_pool()` が MCP tools を tool pool に組み立てる\n- tool name は `mcp__server__tool` 形式に統一する\n\n---\n\n## s19 からの変化\n\n| Component | s19 | s20 |\n|-----------|-----|-----|\n| tool pool | built-in + MCP | built-in + MCP、s01-s18 の tool を補完 |\n| permission | teaching body では省略 | `PreToolUse` hook で実行 |\n| hooks | 省略 | UserPromptSubmit / PreToolUse / PostToolUse / Stop |\n| todo | 省略 | `todo_write` + reminder |\n| skill | 省略 | system prompt の catalog + `load_skill` |\n| compact | 省略 | LLM 前 compaction + `compact` tool + reactive compact |\n| error recovery | simple try/except | retry / max_tokens / prompt too long |\n| background | 省略 | slow-operation thread + task notification |\n| cron | 省略 | daemon scheduler + durable jobs |\n| multi-agent | 維持 | 維持。teammate は isolated directory 上の basic tools を使う |\n| worktree | 維持 | 維持 |\n| MCP | 新規 | final tool pool の一部として維持 |\n\n---\n\n## 試す\n\n```sh\ncd learn-claude-code\npython s20_comprehensive/code.py\n```\n\n試す prompt:\n\n1. `Create a todo list for inspecting this repo, then list Python files`\n2. `Connect to the docs MCP server and search for agent loop`\n3. `Create two tasks, create worktrees for them, then spawn alice and bob. Ask them to submit plans before claiming tasks.`\n4. `remind me of the meeting in 3 minutes.`\n5. `Run npm install in the background and continue reading README.md`\n\n見るポイント:\n\n- tool call の前に hooks/permission を通るか\n- `connect_mcp` 後の次 round で MCP tool が出るか\n- 遅い operation が background placeholder を返すか\n- cron が時刻到達時に自動で reminder を返すか\n- teammate が plan を提出し、approval 前に停止するか\n- plan approval 後、teammate が task を claim できるか\n- worktree binding 後、teammate が対応 directory に切り替わるか\n\n---\n\n## 終わりは始まり\n\ns01 から s20 まで、コードの能力は増えていく。しかし中心は変わらない:\n\n```python\nwhile True:\n response = LLM(messages, tools)\n if not has_tool_use(response.content):\n return\n results = execute_tools(response.content)\n messages.append(tool_results)\n```\n\nClaude Code の複雑さは「別の agent brain」ではない。成熟した harness の複雑さだ。model は判断と action selection を担当する。harness は environment、tools、permissions、memory、teams、external capabilities を整理する。\n\nこれが本コースの終点だ:仕組みは多い、ループは 1 つ。\n" } ] \ No newline at end of file diff --git a/web/src/data/generated/versions.json b/web/src/data/generated/versions.json index 0af62b7..afd71a1 100644 --- a/web/src/data/generated/versions.json +++ b/web/src/data/generated/versions.json @@ -2,302 +2,696 @@ "versions": [ { "id": "s01", - "filename": "s01_agent_loop.py", + "filename": "s01_agent_loop/code.py", "title": "The Agent Loop", - "subtitle": "Bash is All You Need", - "loc": 84, + "subtitle": "One Loop Is All You Need", + "loc": 102, "tools": [ "bash" ], "newTools": [ "bash" ], - "coreAddition": "Single-tool agent loop", - "keyInsight": "The minimal agent kernel is a while loop + one tool", + "coreAddition": "Minimal model/tool loop", + "keyInsight": "The smallest useful agent is a loop that calls the model, runs tools, and feeds results back.", "classes": [], "functions": [ { "name": "run_bash", "signature": "def run_bash(command: str)", - "startLine": 53 + "startLine": 69 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 67 + "startLine": 85 } ], "layer": "tools", - "source": "#!/usr/bin/env python3\n\"\"\"\ns01_agent_loop.py - The Agent Loop\n\nThe entire secret of an AI coding agent in one pattern:\n\n while stop_reason == \"tool_use\":\n response = LLM(messages, tools)\n execute tools\n append results\n\n +----------+ +-------+ +---------+\n | User | ---> | LLM | ---> | Tool |\n | prompt | | | | execute |\n +----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n (loop continues)\n\nThis is the core loop: feed tool results back to the model\nuntil the model decides to stop. Production agents layer\npolicy, hooks, and lifecycle controls on top.\n\"\"\"\n\nimport os\nimport subprocess\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {os.getcwd()}. Use bash to solve tasks. Act, don't explain.\"\n\nTOOLS = [{\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n}]\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=os.getcwd(),\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\n# -- The core pattern: a while loop that calls tools until the model stops --\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n # Append assistant turn\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n # If the model didn't call a tool, we're done\n if response.stop_reason != \"tool_use\":\n return\n # Execute each tool call, collect results\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n print(f\"\\033[33m$ {block.input['command']}\\033[0m\")\n output = run_bash(block.input[\"command\"])\n print(output[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms01 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\n\"\"\"\ns01_agent_loop.py - The Agent Loop\n\nThe entire secret of an AI coding agent in one pattern:\n\n while stop_reason == \"tool_use\":\n response = LLM(messages, tools)\n execute tools\n append results\n\n +----------+ +-------+ +---------+\n | User | ---> | LLM | ---> | Tool |\n | prompt | | | | execute |\n +----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n (loop continues)\n\nThis is the core loop: feed tool results back to the model\nuntil the model decides to stop. Production agents layer\npolicy, hooks, and lifecycle controls on top.\n\nUsage:\n pip install anthropic python-dotenv\n ANTHROPIC_API_KEY=... python s01_agent_loop/code.py\n\"\"\"\n\nimport os\nimport subprocess\n\ntry:\n import readline\n # macOS 的 libedit 在处理中文输入时有退格问题,这四行修复它\n readline.parse_and_bind('set bind-tty-special-chars off')\n readline.parse_and_bind('set input-meta on')\n readline.parse_and_bind('set output-meta on')\n readline.parse_and_bind('set convert-meta off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {os.getcwd()}. Use bash to solve tasks. Act, don't explain.\"\n\n# ── Tool definition: just bash ────────────────────────────\nTOOLS = [{\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n}]\n\n\n# ── Tool execution ────────────────────────────────────────\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=os.getcwd(),\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n except (FileNotFoundError, OSError) as e:\n return f\"Error: {e}\"\n\n\n# ── The core pattern: a while loop that calls tools until the model stops ──\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n\n # Append assistant turn\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n # If the model didn't call a tool, we're done\n if response.stop_reason != \"tool_use\":\n return\n\n # Execute each tool call, collect results\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n print(f\"\\033[33m$ {block.input['command']}\\033[0m\")\n output = run_bash(block.input[\"command\"])\n print(output[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n\n # Feed tool results back, loop continues\n messages.append({\"role\": \"user\", \"content\": results})\n\n\n# ── Entry point ──────────────────────────────────────────\nif __name__ == \"__main__\":\n print(\"s01: Agent Loop\")\n print(\"输入问题,回车发送。输入 q 退出。\\n\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms01 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n # Print the model's final text response\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n", + "images": [ + { + "src": "/course-assets/s01_agent_loop/agent-loop.svg", + "alt": "agent loop" + } + ] }, { "id": "s02", - "filename": "s02_tool_use.py", - "title": "Tools", - "subtitle": "One Handler Per Tool", - "loc": 120, + "filename": "s02_tool_use/code.py", + "title": "Tool Use", + "subtitle": "Add a Tool, Add Just One Line", + "loc": 135, "tools": [ "bash", "read_file", "write_file", - "edit_file" + "edit_file", + "glob" ], "newTools": [ "read_file", "write_file", - "edit_file" + "edit_file", + "glob" ], "coreAddition": "Tool dispatch map", - "keyInsight": "The loop stays the same; new tools register into the dispatch map", + "keyInsight": "The loop stays stable while capabilities register into a dispatch table.", "classes": [], "functions": [ - { - "name": "safe_path", - "signature": "def safe_path(p: str)", - "startLine": 40 - }, { "name": "run_bash", "signature": "def run_bash(command: str)", - "startLine": 47 - }, - { - "name": "run_read", - "signature": "def run_read(path: str, limit: int = None)", - "startLine": 60 - }, - { - "name": "run_write", - "signature": "def run_write(path: str, content: str)", - "startLine": 71 - }, - { - "name": "run_edit", - "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 81 - }, - { - "name": "agent_loop", - "signature": "def agent_loop(messages: list)", - "startLine": 113 - } - ], - "layer": "tools", - "source": "#!/usr/bin/env python3\n\"\"\"\ns02_tool_use.py - Tools\n\nThe agent loop from s01 didn't change. We just added tools to the array\nand a dispatch map to route calls.\n\n +----------+ +-------+ +------------------+\n | User | ---> | LLM | ---> | Tool Dispatch |\n | prompt | | | | { |\n +----------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +----------+ edit: run_edit |\n tool_result| } |\n +------------------+\n\nKey insight: \"The loop didn't change at all. I just added tools.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain.\"\n\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- The dispatch map: {tool_name: handler} --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\"> {block.name}: {output[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms02 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" - }, - { - "id": "s03", - "filename": "s03_todo_write.py", - "title": "TodoWrite", - "subtitle": "Plan Before You Act", - "loc": 176, - "tools": [ - "bash", - "read_file", - "write_file", - "edit_file", - "todo" - ], - "newTools": [ - "todo" - ], - "coreAddition": "TodoManager + nag reminder", - "keyInsight": "An agent without a plan drifts; list the steps first, then execute", - "classes": [ - { - "name": "TodoManager", - "startLine": 51, - "endLine": 87 - } - ], - "functions": [ - { - "name": "safe_path", - "signature": "def safe_path(p: str)", - "startLine": 92 - }, - { - "name": "run_bash", - "signature": "def run_bash(command: str)", - "startLine": 98 - }, - { - "name": "run_read", - "signature": "def run_read(path: str, limit: int = None)", - "startLine": 110 - }, - { - "name": "run_write", - "signature": "def run_write(path: str, content: str)", - "startLine": 119 - }, - { - "name": "run_edit", - "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 128 - }, - { - "name": "agent_loop", - "signature": "def agent_loop(messages: list)", - "startLine": 163 - } - ], - "layer": "planning", - "source": "#!/usr/bin/env python3\n\"\"\"\ns03_todo_write.py - TodoWrite\n\nThe model tracks its own progress via a TodoManager. A nag reminder\nforces it to keep updating when it forgets.\n\n +----------+ +-------+ +---------+\n | User | ---> | LLM | ---> | Tools |\n | prompt | | | | + todo |\n +----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject \n\nKey insight: \"The agent can track its own progress -- and I can see it.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse the todo tool to plan multi-step tasks. Mark in_progress before starting, completed when done.\nPrefer tools over prose.\"\"\"\n\n\n# -- TodoManager: structured state the LLM writes to --\nclass TodoManager:\n def __init__(self):\n self.items = []\n\n def update(self, items: list) -> str:\n if len(items) > 20:\n raise ValueError(\"Max 20 todos allowed\")\n validated = []\n in_progress_count = 0\n for i, item in enumerate(items):\n text = str(item.get(\"text\", \"\")).strip()\n status = str(item.get(\"status\", \"pending\")).lower()\n item_id = str(item.get(\"id\", str(i + 1)))\n if not text:\n raise ValueError(f\"Item {item_id}: text required\")\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Item {item_id}: invalid status '{status}'\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item_id, \"text\": text, \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress at a time\")\n self.items = validated\n return self.render()\n\n def render(self) -> str:\n if not self.items:\n return \"No todos.\"\n lines = []\n for item in self.items:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}[item[\"status\"]]\n lines.append(f\"{marker} #{item['id']}: {item['text']}\")\n done = sum(1 for t in self.items if t[\"status\"] == \"completed\")\n lines.append(f\"\\n({done}/{len(self.items)} completed)\")\n return \"\\n\".join(lines)\n\n\nTODO = TodoManager()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"todo\", \"description\": \"Update task list. Track progress on multi-step tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"items\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"id\": {\"type\": \"string\"}, \"text\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"id\", \"text\", \"status\"]}}}, \"required\": [\"items\"]}},\n]\n\n\n# -- Agent loop with nag reminder injection --\ndef agent_loop(messages: list):\n rounds_since_todo = 0\n while True:\n # Nag reminder is injected below, alongside tool results\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n used_todo = False\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n if block.name == \"todo\":\n used_todo = True\n rounds_since_todo = 0 if used_todo else rounds_since_todo + 1\n if rounds_since_todo >= 3:\n results.insert(0, {\"type\": \"text\", \"text\": \"Update your todos.\"})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms03 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" - }, - { - "id": "s04", - "filename": "s04_subagent.py", - "title": "Subagents", - "subtitle": "Clean Context Per Subtask", - "loc": 151, - "tools": [ - "bash", - "read_file", - "write_file", - "edit_file", - "task" - ], - "newTools": [ - "task" - ], - "coreAddition": "Subagent spawn with isolated messages[]", - "keyInsight": "Subagents use independent messages[], keeping the main conversation clean", - "classes": [], - "functions": [ - { - "name": "safe_path", - "signature": "def safe_path(p: str)", "startLine": 46 }, { - "name": "run_bash", - "signature": "def run_bash(command: str)", - "startLine": 52 + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 66 }, { "name": "run_read", - "signature": "def run_read(path: str, limit: int = None)", - "startLine": 64 + "signature": "def run_read(path: str, limit: int | None = None)", + "startLine": 73 }, { "name": "run_write", "signature": "def run_write(path: str, content: str)", - "startLine": 73 + "startLine": 83 }, { "name": "run_edit", "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 82 + "startLine": 93 }, { - "name": "run_subagent", - "signature": "def run_subagent(prompt: str)", - "startLine": 115 + "name": "run_glob", + "signature": "def run_glob(pattern: str)", + "startLine": 105 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 143 + "startLine": 150 } ], - "layer": "planning", - "source": "#!/usr/bin/env python3\n\"\"\"\ns04_subagent.py - Subagents\n\nSpawn a child agent with fresh messages=[]. The child works in its own\ncontext, sharing the filesystem, then returns only a summary to the parent.\n\n Parent agent Subagent\n +------------------+ +------------------+\n | messages=[...] | | messages=[] | <-- fresh\n | | dispatch | |\n | tool: task | ---------->| while tool_use: |\n | prompt=\"...\" | | call tools |\n | description=\"\" | | append results |\n | | summary | |\n | result = \"...\" | <--------- | return last text |\n +------------------+ +------------------+\n |\n Parent context stays clean.\n Subagent context is discarded.\n\nKey insight: \"Process isolation gives context isolation for free.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use the task tool to delegate exploration or subtasks.\"\nSUBAGENT_SYSTEM = f\"You are a coding subagent at {WORKDIR}. Complete the given task, then summarize your findings.\"\n\n\n# -- Tool implementations shared by parent and child --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\n# Child gets all base tools except task (no recursive spawning)\nCHILD_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\n# -- Subagent: fresh context, filtered tools, summary-only return --\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}] # fresh context\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n # Only the final text returns to the parent -- child context is discarded\n return \"\".join(b.text for b in response.content if hasattr(b, \"text\")) or \"(no summary)\"\n\n\n# -- Parent tools: base tools + task dispatcher --\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\", \"description\": \"Spawn a subagent with fresh context. It shares the filesystem but not conversation history.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"prompt\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\", \"description\": \"Short description of the task\"}}, \"required\": [\"prompt\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=PARENT_TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"task\":\n desc = block.input.get(\"description\", \"subtask\")\n print(f\"> task ({desc}): {block.input['prompt'][:80]}\")\n output = run_subagent(block.input[\"prompt\"])\n else:\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\" {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms04 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "layer": "tools", + "source": "#!/usr/bin/env python3\n\"\"\"\ns02: Tool Use — 在 s01 基础上新增 4 个工具 + 分发映射。\n\n运行: python s02_tool_use/code.py\n需要: pip install anthropic python-dotenv + .env 中配置 ANTHROPIC_API_KEY\n\n本文件 = s01 的全部代码 + 以下新增:\n + run_read / run_write / run_edit / run_glob 四个工具实现\n + TOOL_HANDLERS 分发映射(替代 s01 中硬编码的 run_bash 调用)\n + safe_path 路径安全校验\n\n循环本身(agent_loop)与 s01 完全一致。\n\"\"\"\n\nimport os, subprocess\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\n readline.parse_and_bind('set input-meta on')\n readline.parse_and_bind('set output-meta on')\n readline.parse_and_bind('set convert-meta off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain.\"\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s01 (unchanged)\n# ═══════════════════════════════════════════════════════════\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True,\n encoding=\"utf-8\", errors=\"replace\", timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n except (FileNotFoundError, OSError) as e:\n return f\"Error: {e}\"\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s02: 4 个新工具\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text:\n return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s02: 工具定义(s01 只有一个 bash,现在扩展到 5 个)\n# ═══════════════════════════════════════════════════════════\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n]\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s02: 工具分发映射(s01 是硬编码 run_bash,现在改为查表)\n# ═══════════════════════════════════════════════════════════\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob,\n}\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — 与 s01 结构完全一致,只改了工具执行那部分\n# s01: output = run_bash(block.input[\"command\"])\n# s02: output = TOOL_HANDLERS[block.name](**block.input)\n# ═══════════════════════════════════════════════════════════\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n print(f\"\\033[33m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(\"s02: Tool Use — 在 s01 基础上加了 4 个工具\")\n print(\"输入问题,回车发送。输入 q 退出。\\n\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms02 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n", + "images": [ + { + "src": "/course-assets/s02_tool_use/concurrency-comparison.svg", + "alt": "concurrency comparison" + }, + { + "src": "/course-assets/s02_tool_use/tool-dispatch.svg", + "alt": "tool dispatch" + } + ] }, { - "id": "s05", - "filename": "s05_skill_loading.py", - "title": "Skills", - "subtitle": "Load on Demand", - "loc": 187, + "id": "s03", + "filename": "s03_permission/code.py", + "title": "Permission", + "subtitle": "Check Permissions Before Execution", + "loc": 180, "tools": [ "bash", "read_file", "write_file", "edit_file", - "load_skill" - ], - "newTools": [ - "load_skill" - ], - "coreAddition": "SkillLoader + two-layer injection", - "keyInsight": "Inject knowledge via tool_result when needed, not upfront in the system prompt", - "classes": [ - { - "name": "SkillLoader", - "startLine": 57, - "endLine": 105 - } + "glob" ], + "newTools": [], + "coreAddition": "Permission gate", + "keyInsight": "Dangerous actions need a harness decision point before the shell runs.", + "classes": [], "functions": [ { "name": "safe_path", "signature": "def safe_path(p: str)", - "startLine": 117 + "startLine": 60 }, { "name": "run_bash", "signature": "def run_bash(command: str)", - "startLine": 123 + "startLine": 67 }, { "name": "run_read", - "signature": "def run_read(path: str, limit: int = None)", - "startLine": 135 + "signature": "def run_read(path: str, limit: int | None = None)", + "startLine": 77 }, { "name": "run_write", "signature": "def run_write(path: str, content: str)", - "startLine": 144 + "startLine": 87 }, { "name": "run_edit", "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 153 + "startLine": 97 + }, + { + "name": "run_glob", + "signature": "def run_glob(pattern: str)", + "startLine": 109 + }, + { + "name": "check_deny_list", + "signature": "def check_deny_list(command: str)", + "startLine": 151 + }, + { + "name": "check_rules", + "signature": "def check_rules(tool_name: str, args: dict)", + "startLine": 168 + }, + { + "name": "ask_user", + "signature": "def ask_user(tool_name: str, args: dict, reason: str)", + "startLine": 176 + }, + { + "name": "check_permission", + "signature": "def check_permission(block)", + "startLine": 184 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 187 + "startLine": 202 } ], - "layer": "planning", - "source": "#!/usr/bin/env python3\n\"\"\"\ns05_skill_loading.py - Skills\n\nTwo-layer skill injection that avoids bloating the system prompt:\n\n Layer 1 (cheap): skill names in system prompt (~100 tokens/skill)\n Layer 2 (on demand): full skill body in tool_result\n\n skills/\n pdf/\n SKILL.md <-- frontmatter (name, description) + body\n code-review/\n SKILL.md\n\n System prompt:\n +--------------------------------------+\n | You are a coding agent. |\n | Skills available: |\n | - pdf: Process PDF files... | <-- Layer 1: metadata only\n | - code-review: Review code... |\n +--------------------------------------+\n\n When model calls load_skill(\"pdf\"):\n +--------------------------------------+\n | tool_result: |\n | |\n | Full PDF processing instructions | <-- Layer 2: full body\n | Step 1: ... |\n | Step 2: ... |\n | |\n +--------------------------------------+\n\nKey insight: \"Don't put everything in the system prompt. Load on demand.\"\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nSKILLS_DIR = WORKDIR / \"skills\"\n\n\n# -- SkillLoader: scan skills//SKILL.md with YAML frontmatter --\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills_dir = skills_dir\n self.skills = {}\n self._load_all()\n\n def _load_all(self):\n if not self.skills_dir.exists():\n return\n for f in sorted(self.skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body, \"path\": str(f)}\n\n def _parse_frontmatter(self, text: str) -> tuple:\n \"\"\"Parse YAML frontmatter between --- delimiters.\"\"\"\n match = re.match(r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL)\n if not match:\n return {}, text\n meta = {}\n for line in match.group(1).strip().splitlines():\n if \":\" in line:\n key, val = line.split(\":\", 1)\n meta[key.strip()] = val.strip()\n return meta, match.group(2).strip()\n\n def get_descriptions(self) -> str:\n \"\"\"Layer 1: short descriptions for the system prompt.\"\"\"\n if not self.skills:\n return \"(no skills available)\"\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"No description\")\n tags = skill[\"meta\"].get(\"tags\", \"\")\n line = f\" - {name}: {desc}\"\n if tags:\n line += f\" [{tags}]\"\n lines.append(line)\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n \"\"\"Layer 2: full skill body returned in tool_result.\"\"\"\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'. Available: {', '.join(self.skills.keys())}\"\n return f\"\\n{skill['body']}\\n\"\n\n\nSKILL_LOADER = SkillLoader(SKILLS_DIR)\n\n# Layer 1: skill metadata injected into system prompt\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse load_skill to access specialized knowledge before tackling unfamiliar topics.\n\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"load_skill\", \"description\": \"Load specialized knowledge by name.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\", \"description\": \"Skill name to load\"}}, \"required\": [\"name\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms05 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "layer": "tools", + "source": "#!/usr/bin/env python3\n\"\"\"\ns03_permission.py - Permission System\n\nThree gates inserted before tool execution:\n\n Gate 1: Hard deny list (rm -rf /, sudo, ...)\n Gate 2: Rule matching (write outside workspace? destructive cmd?)\n Gate 3: User approval (pause and wait for confirmation)\n\n +-------+ +--------+ +--------+ +--------+ +------+\n | Tool | -> | Gate 1 | -> | Gate 2 | -> | Gate 3 | -> | Exec |\n | call | | deny? | | match? | | allow? | | |\n +-------+ +--------+ +--------+ +--------+ +------+\n | | | |\n v v v v\n (normal) (blocked) (ask user) (user says no?)\n\nOnly one line added to the agent loop:\n\n if not check_permission(block):\n continue\n\nBuilds on s02 (multi-tool). Usage:\n\n python s03_permission/code.py\n Needs: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport os, subprocess\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\n readline.parse_and_bind('set input-meta on')\n readline.parse_and_bind('set output-meta on')\n readline.parse_and_bind('set convert-meta off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. All destructive operations require user approval.\"\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02 (unchanged): Tool Implementations\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text:\n return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02 (unchanged): Tool Definitions & Dispatch\n# ═══════════════════════════════════════════════════════════\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob,\n}\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s03: Three-Gate Permission Pipeline\n# ═══════════════════════════════════════════════════════════\n\n# Gate 1: Hard deny list — always forbidden\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"mkfs\", \"dd if=\", \"> /dev/sda\"]\n\ndef check_deny_list(command: str) -> str | None:\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Blocked: '{pattern}' is on the deny list\"\n return None\n\n\n# Gate 2: Rule matching — context-dependent checks\nPERMISSION_RULES = [\n {\"tools\": [\"write_file\", \"edit_file\"],\n \"check\": lambda args: not (WORKDIR / args.get(\"path\", \"\")).resolve().is_relative_to(WORKDIR),\n \"message\": \"Writing outside workspace\"},\n {\"tools\": [\"bash\"],\n \"check\": lambda args: any(kw in args.get(\"command\", \"\") for kw in [\"rm \", \"> /etc/\", \"chmod 777\"]),\n \"message\": \"Potentially destructive command\"},\n]\n\ndef check_rules(tool_name: str, args: dict) -> str | None:\n for rule in PERMISSION_RULES:\n if tool_name in rule[\"tools\"] and rule[\"check\"](args):\n return rule[\"message\"]\n return None\n\n\n# Gate 3: User approval — wait for confirmation after rule match\ndef ask_user(tool_name: str, args: dict, reason: str) -> str:\n print(f\"\\n\\033[33m⚠ {reason}\\033[0m\")\n print(f\" Tool: {tool_name}({args})\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n return \"allow\" if choice in (\"y\", \"yes\") else \"deny\"\n\n\n# Pipeline: all three gates chained\ndef check_permission(block) -> bool:\n if block.name == \"bash\":\n reason = check_deny_list(block.input.get(\"command\", \"\"))\n if reason:\n print(f\"\\n\\033[31m⛔ {reason}\\033[0m\")\n return False\n reason = check_rules(block.name, block.input)\n if reason:\n decision = ask_user(block.name, block.input, reason)\n if decision == \"deny\":\n return False\n return True\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — same as s02, with check_permission() inserted\n# ═══════════════════════════════════════════════════════════\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n # s03 change: run through permission pipeline before executing\n if not check_permission(block):\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": \"Permission denied.\"})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(\"s03: Permission\")\n print(\"输入问题,回车发送。输入 q 退出。\\n\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms03 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n", + "images": [ + { + "src": "/course-assets/s03_permission/permission-overview.svg", + "alt": "permission overview" + }, + { + "src": "/course-assets/s03_permission/permission-pipeline.svg", + "alt": "permission pipeline" + } + ] }, { - "id": "s06", - "filename": "s06_context_compact.py", - "title": "Compact", - "subtitle": "Three-Layer Compression", - "loc": 205, + "id": "s04", + "filename": "s04_hooks/code.py", + "title": "Hooks", + "subtitle": "Hang on the Loop, Don't Write into It", + "loc": 232, "tools": [ "bash", "read_file", "write_file", "edit_file", + "glob" + ], + "newTools": [], + "coreAddition": "Lifecycle hooks", + "keyInsight": "Cross-cutting behavior belongs around the loop, not tangled inside it.", + "classes": [], + "functions": [ + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 81 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str)", + "startLine": 87 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int | None = None)", + "startLine": 96 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 105 + }, + { + "name": "run_edit", + "signature": "def run_edit(path: str, old_text: str, new_text: str)", + "startLine": 114 + }, + { + "name": "run_glob", + "signature": "def run_glob(pattern: str)", + "startLine": 125 + }, + { + "name": "register_hook", + "signature": "def register_hook(event: str, callback)", + "startLine": 161 + }, + { + "name": "trigger_hooks", + "signature": "def trigger_hooks(event: str, *args)", + "startLine": 164 + }, + { + "name": "permission_hook", + "signature": "def permission_hook(block)", + "startLine": 176 + }, + { + "name": "log_hook", + "signature": "def log_hook(block)", + "startLine": 200 + }, + { + "name": "large_output_hook", + "signature": "def large_output_hook(block, output)", + "startLine": 206 + }, + { + "name": "context_inject_hook", + "signature": "def context_inject_hook(query: str)", + "startLine": 213 + }, + { + "name": "summary_hook", + "signature": "def summary_hook(messages: list)", + "startLine": 218 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list)", + "startLine": 238 + } + ], + "layer": "tools", + "source": "#!/usr/bin/env python3\n\"\"\"\ns04: Hooks — move extension logic out of the loop, onto hooks.\n\n User types query\n │\n ▼\n ┌──────────────────┐\n │ UserPromptSubmit │ ── trigger_hooks() before LLM\n └────────┬─────────┘\n ▼\n ┌────────────┐ ┌─────────────────────────────┐\n │ messages │────▶│ LLM (stop_reason=tool_use?)│\n └────────────┘ │ No ──▶ Stop hooks ──▶ exit │\n │ Yes ──▶ tool_use block ──┐ │\n └────────────────────────────┘ │\n ▼\n ┌──────────────────┐\n │ trigger_hooks() │\n │ PreToolUse: │\n │ permission_hook │\n │ log_hook │\n └───────┬──────────┘\n │ (not blocked)\n ┌───────▼──────────┐\n │ TOOL_HANDLERS[x] │\n └───────┬──────────┘\n │\n ┌───────▼──────────┐\n │ trigger_hooks() │\n │ PostToolUse: │\n │ large_output │\n └───────┬──────────┘\n │\n results ──▶ back to messages\n\nChanges from s03:\n + HOOKS registry (event -> list of callbacks)\n + register_hook() / trigger_hooks()\n + context_inject_hook (UserPromptSubmit)\n + permission_hook, log_hook (PreToolUse)\n + large_output_hook (PostToolUse)\n + summary_hook (Stop)\n - check_permission() removed from loop body\n (logic moved into permission_hook, triggered via PreToolUse)\n\nRun: python s04_hooks/code.py\nNeeds: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport os, subprocess\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\n readline.parse_and_bind('set input-meta on')\n readline.parse_and_bind('set output-meta on')\n readline.parse_and_bind('set convert-meta off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain.\"\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s03 (unchanged): Tool Implementations\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text:\n return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e:\n return f\"Error: {e}\"\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob,\n}\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s04: Hook System (s03 permission logic now via hooks)\n# ═══════════════════════════════════════════════════════════\n\nHOOKS = {\"UserPromptSubmit\": [], \"PreToolUse\": [], \"PostToolUse\": [], \"Stop\": []}\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None: # teaching shortcut: block this tool call\n return result\n return None\n\n\n# s03 permission check logic, now wrapped as a hook\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"mkfs\", \"dd if=\"]\nDESTRUCTIVE = [\"rm \", \"> /etc/\", \"chmod 777\"]\n\ndef permission_hook(block):\n \"\"\"PreToolUse: s03 check_permission() logic moved here.\"\"\"\n if block.name == \"bash\":\n for pattern in DENY_LIST:\n if pattern in block.input.get(\"command\", \"\"):\n print(f\"\\n\\033[31m⛔ Blocked: '{pattern}'\\033[0m\")\n return \"Permission denied by deny list\"\n for kw in DESTRUCTIVE:\n if kw in block.input.get(\"command\", \"\"):\n print(f\"\\n\\033[33m⚠ Potentially destructive command\\033[0m\")\n print(f\" Tool: {block.name}({block.input})\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n if not (WORKDIR / path).resolve().is_relative_to(WORKDIR):\n print(f\"\\n\\033[33m⚠ Writing outside workspace\\033[0m\")\n print(f\" Tool: {block.name}({block.input})\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\ndef log_hook(block):\n \"\"\"PreToolUse: log every tool call.\"\"\"\n args_preview = str(list(block.input.values())[:2])[:60]\n print(f\"\\033[90m[HOOK] {block.name}({args_preview})\\033[0m\")\n return None\n\ndef large_output_hook(block, output):\n \"\"\"PostToolUse: warn on large output.\"\"\"\n if len(str(output)) > 100000:\n print(f\"\\033[33m[HOOK] ⚠ Large output from {block.name}: {len(str(output))} chars\\033[0m\")\n return None\n\n# UserPromptSubmit hook: log user input before it reaches the LLM\ndef context_inject_hook(query: str):\n print(f\"\\033[90m[HOOK] UserPromptSubmit: working in {WORKDIR}\\033[0m\")\n return None\n\n# Stop hook: print summary when loop is about to exit\ndef summary_hook(messages: list):\n tool_count = sum(1 for m in messages\n for b in (m.get(\"content\") if isinstance(m.get(\"content\"), list) else [])\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: session used {tool_count} tool calls\\033[0m\")\n return None\n\nregister_hook(\"UserPromptSubmit\", context_inject_hook)\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\nregister_hook(\"Stop\", summary_hook)\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — same structure as s03, but no hard-coded check\n# s03: if not check_permission(block): ...\n# s04: if trigger_hooks(\"PreToolUse\", block): ...\n# ═══════════════════════════════════════════════════════════\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n force = trigger_hooks(\"Stop\", messages)\n if force:\n messages.append({\"role\": \"user\", \"content\": force})\n continue\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n # s04 change: hook replaces hard-coded check_permission()\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n\n trigger_hooks(\"PostToolUse\", block, output) # s04: post hook\n\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(\"s04: Hooks — extension logic on hooks, loop stays clean\")\n print(\"Type a question, press Enter. Type q to quit.\\n\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms04 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n trigger_hooks(\"UserPromptSubmit\", query)\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n", + "images": [ + { + "src": "/course-assets/s04_hooks/hooks-overview.svg", + "alt": "hooks overview" + } + ] + }, + { + "id": "s05", + "filename": "s05_todo_write/code.py", + "title": "TodoWrite", + "subtitle": "An Agent Without a Plan Drifts Off Course", + "loc": 219, + "tools": [ + "bash", + "read_file", + "write_file", + "edit_file", + "glob", + "todo_write" + ], + "newTools": [ + "todo_write" + ], + "coreAddition": "Todo manager", + "keyInsight": "Explicit plans keep long-running work visible and correctable.", + "classes": [], + "functions": [ + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 64 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str)", + "startLine": 70 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int | None = None)", + "startLine": 79 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 88 + }, + { + "name": "run_edit", + "signature": "def run_edit(path: str, old_text: str, new_text: str)", + "startLine": 97 + }, + { + "name": "run_glob", + "signature": "def run_glob(pattern: str)", + "startLine": 108 + }, + { + "name": "run_todo_write", + "signature": "def run_todo_write(todos: list)", + "startLine": 124 + }, + { + "name": "register_hook", + "signature": "def register_hook(event: str, callback)", + "startLine": 168 + }, + { + "name": "trigger_hooks", + "signature": "def trigger_hooks(event: str, *args)", + "startLine": 171 + }, + { + "name": "permission_hook", + "signature": "def permission_hook(block)", + "startLine": 181 + }, + { + "name": "log_hook", + "signature": "def log_hook(block)", + "startLine": 190 + }, + { + "name": "context_inject_hook", + "signature": "def context_inject_hook(query: str)", + "startLine": 195 + }, + { + "name": "summary_hook", + "signature": "def summary_hook(messages: list)", + "startLine": 200 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list)", + "startLine": 220 + } + ], + "layer": "planning", + "source": "#!/usr/bin/env python3\n\"\"\"\ns05: TodoWrite — add a planning tool on top of s04 hooks.\n\n +---------+ +-------+ +------------------+\n | User | ---> | LLM | ---> | TOOL_HANDLERS |\n | prompt | | | | bash |\n +---------+ +---+---+ | read_file |\n ^ | write_file |\n | result | edit_file |\n +---------+ glob |\n todo_write ← NEW\n +------------------+\n |\n in-memory current_todos\n |\n if rounds_since_todo >= 3:\n inject \n\nChanges from s04:\n + todo_write tool + run_todo_write() implementation\n + Nag reminder (inject reminder after 3 rounds without todo update)\n + SYSTEM prompt includes \"plan before execute\" guidance\n + rounds_since_todo counter in agent_loop\n Loop unchanged: new tool auto-dispatches via TOOL_HANDLERS.\n\nRun: python s05_todo_write/code.py\nNeeds: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport os, subprocess\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nCURRENT_TODOS: list[dict] = []\n\n# s05 change: SYSTEM prompt adds planning guidance\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Before starting any multi-step task, use todo_write to plan your steps. \"\n \"Update status as you go.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s04 (unchanged): Tool Implementations\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text:\n return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s05: todo_write tool — plan only, no execution\n# ═══════════════════════════════════════════════════════════\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n # validate required fields\n for i, t in enumerate(todos):\n if \"content\" not in t or \"status\" not in t:\n return f\"Error: todos[{i}] missing 'content' or 'status'\"\n if t[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return f\"Error: todos[{i}] has invalid status '{t['status']}'\"\n CURRENT_TODOS = todos\n lines = [\"\\n\\033[33m## Current Tasks\\033[0m\"]\n for t in CURRENT_TODOS:\n icon = {\"pending\": \" \", \"in_progress\": \"\\033[36m▸\\033[0m\", \"completed\": \"\\033[32m✓\\033[0m\"}[t[\"status\"]]\n lines.append(f\" [{icon}] {t['content']}\")\n print(\"\\n\".join(lines))\n return f\"Updated {len(CURRENT_TODOS)} tasks\"\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n # s05: new tool\n {\"name\": \"todo_write\", \"description\": \"Create and manage a task list for your current coding session.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"todos\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"content\", \"status\"]}}}, \"required\": [\"todos\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"todo_write\": run_todo_write,\n}\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s04 (unchanged): Hook System\n# ═══════════════════════════════════════════════════════════\n\nHOOKS = {\"UserPromptSubmit\": [], \"PreToolUse\": [], \"PostToolUse\": [], \"Stop\": []}\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None:\n return result\n return None\n\n# s04 hooks preserved\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"mkfs\", \"dd if=\"]\n\ndef permission_hook(block):\n \"\"\"PreToolUse: deny list check.\"\"\"\n if block.name == \"bash\":\n for p in DENY_LIST:\n if p in block.input.get(\"command\", \"\"):\n print(f\"\\n\\033[31m⛔ Blocked: '{p}'\\033[0m\")\n return \"Permission denied\"\n return None\n\ndef log_hook(block):\n \"\"\"PreToolUse: log tool calls.\"\"\"\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\ndef context_inject_hook(query: str):\n \"\"\"UserPromptSubmit: log working directory.\"\"\"\n print(f\"\\033[90m[HOOK] UserPromptSubmit: working in {WORKDIR}\\033[0m\")\n return None\n\ndef summary_hook(messages: list):\n \"\"\"Stop: print tool call count.\"\"\"\n tool_count = sum(1 for m in messages\n for b in (m.get(\"content\") if isinstance(m.get(\"content\"), list) else [])\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: session used {tool_count} tool calls\\033[0m\")\n return None\n\nregister_hook(\"UserPromptSubmit\", context_inject_hook)\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"Stop\", summary_hook)\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — same as s04 + nag reminder counter\n# ═══════════════════════════════════════════════════════════\n\nrounds_since_todo = 0\n\ndef agent_loop(messages: list):\n global rounds_since_todo\n while True:\n # s05: nag reminder — inject if model hasn't updated todos for 3 rounds\n if rounds_since_todo >= 3 and messages:\n messages.append({\"role\": \"user\",\n \"content\": \"Update your todos.\"})\n rounds_since_todo = 0\n\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n force = trigger_hooks(\"Stop\", messages)\n if force:\n messages.append({\"role\": \"user\", \"content\": force})\n continue\n return\n\n rounds_since_todo += 1\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n\n trigger_hooks(\"PostToolUse\", block, output)\n\n # s05: reset nag counter when todo_write is called\n if block.name == \"todo_write\":\n rounds_since_todo = 0\n\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(\"s05: TodoWrite — plan before execute, nag if you forget\")\n print(\"Type a question, press Enter. Type q to quit.\\n\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms05 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n trigger_hooks(\"UserPromptSubmit\", query)\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n", + "images": [ + { + "src": "/course-assets/s05_todo_write/todo-overview.svg", + "alt": "todo overview" + } + ] + }, + { + "id": "s06", + "filename": "s06_subagent/code.py", + "title": "Subagent", + "subtitle": "Break Large Tasks into Small Ones with Clean Context", + "loc": 287, + "tools": [ + "bash", + "read_file", + "write_file", + "edit_file", + "glob", + "todo_write", + "task" + ], + "newTools": [ + "task" + ], + "coreAddition": "Isolated subtask context", + "keyInsight": "Subagents give each subtask a clean message history while preserving the main thread.", + "classes": [], + "functions": [ + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 69 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str)", + "startLine": 75 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int | None = None)", + "startLine": 84 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 93 + }, + { + "name": "run_edit", + "signature": "def run_edit(path: str, old_text: str, new_text: str)", + "startLine": 102 + }, + { + "name": "run_glob", + "signature": "def run_glob(pattern: str)", + "startLine": 113 + }, + { + "name": "run_todo_write", + "signature": "def run_todo_write(todos: list)", + "startLine": 124 + }, + { + "name": "extract_text", + "signature": "def extract_text(content)", + "startLine": 139 + }, + { + "name": "spawn_subagent", + "signature": "def spawn_subagent(description: str)", + "startLine": 189 + }, + { + "name": "register_hook", + "signature": "def register_hook(event: str, callback)", + "startLine": 248 + }, + { + "name": "trigger_hooks", + "signature": "def trigger_hooks(event: str, *args)", + "startLine": 251 + }, + { + "name": "permission_hook", + "signature": "def permission_hook(block)", + "startLine": 260 + }, + { + "name": "log_hook", + "signature": "def log_hook(block)", + "startLine": 269 + }, + { + "name": "context_inject_hook", + "signature": "def context_inject_hook(query: str)", + "startLine": 274 + }, + { + "name": "summary_hook", + "signature": "def summary_hook(messages: list)", + "startLine": 279 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list)", + "startLine": 299 + } + ], + "layer": "planning", + "source": "#!/usr/bin/env python3\n\"\"\"\ns06: Subagent — spawn sub-agents with fresh messages[] for context isolation.\n\n Parent Agent Subagent\n +------------------+ +------------------+\n | messages=[...] | | messages=[task] | <-- fresh\n | | dispatch | |\n | tool: task | ---------------> | own while loop |\n | prompt=\"...\" | | bash/read/... |\n | | summary only | (max 30 turns) |\n | result = \"...\" | <--------------- | return last text |\n +------------------+ +------------------+\n ^ |\n | intermediate results DISCARDED |\n +--------------------------------------+\n\n Subagent tools: bash, read, write, edit, glob (NO task — no recursion)\n\nChanges from s05:\n + task tool + spawn_subagent() with fresh messages[]\n + Safety limit: max 30 turns per subagent\n + extract_text() helper\n Subagent cannot spawn sub-subagents (no task tool in sub_tools).\n Main loop unchanged: task auto-dispatches via TOOL_HANDLERS.\n\nRun: python s06_subagent/code.py\nNeeds: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport os, subprocess\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nCURRENT_TODOS: list[dict] = []\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"For complex sub-problems, use the task tool to spawn a subagent.\"\n)\n\n# s06: subagent gets its own system prompt — no task, no recursion\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s05 (unchanged): Tool Implementations\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text:\n return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n for i, t in enumerate(todos):\n if \"content\" not in t or \"status\" not in t:\n return f\"Error: todos[{i}] missing 'content' or 'status'\"\n if t[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return f\"Error: todos[{i}] has invalid status '{t['status']}'\"\n CURRENT_TODOS = todos\n lines = [\"\\n\\033[33m## Current Tasks\\033[0m\"]\n for t in CURRENT_TODOS:\n icon = {\"pending\": \" \", \"in_progress\": \"\\033[36m▸\\033[0m\", \"completed\": \"\\033[32m✓\\033[0m\"}[t[\"status\"]]\n lines.append(f\" [{icon}] {t['content']}\")\n print(\"\\n\".join(lines))\n return f\"Updated {len(CURRENT_TODOS)} tasks\"\n\ndef extract_text(content) -> str:\n \"\"\"Extract text from message content blocks.\"\"\"\n if not isinstance(content, list):\n return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\", \"description\": \"Create and manage a task list for your current coding session.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"todos\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"content\", \"status\"]}}}, \"required\": [\"todos\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"todo_write\": run_todo_write,\n}\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s06: Subagent — fresh messages[], summary only\n# ═══════════════════════════════════════════════════════════\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n]\n# NO \"task\" tool — prevent recursive spawning\n\nSUB_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob,\n}\n\ndef spawn_subagent(description: str) -> str:\n \"\"\"Spawn a subagent with fresh messages[], return summary only.\"\"\"\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": description}] # fresh context\n\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n # Issue 1: subagent also runs hooks (permissions apply)\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n trigger_hooks(\"PostToolUse\", block, output)\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Issue 5: fallback if safety limit hit during tool_use\n result = extract_text(messages[-1][\"content\"])\n if not result:\n # last message is tool_result, look backwards for assistant text\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result:\n break\n if not result:\n result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result # only summary, entire message history discarded\n\n# Add task tool to parent's tools\nTOOLS.append({\n \"name\": \"task\",\n \"description\": \"Launch a subagent to handle a complex subtask. Returns only the final conclusion.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]},\n})\nTOOL_HANDLERS[\"task\"] = spawn_subagent\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s04 (unchanged): Hook System\n# ═══════════════════════════════════════════════════════════\n\nHOOKS = {\"UserPromptSubmit\": [], \"PreToolUse\": [], \"PostToolUse\": [], \"Stop\": []}\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None:\n return result\n return None\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"mkfs\", \"dd if=\"]\n\ndef permission_hook(block):\n \"\"\"PreToolUse: deny list check.\"\"\"\n if block.name == \"bash\":\n for p in DENY_LIST:\n if p in block.input.get(\"command\", \"\"):\n print(f\"\\n\\033[31m⛔ Blocked: '{p}'\\033[0m\")\n return \"Permission denied\"\n return None\n\ndef log_hook(block):\n \"\"\"PreToolUse: log tool calls.\"\"\"\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\ndef context_inject_hook(query: str):\n \"\"\"UserPromptSubmit: log working directory.\"\"\"\n print(f\"\\033[90m[HOOK] UserPromptSubmit: working in {WORKDIR}\\033[0m\")\n return None\n\ndef summary_hook(messages: list):\n \"\"\"Stop: print tool call count.\"\"\"\n tool_count = sum(1 for m in messages\n for b in (m.get(\"content\") if isinstance(m.get(\"content\"), list) else [])\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: session used {tool_count} tool calls\\033[0m\")\n return None\n\nregister_hook(\"UserPromptSubmit\", context_inject_hook)\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"Stop\", summary_hook)\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — same as s05 + nag reminder, task auto-dispatches\n# ═══════════════════════════════════════════════════════════\n\nrounds_since_todo = 0\n\ndef agent_loop(messages: list):\n global rounds_since_todo\n while True:\n # s05: nag reminder\n if rounds_since_todo >= 3 and messages:\n messages.append({\"role\": \"user\",\n \"content\": \"Update your todos.\"})\n rounds_since_todo = 0\n\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n force = trigger_hooks(\"Stop\", messages)\n if force:\n messages.append({\"role\": \"user\", \"content\": force})\n continue\n return\n\n rounds_since_todo += 1\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n\n trigger_hooks(\"PostToolUse\", block, output)\n\n if block.name == \"todo_write\":\n rounds_since_todo = 0\n\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(\"s06: Subagent — spawn sub-agents with fresh context, summary only\")\n print(\"Type a question, press Enter. Type q to quit.\\n\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms06 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n trigger_hooks(\"UserPromptSubmit\", query)\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n", + "images": [ + { + "src": "/course-assets/s06_subagent/subagent-overview.svg", + "alt": "subagent overview" + } + ] + }, + { + "id": "s07", + "filename": "s07_skill_loading/code.py", + "title": "Skill Loading", + "subtitle": "Load Only When Needed", + "loc": 321, + "tools": [ + "bash", + "read_file", + "write_file", + "edit_file", + "glob", + "todo_write", + "task", + "load_skill" + ], + "newTools": [ + "load_skill" + ], + "coreAddition": "On-demand skill loader", + "keyInsight": "Inject specialized knowledge only when the task actually needs it.", + "classes": [], + "functions": [ + { + "name": "_parse_frontmatter", + "signature": "def _parse_frontmatter(text: str)", + "startLine": 52 + }, + { + "name": "_scan_skills", + "signature": "def _scan_skills()", + "startLine": 69 + }, + { + "name": "list_skills", + "signature": "def list_skills()", + "startLine": 86 + }, + { + "name": "build_system", + "signature": "def build_system()", + "startLine": 93 + }, + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 116 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str)", + "startLine": 122 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int | None = None)", + "startLine": 131 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 140 + }, + { + "name": "run_edit", + "signature": "def run_edit(path: str, old_text: str, new_text: str)", + "startLine": 149 + }, + { + "name": "run_glob", + "signature": "def run_glob(pattern: str)", + "startLine": 160 + }, + { + "name": "run_todo_write", + "signature": "def run_todo_write(todos: list)", + "startLine": 171 + }, + { + "name": "extract_text", + "signature": "def extract_text(content)", + "startLine": 186 + }, + { + "name": "spawn_subagent", + "signature": "def spawn_subagent(description: str)", + "startLine": 211 + }, + { + "name": "load_skill", + "signature": "def load_skill(name: str)", + "startLine": 251 + }, + { + "name": "register_hook", + "signature": "def register_hook(event: str, callback)", + "startLine": 296 + }, + { + "name": "trigger_hooks", + "signature": "def trigger_hooks(event: str, *args)", + "startLine": 299 + }, + { + "name": "permission_hook", + "signature": "def permission_hook(block)", + "startLine": 308 + }, + { + "name": "log_hook", + "signature": "def log_hook(block)", + "startLine": 316 + }, + { + "name": "context_inject_hook", + "signature": "def context_inject_hook(query: str)", + "startLine": 320 + }, + { + "name": "summary_hook", + "signature": "def summary_hook(messages: list)", + "startLine": 324 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list)", + "startLine": 343 + } + ], + "layer": "planning", + "source": "#!/usr/bin/env python3\n\"\"\"\ns07: Skill Loading — two-level on-demand knowledge injection.\n\n Layer 1 (cheap, always present):\n SYSTEM prompt includes skill names + one-line descriptions (~100 tokens/skill)\n \"Skills available: agent-builder, code-review, mcp-builder, pdf\"\n\n Layer 2 (expensive, on demand):\n Agent calls load_skill(\"code-review\") → full SKILL.md content\n injected via tool_result (~2000 tokens/skill)\n\n skills/\n agent-builder/SKILL.md\n code-review/SKILL.md\n mcp-builder/SKILL.md\n pdf/SKILL.md\n\nChanges from s06:\n + build_system() — scan skills/ dir at startup, inject catalog into SYSTEM\n + load_skill(name) — return full SKILL.md content via tool_result\n + SKILLS_DIR config\n Loop unchanged: load_skill auto-dispatches via TOOL_HANDLERS.\n\nRun: python s07_skill_loading/code.py\nNeeds: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport os, subprocess\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nSKILLS_DIR = WORKDIR / \"skills\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nCURRENT_TODOS: list[dict] = []\n\n# s07: Skill catalog scan (used by build_system below)\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n \"\"\"Parse YAML frontmatter from SKILL.md. Returns (meta, body).\"\"\"\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n meta = {}\n for line in parts[1].strip().splitlines():\n if \":\" in line:\n k, v = line.split(\":\", 1)\n meta[k.strip()] = v.strip().strip('\"').strip(\"'\")\n return meta, parts[2].strip()\n\n# Build skill registry at startup (used for safe lookup in load_skill)\nSKILL_REGISTRY: dict[str, dict] = {}\n\ndef _scan_skills():\n \"\"\"Scan skills/ dir, populate SKILL_REGISTRY with name/description/content.\"\"\"\n if not SKILLS_DIR.exists():\n return\n for d in sorted(SKILLS_DIR.iterdir()):\n if not d.is_dir():\n continue\n manifest = d / \"SKILL.md\"\n if manifest.exists():\n raw = manifest.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", d.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\"name\": name, \"description\": desc, \"content\": raw}\n\n_scan_skills()\n\ndef list_skills() -> str:\n \"\"\"List all skills (name + one-line description).\"\"\"\n if not SKILL_REGISTRY:\n return \"(no skills found)\"\n return \"\\n\".join(f\"- **{s['name']}**: {s['description']}\" for s in SKILL_REGISTRY.values())\n\n# s07: SYSTEM includes skill catalog (cheap — just names + descriptions)\ndef build_system() -> str:\n \"\"\"Build SYSTEM prompt with skill catalog injected at startup.\"\"\"\n catalog = list_skills()\n return (\n f\"You are a coding agent at {WORKDIR}. \"\n f\"Skills available:\\n{catalog}\\n\"\n \"Use load_skill to get full details when needed.\"\n )\n\nSYSTEM = build_system()\n\n# s07: subagent gets its own system prompt — no skill loading, no task\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s06 (unchanged): Tool Implementations\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text:\n return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n for i, t in enumerate(todos):\n if \"content\" not in t or \"status\" not in t:\n return f\"Error: todos[{i}] missing 'content' or 'status'\"\n if t[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return f\"Error: todos[{i}] has invalid status '{t['status']}'\"\n CURRENT_TODOS = todos\n lines = [\"\\n\\033[33m## Current Tasks\\033[0m\"]\n for t in CURRENT_TODOS:\n icon = {\"pending\": \" \", \"in_progress\": \"\\033[36m▸\\033[0m\", \"completed\": \"\\033[32m✓\\033[0m\"}[t[\"status\"]]\n lines.append(f\" [{icon}] {t['content']}\")\n print(\"\\n\".join(lines))\n return f\"Updated {len(CURRENT_TODOS)} tasks\"\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s06 (unchanged): Subagent\n# ═══════════════════════════════════════════════════════════\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n]\nSUB_HANDLERS = {\"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob}\n\ndef spawn_subagent(description: str) -> str:\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": description}]\n for _ in range(30):\n response = client.messages.create(model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n trigger_hooks(\"PostToolUse\", block, output)\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n result = extract_text(messages[-1][\"content\"])\n if not result:\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result:\n break\n if not result:\n result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s07: load_skill — runtime full content loading\n# ═══════════════════════════════════════════════════════════\n\ndef load_skill(name: str) -> str:\n \"\"\"Load full skill content. Lookup via registry — no path traversal.\"\"\"\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n return f\"Skill not found: {name}\"\n return skill[\"content\"]\n\n\n# ═══════════════════════════════════════════════════════════\n# Tool Registry — all tools from s02-s07\n# ═══════════════════════════════════════════════════════════\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\", \"description\": \"Create and manage a task list for your current coding session.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"todos\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"content\", \"status\"]}}}, \"required\": [\"todos\"]}},\n {\"name\": \"task\", \"description\": \"Launch a subagent to handle a complex subtask. Returns only the final conclusion.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n # s07: skill tool (catalog is already in SYSTEM prompt, this loads full content)\n {\"name\": \"load_skill\", \"description\": \"Load the full content of a skill by name.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"todo_write\": run_todo_write,\n \"task\": spawn_subagent, \"load_skill\": load_skill,\n}\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s04 (unchanged): Hook System\n# ═══════════════════════════════════════════════════════════\n\nHOOKS = {\"UserPromptSubmit\": [], \"PreToolUse\": [], \"PostToolUse\": [], \"Stop\": []}\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None:\n return result\n return None\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"mkfs\", \"dd if=\"]\n\ndef permission_hook(block):\n if block.name == \"bash\":\n for p in DENY_LIST:\n if p in block.input.get(\"command\", \"\"):\n print(f\"\\n\\033[31m⛔ Blocked: '{p}'\\033[0m\")\n return \"Permission denied\"\n return None\n\ndef log_hook(block):\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\ndef context_inject_hook(query: str):\n print(f\"\\033[90m[HOOK] UserPromptSubmit: working in {WORKDIR}\\033[0m\")\n return None\n\ndef summary_hook(messages: list):\n tool_count = sum(1 for m in messages\n for b in (m.get(\"content\") if isinstance(m.get(\"content\"), list) else [])\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: session used {tool_count} tool calls\\033[0m\")\n return None\n\nregister_hook(\"UserPromptSubmit\", context_inject_hook)\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"Stop\", summary_hook)\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — same as s05-s06 + nag reminder\n# ═══════════════════════════════════════════════════════════\n\nrounds_since_todo = 0\n\ndef agent_loop(messages: list):\n global rounds_since_todo\n while True:\n if rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n force = trigger_hooks(\"Stop\", messages)\n if force:\n messages.append({\"role\": \"user\", \"content\": force})\n continue\n return\n\n rounds_since_todo += 1\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n\n trigger_hooks(\"PostToolUse\", block, output)\n\n if block.name == \"todo_write\":\n rounds_since_todo = 0\n\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(\"s07: Skill Loading — catalog in SYSTEM, content on demand\")\n print(\"Type a question, press Enter. Type q to quit.\\n\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms07 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n trigger_hooks(\"UserPromptSubmit\", query)\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n", + "images": [ + { + "src": "/course-assets/s07_skill_loading/skill-overview.svg", + "alt": "skill overview" + } + ] + }, + { + "id": "s08", + "filename": "s08_context_compact/code.py", + "title": "Context Compact", + "subtitle": "Context Will Fill Up", + "loc": 365, + "tools": [ + "bash", + "read_file", + "write_file", + "edit_file", + "glob", + "todo_write", + "task", + "load_skill", "compact" ], "newTools": [ "compact" ], - "coreAddition": "micro-compact + auto-compact + archival", - "keyInsight": "Context will fill up; three-layer compression strategy enables infinite sessions", + "coreAddition": "Context compaction", + "keyInsight": "Compression keeps the conversation usable when the context window gets crowded.", "classes": [], "functions": [ { - "name": "estimate_tokens", - "signature": "def estimate_tokens(messages: list)", - "startLine": 61 + "name": "_parse_frontmatter", + "signature": "def _parse_frontmatter(text: str)", + "startLine": 59 }, { - "name": "micro_compact", - "signature": "def micro_compact(messages: list)", - "startLine": 67 + "name": "_scan_skills", + "signature": "def _scan_skills()", + "startLine": 74 }, { - "name": "auto_compact", - "signature": "def auto_compact(messages: list)", - "startLine": 97 + "name": "list_skills", + "signature": "def list_skills()", + "startLine": 90 + }, + { + "name": "load_skill", + "signature": "def load_skill(name: str)", + "startLine": 95 + }, + { + "name": "build_system", + "signature": "def build_system()", + "startLine": 102 }, { "name": "safe_path", @@ -307,525 +701,2901 @@ { "name": "run_bash", "signature": "def run_bash(command: str)", - "startLine": 130 + "startLine": 129 }, { "name": "run_read", - "signature": "def run_read(path: str, limit: int = None)", - "startLine": 142 - }, - { - "name": "run_write", - "signature": "def run_write(path: str, content: str)", - "startLine": 151 - }, - { - "name": "run_edit", - "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 160 - }, - { - "name": "agent_loop", - "signature": "def agent_loop(messages: list)", - "startLine": 194 - } - ], - "layer": "memory", - "source": "#!/usr/bin/env python3\n\"\"\"\ns06_context_compact.py - Compact\n\nThree-layer compression pipeline so the agent can work forever:\n\n Every turn:\n +------------------+\n | Tool call result |\n +------------------+\n |\n v\n [Layer 1: micro_compact] (silent, every turn)\n Replace tool_result content older than last 3\n with \"[Previous: used {tool_name}]\"\n |\n v\n [Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\n continue [Layer 2: auto_compact]\n Save full transcript to .transcripts/\n Ask LLM to summarize conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact -> immediate summarization.\n Same as auto, triggered manually.\n\nKey insight: \"The agent can forget strategically and keep working forever.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\n\nTHRESHOLD = 50000\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nKEEP_RECENT = 3\n\n\ndef estimate_tokens(messages: list) -> int:\n \"\"\"Rough token count: ~4 chars per token.\"\"\"\n return len(str(messages)) // 4\n\n\n# -- Layer 1: micro_compact - replace old tool results with placeholders --\ndef micro_compact(messages: list) -> list:\n # Collect (msg_index, part_index, tool_result_dict) for all tool_result entries\n tool_results = []\n for msg_idx, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for part_idx, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((msg_idx, part_idx, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n # Find tool_name for each result by matching tool_use_id in prior assistant messages\n tool_name_map = {}\n for msg in messages:\n if msg[\"role\"] == \"assistant\":\n content = msg.get(\"content\", [])\n if isinstance(content, list):\n for block in content:\n if hasattr(block, \"type\") and block.type == \"tool_use\":\n tool_name_map[block.id] = block.name\n # Clear old results (keep last KEEP_RECENT)\n to_clear = tool_results[:-KEEP_RECENT]\n for _, _, result in to_clear:\n if isinstance(result.get(\"content\"), str) and len(result[\"content\"]) > 100:\n tool_id = result.get(\"tool_use_id\", \"\")\n tool_name = tool_name_map.get(tool_id, \"unknown\")\n result[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n\n\n# -- Layer 2: auto_compact - save transcript, summarize, replace messages --\ndef auto_compact(messages: list) -> list:\n # Save full transcript to disk\n TRANSCRIPT_DIR.mkdir(exist_ok=True)\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n print(f\"[transcript saved: {transcript_path}]\")\n # Ask LLM to summarize\n conversation_text = json.dumps(messages, default=str)[:80000]\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity. Include: \"\n \"1) What was accomplished, 2) Current state, 3) Key decisions made. \"\n \"Be concise but preserve critical details.\\n\\n\" + conversation_text}],\n max_tokens=2000,\n )\n summary = response.content[0].text\n # Replace all messages with compressed summary\n return [\n {\"role\": \"user\", \"content\": f\"[Conversation compressed. Transcript: {transcript_path}]\\n\\n{summary}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. I have the context from the summary. Continuing.\"},\n ]\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"compact\": lambda **kw: \"Manual compression requested.\",\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"compact\", \"description\": \"Trigger manual conversation compression.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"focus\": {\"type\": \"string\", \"description\": \"What to preserve in the summary\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # Layer 1: micro_compact before each LLM call\n micro_compact(messages)\n # Layer 2: auto_compact if token estimate exceeds threshold\n if estimate_tokens(messages) > THRESHOLD:\n print(\"[auto_compact triggered]\")\n messages[:] = auto_compact(messages)\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n manual_compact = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"compact\":\n manual_compact = True\n output = \"Compressing...\"\n else:\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n # Layer 3: manual compact triggered by the compact tool\n if manual_compact:\n print(\"[manual compact]\")\n messages[:] = auto_compact(messages)\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms06 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" - }, - { - "id": "s07", - "filename": "s07_task_system.py", - "title": "Tasks", - "subtitle": "Task Graph + Dependencies", - "loc": 207, - "tools": [ - "bash", - "read_file", - "write_file", - "edit_file", - "task_create", - "task_update", - "task_list", - "task_get" - ], - "newTools": [ - "task_create", - "task_update", - "task_list", - "task_get" - ], - "coreAddition": "TaskManager with file-based state + dependency graph", - "keyInsight": "A file-based task graph with ordering, parallelism, and dependencies -- the coordination backbone for multi-agent work", - "classes": [ - { - "name": "TaskManager", - "startLine": 46, - "endLine": 125 - } - ], - "functions": [ - { - "name": "safe_path", - "signature": "def safe_path(p: str)", - "startLine": 130 - }, - { - "name": "run_bash", - "signature": "def run_bash(command: str)", + "signature": "def run_read(path: str, limit: int | None = None)", "startLine": 136 }, - { - "name": "run_read", - "signature": "def run_read(path: str, limit: int = None)", - "startLine": 148 - }, { "name": "run_write", "signature": "def run_write(path: str, content: str)", - "startLine": 157 + "startLine": 143 }, { "name": "run_edit", "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 166 + "startLine": 149 }, { - "name": "agent_loop", - "signature": "def agent_loop(messages: list)", - "startLine": 209 - } - ], - "layer": "planning", - "source": "#!/usr/bin/env python3\n\"\"\"\ns07_task_system.py - Tasks\n\nTasks persist as JSON files in .tasks/ so they survive context compression.\nEach task has a dependency graph (blockedBy/blocks).\n\n .tasks/\n task_1.json {\"id\":1, \"subject\":\"...\", \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\", ...}\n task_3.json {\"id\":3, \"blockedBy\":[2], \"blocks\":[], ...}\n\n Dependency resolution:\n +----------+ +----------+ +----------+\n | task 1 | --> | task 2 | --> | task 3 |\n | complete | | blocked | | blocked |\n +----------+ +----------+ +----------+\n | ^\n +--- completing task 1 removes it from task 2's blockedBy\n\nKey insight: \"State that survives compression -- because it's outside the conversation.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTASKS_DIR = WORKDIR / \".tasks\"\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use task tools to plan and track work.\"\n\n\n# -- TaskManager: CRUD with dependency graph, persisted as JSON files --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = [int(f.stem.split(\"_\")[1]) for f in self.dir.glob(\"task_*.json\")]\n return max(ids) if ids else 0\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id, \"subject\": subject, \"description\": description,\n \"status\": \"pending\", \"blockedBy\": [], \"blocks\": [], \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def update(self, task_id: int, status: str = None,\n add_blocked_by: list = None, add_blocks: list = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n # When a task is completed, remove it from all other tasks' blockedBy\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocked_by:\n task[\"blockedBy\"] = list(set(task[\"blockedBy\"] + add_blocked_by))\n if add_blocks:\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\n # Bidirectional: also update the blocked tasks' blockedBy lists\n for blocked_id in add_blocks:\n try:\n blocked = self._load(blocked_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n self._save(blocked)\n except ValueError:\n pass\n self._save(task)\n return json.dumps(task, indent=2)\n\n def _clear_dependency(self, completed_id: int):\n \"\"\"Remove completed_id from all other tasks' blockedBy lists.\"\"\"\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n blocked = f\" (blocked by: {t['blockedBy']})\" if t.get(\"blockedBy\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{blocked}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(TASKS_DIR)\n\n\n# -- Base tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"addBlockedBy\"), kw.get(\"addBlocks\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"task_create\", \"description\": \"Create a new task.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"subject\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}}, \"required\": [\"subject\"]}},\n {\"name\": \"task_update\", \"description\": \"Update a task's status or dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}, \"addBlockedBy\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}, \"addBlocks\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_list\", \"description\": \"List all tasks with status summary.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"task_get\", \"description\": \"Get full details of a task by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms07 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" - }, - { - "id": "s08", - "filename": "s08_background_tasks.py", - "title": "Background Tasks", - "subtitle": "Background Threads + Notifications", - "loc": 198, - "tools": [ - "bash", - "read_file", - "write_file", - "edit_file", - "background_run", - "check_background" - ], - "newTools": [ - "background_run", - "check_background" - ], - "coreAddition": "BackgroundManager + notification queue", - "keyInsight": "Run slow operations in the background; the agent keeps thinking ahead", - "classes": [ - { - "name": "BackgroundManager", - "startLine": 49, - "endLine": 109 - } - ], - "functions": [ - { - "name": "safe_path", - "signature": "def safe_path(p: str)", - "startLine": 114 + "name": "run_glob", + "signature": "def run_glob(pattern: str)", + "startLine": 158 }, { - "name": "run_bash", - "signature": "def run_bash(command: str)", - "startLine": 120 + "name": "run_todo_write", + "signature": "def run_todo_write(todos: list)", + "startLine": 168 }, { - "name": "run_read", - "signature": "def run_read(path: str, limit: int = None)", - "startLine": 132 + "name": "extract_text", + "signature": "def extract_text(content)", + "startLine": 183 }, { - "name": "run_write", - "signature": "def run_write(path: str, content: str)", - "startLine": 141 + "name": "spawn_subagent", + "signature": "def spawn_subagent(task: str)", + "startLine": 207 }, { - "name": "run_edit", - "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 150 + "name": "estimate_size", + "signature": "def estimate_size(msgs)", + "startLine": 251 }, { - "name": "agent_loop", - "signature": "def agent_loop(messages: list)", - "startLine": 187 - } - ], - "layer": "concurrency", - "source": "#!/usr/bin/env python3\n\"\"\"\ns08_background_tasks.py - Background Tasks\n\nRun commands in background threads. A notification queue is drained\nbefore each LLM call to deliver results.\n\n Main thread Background thread\n +-----------------+ +-----------------+\n | agent loop | | task executes |\n | ... | | ... |\n | [LLM call] <---+------- | enqueue(result) |\n | ^drain queue | +-----------------+\n +-----------------+\n\n Timeline:\n Agent ----[spawn A]----[spawn B]----[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- notification queue --> [results injected]\n\nKey insight: \"Fire and forget -- the agent doesn't block while the command runs.\"\n\"\"\"\n\nimport os\nimport subprocess\nimport threading\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use background_run for long-running commands.\"\n\n\n# -- BackgroundManager: threaded execution + notification queue --\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {} # task_id -> {status, result, command}\n self._notification_queue = [] # completed task results\n self._lock = threading.Lock()\n\n def run(self, command: str) -> str:\n \"\"\"Start a background thread, return task_id immediately.\"\"\"\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"result\": None, \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True\n )\n thread.start()\n return f\"Background task {task_id} started: {command[:80]}\"\n\n def _execute(self, task_id: str, command: str):\n \"\"\"Thread target: run subprocess, capture output, push to queue.\"\"\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300\n )\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n except Exception as e:\n output = f\"Error: {e}\"\n status = \"error\"\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = output or \"(no output)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"command\": command[:80],\n \"result\": (output or \"(no output)\")[:500],\n })\n\n def check(self, task_id: str = None) -> str:\n \"\"\"Check status of one task or list all.\"\"\"\n if task_id:\n t = self.tasks.get(task_id)\n if not t:\n return f\"Error: Unknown task {task_id}\"\n return f\"[{t['status']}] {t['command'][:60]}\\n{t.get('result') or '(running)'}\"\n lines = []\n for tid, t in self.tasks.items():\n lines.append(f\"{tid}: [{t['status']}] {t['command'][:60]}\")\n return \"\\n\".join(lines) if lines else \"No background tasks.\"\n\n def drain_notifications(self) -> list:\n \"\"\"Return and clear all pending completion notifications.\"\"\"\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n\n\nBG = BackgroundManager()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"background_run\": lambda **kw: BG.run(kw[\"command\"]),\n \"check_background\": lambda **kw: BG.check(kw.get(\"task_id\")),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command (blocking).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"background_run\", \"description\": \"Run command in background thread. Returns task_id immediately.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"check_background\", \"description\": \"Check background task status. Omit task_id to list all.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"string\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # Drain background notifications and inject as system message before LLM call\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: {n['result']}\" for n in notifs\n )\n messages.append({\"role\": \"user\", \"content\": f\"\\n{notif_text}\\n\"})\n messages.append({\"role\": \"assistant\", \"content\": \"Noted background results.\"})\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms08 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" - }, - { - "id": "s09", - "filename": "s09_agent_teams.py", - "title": "Agent Teams", - "subtitle": "Teammates + Mailboxes", - "loc": 348, - "tools": [ - "alice", - "bash", - "read_file", - "write_file", - "edit_file", - "send_message", - "read_inbox", - "spawn_teammate", - "list_teammates", - "broadcast" - ], - "newTools": [ - "alice", - "send_message", - "read_inbox", - "spawn_teammate", - "list_teammates", - "broadcast" - ], - "coreAddition": "TeammateManager + file-based mailbox", - "keyInsight": "When one agent can't finish, delegate to persistent teammates via async mailboxes", - "classes": [ - { - "name": "MessageBus", - "startLine": 77, - "endLine": 118 + "name": "snip_compact", + "signature": "def snip_compact(messages, max_messages=50)", + "startLine": 255 }, { - "name": "TeammateManager", - "startLine": 123, - "endLine": 249 - } - ], - "functions": [ - { - "name": "_safe_path", - "signature": "def _safe_path(p: str)", - "startLine": 254 + "name": "collect_tool_results", + "signature": "def collect_tool_results(messages)", + "startLine": 263 }, { - "name": "_run_bash", - "signature": "def _run_bash(command: str)", - "startLine": 261 + "name": "micro_compact", + "signature": "def micro_compact(messages)", + "startLine": 272 }, { - "name": "_run_read", - "signature": "def _run_read(path: str, limit: int = None)", - "startLine": 276 + "name": "persist_large_output", + "signature": "def persist_large_output(tool_use_id, output)", + "startLine": 282 }, { - "name": "_run_write", - "signature": "def _run_write(path: str, content: str)", - "startLine": 286 + "name": "tool_result_budget", + "signature": "def tool_result_budget(messages, max_bytes=200_000)", + "startLine": 289 }, { - "name": "_run_edit", - "signature": "def _run_edit(path: str, old_text: str, new_text: str)", - "startLine": 296 + "name": "write_transcript", + "signature": "def write_transcript(messages)", + "startLine": 307 }, { - "name": "agent_loop", - "signature": "def agent_loop(messages: list)", - "startLine": 344 - } - ], - "layer": "collaboration", - "source": "#!/usr/bin/env python3\n\"\"\"\ns09_agent_teams.py - Agent Teams\n\nPersistent named agents with file-based JSONL inboxes. Each teammate runs\nits own agent loop in a separate thread. Communication via append-only inboxes.\n\n Subagent (s04): spawn -> execute -> return summary -> destroyed\n Teammate (s09): spawn -> work -> idle -> work -> ... -> shutdown\n\n .team/config.json .team/inbox/\n +----------------------------+ +------------------+\n | {\"team_name\": \"default\", | | alice.jsonl |\n | \"members\": [ | | bob.jsonl |\n | {\"name\":\"alice\", | | lead.jsonl |\n | \"role\":\"coder\", | +------------------+\n | \"status\":\"idle\"} |\n | ]} | send_message(\"alice\", \"fix bug\"):\n +----------------------------+ open(\"alice.jsonl\", \"a\").write(msg)\n\n read_inbox(\"alice\"):\n spawn_teammate(\"alice\",\"coder\",...) msgs = [json.loads(l) for l in ...]\n | open(\"alice.jsonl\", \"w\").close()\n v return msgs # drain\n Thread: alice Thread: bob\n +------------------+ +------------------+\n | agent_loop | | agent_loop |\n | status: working | | status: idle |\n | ... runs tools | | ... waits ... |\n | status -> idle | | |\n +------------------+ +------------------+\n\n 5 message types (all declared, not all handled here):\n +-------------------------+-----------------------------------+\n | message | Normal text message |\n | broadcast | Sent to all teammates |\n | shutdown_request | Request graceful shutdown (s10) |\n | shutdown_response | Approve/reject shutdown (s10) |\n | plan_approval_response | Approve/reject plan (s10) |\n +-------------------------+-----------------------------------+\n\nKey insight: \"Teammates that can talk to each other.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Spawn teammates and communicate via inboxes.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager: persistent named agents with config.json --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Use send_message to communicate. Complete your task.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member and member[\"status\"] != \"shutdown\":\n member[\"status\"] = \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead tool dispatch (9 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate that runs in its own thread.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates with name, role, status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" - }, - { - "id": "s10", - "filename": "s10_team_protocols.py", - "title": "Team Protocols", - "subtitle": "Shared Communication Rules", - "loc": 419, - "tools": [ - "bash", - "read_file", - "write_file", - "edit_file", - "send_message", - "read_inbox", - "shutdown_response", - "plan_approval", - "spawn_teammate", - "list_teammates", - "broadcast", - "shutdown_request" - ], - "newTools": [ - "shutdown_response", - "plan_approval", - "shutdown_request" - ], - "coreAddition": "request_id correlation for two protocols", - "keyInsight": "One request-response pattern drives all team negotiation", - "classes": [ - { - "name": "MessageBus", - "startLine": 87, - "endLine": 128 + "name": "summarize_history", + "signature": "def summarize_history(messages)", + "startLine": 314 }, { - "name": "TeammateManager", - "startLine": 133, - "endLine": 290 - } - ], - "functions": [ - { - "name": "_safe_path", - "signature": "def _safe_path(p: str)", - "startLine": 295 + "name": "compact_history", + "signature": "def compact_history(messages)", + "startLine": 325 }, { - "name": "_run_bash", - "signature": "def _run_bash(command: str)", - "startLine": 302 + "name": "reactive_compact", + "signature": "def reactive_compact(messages)", + "startLine": 333 }, { - "name": "_run_read", - "signature": "def _run_read(path: str, limit: int = None)", - "startLine": 317 - }, - { - "name": "_run_write", - "signature": "def _run_write(path: str, content: str)", - "startLine": 327 - }, - { - "name": "_run_edit", - "signature": "def _run_edit(path: str, old_text: str, new_text: str)", - "startLine": 337 - }, - { - "name": "handle_shutdown_request", - "signature": "def handle_shutdown_request(teammate: str)", - "startLine": 350 - }, - { - "name": "handle_plan_review", - "signature": "def handle_plan_review(request_id: str, approve: bool, feedback: str = \"\")", - "startLine": 361 - }, - { - "name": "_check_shutdown_status", - "signature": "def _check_shutdown_status(request_id: str)", - "startLine": 375 - }, - { - "name": "agent_loop", - "signature": "def agent_loop(messages: list)", - "startLine": 425 - } - ], - "layer": "collaboration", - "source": "#!/usr/bin/env python3\n\"\"\"\ns10_team_protocols.py - Team Protocols\n\nShutdown protocol and plan approval protocol, both using the same\nrequest_id correlation pattern. Builds on s09's team messaging.\n\n Shutdown FSM: pending -> approved | rejected\n\n Lead Teammate\n +---------------------+ +---------------------+\n | shutdown_request | | |\n | { | -------> | receives request |\n | request_id: abc | | decides: approve? |\n | } | | |\n +---------------------+ +---------------------+\n |\n +---------------------+ +-------v-------------+\n | shutdown_response | <------- | shutdown_response |\n | { | | { |\n | request_id: abc | | request_id: abc |\n | approve: true | | approve: true |\n | } | | } |\n +---------------------+ +---------------------+\n |\n v\n status -> \"shutdown\", thread stops\n\n Plan approval FSM: pending -> approved | rejected\n\n Teammate Lead\n +---------------------+ +---------------------+\n | plan_approval | | |\n | submit: {plan:\"...\"}| -------> | reviews plan text |\n +---------------------+ | approve/reject? |\n +---------------------+\n |\n +---------------------+ +-------v-------------+\n | plan_approval_resp | <------- | plan_approval |\n | {approve: true} | | review: {req_id, |\n +---------------------+ | approve: true} |\n +---------------------+\n\n Trackers: {request_id: {\"target|from\": name, \"status\": \"pending|...\"}}\n\nKey insight: \"Same request_id correlation pattern, two domains.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Manage teammates with shutdown and plan approval protocols.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n# -- Request trackers: correlate by request_id --\nshutdown_requests = {}\nplan_requests = {}\n_tracker_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager with shutdown + plan approval --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Submit plans via plan_approval before major work. \"\n f\"Respond to shutdown_request with shutdown_response.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n should_exit = False\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n if should_exit:\n break\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n if block.name == \"shutdown_response\" and block.input.get(\"approve\"):\n should_exit = True\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member:\n member[\"status\"] = \"shutdown\" if should_exit else \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n with _tracker_lock:\n if req_id in shutdown_requests:\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": approve},\n )\n return f\"Shutdown {'approved' if approve else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n plan_requests[req_id] = {\"from\": sender, \"plan\": plan_text, \"status\": \"pending\"}\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval_response\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for lead approval.\"\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request. Approve to shut down, reject to keep working.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval. Provide plan text.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}' (status: pending)\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n with _tracker_lock:\n req = plan_requests.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n with _tracker_lock:\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {req['status']} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n with _tracker_lock:\n return json.dumps(shutdown_requests.get(request_id, {\"error\": \"not found\"}))\n\n\n# -- Lead tool dispatch (12 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down gracefully. Returns a request_id for tracking.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check the status of a shutdown request by request_id.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan. Provide request_id + approve + optional feedback.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms10 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" - }, - { - "id": "s11", - "filename": "s11_autonomous_agents.py", - "title": "Autonomous Agents", - "subtitle": "Scan Board, Claim Tasks", - "loc": 499, - "tools": [ - "bash", - "read_file", - "write_file", - "edit_file", - "send_message", - "read_inbox", - "shutdown_response", - "plan_approval", - "idle", - "claim_task", - "spawn_teammate", - "list_teammates", - "broadcast", - "shutdown_request" - ], - "newTools": [ - "idle", - "claim_task" - ], - "coreAddition": "Task board polling + timeout-based self-governance", - "keyInsight": "Teammates scan the board and claim tasks themselves; no need for the lead to assign each one", - "classes": [ - { - "name": "MessageBus", - "startLine": 80, - "endLine": 121 - }, - { - "name": "TeammateManager", - "startLine": 159, - "endLine": 368 - } - ], - "functions": [ - { - "name": "scan_unclaimed_tasks", - "signature": "def scan_unclaimed_tasks()", - "startLine": 126 - }, - { - "name": "claim_task", - "signature": "def claim_task(task_id: int, owner: str)", - "startLine": 138 - }, - { - "name": "make_identity_block", - "signature": "def make_identity_block(name: str, role: str, team_name: str)", - "startLine": 151 - }, - { - "name": "_safe_path", - "signature": "def _safe_path(p: str)", + "name": "trigger_hooks", + "signature": "def trigger_hooks(event, *args)", "startLine": 373 }, { - "name": "_run_bash", - "signature": "def _run_bash(command: str)", + "name": "permission_hook", + "signature": "def permission_hook(block)", "startLine": 380 }, { - "name": "_run_read", - "signature": "def _run_read(path: str, limit: int = None)", - "startLine": 395 - }, - { - "name": "_run_write", - "signature": "def _run_write(path: str, content: str)", - "startLine": 405 - }, - { - "name": "_run_edit", - "signature": "def _run_edit(path: str, old_text: str, new_text: str)", - "startLine": 415 - }, - { - "name": "handle_shutdown_request", - "signature": "def handle_shutdown_request(teammate: str)", - "startLine": 428 - }, - { - "name": "handle_plan_review", - "signature": "def handle_plan_review(request_id: str, approve: bool, feedback: str = \"\")", - "startLine": 439 - }, - { - "name": "_check_shutdown_status", - "signature": "def _check_shutdown_status(request_id: str)", - "startLine": 453 + "name": "log_hook", + "signature": "def log_hook(block)", + "startLine": 385 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 509 + "startLine": 399 } ], - "layer": "collaboration", - "source": "#!/usr/bin/env python3\n\"\"\"\ns11_autonomous_agents.py - Autonomous Agents\n\nIdle cycle with task board polling, auto-claiming unclaimed tasks, and\nidentity re-injection after context compression. Builds on s10's protocols.\n\n Teammate lifecycle:\n +-------+\n | spawn |\n +---+---+\n |\n v\n +-------+ tool_use +-------+\n | WORK | <----------- | LLM |\n +---+---+ +-------+\n |\n | stop_reason != tool_use\n v\n +--------+\n | IDLE | poll every 5s for up to 60s\n +---+----+\n |\n +---> check inbox -> message? -> resume WORK\n |\n +---> scan .tasks/ -> unclaimed? -> claim -> resume WORK\n |\n +---> timeout (60s) -> shutdown\n\n Identity re-injection after compression:\n messages = [identity_block, ...remaining...]\n \"You are 'coder', role: backend, team: my-team\"\n\nKey insight: \"The agent finds work itself.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\nTASKS_DIR = WORKDIR / \".tasks\"\n\nPOLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Teammates are autonomous -- they find work themselves.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n# -- Request trackers --\nshutdown_requests = {}\nplan_requests = {}\n_tracker_lock = threading.Lock()\n_claim_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- Task board scanning --\ndef scan_unclaimed_tasks() -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n\n\ndef claim_task(task_id: int, owner: str) -> str:\n with _claim_lock:\n path = TASKS_DIR / f\"task_{task_id}.json\"\n if not path.exists():\n return f\"Error: Task {task_id} not found\"\n task = json.loads(path.read_text())\n task[\"owner\"] = owner\n task[\"status\"] = \"in_progress\"\n path.write_text(json.dumps(task, indent=2))\n return f\"Claimed task #{task_id} for {owner}\"\n\n\n# -- Identity re-injection after compression --\ndef make_identity_block(name: str, role: str, team_name: str) -> dict:\n return {\n \"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, team: {team_name}. Continue your work.\",\n }\n\n\n# -- Autonomous TeammateManager --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def _set_status(self, name: str, status: str):\n member = self._find_member(name)\n if member:\n member[\"status\"] = status\n self._save_config()\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _loop(self, name: str, role: str, prompt: str):\n team_name = self.config[\"team_name\"]\n sys_prompt = (\n f\"You are '{name}', role: {role}, team: {team_name}, at {WORKDIR}. \"\n f\"Use idle tool when you have no more work. You will auto-claim new tasks.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n\n while True:\n # -- WORK PHASE: standard agent loop --\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n self._set_status(name, \"idle\")\n return\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n idle_requested = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"idle\":\n idle_requested = True\n output = \"Entering idle phase. Will poll for new tasks.\"\n else:\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n if idle_requested:\n break\n\n # -- IDLE PHASE: poll for inbox messages and unclaimed tasks --\n self._set_status(name, \"idle\")\n resume = False\n polls = IDLE_TIMEOUT // max(POLL_INTERVAL, 1)\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n resume = True\n break\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n claim_task(task[\"id\"], name)\n task_prompt = (\n f\"Task #{task['id']}: {task['subject']}\\n\"\n f\"{task.get('description', '')}\"\n )\n if len(messages) <= 3:\n messages.insert(0, make_identity_block(name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\", \"content\": f\"I am {name}. Continuing.\"})\n messages.append({\"role\": \"user\", \"content\": task_prompt})\n messages.append({\"role\": \"assistant\", \"content\": f\"Claimed task #{task['id']}. Working on it.\"})\n resume = True\n break\n\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n with _tracker_lock:\n if req_id in shutdown_requests:\n shutdown_requests[req_id][\"status\"] = \"approved\" if args[\"approve\"] else \"rejected\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": args[\"approve\"]},\n )\n return f\"Shutdown {'approved' if args['approve'] else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n plan_requests[req_id] = {\"from\": sender, \"plan\": plan_text, \"status\": \"pending\"}\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval_response\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for approval.\"\n if tool_name == \"claim_task\":\n return claim_task(args[\"task_id\"], sender)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n {\"name\": \"idle\", \"description\": \"Signal that you have no more work. Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the task board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}'\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n with _tracker_lock:\n req = plan_requests.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n with _tracker_lock:\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {req['status']} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n with _tracker_lock:\n return json.dumps(shutdown_requests.get(request_id, {\"error\": \"not found\"}))\n\n\n# -- Lead tool dispatch (14 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n \"idle\": lambda **kw: \"Lead does not idle.\",\n \"claim_task\": lambda **kw: claim_task(kw[\"task_id\"], \"lead\"),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check shutdown request status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"idle\", \"description\": \"Enter idle state (for lead -- rarely used).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms11 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n if query.strip() == \"/tasks\":\n TASKS_DIR.mkdir(exist_ok=True)\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n t = json.loads(f.read_text())\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n owner = f\" @{t['owner']}\" if t.get(\"owner\") else \"\"\n print(f\" {marker} #{t['id']}: {t['subject']}{owner}\")\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "layer": "memory", + "source": "#!/usr/bin/env python3\n\"\"\"\ns08_context_compact.py - Context Compact\n\nFour-layer compaction pipeline inserted before LLM calls:\n\n L1: snip_compact — trim middle messages when count > 50\n L2: micro_compact — replace old tool_results with placeholders\n L3: tool_result_budget — persist large results to disk\n L4: compact_history — LLM full summary (1 API call)\n\n Emergency: reactive_compact — when API still returns prompt_too_long\n\n ┌─────────────────────────────────────────────────────────────┐\n │ messages[] │\n │ ↓ │\n │ L3 budget ─→ L1 snip ─→ L2 micro ─→ [token > threshold?] │\n │ ├─ No → LLM │\n │ └─ Yes → L4 summary │\n │ ↓ │\n │ LLM call │\n │ [prompt_too_long?] │\n │ └─ Yes → reactive │\n └─────────────────────────────────────────────────────────────┘\n\nCore principle: cheap first, expensive last.\nExecution order matches CC source: budget → snip → micro → auto.\n\nBuilds on s07 (skill loading). Usage:\n\n python s08_context_compact/code.py\n Needs: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport os, subprocess, json, time\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"): os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nCURRENT_TODOS: list[dict] = []\n\n# s07: Skill catalog scan (inherited from s07)\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n meta = {}\n for line in parts[1].strip().splitlines():\n if \":\" in line:\n k, v = line.split(\":\", 1)\n meta[k.strip()] = v.strip().strip('\"').strip(\"'\")\n return meta, parts[2].strip()\n\nSKILL_REGISTRY: dict[str, dict] = {}\n\ndef _scan_skills():\n if not SKILLS_DIR.exists():\n return\n for d in sorted(SKILLS_DIR.iterdir()):\n if not d.is_dir():\n continue\n manifest = d / \"SKILL.md\"\n if manifest.exists():\n raw = manifest.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", d.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\"name\": name, \"description\": desc, \"content\": raw}\n\n_scan_skills()\n\ndef list_skills() -> str:\n if not SKILL_REGISTRY:\n return \"(no skills found)\"\n return \"\\n\".join(f\"- **{s['name']}**: {s['description']}\" for s in SKILL_REGISTRY.values())\n\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n return f\"Skill not found: {name}\"\n return skill[\"content\"]\n\n# s08: SYSTEM includes skill catalog (inherited from s07 build_system)\ndef build_system() -> str:\n catalog = list_skills()\n return (\n f\"You are a coding agent at {WORKDIR}. \"\n f\"Skills available:\\n{catalog}\\n\"\n \"Use load_skill to get full details when needed.\"\n )\n\nSYSTEM = build_system()\n\n# s08: subagent gets its own system prompt — no compact, no skill loading\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s07 (unchanged): Basic Tools\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR): raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired: return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines): lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e: return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path); file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content); return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text: return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n for i, t in enumerate(todos):\n if \"content\" not in t or \"status\" not in t:\n return f\"Error: todos[{i}] missing 'content' or 'status'\"\n if t[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return f\"Error: todos[{i}] has invalid status '{t['status']}'\"\n CURRENT_TODOS = todos\n lines = [\"\\n\\033[33m## Current Tasks\\033[0m\"]\n for t in CURRENT_TODOS:\n icon = {\"pending\": \" \", \"in_progress\": \"\\033[36m▸\\033[0m\", \"completed\": \"\\033[32m✓\\033[0m\"}[t[\"status\"]]\n lines.append(f\" [{icon}] {t['content']}\")\n print(\"\\n\".join(lines))\n return f\"Updated {len(CURRENT_TODOS)} tasks\"\n\ndef extract_text(content) -> str:\n if not isinstance(content, list): return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s06-s07 (unchanged): Subagent\n# ═══════════════════════════════════════════════════════════\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n]\nSUB_HANDLERS = {\"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob}\n\ndef spawn_subagent(task: str) -> str:\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": task}]\n for _ in range(30):\n response = client.messages.create(model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n trigger_hooks(\"PostToolUse\", block, output)\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n result = extract_text(messages[-1][\"content\"])\n if not result:\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result:\n break\n if not result:\n result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s08: Four-Layer Compaction Pipeline\n# ═══════════════════════════════════════════════════════════\n\nCONTEXT_LIMIT = 50000\nKEEP_RECENT = 3\nPERSIST_THRESHOLD = 30000\n\ndef estimate_size(msgs): return len(str(msgs))\n\n\n# L1: snipCompact — trim middle messages\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages: return messages\n keep_head, keep_tail = 3, max_messages - 3\n snipped = len(messages) - keep_head - keep_tail\n return messages[:keep_head] + [{\"role\": \"user\", \"content\": f\"[snipped {snipped} messages]\"}] + messages[-keep_tail:]\n\n\n# L2: microCompact — old result placeholders\ndef collect_tool_results(messages):\n blocks = []\n for mi, msg in enumerate(messages):\n if msg.get(\"role\") != \"user\" or not isinstance(msg.get(\"content\"), list): continue\n for bi, block in enumerate(msg[\"content\"]):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n blocks.append((mi, bi, block))\n return blocks\n\ndef micro_compact(messages):\n tool_results = collect_tool_results(messages)\n if len(tool_results) <= KEEP_RECENT: return messages\n for _, _, block in tool_results[:-KEEP_RECENT]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n\n\n# L3: toolResultBudget — persist large results to disk\ndef persist_large_output(tool_use_id, output):\n if len(output) <= PERSIST_THRESHOLD: return output\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not path.exists(): path.write_text(output)\n return f\"\\nFull output: {path}\\nPreview:\\n{output[:2000]}\\n\"\n\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1] if messages else None\n if not last or last.get(\"role\") != \"user\" or not isinstance(last.get(\"content\"), list): return messages\n blocks = [(i, b) for i, b in enumerate(last[\"content\"]) if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes: return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for _, block in ranked:\n if total <= max_bytes: break\n content = str(block.get(\"content\", \"\"))\n if len(content) <= PERSIST_THRESHOLD: continue\n tid = block.get(\"tool_use_id\", \"unknown\")\n block[\"content\"] = persist_large_output(tid, content)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return messages\n\n\n# L4: autoCompact — LLM full summary\ndef write_transcript(messages):\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as f:\n for msg in messages: f.write(json.dumps(msg, default=str) + \"\\n\")\n return path\n\ndef summarize_history(messages):\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\"Summarize this coding-agent conversation so work can continue.\\n\"\n \"Preserve: 1. current goal, 2. key findings/decisions, 3. files read/changed, \"\n \"4. remaining work, 5. user constraints.\\nBe compact but concrete.\\n\\n\" + conversation)\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=2000)\n return \"\\n\".join(\n getattr(block, \"text\", \"\")\n for block in response.content\n if getattr(block, \"type\", None) == \"text\").strip() or \"(empty summary)\"\n\ndef compact_history(messages):\n transcript_path = write_transcript(messages)\n print(f\"[transcript saved: {transcript_path}]\")\n summary = summarize_history(messages)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\n\n# Emergency: reactiveCompact — on API error\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[-5:]]\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s07: Tool Definitions\n# ═══════════════════════════════════════════════════════════\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\", \"description\": \"Create and manage a task list for your current coding session.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"todos\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"content\", \"status\"]}}}, \"required\": [\"todos\"]}},\n {\"name\": \"task\", \"description\": \"Launch a subagent to handle a complex subtask. Returns only the final conclusion.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n {\"name\": \"load_skill\", \"description\": \"Load the full content of a skill by name.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n # s08 change: new compact tool — triggers compact_history, not a no-op\n {\"name\": \"compact\", \"description\": \"Summarize earlier conversation to free context space.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"focus\": {\"type\": \"string\"}}}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"todo_write\": run_todo_write,\n \"task\": spawn_subagent, \"load_skill\": load_skill,\n}\n\n# FROM s04 (unchanged): Hooks\nHOOKS = {\"PreToolUse\": [], \"PostToolUse\": []}\ndef trigger_hooks(event, *args):\n for cb in HOOKS[event]:\n r = cb(*args)\n if r is not None: return r\n return None\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\"]\ndef permission_hook(block):\n if block.name == \"bash\":\n for p in DENY_LIST:\n if p in block.input.get(\"command\", \"\"): return \"Permission denied\"\n return None\ndef log_hook(block):\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\nHOOKS[\"PreToolUse\"].append(permission_hook)\nHOOKS[\"PreToolUse\"].append(log_hook)\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — s08 core: run compaction pipeline before LLM\n# ═══════════════════════════════════════════════════════════\n\nMAX_REACTIVE_RETRIES = 1 # retry limit for reactive compact\n\ndef agent_loop(messages: list):\n reactive_retries = 0\n while True:\n # s08 change: three preprocessors (0 API calls, cheap first)\n # Order matches CC source: budget → snip → micro\n messages[:] = tool_result_budget(messages) # L3: persist large results first\n messages[:] = snip_compact(messages) # L1: trim middle\n messages[:] = micro_compact(messages) # L2: old result placeholders\n\n # s08 change: tokens still over threshold → LLM summary (1 API call)\n if estimate_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000)\n reactive_retries = 0 # reset on successful API call\n except Exception as e:\n if (\"prompt_too_long\" in str(e).lower() or \"too many tokens\" in str(e).lower()) and reactive_retries < MAX_REACTIVE_RETRIES:\n print(\"[reactive compact]\")\n messages[:] = reactive_compact(messages)\n reactive_retries += 1\n continue\n raise\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\": return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\": continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n # s08: compact tool triggers compact_history, not a no-op string\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": \"[Compacted. Conversation history has been summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # end current turn, start fresh with compacted context\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(blocked)})\n continue\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n trigger_hooks(\"PostToolUse\", block, output)\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n else:\n # normal path: no compact was called\n messages.append({\"role\": \"user\", \"content\": results})\n continue\n # compact was called: results already appended above\n continue\n\n\nif __name__ == \"__main__\":\n print(\"s08: Context Compact — four-layer compaction pipeline\")\n print(\"输入问题,回车发送。输入 q 退出。\\n\")\n history = []\n while True:\n try: query = input(\"\\033[36ms08 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt): break\n if query.strip().lower() in (\"q\", \"exit\", \"\"): break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\": print(block.text)\n print()\n", + "images": [ + { + "src": "/course-assets/s08_context_compact/auto-compact.svg", + "alt": "auto compact" + }, + { + "src": "/course-assets/s08_context_compact/compact-overview.svg", + "alt": "compact overview" + }, + { + "src": "/course-assets/s08_context_compact/compaction-layers.svg", + "alt": "compaction layers" + }, + { + "src": "/course-assets/s08_context_compact/layer1-budget.svg", + "alt": "layer1 budget" + }, + { + "src": "/course-assets/s08_context_compact/micro-compact.svg", + "alt": "micro compact" + } + ] }, { - "id": "s12", - "filename": "s12_worktree_task_isolation.py", - "title": "Worktree + Task Isolation", - "subtitle": "Isolate by Directory", - "loc": 694, + "id": "s09", + "filename": "s09_memory/code.py", + "title": "Memory", + "subtitle": "Keep a Layer That Doesn't Lose Details", + "loc": 492, "tools": [ "bash", "read_file", "write_file", "edit_file", - "task_create", - "task_list", - "task_get", - "task_update", - "task_bind_worktree", - "worktree_create", - "worktree_list", - "worktree_status", - "worktree_run", - "worktree_remove", - "worktree_keep", - "worktree_events" - ], - "newTools": [ - "task_create", - "task_list", - "task_get", - "task_update", - "task_bind_worktree", - "worktree_create", - "worktree_list", - "worktree_status", - "worktree_run", - "worktree_remove", - "worktree_keep", - "worktree_events" - ], - "coreAddition": "Composable worktree lifecycle + event stream over a shared task board", - "keyInsight": "Each works in its own directory; tasks manage goals, worktrees manage directories, bound by ID", - "classes": [ - { - "name": "EventBus", - "startLine": 82, - "endLine": 120 - }, - { - "name": "TaskManager", - "startLine": 121, - "endLine": 218 - }, - { - "name": "WorktreeManager", - "startLine": 224, - "endLine": 472 - } + "glob", + "task" ], + "newTools": [], + "coreAddition": "Durable memory layer", + "keyInsight": "Some facts should survive summarization and future sessions.", + "classes": [], "functions": [ { - "name": "detect_repo_root", - "signature": "def detect_repo_root(cwd: Path)", - "startLine": 52 + "name": "_parse_frontmatter", + "signature": "def _parse_frontmatter(text: str)", + "startLine": 58 + }, + { + "name": "write_memory_file", + "signature": "def write_memory_file(name: str, mem_type: str, description: str, body: str)", + "startLine": 72 + }, + { + "name": "_rebuild_index", + "signature": "def _rebuild_index()", + "startLine": 84 + }, + { + "name": "read_memory_index", + "signature": "def read_memory_index()", + "startLine": 98 + }, + { + "name": "read_memory_file", + "signature": "def read_memory_file(filename: str)", + "startLine": 106 + }, + { + "name": "list_memory_files", + "signature": "def list_memory_files()", + "startLine": 114 + }, + { + "name": "select_relevant_memories", + "signature": "def select_relevant_memories(messages: list, max_items: int = 5)", + "startLine": 132 + }, + { + "name": "load_memories", + "signature": "def load_memories(messages: list)", + "startLine": 207 + }, + { + "name": "extract_memories", + "signature": "def extract_memories(messages: list)", + "startLine": 222 + }, + { + "name": "consolidate_memories", + "signature": "def consolidate_memories()", + "startLine": 287 + }, + { + "name": "build_system", + "signature": "def build_system()", + "startLine": 337 }, { "name": "safe_path", "signature": "def safe_path(p: str)", - "startLine": 477 + "startLine": 360 }, { "name": "run_bash", "signature": "def run_bash(command: str)", - "startLine": 484 + "startLine": 365 }, { "name": "run_read", - "signature": "def run_read(path: str, limit: int = None)", - "startLine": 503 + "signature": "def run_read(path: str, limit: int | None = None)", + "startLine": 372 }, { "name": "run_write", "signature": "def run_write(path: str, content: str)", - "startLine": 513 + "startLine": 379 }, { "name": "run_edit", "signature": "def run_edit(path: str, old_text: str, new_text: str)", - "startLine": 523 + "startLine": 385 + }, + { + "name": "run_glob", + "signature": "def run_glob(pattern: str)", + "startLine": 394 + }, + { + "name": "extract_text", + "signature": "def extract_text(content)", + "startLine": 404 + }, + { + "name": "spawn_subagent", + "signature": "def spawn_subagent(task: str)", + "startLine": 419 + }, + { + "name": "estimate_size", + "signature": "def estimate_size(msgs)", + "startLine": 452 + }, + { + "name": "snip_compact", + "signature": "def snip_compact(msgs, mx=50)", + "startLine": 454 + }, + { + "name": "collect_tool_results", + "signature": "def collect_tool_results(msgs)", + "startLine": 458 + }, + { + "name": "micro_compact", + "signature": "def micro_compact(msgs)", + "startLine": 466 + }, + { + "name": "persist_large", + "signature": "def persist_large(tid, out)", + "startLine": 473 + }, + { + "name": "tool_result_budget", + "signature": "def tool_result_budget(msgs, mx=200_000)", + "startLine": 480 + }, + { + "name": "write_transcript", + "signature": "def write_transcript(msgs)", + "startLine": 494 + }, + { + "name": "summarize_history", + "signature": "def summarize_history(msgs)", + "startLine": 501 + }, + { + "name": "compact_history", + "signature": "def compact_history(msgs)", + "startLine": 509 + }, + { + "name": "reactive_compact", + "signature": "def reactive_compact(msgs)", + "startLine": 514 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 728 + "startLine": 551 + } + ], + "layer": "memory", + "source": "#!/usr/bin/env python3\n\"\"\"\ns09_memory.py - Memory System\n\nPersistent, cross-session knowledge for the coding agent.\n\nStorage:\n .memory/\n MEMORY.md ← index (one line per memory, ≤200 lines)\n feedback_tabs.md ← individual memory files (Markdown + YAML frontmatter)\n user_profile.md\n project_facts.md\n\nFlow in agent_loop:\n 1. Load MEMORY.md index into SYSTEM prompt (cheap, always present)\n 2. Select relevant memories by filename/description → inject content\n 3. Run compression pipeline from s08\n 4. After each turn ends → extract new memories from original messages\n 5. Periodically consolidate (Dream)\n\nBuilds on s08 (context compact). Usage:\n\n python s09_memory/code.py\n Needs: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport os, subprocess, json, time, re\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"): os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"; MEMORY_DIR.mkdir(exist_ok=True)\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s09: Memory System\n# ═══════════════════════════════════════════════════════════\n\nMEMORY_TYPES = [\"user\", \"feedback\", \"project\", \"reference\"]\n\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n meta = {}\n for line in parts[1].strip().splitlines():\n if \":\" in line:\n k, v = line.split(\":\", 1)\n meta[k.strip()] = v.strip().strip('\"').strip(\"'\")\n return meta, parts[2].strip()\n\n\ndef write_memory_file(name: str, mem_type: str, description: str, body: str):\n \"\"\"Write a single memory file with YAML frontmatter.\"\"\"\n slug = name.lower().replace(\" \", \"-\").replace(\"/\", \"-\")\n filename = f\"{slug}.md\"\n filepath = MEMORY_DIR / filename\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n return filepath\n\n\ndef _rebuild_index():\n \"\"\"Rebuild MEMORY.md index from all memory files.\"\"\"\n lines = []\n for f in sorted(MEMORY_DIR.glob(\"*.md\")):\n if f.name == \"MEMORY.md\":\n continue\n raw = f.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", f.stem)\n desc = meta.get(\"description\", body.split(\"\\n\")[0][:80])\n lines.append(f\"- [{name}]({f.name}) — {desc}\")\n MEMORY_INDEX.write_text(\"\\n\".join(lines) + \"\\n\" if lines else \"\")\n\n\ndef read_memory_index() -> str:\n \"\"\"Read MEMORY.md index (injected into SYSTEM every turn).\"\"\"\n if not MEMORY_INDEX.exists():\n return \"\"\n text = MEMORY_INDEX.read_text().strip()\n return text if text else \"\"\n\n\ndef read_memory_file(filename: str) -> str | None:\n \"\"\"Read a single memory file's full content.\"\"\"\n path = MEMORY_DIR / filename\n if not path.exists():\n return None\n return path.read_text()\n\n\ndef list_memory_files() -> list[dict]:\n \"\"\"List all memory files with metadata.\"\"\"\n result = []\n for f in sorted(MEMORY_DIR.glob(\"*.md\")):\n if f.name == \"MEMORY.md\":\n continue\n raw = f.read_text()\n meta, body = _parse_frontmatter(raw)\n result.append({\n \"filename\": f.name,\n \"name\": meta.get(\"name\", f.stem),\n \"description\": meta.get(\"description\", \"\"),\n \"type\": meta.get(\"type\", \"user\"),\n \"body\": body,\n })\n return result\n\n\ndef select_relevant_memories(messages: list, max_items: int = 5) -> list[str]:\n \"\"\"Select relevant memory filenames by matching recent conversation against\n memory names/descriptions. Uses a simple LLM call (or falls back to keyword\n matching on name+description).\"\"\"\n files = list_memory_files()\n if not files:\n return []\n\n # Collect recent user text for context\n recent_texts = []\n for msg in reversed(messages):\n if msg.get(\"role\") == \"user\":\n content = msg.get(\"content\", \"\")\n if isinstance(content, list):\n content = \" \".join(\n str(getattr(b, \"text\", \"\")) for b in content\n if getattr(b, \"type\", None) == \"text\"\n )\n if isinstance(content, str):\n recent_texts.append(content)\n if len(recent_texts) >= 3:\n break\n recent = \" \".join(reversed(recent_texts))[:2000]\n\n if not recent.strip():\n return []\n\n # Build catalog of name + description for LLM to choose from\n catalog_lines = []\n for i, f in enumerate(files):\n catalog_lines.append(f\"{i}: {f['name']} — {f['description']}\")\n catalog = \"\\n\".join(catalog_lines)\n\n prompt = (\n \"Given the recent conversation and the memory catalog below, \"\n \"select the indices of memories that are clearly relevant. \"\n \"Return ONLY a JSON array of integers, e.g. [0, 3]. \"\n \"If none are relevant, return [].\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\n\"\n f\"Memory catalog:\\n{catalog}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=200,\n )\n text = response.content[0].text.strip()\n # Extract JSON array from response\n match = re.search(r'\\[.*?\\]', text, re.DOTALL)\n if match:\n indices = json.loads(match.group())\n selected = []\n for idx in indices:\n if isinstance(idx, int) and 0 <= idx < len(files):\n selected.append(files[idx][\"filename\"])\n if len(selected) >= max_items:\n break\n return selected\n except Exception:\n pass\n\n # Fallback: keyword matching on name + description\n keywords = [w.lower() for w in recent.split() if len(w) > 3]\n selected = []\n for f in files:\n text = (f[\"name\"] + \" \" + f[\"description\"]).lower()\n if any(kw in text for kw in keywords):\n selected.append(f[\"filename\"])\n if len(selected) >= max_items:\n break\n return selected\n\n\ndef load_memories(messages: list) -> str:\n \"\"\"Load relevant memory content for injection into context.\"\"\"\n selected_files = select_relevant_memories(messages)\n if not selected_files:\n return \"\"\n\n parts = [\"\"]\n for filename in selected_files:\n content = read_memory_file(filename)\n if content:\n parts.append(content)\n parts.append(\"\")\n return \"\\n\\n\".join(parts)\n\n\ndef extract_memories(messages: list):\n \"\"\"Extract new memories from recent dialogue. Runs after each turn.\"\"\"\n # Collect recent conversation text\n dialogue_parts = []\n for msg in messages[-10:]:\n role = msg.get(\"role\", \"?\")\n content = msg.get(\"content\", \"\")\n if isinstance(content, list):\n content = \" \".join(\n str(getattr(b, \"text\", \"\")) for b in content\n if getattr(b, \"type\", None) == \"text\"\n )\n if isinstance(content, str) and content.strip():\n dialogue_parts.append(f\"{role}: {content}\")\n dialogue = \"\\n\".join(dialogue_parts)\n\n if not dialogue.strip():\n return\n\n # Check existing memories to avoid duplicates\n existing = list_memory_files()\n existing_desc = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in existing) if existing else \"(none)\"\n\n prompt = (\n \"Extract user preferences, constraints, or project facts from this dialogue.\\n\"\n \"Return a JSON array. Each item: {name, type, description, body}.\\n\"\n \"- name: short kebab-case identifier (e.g. 'user-preference-tabs')\\n\"\n \"- type: one of 'user' (user preference), 'feedback' (guidance), \"\n \"'project' (project fact), 'reference' (external pointer)\\n\"\n \"- description: one-line summary for index lookup\\n\"\n \"- body: full detail in markdown\\n\"\n \"If nothing new or already covered by existing memories, return [].\\n\\n\"\n f\"Existing memories:\\n{existing_desc}\\n\\n\"\n f\"Dialogue:\\n{dialogue[:4000]}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=800\n )\n text = response.content[0].text.strip()\n # Extract JSON array from response\n match = re.search(r'\\[.*\\]', text, re.DOTALL)\n if not match:\n return\n items = json.loads(match.group())\n if not items:\n return\n count = 0\n for mem in items:\n name = mem.get(\"name\", f\"memory_{int(time.time())}\")\n mem_type = mem.get(\"type\", \"user\")\n desc = mem.get(\"description\", \"\")\n body = mem.get(\"body\", \"\")\n if desc and body:\n write_memory_file(name, mem_type, desc, body)\n count += 1\n if count:\n print(f\"\\n\\033[33m[Memory: extracted {count} new memories]\\033[0m\")\n except Exception:\n pass\n\n\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n \"\"\"Merge duplicate/stale memories. Triggered when file count ≥ threshold.\"\"\"\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return\n\n catalog = \"\\n\\n\".join(\n f\"## {f['filename']}\\nname: {f['name']}\\ndescription: {f['description']}\\n{f['body']}\"\n for f in files\n )\n\n prompt = (\n \"Consolidate the following memory files. Rules:\\n\"\n \"1. Merge duplicates into one\\n\"\n \"2. Remove outdated/contradicted memories\\n\"\n \"3. Keep the total under 30 memories\\n\"\n \"4. Preserve important user preferences above all\\n\"\n \"Return a JSON array. Each item: {name, type, description, body}.\\n\\n\"\n f\"{catalog[:16000]}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=3000\n )\n text = response.content[0].text.strip()\n match = re.search(r'\\[.*\\]', text, re.DOTALL)\n if not match:\n return\n items = json.loads(match.group())\n\n # Remove old memory files (keep MEMORY.md)\n for f in MEMORY_DIR.glob(\"*.md\"):\n if f.name != \"MEMORY.md\":\n f.unlink()\n\n for mem in items:\n name = mem.get(\"name\", f\"memory_{int(time.time())}\")\n mem_type = mem.get(\"type\", \"user\")\n desc = mem.get(\"description\", \"\")\n body = mem.get(\"body\", \"\")\n if desc and body:\n write_memory_file(name, mem_type, desc, body)\n\n print(f\"\\n\\033[33m[Memory: consolidated {len(files)} → {len(items)} memories]\\033[0m\")\n except Exception:\n pass\n\n\n# Build SYSTEM with memory index\ndef build_system() -> str:\n index = read_memory_index()\n memories_section = f\"\\n\\nMemories available:\\n{index}\" if index else \"\"\n return (\n f\"You are a coding agent at {WORKDIR}.\"\n f\"{memories_section}\\n\"\n \"Relevant memories are injected below. Respect user preferences from memory.\\n\"\n \"When the user says 'remember' or expresses a clear preference, extract it as a memory.\"\n )\n\nSYSTEM = build_system()\n\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s08 (skeleton): Basic tools\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR): raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired: return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines): lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e: return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path); file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content); return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text: return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e: return f\"Error: {e}\"\n\ndef extract_text(content) -> str:\n if not isinstance(content, list): return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\n# Subagent (simplified from s06-s07)\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n]\nSUB_HANDLERS = {\"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write}\n\ndef spawn_subagent(task: str) -> str:\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": task}]\n for _ in range(30):\n response = client.messages.create(model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\": break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n result = extract_text(messages[-1][\"content\"])\n if not result:\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result: break\n if not result: result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s08 (skeleton): Compaction pipeline\n# ═══════════════════════════════════════════════════════════\n\nCONTEXT_LIMIT = 50000; KEEP_RECENT = 3; PERSIST_THRESHOLD = 30000\n\ndef estimate_size(msgs): return len(str(msgs))\n\ndef snip_compact(msgs, mx=50):\n if len(msgs) <= mx: return msgs\n return msgs[:3] + [{\"role\": \"user\", \"content\": f\"[snipped {len(msgs)-mx} msgs]\"}] + msgs[-(mx-3):]\n\ndef collect_tool_results(msgs):\n blocks = []\n for mi, msg in enumerate(msgs):\n if msg.get(\"role\") != \"user\" or not isinstance(msg.get(\"content\"), list): continue\n for bi, block in enumerate(msg[\"content\"]):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\": blocks.append((mi, bi, block))\n return blocks\n\ndef micro_compact(msgs):\n tr = collect_tool_results(msgs)\n if len(tr) <= KEEP_RECENT: return msgs\n for _, _, b in tr[:-KEEP_RECENT]:\n if len(b.get(\"content\", \"\")) > 120: b[\"content\"] = \"[Earlier tool result compacted.]\"\n return msgs\n\ndef persist_large(tid, out):\n if len(out) <= PERSIST_THRESHOLD: return out\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n p = TOOL_RESULTS_DIR / f\"{tid}.txt\"\n if not p.exists(): p.write_text(out)\n return f\"\\nFull: {p}\\nPreview:\\n{out[:2000]}\\n\"\n\ndef tool_result_budget(msgs, mx=200_000):\n last = msgs[-1] if msgs else None\n if not last or last.get(\"role\") != \"user\" or not isinstance(last.get(\"content\"), list): return msgs\n blocks = [(i, b) for i, b in enumerate(last[\"content\"]) if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= mx: return msgs\n for _, block in sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True):\n if total <= mx: break\n c = str(block.get(\"content\", \"\"))\n if len(c) <= PERSIST_THRESHOLD: continue\n block[\"content\"] = persist_large(block.get(\"tool_use_id\", \"?\"), c)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return msgs\n\ndef write_transcript(msgs):\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n p = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with p.open(\"w\") as f:\n for m in msgs: f.write(json.dumps(m, default=str) + \"\\n\")\n return p\n\ndef summarize_history(msgs):\n conv = json.dumps(msgs, default=str)[:80000]\n r = client.messages.create(model=MODEL, messages=[{\"role\": \"user\", \"content\":\n \"Summarize this coding-agent conversation so work can continue.\\n\"\n \"Preserve: 1. current goal, 2. key findings, 3. files changed, 4. remaining work, 5. user constraints.\\n\\n\" + conv}],\n max_tokens=2000)\n return r.content[0].text.strip()\n\ndef compact_history(msgs):\n write_transcript(msgs)\n summary = summarize_history(msgs)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\ndef reactive_compact(msgs):\n write_transcript(msgs)\n summary = summarize_history(msgs)\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *msgs[-5:]]\n\n\n# ═══════════════════════════════════════════════════════════\n# Tool Definitions (skeleton — fewer tools to focus on memory)\n# ═══════════════════════════════════════════════════════════\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"task\", \"description\": \"Launch a subagent to handle a subtask.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"task\": spawn_subagent,\n}\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — s09: inject memories + extract after each turn\n# ═══════════════════════════════════════════════════════════\n\nMAX_REACTIVE_RETRIES = 1\n\ndef agent_loop(messages: list):\n reactive_retries = 0\n while True:\n # s09: rebuild system with current memory index + relevant memories\n system = build_system()\n memories_content = load_memories(messages)\n if memories_content:\n system += \"\\n\\n\" + memories_content\n\n # s09: save pre-compression snapshot for accurate memory extraction\n pre_compress = [m if isinstance(m, dict) else {\"role\": m.get(\"role\",\"\"),\n \"content\": str(m.get(\"content\",\"\"))} for m in messages]\n\n # s08: compression pipeline (budget → snip → micro)\n messages[:] = tool_result_budget(messages)\n messages[:] = snip_compact(messages)\n messages[:] = micro_compact(messages)\n\n if estimate_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages, tools=TOOLS, max_tokens=8000\n )\n reactive_retries = 0\n except Exception as e:\n if (\"prompt_too_long\" in str(e).lower() or \"too many tokens\" in str(e).lower()) and reactive_retries < MAX_REACTIVE_RETRIES:\n print(\"[reactive compact]\")\n messages[:] = reactive_compact(messages)\n reactive_retries += 1\n continue\n raise\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n # s09: extract from pre-compression snapshot for full fidelity\n extract_memories(pre_compress)\n consolidate_memories()\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\": continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(\"s09: Memory — persistent cross-session knowledge\")\n print(\"输入问题,回车发送。输入 q 退出。\\n\")\n history = []\n while True:\n try: query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt): break\n if query.strip().lower() in (\"q\", \"exit\", \"\"): break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\": print(block.text)\n print()\n", + "images": [ + { + "src": "/course-assets/s09_memory/memory-overview.svg", + "alt": "memory overview" + }, + { + "src": "/course-assets/s09_memory/memory-subsystems.svg", + "alt": "memory subsystems" + } + ] + }, + { + "id": "s10", + "filename": "s10_system_prompt/code.py", + "title": "System Prompt", + "subtitle": "Assembled at Runtime, Never Hardcoded", + "loc": 166, + "tools": [ + "bash", + "read_file", + "write_file" + ], + "newTools": [], + "coreAddition": "Runtime prompt assembly", + "keyInsight": "The system prompt is a generated product of policy, tools, skills, and context.", + "classes": [], + "functions": [ + { + "name": "assemble_system_prompt", + "signature": "def assemble_system_prompt(context: dict)", + "startLine": 50 + }, + { + "name": "get_system_prompt", + "signature": "def get_system_prompt(context: dict)", + "startLine": 71 + }, + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 97 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str)", + "startLine": 104 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int | None = None)", + "startLine": 114 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 124 + }, + { + "name": "update_context", + "signature": "def update_context(context: dict, messages: list)", + "startLine": 156 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list, context: dict)", + "startLine": 172 + } + ], + "layer": "planning", + "source": "#!/usr/bin/env python3\n\"\"\"\ns10: System Prompt — Runtime prompt assembly with caching.\n\nRun: python s10_system_prompt/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s09:\n - PROMPT_SECTIONS: topic-keyed dict of prompt fragments\n - assemble_system_prompt(context): select + join sections by real state\n - get_system_prompt(context): deterministic cache via json.dumps\n - agent_loop uses get_system_prompt(context) instead of hardcoded SYSTEM\n\nMemory section loads when .memory/MEMORY.md exists (real state, not keywords).\n\"\"\"\n\nimport os, subprocess, json\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\n# ── Prompt Sections ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n \"\"\"Select and join prompt sections based on current context.\"\"\"\n sections = []\n\n # Always loaded — identity, tools, workspace\n sections.append(PROMPT_SECTIONS[\"identity\"])\n sections.append(PROMPT_SECTIONS[\"tools\"])\n sections.append(PROMPT_SECTIONS[\"workspace\"])\n\n # Conditional — memory loaded when MEMORY.md exists and has content\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key = None\n_last_prompt = None\n\n\ndef get_system_prompt(context: dict) -> str:\n \"\"\"Cache wrapper — reassemble only when context changes.\n\n Uses json.dumps for deterministic serialization, not Python's hash()\n which has process randomization and fails on nested dicts/lists.\n This cache only avoids redundant string assembly within a process.\n Real Claude Code additionally protects API-level prompt cache via\n stable section ordering and SYSTEM_PROMPT_DYNAMIC_BOUNDARY.\n \"\"\"\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n print(\" \\033[90m[cache hit] system prompt unchanged\\033[0m\")\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n\n loaded = [\"identity\", \"tools\", \"workspace\"]\n if context.get(\"memories\"):\n loaded.append(\"memory\")\n print(f\" \\033[32m[assembled] sections: {', '.join(loaded)}\\033[0m\")\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n]\n\nTOOL_HANDLERS = {\"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write}\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state: which tools exist, whether memory files exist.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": list(TOOL_HANDLERS.keys()),\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop ──\n\ndef agent_loop(messages: list, context: dict):\n \"\"\"Main loop — uses assembled system prompt instead of hardcoded SYSTEM.\"\"\"\n system = get_system_prompt(context)\n while True:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Re-evaluate context and prompt after each tool round\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s10: system prompt — runtime assembly\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms10 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n", + "images": [ + { + "src": "/course-assets/s10_system_prompt/system-prompt-overview.svg", + "alt": "system prompt overview" + } + ] + }, + { + "id": "s11", + "filename": "s11_error_recovery/code.py", + "title": "Error Recovery", + "subtitle": "Errors Are the Start of a Retry", + "loc": 287, + "tools": [ + "bash", + "read_file", + "write_file" + ], + "newTools": [], + "coreAddition": "Retry strategy", + "keyInsight": "A robust harness classifies failures and decides what kind of retry is worthwhile.", + "classes": [ + { + "name": "RecoveryState", + "startLine": 163, + "endLine": 172 + } + ], + "functions": [ + { + "name": "assemble_system_prompt", + "signature": "def assemble_system_prompt(context: dict)", + "startLine": 73 + }, + { + "name": "get_system_prompt", + "signature": "def get_system_prompt(context: dict)", + "startLine": 86 + }, + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 104 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str)", + "startLine": 111 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int | None = None)", + "startLine": 121 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 131 + }, + { + "name": "retry_delay", + "signature": "def retry_delay(attempt, retry_after=None)", + "startLine": 173 + }, + { + "name": "with_retry", + "signature": "def with_retry(fn, state: RecoveryState)", + "startLine": 182 + }, + { + "name": "is_prompt_too_long_error", + "signature": "def is_prompt_too_long_error(e: Exception)", + "startLine": 226 + }, + { + "name": "reactive_compact", + "signature": "def reactive_compact(messages: list)", + "startLine": 235 + }, + { + "name": "update_context", + "signature": "def update_context(context: dict, messages: list)", + "startLine": 249 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list, context: dict)", + "startLine": 265 + } + ], + "layer": "planning", + "source": "#!/usr/bin/env python3\n\"\"\"\ns11: Error Recovery — three recovery paths + exponential backoff.\n\nRun: python s11_error_recovery/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s10:\n - LLM call wrapped in try/except with three recovery paths\n - Path 1: max_tokens -> escalate 8K->64K (no append on first escalation),\n then continuation prompt (max 3)\n - Path 2: prompt_too_long -> reactive compact -> retry (once)\n - Path 3: 429/529 -> exponential backoff with jitter (max 10),\n fallback model on consecutive 529\n - with_retry wrapper for transient errors\n - RecoveryState tracks escalation / compact / 529 / model\n\nASCII flow:\n messages -> prompt assembly -> compress+load -> [try] LLM [except] -> tools -> loop\n | |\n stop_reason error type\n max_tokens? prompt_too_long? -> compact\n escalate / 429/529? -> backoff\n continue other? -> log + exit\n\"\"\"\n\nimport os, subprocess, time, random, json\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nPRIMARY_MODEL = os.environ[\"MODEL_ID\"]\nFALLBACK_MODEL = os.getenv(\"FALLBACK_MODEL_ID\")\n\n# ── Constants ──\n\nESCALATED_MAX_TOKENS = 64000\nDEFAULT_MAX_TOKENS = 8000\nMAX_RECOVERY_RETRIES = 3\nMAX_RETRIES = 10\nBASE_DELAY_MS = 500\nMAX_CONSECUTIVE_529 = 3\nCONTINUATION_PROMPT = (\n \"Output token limit hit. Resume directly — \"\n \"no apology, no recap. Pick up mid-thought.\"\n)\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n print(\" \\033[90m[cache hit] system prompt unchanged\\033[0m\")\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n\n loaded = [\"identity\", \"tools\", \"workspace\"]\n if context.get(\"memories\"):\n loaded.append(\"memory\")\n print(f\" \\033[32m[assembled] sections: {', '.join(loaded)}\\033[0m\")\n return _last_prompt\n\n\n# ── Tools (unchanged) ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n]\n\nTOOL_HANDLERS = {\"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write}\n\n\n# ── Error Recovery (s11 new) ──\n\nclass RecoveryState:\n \"\"\"Track recovery attempts across the loop.\"\"\"\n def __init__(self):\n self.has_escalated = False\n self.recovery_count = 0\n self.consecutive_529 = 0\n self.has_attempted_reactive_compact = False\n self.current_model = PRIMARY_MODEL\n\n\ndef retry_delay(attempt, retry_after=None):\n \"\"\"Exponential backoff with jitter. Retry-After takes priority.\"\"\"\n if retry_after:\n return retry_after\n base = min(BASE_DELAY_MS * (2 ** attempt), 32000) / 1000\n jitter = random.uniform(0, base * 0.25)\n return base + jitter\n\n\ndef with_retry(fn, state: RecoveryState):\n \"\"\"Exponential backoff for transient errors (429/529).\n Non-transient errors are re-raised for the outer handler.\"\"\"\n for attempt in range(MAX_RETRIES):\n try:\n result = fn()\n state.consecutive_529 = 0\n return result\n except Exception as e:\n name = type(e).__name__\n msg = str(e).lower()\n\n # 429 rate limit -> exponential backoff\n if \"ratelimit\" in name.lower() or \"429\" in msg:\n delay = retry_delay(attempt)\n print(f\" \\033[33m[429 rate limit] retry {attempt+1}/{MAX_RETRIES},\"\n f\" wait {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n\n # 529 overloaded -> exponential backoff + fallback model\n if \"overloaded\" in name.lower() or \"529\" in msg or \"overloaded\" in msg:\n state.consecutive_529 += 1\n if state.consecutive_529 >= MAX_CONSECUTIVE_529:\n if FALLBACK_MODEL:\n state.current_model = FALLBACK_MODEL\n state.consecutive_529 = 0\n print(f\" \\033[31m[529 x{MAX_CONSECUTIVE_529}]\"\n f\" switching to {FALLBACK_MODEL}\\033[0m\")\n else:\n state.consecutive_529 = 0\n print(f\" \\033[31m[529 x{MAX_CONSECUTIVE_529}]\"\n f\" no FALLBACK_MODEL_ID configured, continuing retry\\033[0m\")\n delay = retry_delay(attempt)\n print(f\" \\033[33m[529 overloaded] retry {attempt+1}/{MAX_RETRIES},\"\n f\" wait {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n\n # Not transient -> re-raise for outer try/except\n raise\n raise RuntimeError(f\"Max retries ({MAX_RETRIES}) exceeded\")\n\n\ndef is_prompt_too_long_error(e: Exception) -> bool:\n \"\"\"Check whether an API error indicates prompt/context too long.\"\"\"\n msg = str(e).lower()\n return ((\"prompt\" in msg and \"long\" in msg)\n or \"prompt_is_too_long\" in msg\n or \"context_length_exceeded\" in msg\n or \"max_context_window\" in msg)\n\n\ndef reactive_compact(messages: list) -> list:\n \"\"\"Emergency compact — teaching version keeps last N messages.\n Real CC generates a compact summary via LLM, then retries with\n the compacted message list. Teaching version simplifies to tail\n retention since s08/s09 already cover LLM-based compact.\"\"\"\n print(\" \\033[31m[reactive compact] trimming to last 5 messages\\033[0m\")\n tail = messages[-5:]\n return [{\"role\": \"user\",\n \"content\": \"[Reactive compact] Earlier conversation trimmed. \"\n \"Continue from where you left off.\"}, *tail]\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state: which tools exist, whether memory files exist.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": list(TOOL_HANDLERS.keys()),\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop ──\n\ndef agent_loop(messages: list, context: dict):\n \"\"\"Main loop with error recovery wrapping LLM calls.\"\"\"\n system = get_system_prompt(context)\n state = RecoveryState()\n max_tokens = DEFAULT_MAX_TOKENS\n\n while True:\n # ── LLM call: with_retry handles 429/529, outer handles rest ──\n try:\n response = with_retry(\n lambda mt=max_tokens, mdl=state.current_model:\n client.messages.create(\n model=mdl, system=system, messages=messages,\n tools=TOOLS, max_tokens=mt),\n state)\n except Exception as e:\n # Path 2: prompt_too_long -> reactive compact (once)\n if is_prompt_too_long_error(e):\n if not state.has_attempted_reactive_compact:\n messages[:] = reactive_compact(messages)\n state.has_attempted_reactive_compact = True\n continue\n print(\" \\033[31m[unrecoverable] still too long after compact\\033[0m\")\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": \"[Error] Context too large, cannot continue.\"}]})\n return\n\n # Unrecoverable\n name = type(e).__name__\n print(f\" \\033[31m[unrecoverable] {name}: {str(e)[:100]}\\033[0m\")\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {name}: {str(e)[:200]}\"}]})\n return\n\n # ── Path 1: max_tokens -> escalate or continue ──\n if response.stop_reason == \"max_tokens\":\n # First escalation: don't append truncated output, retry same request\n if not state.has_escalated:\n max_tokens = ESCALATED_MAX_TOKENS\n state.has_escalated = True\n print(f\" \\033[33m[max_tokens] escalating\"\n f\" {DEFAULT_MAX_TOKENS} -> {ESCALATED_MAX_TOKENS}\\033[0m\")\n continue\n # 64K still truncated: save truncated output + continuation prompt\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if state.recovery_count < MAX_RECOVERY_RETRIES:\n messages.append({\"role\": \"user\", \"content\": CONTINUATION_PROMPT})\n state.recovery_count += 1\n print(f\" \\033[33m[max_tokens] continuation\"\n f\" {state.recovery_count}/{MAX_RECOVERY_RETRIES}\\033[0m\")\n continue\n print(\" \\033[31m[max_tokens] recovery limit reached\\033[0m\")\n return\n\n # Normal completion: append assistant response\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n # ── Tool execution ──\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s11: error recovery\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms11 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n turn_start = len(history)\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for msg in history[turn_start:]:\n if msg.get(\"role\") != \"assistant\":\n continue\n for block in msg[\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n", + "images": [ + { + "src": "/course-assets/s11_error_recovery/error-recovery-overview.svg", + "alt": "error recovery overview" + } + ] + }, + { + "id": "s12", + "filename": "s12_task_system/code.py", + "title": "Task System", + "subtitle": "Break Big Goals into Small Tasks", + "loc": 297, + "tools": [ + "bash", + "read_file", + "write_file", + "create_task", + "list_tasks", + "get_task", + "claim_task", + "complete_task" + ], + "newTools": [ + "create_task", + "list_tasks", + "get_task", + "claim_task", + "complete_task" + ], + "coreAddition": "Task board", + "keyInsight": "A task graph turns vague goals into ordered, observable work.", + "classes": [ + { + "name": "Task", + "startLine": 53, + "endLine": 61 + } + ], + "functions": [ + { + "name": "_task_path", + "signature": "def _task_path(task_id: str)", + "startLine": 62 + }, + { + "name": "save_task", + "signature": "def save_task(task: Task)", + "startLine": 80 + }, + { + "name": "load_task", + "signature": "def load_task(task_id: str)", + "startLine": 84 + }, + { + "name": "list_tasks", + "signature": "def list_tasks()", + "startLine": 88 + }, + { + "name": "get_task", + "signature": "def get_task(task_id: str)", + "startLine": 93 + }, + { + "name": "can_start", + "signature": "def can_start(task_id: str)", + "startLine": 99 + }, + { + "name": "claim_task", + "signature": "def claim_task(task_id: str, owner: str = \"agent\")", + "startLine": 111 + }, + { + "name": "complete_task", + "signature": "def complete_task(task_id: str)", + "startLine": 126 + }, + { + "name": "assemble_system_prompt", + "signature": "def assemble_system_prompt(context: dict)", + "startLine": 153 + }, + { + "name": "get_system_prompt", + "signature": "def get_system_prompt(context: dict)", + "startLine": 166 + }, + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 178 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str)", + "startLine": 185 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int | None = None)", + "startLine": 195 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 205 + }, + { + "name": "run_list_tasks", + "signature": "def run_list_tasks()", + "startLine": 225 + }, + { + "name": "run_get_task", + "signature": "def run_get_task(task_id: str)", + "startLine": 240 + }, + { + "name": "run_claim_task", + "signature": "def run_claim_task(task_id: str)", + "startLine": 247 + }, + { + "name": "run_complete_task", + "signature": "def run_complete_task(task_id: str)", + "startLine": 251 + }, + { + "name": "update_context", + "signature": "def update_context(context: dict, messages: list)", + "startLine": 310 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list, context: dict)", + "startLine": 326 } ], "layer": "collaboration", - "source": "#!/usr/bin/env python3\n\"\"\"\ns12_worktree_task_isolation.py - Worktree + Task Isolation\n\nDirectory-level isolation for parallel task execution.\nTasks are the control plane and worktrees are the execution plane.\n\n .tasks/task_12.json\n {\n \"id\": 12,\n \"subject\": \"Implement auth refactor\",\n \"status\": \"in_progress\",\n \"worktree\": \"auth-refactor\"\n }\n\n .worktrees/index.json\n {\n \"worktrees\": [\n {\n \"name\": \"auth-refactor\",\n \"path\": \".../.worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\"\n }\n ]\n }\n\nKey insight: \"Isolate by directory, coordinate by task ID.\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\ndef detect_repo_root(cwd: Path) -> Path | None:\n \"\"\"Return git repo root if cwd is inside a repo, else None.\"\"\"\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--show-toplevel\"],\n cwd=cwd,\n capture_output=True,\n text=True,\n timeout=10,\n )\n if r.returncode != 0:\n return None\n root = Path(r.stdout.strip())\n return root if root.exists() else None\n except Exception:\n return None\n\n\nREPO_ROOT = detect_repo_root(WORKDIR) or WORKDIR\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Use task + worktree tools for multi-task work. \"\n \"For parallel or risky changes: create tasks, allocate worktree lanes, \"\n \"run commands in those lanes, then choose keep/remove for closeout. \"\n \"Use worktree_events when you need lifecycle visibility.\"\n)\n\n\n# -- EventBus: append-only lifecycle events for observability --\nclass EventBus:\n def __init__(self, event_log_path: Path):\n self.path = event_log_path\n self.path.parent.mkdir(parents=True, exist_ok=True)\n if not self.path.exists():\n self.path.write_text(\"\")\n\n def emit(\n self,\n event: str,\n task: dict | None = None,\n worktree: dict | None = None,\n error: str | None = None,\n ):\n payload = {\n \"event\": event,\n \"ts\": time.time(),\n \"task\": task or {},\n \"worktree\": worktree or {},\n }\n if error:\n payload[\"error\"] = error\n with self.path.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n\n def list_recent(self, limit: int = 20) -> str:\n n = max(1, min(int(limit or 20), 200))\n lines = self.path.read_text(encoding=\"utf-8\").splitlines()\n recent = lines[-n:]\n items = []\n for line in recent:\n try:\n items.append(json.loads(line))\n except Exception:\n items.append({\"event\": \"parse_error\", \"raw\": line})\n return json.dumps(items, indent=2)\n\n\n# -- TaskManager: persistent task board with optional worktree binding --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = []\n for f in self.dir.glob(\"task_*.json\"):\n try:\n ids.append(int(f.stem.split(\"_\")[1]))\n except Exception:\n pass\n return max(ids) if ids else 0\n\n def _path(self, task_id: int) -> Path:\n return self.dir / f\"task_{task_id}.json\"\n\n def _load(self, task_id: int) -> dict:\n path = self._path(task_id)\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n self._path(task[\"id\"]).write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id,\n \"subject\": subject,\n \"description\": description,\n \"status\": \"pending\",\n \"owner\": \"\",\n \"worktree\": \"\",\n \"blockedBy\": [],\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def exists(self, task_id: int) -> bool:\n return self._path(task_id).exists()\n\n def update(self, task_id: int, status: str = None, owner: str = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n if owner is not None:\n task[\"owner\"] = owner\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def bind_worktree(self, task_id: int, worktree: str, owner: str = \"\") -> str:\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if owner:\n task[\"owner\"] = owner\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def unbind_worktree(self, task_id: int) -> str:\n task = self._load(task_id)\n task[\"worktree\"] = \"\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\n \"pending\": \"[ ]\",\n \"in_progress\": \"[>]\",\n \"completed\": \"[x]\",\n }.get(t[\"status\"], \"[?]\")\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\n wt = f\" wt={t['worktree']}\" if t.get(\"worktree\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{wt}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(REPO_ROOT / \".tasks\")\nEVENTS = EventBus(REPO_ROOT / \".worktrees\" / \"events.jsonl\")\n\n\n# -- WorktreeManager: create/list/run/remove git worktrees + lifecycle index --\nclass WorktreeManager:\n def __init__(self, repo_root: Path, tasks: TaskManager, events: EventBus):\n self.repo_root = repo_root\n self.tasks = tasks\n self.events = events\n self.dir = repo_root / \".worktrees\"\n self.dir.mkdir(parents=True, exist_ok=True)\n self.index_path = self.dir / \"index.json\"\n if not self.index_path.exists():\n self.index_path.write_text(json.dumps({\"worktrees\": []}, indent=2))\n self.git_available = self._is_git_repo()\n\n def _is_git_repo(self) -> bool:\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--is-inside-work-tree\"],\n cwd=self.repo_root,\n capture_output=True,\n text=True,\n timeout=10,\n )\n return r.returncode == 0\n except Exception:\n return False\n\n def _run_git(self, args: list[str]) -> str:\n if not self.git_available:\n raise RuntimeError(\"Not in a git repository. worktree tools require git.\")\n r = subprocess.run(\n [\"git\", *args],\n cwd=self.repo_root,\n capture_output=True,\n text=True,\n timeout=120,\n )\n if r.returncode != 0:\n msg = (r.stdout + r.stderr).strip()\n raise RuntimeError(msg or f\"git {' '.join(args)} failed\")\n return (r.stdout + r.stderr).strip() or \"(no output)\"\n\n def _load_index(self) -> dict:\n return json.loads(self.index_path.read_text())\n\n def _save_index(self, data: dict):\n self.index_path.write_text(json.dumps(data, indent=2))\n\n def _find(self, name: str) -> dict | None:\n idx = self._load_index()\n for wt in idx.get(\"worktrees\", []):\n if wt.get(\"name\") == name:\n return wt\n return None\n\n def _validate_name(self, name: str):\n if not re.fullmatch(r\"[A-Za-z0-9._-]{1,40}\", name or \"\"):\n raise ValueError(\n \"Invalid worktree name. Use 1-40 chars: letters, numbers, ., _, -\"\n )\n\n def create(self, name: str, task_id: int = None, base_ref: str = \"HEAD\") -> str:\n self._validate_name(name)\n if self._find(name):\n raise ValueError(f\"Worktree '{name}' already exists in index\")\n if task_id is not None and not self.tasks.exists(task_id):\n raise ValueError(f\"Task {task_id} not found\")\n\n path = self.dir / name\n branch = f\"wt/{name}\"\n self.events.emit(\n \"worktree.create.before\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\"name\": name, \"base_ref\": base_ref},\n )\n try:\n self._run_git([\"worktree\", \"add\", \"-b\", branch, str(path), base_ref])\n\n entry = {\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"task_id\": task_id,\n \"status\": \"active\",\n \"created_at\": time.time(),\n }\n\n idx = self._load_index()\n idx[\"worktrees\"].append(entry)\n self._save_index(idx)\n\n if task_id is not None:\n self.tasks.bind_worktree(task_id, name)\n\n self.events.emit(\n \"worktree.create.after\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"status\": \"active\",\n },\n )\n return json.dumps(entry, indent=2)\n except Exception as e:\n self.events.emit(\n \"worktree.create.failed\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\"name\": name, \"base_ref\": base_ref},\n error=str(e),\n )\n raise\n\n def list_all(self) -> str:\n idx = self._load_index()\n wts = idx.get(\"worktrees\", [])\n if not wts:\n return \"No worktrees in index.\"\n lines = []\n for wt in wts:\n suffix = f\" task={wt['task_id']}\" if wt.get(\"task_id\") else \"\"\n lines.append(\n f\"[{wt.get('status', 'unknown')}] {wt['name']} -> \"\n f\"{wt['path']} ({wt.get('branch', '-')}){suffix}\"\n )\n return \"\\n\".join(lines)\n\n def status(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n r = subprocess.run(\n [\"git\", \"status\", \"--short\", \"--branch\"],\n cwd=path,\n capture_output=True,\n text=True,\n timeout=60,\n )\n text = (r.stdout + r.stderr).strip()\n return text or \"Clean worktree\"\n\n def run(self, name: str, command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n\n try:\n r = subprocess.run(\n command,\n shell=True,\n cwd=path,\n capture_output=True,\n text=True,\n timeout=300,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (300s)\"\n\n def remove(self, name: str, force: bool = False, complete_task: bool = False) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n\n self.events.emit(\n \"worktree.remove.before\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\n )\n try:\n args = [\"worktree\", \"remove\"]\n if force:\n args.append(\"--force\")\n args.append(wt[\"path\"])\n self._run_git(args)\n\n if complete_task and wt.get(\"task_id\") is not None:\n task_id = wt[\"task_id\"]\n before = json.loads(self.tasks.get(task_id))\n self.tasks.update(task_id, status=\"completed\")\n self.tasks.unbind_worktree(task_id)\n self.events.emit(\n \"task.completed\",\n task={\n \"id\": task_id,\n \"subject\": before.get(\"subject\", \"\"),\n \"status\": \"completed\",\n },\n worktree={\"name\": name},\n )\n\n idx = self._load_index()\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item[\"status\"] = \"removed\"\n item[\"removed_at\"] = time.time()\n self._save_index(idx)\n\n self.events.emit(\n \"worktree.remove.after\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\"), \"status\": \"removed\"},\n )\n return f\"Removed worktree '{name}'\"\n except Exception as e:\n self.events.emit(\n \"worktree.remove.failed\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\n error=str(e),\n )\n raise\n\n def keep(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n\n idx = self._load_index()\n kept = None\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item[\"status\"] = \"kept\"\n item[\"kept_at\"] = time.time()\n kept = item\n self._save_index(idx)\n\n self.events.emit(\n \"worktree.keep\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\n \"name\": name,\n \"path\": wt.get(\"path\"),\n \"status\": \"kept\",\n },\n )\n return json.dumps(kept, indent=2) if kept else f\"Error: Unknown worktree '{name}'\"\n\n\nWORKTREES = WorktreeManager(REPO_ROOT, TASKS, EVENTS)\n\n\n# -- Base tools (kept minimal, same style as previous sessions) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\")),\n \"task_bind_worktree\": lambda **kw: TASKS.bind_worktree(kw[\"task_id\"], kw[\"worktree\"], kw.get(\"owner\", \"\")),\n \"worktree_create\": lambda **kw: WORKTREES.create(kw[\"name\"], kw.get(\"task_id\"), kw.get(\"base_ref\", \"HEAD\")),\n \"worktree_list\": lambda **kw: WORKTREES.list_all(),\n \"worktree_status\": lambda **kw: WORKTREES.status(kw[\"name\"]),\n \"worktree_run\": lambda **kw: WORKTREES.run(kw[\"name\"], kw[\"command\"]),\n \"worktree_keep\": lambda **kw: WORKTREES.keep(kw[\"name\"]),\n \"worktree_remove\": lambda **kw: WORKTREES.remove(kw[\"name\"], kw.get(\"force\", False), kw.get(\"complete_task\", False)),\n \"worktree_events\": lambda **kw: EVENTS.list_recent(kw.get(\"limit\", 20)),\n}\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command in the current workspace (blocking).\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"task_create\",\n \"description\": \"Create a new task on the shared task board.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n },\n \"required\": [\"subject\"],\n },\n },\n {\n \"name\": \"task_list\",\n \"description\": \"List all tasks with status, owner, and worktree binding.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\n },\n {\n \"name\": \"task_get\",\n \"description\": \"Get task details by ID.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"integer\"}},\n \"required\": [\"task_id\"],\n },\n },\n {\n \"name\": \"task_update\",\n \"description\": \"Update task status or owner.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"task_id\": {\"type\": \"integer\"},\n \"status\": {\n \"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"],\n },\n \"owner\": {\"type\": \"string\"},\n },\n \"required\": [\"task_id\"],\n },\n },\n {\n \"name\": \"task_bind_worktree\",\n \"description\": \"Bind a task to a worktree name.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"task_id\": {\"type\": \"integer\"},\n \"worktree\": {\"type\": \"string\"},\n \"owner\": {\"type\": \"string\"},\n },\n \"required\": [\"task_id\", \"worktree\"],\n },\n },\n {\n \"name\": \"worktree_create\",\n \"description\": \"Create a git worktree and optionally bind it to a task.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"integer\"},\n \"base_ref\": {\"type\": \"string\"},\n },\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_list\",\n \"description\": \"List worktrees tracked in .worktrees/index.json.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\n },\n {\n \"name\": \"worktree_status\",\n \"description\": \"Show git status for one worktree.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_run\",\n \"description\": \"Run a shell command in a named worktree directory.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"command\": {\"type\": \"string\"},\n },\n \"required\": [\"name\", \"command\"],\n },\n },\n {\n \"name\": \"worktree_remove\",\n \"description\": \"Remove a worktree and optionally mark its bound task completed.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"force\": {\"type\": \"boolean\"},\n \"complete_task\": {\"type\": \"boolean\"},\n },\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_keep\",\n \"description\": \"Mark a worktree as kept in lifecycle state without removing it.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_events\",\n \"description\": \"List recent worktree/task lifecycle events from .worktrees/events.jsonl.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"limit\": {\"type\": \"integer\"}},\n },\n },\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append(\n {\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n }\n )\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(f\"Repo root for s12: {REPO_ROOT}\")\n if not WORKTREES.git_available:\n print(\"Note: Not in a git repo. worktree_* tools will return errors.\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms12 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\n\"\"\"\ns12: Task System — file-persisted task graph with blockedBy dependencies.\n\nRun: python s12_task_system/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s11:\n - Task dataclass (id, subject, description, status, owner, blockedBy)\n - TASKS_DIR = .tasks/ for persistent JSON storage\n - create_task / save_task / load_task / list_tasks / get_task\n - can_start: checks blockedBy all completed (missing deps = blocked)\n - claim_task: set owner + pending -> in_progress\n - complete_task: set completed + report unblocked downstream\n - 5 new tools: create_task, list_tasks, get_task, claim_task, complete_task\n\nNote: Teaching code keeps a basic agent loop to stay focused on the task\nsystem. S11's full error recovery (RecoveryState, backoff, escalation,\nreactive compact, fallback model) is omitted — in real CC, tasks.ts and\nwithRetry are independent layers that compose naturally.\n\"\"\"\n\nimport os, subprocess, json, time, random\nfrom pathlib import Path\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None # Agent name (multi-agent scenarios)\n blockedBy: list[str] # Dependency task IDs\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject,\n description=description,\n status=\"pending\",\n owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n}\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": list(TOOL_HANDLERS.keys()),\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop (simplified, focused on task system) ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s12: task system\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms12 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n", + "images": [ + { + "src": "/course-assets/s12_task_system/task-dag.svg", + "alt": "task dag" + }, + { + "src": "/course-assets/s12_task_system/task-system-overview.svg", + "alt": "task system overview" + } + ] + }, + { + "id": "s13", + "filename": "s13_background_tasks/code.py", + "title": "Background Tasks", + "subtitle": "Slow Operations Go to the Background", + "loc": 380, + "tools": [ + "bash", + "read_file", + "write_file", + "create_task", + "list_tasks", + "get_task", + "claim_task", + "complete_task" + ], + "newTools": [], + "coreAddition": "Background execution", + "keyInsight": "The agent can keep reasoning while slow work completes elsewhere.", + "classes": [ + { + "name": "Task", + "startLine": 54, + "endLine": 62 + } + ], + "functions": [ + { + "name": "_task_path", + "signature": "def _task_path(task_id: str)", + "startLine": 63 + }, + { + "name": "save_task", + "signature": "def save_task(task: Task)", + "startLine": 79 + }, + { + "name": "load_task", + "signature": "def load_task(task_id: str)", + "startLine": 83 + }, + { + "name": "list_tasks", + "signature": "def list_tasks()", + "startLine": 87 + }, + { + "name": "get_task", + "signature": "def get_task(task_id: str)", + "startLine": 92 + }, + { + "name": "can_start", + "signature": "def can_start(task_id: str)", + "startLine": 98 + }, + { + "name": "claim_task", + "signature": "def claim_task(task_id: str, owner: str = \"agent\")", + "startLine": 110 + }, + { + "name": "complete_task", + "signature": "def complete_task(task_id: str)", + "startLine": 125 + }, + { + "name": "assemble_system_prompt", + "signature": "def assemble_system_prompt(context: dict)", + "startLine": 152 + }, + { + "name": "get_system_prompt", + "signature": "def get_system_prompt(context: dict)", + "startLine": 165 + }, + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 177 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str, run_in_background: bool = False)", + "startLine": 184 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int | None = None)", + "startLine": 195 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 205 + }, + { + "name": "run_list_tasks", + "signature": "def run_list_tasks()", + "startLine": 225 + }, + { + "name": "run_get_task", + "signature": "def run_get_task(task_id: str)", + "startLine": 240 + }, + { + "name": "run_claim_task", + "signature": "def run_claim_task(task_id: str)", + "startLine": 247 + }, + { + "name": "run_complete_task", + "signature": "def run_complete_task(task_id: str)", + "startLine": 251 + }, + { + "name": "is_slow_operation", + "signature": "def is_slow_operation(tool_name: str, tool_input: dict)", + "startLine": 318 + }, + { + "name": "should_run_background", + "signature": "def should_run_background(tool_name: str, tool_input: dict)", + "startLine": 329 + }, + { + "name": "execute_tool", + "signature": "def execute_tool(block)", + "startLine": 336 + }, + { + "name": "start_background_task", + "signature": "def start_background_task(block)", + "startLine": 344 + }, + { + "name": "collect_background_results", + "signature": "def collect_background_results()", + "startLine": 369 + }, + { + "name": "update_context", + "signature": "def update_context(context: dict, messages: list)", + "startLine": 394 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list, context: dict)", + "startLine": 410 + } + ], + "layer": "concurrency", + "source": "#!/usr/bin/env python3\n\"\"\"\ns13: Background Tasks — thread-based async execution + notification injection.\n\nRun: python s13_background_tasks/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s12:\n - threading.Thread for background execution\n - background_tasks dict for lifecycle tracking (bg_id, command, status)\n - background_results dict + threading.Lock for thread-safe storage\n - should_run_background: model explicit request via run_in_background param\n - is_slow_operation: fallback heuristic when model doesn't specify\n - start_background_task: dispatch to daemon thread, return bg task id\n - collect_background_results: gather completed, return as notifications\n - agent_loop: slow ops → background + placeholder, inject notifications\n - Notifications use format, not reused tool_use_id\n\nNote: Teaching code keeps a basic agent loop to stay focused on background\ntasks. S11's full error recovery (RecoveryState, backoff, escalation,\nreactive compact, fallback model) is omitted.\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n}\n\n\n# ── Background Tasks (s13 new) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {} # bg_id → {tool_use_id, command, status}\nbackground_results: dict[str, str] = {} # bg_id → output\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = TOOL_HANDLERS.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n thread = threading.Thread(target=worker, daemon=True)\n thread.start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": list(TOOL_HANDLERS.keys()),\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop (simplified, focused on background tasks) ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Command: {block.input.get('command', '')}. \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Inject background notifications + tool results in one user message\n user_content = []\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n print(f\" \\033[32m[inject] {len(bg_notifications)} background \"\n f\"notification(s)\\033[0m\")\n user_content.extend(results)\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s13: background tasks\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms13 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n", + "images": [ + { + "src": "/course-assets/s13_background_tasks/background-tasks-overview.svg", + "alt": "background tasks overview" + } + ] + }, + { + "id": "s14", + "filename": "s14_cron_scheduler/code.py", + "title": "Cron Scheduler", + "subtitle": "Producing Work on a Schedule", + "loc": 646, + "tools": [ + "bash", + "read_file", + "write_file", + "create_task", + "list_tasks", + "get_task", + "claim_task", + "complete_task", + "schedule_cron", + "list_crons", + "cancel_cron" + ], + "newTools": [ + "schedule_cron", + "list_crons", + "cancel_cron" + ], + "coreAddition": "Scheduled task creation", + "keyInsight": "Recurring work should be created by the harness, not remembered by the model.", + "classes": [ + { + "name": "Task", + "startLine": 56, + "endLine": 64 + }, + { + "name": "CronJob", + "startLine": 352, + "endLine": 359 + } + ], + "functions": [ + { + "name": "_task_path", + "signature": "def _task_path(task_id: str)", + "startLine": 65 + }, + { + "name": "save_task", + "signature": "def save_task(task: Task)", + "startLine": 81 + }, + { + "name": "load_task", + "signature": "def load_task(task_id: str)", + "startLine": 85 + }, + { + "name": "list_tasks", + "signature": "def list_tasks()", + "startLine": 89 + }, + { + "name": "get_task", + "signature": "def get_task(task_id: str)", + "startLine": 94 + }, + { + "name": "can_start", + "signature": "def can_start(task_id: str)", + "startLine": 100 + }, + { + "name": "claim_task", + "signature": "def claim_task(task_id: str, owner: str = \"agent\")", + "startLine": 112 + }, + { + "name": "complete_task", + "signature": "def complete_task(task_id: str)", + "startLine": 127 + }, + { + "name": "assemble_system_prompt", + "signature": "def assemble_system_prompt(context: dict)", + "startLine": 155 + }, + { + "name": "get_system_prompt", + "signature": "def get_system_prompt(context: dict)", + "startLine": 168 + }, + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 180 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str, run_in_background: bool = False)", + "startLine": 187 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int | None = None)", + "startLine": 198 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 208 + }, + { + "name": "run_list_tasks", + "signature": "def run_list_tasks()", + "startLine": 228 + }, + { + "name": "run_get_task", + "signature": "def run_get_task(task_id: str)", + "startLine": 243 + }, + { + "name": "run_claim_task", + "signature": "def run_claim_task(task_id: str)", + "startLine": 250 + }, + { + "name": "run_complete_task", + "signature": "def run_complete_task(task_id: str)", + "startLine": 254 + }, + { + "name": "is_slow_operation", + "signature": "def is_slow_operation(tool_name: str, tool_input: dict)", + "startLine": 266 + }, + { + "name": "should_run_background", + "signature": "def should_run_background(tool_name: str, tool_input: dict)", + "startLine": 277 + }, + { + "name": "execute_tool", + "signature": "def execute_tool(block)", + "startLine": 284 + }, + { + "name": "start_background_task", + "signature": "def start_background_task(block)", + "startLine": 299 + }, + { + "name": "collect_background_results", + "signature": "def collect_background_results()", + "startLine": 323 + }, + { + "name": "_cron_field_matches", + "signature": "def _cron_field_matches(field: str, value: int)", + "startLine": 367 + }, + { + "name": "cron_matches", + "signature": "def cron_matches(cron_expr: str, dt: datetime)", + "startLine": 383 + }, + { + "name": "_validate_cron_field", + "signature": "def _validate_cron_field(field: str, lo: int, hi: int)", + "startLine": 413 + }, + { + "name": "validate_cron", + "signature": "def validate_cron(cron_expr: str)", + "startLine": 448 + }, + { + "name": "save_durable_jobs", + "signature": "def save_durable_jobs()", + "startLine": 462 + }, + { + "name": "load_durable_jobs", + "signature": "def load_durable_jobs()", + "startLine": 468 + }, + { + "name": "cancel_job", + "signature": "def cancel_job(job_id: str)", + "startLine": 507 + }, + { + "name": "cron_scheduler_loop", + "signature": "def cron_scheduler_loop()", + "startLine": 519 + }, + { + "name": "consume_cron_queue", + "signature": "def consume_cron_queue()", + "startLine": 545 + }, + { + "name": "has_cron_queue", + "signature": "def has_cron_queue()", + "startLine": 553 + }, + { + "name": "run_list_crons", + "signature": "def run_list_crons()", + "startLine": 575 + }, + { + "name": "run_cancel_cron", + "signature": "def run_cancel_cron(job_id: str)", + "startLine": 589 + }, + { + "name": "update_context", + "signature": "def update_context(context: dict, messages: list)", + "startLine": 667 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list, context: dict)", + "startLine": 686 + }, + { + "name": "print_latest_assistant_text", + "signature": "def print_latest_assistant_text(messages: list)", + "startLine": 745 + }, + { + "name": "run_agent_turn_locked", + "signature": "def run_agent_turn_locked(user_query: str | None = None)", + "startLine": 763 + }, + { + "name": "queue_processor_loop", + "signature": "def queue_processor_loop()", + "startLine": 774 + } + ], + "layer": "concurrency", + "source": "#!/usr/bin/env python3\n\"\"\"\ns14: Cron Scheduler — independent daemon thread + queue processor.\n\nRun: python s14_cron_scheduler/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s13:\n - CronJob dataclass (id, cron, prompt, recurring, durable)\n - cron_matches: 5-field cron expression matching with DOM/DOW OR semantics\n - schedule_job / cancel_job: register/remove cron jobs (with validation)\n - cron_scheduler_loop: independent daemon thread, polls every 1s\n - cron_queue: thread-safe queue, scheduler writes, queue processor delivers\n - queue_processor_loop: auto-runs agent_loop when cron_queue has work\n - Durable storage: .scheduled_tasks.json (survives restart)\n - 3 new tools: schedule_cron, list_crons, cancel_cron\n\nFour layers:\n 1. Scheduler: daemon thread checks time → fires matching jobs\n 2. Queue: cron_queue decouples scheduler from agent loop\n 3. Queue processor: wakes the agent when queued work exists and it is idle\n 4. Consumer: agent_loop consumes queued jobs and injects them into messages\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"schedule_cron, list_crons, cancel_cron.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\n# ── Background Tasks (from s13, synced) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n \"schedule_cron\": run_schedule_cron, \"list_crons\": run_list_crons,\n \"cancel_cron\": run_cancel_cron,\n }.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── Cron Scheduler (s14 new) ──\n\nDURABLE_PATH = WORKDIR / \".scheduled_tasks.json\"\n\n\n@dataclass\nclass CronJob:\n id: str\n cron: str # \"0 9 * * *\"\n prompt: str # message to inject when fired\n recurring: bool # True = recurring, False = one-shot\n durable: bool # True = persist to disk\n\n\nscheduled_jobs: dict[str, CronJob] = {}\ncron_queue: list[CronJob] = []\ncron_lock = threading.Lock()\nagent_lock = threading.Lock()\n_last_fired: dict[str, str] = {} # job_id → \"YYYY-MM-DD HH:MM\"\n\n\ndef _cron_field_matches(field: str, value: int) -> bool:\n \"\"\"Match a single cron field against a value.\"\"\"\n if field == \"*\":\n return True\n if field.startswith(\"*/\"):\n step = int(field[2:])\n return step > 0 and value % step == 0\n if \",\" in field:\n return any(_cron_field_matches(f.strip(), value)\n for f in field.split(\",\"))\n if \"-\" in field:\n lo, hi = field.split(\"-\", 1)\n return int(lo) <= value <= int(hi)\n return value == int(field)\n\n\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n \"\"\"Check if a 5-field cron expression matches the given datetime.\n Standard cron semantics: DOM and DOW use OR when both are constrained.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7 # Python Monday=0 → cron Sunday=0\n\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n\n # Minute, hour, month must all match\n if not (m and h and month_ok):\n return False\n # DOM and DOW: if both constrained, either matching is enough (OR)\n dom_unconstrained = dom == \"*\"\n dow_unconstrained = dow == \"*\"\n if dom_unconstrained and dow_unconstrained:\n return True\n if dom_unconstrained:\n return dow_ok\n if dow_unconstrained:\n return dom_ok\n return dom_ok or dow_ok\n\n\ndef _validate_cron_field(field: str, lo: int, hi: int) -> str | None:\n \"\"\"Validate a single cron field value is within [lo, hi].\"\"\"\n if field == \"*\":\n return None\n if field.startswith(\"*/\"):\n step_str = field[2:]\n if not step_str.isdigit():\n return f\"Invalid step: {field}\"\n step = int(step_str)\n if step <= 0:\n return f\"Step must be > 0: {field}\"\n return None\n if \",\" in field:\n for part in field.split(\",\"):\n err = _validate_cron_field(part.strip(), lo, hi)\n if err: return err\n return None\n if \"-\" in field:\n parts = field.split(\"-\", 1)\n if not parts[0].isdigit() or not parts[1].isdigit():\n return f\"Invalid range: {field}\"\n a, b = int(parts[0]), int(parts[1])\n if a < lo or a > hi or b < lo or b > hi:\n return f\"Range {field} out of bounds [{lo}-{hi}]\"\n if a > b:\n return f\"Range start > end: {field}\"\n return None\n if not field.isdigit():\n return f\"Invalid field: {field}\"\n val = int(field)\n if val < lo or val > hi:\n return f\"Value {val} out of bounds [{lo}-{hi}]\"\n return None\n\n\ndef validate_cron(cron_expr: str) -> str | None:\n \"\"\"Validate a cron expression. Returns error message or None.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return f\"Expected 5 fields, got {len(fields)}\"\n bounds = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n names = [\"minute\", \"hour\", \"day-of-month\", \"month\", \"day-of-week\"]\n for i, (field, (lo, hi), name) in enumerate(zip(fields, bounds, names)):\n err = _validate_cron_field(field, lo, hi)\n if err:\n return f\"{name}: {err}\"\n return None\n\n\ndef save_durable_jobs():\n \"\"\"Persist durable jobs to .scheduled_tasks.json.\"\"\"\n durable = [asdict(j) for j in scheduled_jobs.values() if j.durable]\n DURABLE_PATH.write_text(json.dumps(durable, indent=2))\n\n\ndef load_durable_jobs():\n \"\"\"Load durable jobs from disk on startup.\"\"\"\n if not DURABLE_PATH.exists():\n return\n try:\n jobs = json.loads(DURABLE_PATH.read_text())\n for j in jobs:\n job = CronJob(**j)\n err = validate_cron(job.cron)\n if err:\n print(f\" \\033[31m[cron] skipping invalid job {job.id}: {err}\\033[0m\")\n continue\n scheduled_jobs[job.id] = job\n valid = [j for j in jobs if j[\"id\"] in scheduled_jobs]\n if valid:\n print(f\" \\033[35m[cron] loaded {len(valid)} durable job(s)\\033[0m\")\n except Exception:\n pass\n\n\ndef schedule_job(cron: str, prompt: str, recurring: bool = True,\n durable: bool = True) -> CronJob | str:\n \"\"\"Register a new cron job. Returns CronJob or error string.\"\"\"\n err = validate_cron(cron)\n if err:\n return err\n job = CronJob(\n id=f\"cron_{random.randint(0, 999999):06d}\",\n cron=cron, prompt=prompt,\n recurring=recurring, durable=durable,\n )\n with cron_lock:\n scheduled_jobs[job.id] = job\n if durable:\n save_durable_jobs()\n print(f\" \\033[35m[cron register] {job.id} '{cron}' → {prompt[:40]}\\033[0m\")\n return job\n\n\ndef cancel_job(job_id: str) -> str:\n \"\"\"Cancel a cron job.\"\"\"\n with cron_lock:\n job = scheduled_jobs.pop(job_id, None)\n if not job:\n return f\"Job {job_id} not found\"\n if job.durable:\n save_durable_jobs()\n print(f\" \\033[31m[cron cancel] {job_id}\\033[0m\")\n return f\"Cancelled {job_id}\"\n\n\ndef cron_scheduler_loop():\n \"\"\"Independent daemon thread: poll every 1s, fire matching jobs.\n Individual job errors are caught to prevent one bad job from\n killing the entire scheduler thread.\"\"\"\n while True:\n time.sleep(1)\n now = datetime.now()\n # Date-aware marker prevents daily jobs from skipping on day 2+\n minute_marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now):\n if _last_fired.get(job.id) != minute_marker:\n cron_queue.append(job)\n _last_fired[job.id] = minute_marker\n print(f\" \\033[35m[cron fire] {job.id} → \"\n f\"{job.prompt[:40]}\\033[0m\")\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\" \\033[31m[cron error] {job.id}: {e}\\033[0m\")\n\n\ndef consume_cron_queue() -> list[CronJob]:\n \"\"\"Consume fired jobs from cron_queue (called by agent_loop).\"\"\"\n with cron_lock:\n fired = list(cron_queue)\n cron_queue.clear()\n return fired\n\n\ndef has_cron_queue() -> bool:\n \"\"\"Return whether fired cron jobs are waiting to be delivered.\"\"\"\n with cron_lock:\n return bool(cron_queue)\n\n\n# Load durable jobs on startup, then start scheduler thread\nload_durable_jobs()\nthreading.Thread(target=cron_scheduler_loop, daemon=True).start()\nprint(\" \\033[35m[cron] scheduler thread started\\033[0m\")\n\n\n# ── Cron Tools ──\n\ndef run_schedule_cron(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> str:\n result = schedule_job(cron, prompt, recurring, durable)\n if isinstance(result, str):\n return f\"Error: {result}\"\n return f\"Scheduled {result.id}: '{cron}' → {prompt}\"\n\n\ndef run_list_crons() -> str:\n with cron_lock:\n jobs = list(scheduled_jobs.values())\n if not jobs:\n return \"No cron jobs. Use schedule_cron to add one.\"\n lines = []\n for j in jobs:\n tag = \"recurring\" if j.recurring else \"one-shot\"\n dur = \"durable\" if j.durable else \"session\"\n lines.append(f\" {j.id}: '{j.cron}' → {j.prompt[:40]} \"\n f\"[{tag}, {dur}]\")\n return \"\\n\".join(lines)\n\n\ndef run_cancel_cron(job_id: str) -> str:\n return cancel_job(job_id)\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"schedule_cron\",\n \"description\": \"Schedule a cron job. cron is 5-field: min hour dom month dow.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"cron\": {\"type\": \"string\",\n \"description\": \"5-field cron expression\"},\n \"prompt\": {\"type\": \"string\",\n \"description\": \"Message to inject when fired\"},\n \"recurring\": {\"type\": \"boolean\",\n \"description\": \"True=recurring, False=one-shot\"},\n \"durable\": {\"type\": \"boolean\",\n \"description\": \"True=persist to disk\"}},\n \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"list_crons\",\n \"description\": \"List all registered cron jobs.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"cancel_cron\",\n \"description\": \"Cancel a cron job by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"job_id\": {\"type\": \"string\"}},\n \"required\": [\"job_id\"]}},\n]\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": [t[\"name\"] for t in TOOLS],\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop (simplified, focused on cron scheduler) ──\n# Teaching code keeps a basic agent loop. S11's full error recovery is omitted.\n# cron_scheduler_loop produces work; queue_processor_loop wakes this loop when\n# queued work exists and no other agent turn is running.\n\ndef agent_loop(messages: list, context: dict) -> dict:\n system = get_system_prompt(context)\n while True:\n # Layer 4: consume fired cron jobs → inject as messages\n fired = consume_cron_queue()\n for job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n print(f\" \\033[35m[inject cron] {job.prompt[:50]}\\033[0m\")\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return context\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return context\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Merge background notifications + tool results into one user message\n user_content = []\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n user_content.extend(results)\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nsession_history: list = []\nsession_context = update_context({}, [])\n\n\ndef print_latest_assistant_text(messages: list):\n \"\"\"Print text blocks from the latest assistant message.\"\"\"\n if not messages:\n return\n msg = messages[-1]\n if not isinstance(msg, dict) or msg.get(\"role\") != \"assistant\":\n return\n content = msg.get(\"content\", \"\")\n if isinstance(content, str):\n print(content)\n return\n for block in content:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n elif isinstance(block, dict) and block.get(\"type\") == \"text\":\n print(block.get(\"text\", \"\"))\n\n\ndef run_agent_turn_locked(user_query: str | None = None):\n \"\"\"Run one agent turn. Caller must hold agent_lock.\"\"\"\n global session_context\n if user_query is not None:\n session_history.append({\"role\": \"user\", \"content\": user_query})\n session_context = agent_loop(session_history, session_context)\n session_context = update_context(session_context, session_history)\n print_latest_assistant_text(session_history)\n print()\n\n\ndef queue_processor_loop():\n \"\"\"Auto-deliver fired cron jobs when the agent is idle.\"\"\"\n global session_context\n while True:\n time.sleep(0.2)\n if not has_cron_queue():\n continue\n if not agent_lock.acquire(blocking=False):\n continue\n try:\n if not has_cron_queue():\n continue\n print(\"\\n \\033[35m[queue processor] delivering scheduled work\\033[0m\")\n run_agent_turn_locked()\n finally:\n agent_lock.release()\n\n\nif __name__ == \"__main__\":\n print(\"s14: cron scheduler\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n threading.Thread(target=queue_processor_loop, daemon=True).start()\n print(\" \\033[35m[queue processor] started\\033[0m\")\n while True:\n try:\n query = input(\"\\033[36ms14 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n with agent_lock:\n run_agent_turn_locked(query)\n", + "images": [ + { + "src": "/course-assets/s14_cron_scheduler/cron-scheduler-overview.svg", + "alt": "cron scheduler overview" + } + ] + }, + { + "id": "s15", + "filename": "s15_agent_teams/code.py", + "title": "Agent Teams", + "subtitle": "One Agent Isn't Enough, Form a Team", + "loc": 746, + "tools": [ + "bash", + "read_file", + "write_file", + "send_message", + "create_task", + "list_tasks", + "get_task", + "claim_task", + "complete_task", + "schedule_cron", + "list_crons", + "cancel_cron", + "spawn_teammate", + "check_inbox" + ], + "newTools": [ + "send_message", + "spawn_teammate", + "check_inbox" + ], + "coreAddition": "Teammate mailboxes", + "keyInsight": "Persistent teammates let work continue in parallel without stuffing every thought into one context.", + "classes": [ + { + "name": "Task", + "startLine": 54, + "endLine": 62 + }, + { + "name": "CronJob", + "startLine": 353, + "endLine": 360 + }, + { + "name": "MessageBus", + "startLine": 595, + "endLine": 620 + } + ], + "functions": [ + { + "name": "_task_path", + "signature": "def _task_path(task_id: str)", + "startLine": 63 + }, + { + "name": "save_task", + "signature": "def save_task(task: Task)", + "startLine": 79 + }, + { + "name": "load_task", + "signature": "def load_task(task_id: str)", + "startLine": 83 + }, + { + "name": "list_tasks", + "signature": "def list_tasks()", + "startLine": 87 + }, + { + "name": "get_task", + "signature": "def get_task(task_id: str)", + "startLine": 92 + }, + { + "name": "can_start", + "signature": "def can_start(task_id: str)", + "startLine": 98 + }, + { + "name": "claim_task", + "signature": "def claim_task(task_id: str, owner: str = \"agent\")", + "startLine": 110 + }, + { + "name": "complete_task", + "signature": "def complete_task(task_id: str)", + "startLine": 125 + }, + { + "name": "assemble_system_prompt", + "signature": "def assemble_system_prompt(context: dict)", + "startLine": 154 + }, + { + "name": "get_system_prompt", + "signature": "def get_system_prompt(context: dict)", + "startLine": 167 + }, + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 179 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str, run_in_background: bool = False)", + "startLine": 186 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int | None = None)", + "startLine": 197 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 207 + }, + { + "name": "run_list_tasks", + "signature": "def run_list_tasks()", + "startLine": 227 + }, + { + "name": "run_get_task", + "signature": "def run_get_task(task_id: str)", + "startLine": 242 + }, + { + "name": "run_claim_task", + "signature": "def run_claim_task(task_id: str)", + "startLine": 249 + }, + { + "name": "run_complete_task", + "signature": "def run_complete_task(task_id: str)", + "startLine": 253 + }, + { + "name": "is_slow_operation", + "signature": "def is_slow_operation(tool_name: str, tool_input: dict)", + "startLine": 265 + }, + { + "name": "should_run_background", + "signature": "def should_run_background(tool_name: str, tool_input: dict)", + "startLine": 276 + }, + { + "name": "execute_tool", + "signature": "def execute_tool(block)", + "startLine": 283 + }, + { + "name": "start_background_task", + "signature": "def start_background_task(block)", + "startLine": 300 + }, + { + "name": "collect_background_results", + "signature": "def collect_background_results()", + "startLine": 324 + }, + { + "name": "_cron_field_matches", + "signature": "def _cron_field_matches(field: str, value: int)", + "startLine": 367 + }, + { + "name": "cron_matches", + "signature": "def cron_matches(cron_expr: str, dt: datetime)", + "startLine": 383 + }, + { + "name": "_validate_cron_field", + "signature": "def _validate_cron_field(field: str, lo: int, hi: int)", + "startLine": 413 + }, + { + "name": "validate_cron", + "signature": "def validate_cron(cron_expr: str)", + "startLine": 448 + }, + { + "name": "save_durable_jobs", + "signature": "def save_durable_jobs()", + "startLine": 462 + }, + { + "name": "load_durable_jobs", + "signature": "def load_durable_jobs()", + "startLine": 468 + }, + { + "name": "cancel_job", + "signature": "def cancel_job(job_id: str)", + "startLine": 507 + }, + { + "name": "cron_scheduler_loop", + "signature": "def cron_scheduler_loop()", + "startLine": 519 + }, + { + "name": "consume_cron_queue", + "signature": "def consume_cron_queue()", + "startLine": 545 + }, + { + "name": "run_list_crons", + "signature": "def run_list_crons()", + "startLine": 569 + }, + { + "name": "run_cancel_cron", + "signature": "def run_cancel_cron(job_id: str)", + "startLine": 583 + }, + { + "name": "spawn_teammate_thread", + "signature": "def spawn_teammate_thread(name: str, role: str, prompt: str)", + "startLine": 629 + }, + { + "name": "run_spawn_teammate", + "signature": "def run_spawn_teammate(name: str, role: str, prompt: str)", + "startLine": 717 + }, + { + "name": "run_send_message", + "signature": "def run_send_message(to: str, content: str)", + "startLine": 721 + }, + { + "name": "run_check_inbox", + "signature": "def run_check_inbox()", + "startLine": 726 + }, + { + "name": "update_context", + "signature": "def update_context(context: dict, messages: list)", + "startLine": 828 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list, context: dict)", + "startLine": 847 + } + ], + "layer": "collaboration", + "source": "#!/usr/bin/env python3\n\"\"\"\ns15: Agent Teams — MessageBus + spawn_teammate_thread + inbox injection.\n\nRun: python s15_agent_teams/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s14:\n - MessageBus class: file-based mailboxes (.mailboxes/*.jsonl)\n - spawn_teammate_thread: creates teammate in background thread\n - Teammate runs own simplified agent_loop (bash, read, write, send_message)\n - Lead tools: spawn_teammate, send_message, check_inbox (3 new)\n - Lead inbox: teammate messages injected into history (not just printed)\n - Teaching version: teammates limited to 10 rounds (real CC uses idle loop)\n\nASCII flow:\n Lead: cron_queue → messages → prompt → LLM → TOOLS ────→ loop\n ↑ ↓ |\n └── inbox ← MessageBus ← teammate.send_message ←┘\n Teammate: inbox → LLM → bash/read/write/send → loop (max 10 turns)\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"get_task, create_task, list_tasks, claim_task, complete_task, \"\n \"schedule_cron, list_crons, cancel_cron, \"\n \"spawn_teammate, send_message, check_inbox.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\n# ── Background Tasks (from s13, synced) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n \"schedule_cron\": run_schedule_cron, \"list_crons\": run_list_crons,\n \"cancel_cron\": run_cancel_cron,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n }.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── Cron Scheduler (from s14, synced) ──\n\nDURABLE_PATH = WORKDIR / \".scheduled_tasks.json\"\n\n\n@dataclass\nclass CronJob:\n id: str\n cron: str # \"0 9 * * *\"\n prompt: str # message to inject when fired\n recurring: bool # True = recurring, False = one-shot\n durable: bool # True = persist to disk\n\n\nscheduled_jobs: dict[str, CronJob] = {}\ncron_queue: list[CronJob] = []\ncron_lock = threading.Lock()\n_last_fired: dict[str, str] = {} # job_id → \"YYYY-MM-DD HH:MM\"\n\n\ndef _cron_field_matches(field: str, value: int) -> bool:\n \"\"\"Match a single cron field against a value.\"\"\"\n if field == \"*\":\n return True\n if field.startswith(\"*/\"):\n step = int(field[2:])\n return step > 0 and value % step == 0\n if \",\" in field:\n return any(_cron_field_matches(f.strip(), value)\n for f in field.split(\",\"))\n if \"-\" in field:\n lo, hi = field.split(\"-\", 1)\n return int(lo) <= value <= int(hi)\n return value == int(field)\n\n\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n \"\"\"Check if a 5-field cron expression matches the given datetime.\n Standard cron semantics: DOM and DOW use OR when both are constrained.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7 # Python Monday=0 → cron Sunday=0\n\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n\n # Minute, hour, month must all match\n if not (m and h and month_ok):\n return False\n # DOM and DOW: if both constrained, either matching is enough (OR)\n dom_unconstrained = dom == \"*\"\n dow_unconstrained = dow == \"*\"\n if dom_unconstrained and dow_unconstrained:\n return True\n if dom_unconstrained:\n return dow_ok\n if dow_unconstrained:\n return dom_ok\n return dom_ok or dow_ok\n\n\ndef _validate_cron_field(field: str, lo: int, hi: int) -> str | None:\n \"\"\"Validate a single cron field value is within [lo, hi].\"\"\"\n if field == \"*\":\n return None\n if field.startswith(\"*/\"):\n step_str = field[2:]\n if not step_str.isdigit():\n return f\"Invalid step: {field}\"\n step = int(step_str)\n if step <= 0:\n return f\"Step must be > 0: {field}\"\n return None\n if \",\" in field:\n for part in field.split(\",\"):\n err = _validate_cron_field(part.strip(), lo, hi)\n if err: return err\n return None\n if \"-\" in field:\n parts = field.split(\"-\", 1)\n if not parts[0].isdigit() or not parts[1].isdigit():\n return f\"Invalid range: {field}\"\n a, b = int(parts[0]), int(parts[1])\n if a < lo or a > hi or b < lo or b > hi:\n return f\"Range {field} out of bounds [{lo}-{hi}]\"\n if a > b:\n return f\"Range start > end: {field}\"\n return None\n if not field.isdigit():\n return f\"Invalid field: {field}\"\n val = int(field)\n if val < lo or val > hi:\n return f\"Value {val} out of bounds [{lo}-{hi}]\"\n return None\n\n\ndef validate_cron(cron_expr: str) -> str | None:\n \"\"\"Validate a cron expression. Returns error message or None.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return f\"Expected 5 fields, got {len(fields)}\"\n bounds = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n names = [\"minute\", \"hour\", \"day-of-month\", \"month\", \"day-of-week\"]\n for i, (field, (lo, hi), name) in enumerate(zip(fields, bounds, names)):\n err = _validate_cron_field(field, lo, hi)\n if err:\n return f\"{name}: {err}\"\n return None\n\n\ndef save_durable_jobs():\n \"\"\"Persist durable jobs to .scheduled_tasks.json.\"\"\"\n durable = [asdict(j) for j in scheduled_jobs.values() if j.durable]\n DURABLE_PATH.write_text(json.dumps(durable, indent=2))\n\n\ndef load_durable_jobs():\n \"\"\"Load durable jobs from disk on startup.\"\"\"\n if not DURABLE_PATH.exists():\n return\n try:\n jobs = json.loads(DURABLE_PATH.read_text())\n for j in jobs:\n job = CronJob(**j)\n err = validate_cron(job.cron)\n if err:\n print(f\" \\033[31m[cron] skipping invalid job {job.id}: {err}\\033[0m\")\n continue\n scheduled_jobs[job.id] = job\n valid = [j for j in jobs if j[\"id\"] in scheduled_jobs]\n if valid:\n print(f\" \\033[35m[cron] loaded {len(valid)} durable job(s)\\033[0m\")\n except Exception:\n pass\n\n\ndef schedule_job(cron: str, prompt: str, recurring: bool = True,\n durable: bool = True) -> CronJob | str:\n \"\"\"Register a new cron job. Returns CronJob or error string.\"\"\"\n err = validate_cron(cron)\n if err:\n return err\n job = CronJob(\n id=f\"cron_{random.randint(0, 999999):06d}\",\n cron=cron, prompt=prompt,\n recurring=recurring, durable=durable,\n )\n with cron_lock:\n scheduled_jobs[job.id] = job\n if durable:\n save_durable_jobs()\n print(f\" \\033[35m[cron register] {job.id} '{cron}' → {prompt[:40]}\\033[0m\")\n return job\n\n\ndef cancel_job(job_id: str) -> str:\n \"\"\"Cancel a cron job.\"\"\"\n with cron_lock:\n job = scheduled_jobs.pop(job_id, None)\n if not job:\n return f\"Job {job_id} not found\"\n if job.durable:\n save_durable_jobs()\n print(f\" \\033[31m[cron cancel] {job_id}\\033[0m\")\n return f\"Cancelled {job_id}\"\n\n\ndef cron_scheduler_loop():\n \"\"\"Independent daemon thread: poll every 1s, fire matching jobs.\n Individual job errors are caught to prevent one bad job from\n killing the entire scheduler thread.\"\"\"\n while True:\n time.sleep(1)\n now = datetime.now()\n # Date-aware marker prevents daily jobs from skipping on day 2+\n minute_marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now):\n if _last_fired.get(job.id) != minute_marker:\n cron_queue.append(job)\n _last_fired[job.id] = minute_marker\n print(f\" \\033[35m[cron fire] {job.id} → \"\n f\"{job.prompt[:40]}\\033[0m\")\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\" \\033[31m[cron error] {job.id}: {e}\\033[0m\")\n\n\ndef consume_cron_queue() -> list[CronJob]:\n \"\"\"Consume fired jobs from cron_queue (called by agent_loop).\"\"\"\n with cron_lock:\n fired = list(cron_queue)\n cron_queue.clear()\n return fired\n\n\n# Load durable jobs on startup, then start scheduler thread\nload_durable_jobs()\nthreading.Thread(target=cron_scheduler_loop, daemon=True).start()\nprint(\" \\033[35m[cron] scheduler thread started\\033[0m\")\n\n\n# Cron tool handlers\n\ndef run_schedule_cron(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> str:\n result = schedule_job(cron, prompt, recurring, durable)\n if isinstance(result, str):\n return f\"Error: {result}\"\n return f\"Scheduled {result.id}: '{cron}' → {prompt}\"\n\n\ndef run_list_crons() -> str:\n with cron_lock:\n jobs = list(scheduled_jobs.values())\n if not jobs:\n return \"No cron jobs. Use schedule_cron to add one.\"\n lines = []\n for j in jobs:\n tag = \"recurring\" if j.recurring else \"one-shot\"\n dur = \"durable\" if j.durable else \"session\"\n lines.append(f\" {j.id}: '{j.cron}' → {j.prompt[:40]} \"\n f\"[{tag}, {dur}]\")\n return \"\\n\".join(lines)\n\n\ndef run_cancel_cron(job_id: str) -> str:\n return cancel_job(job_id)\n\n\n# ── MessageBus (s15 new) ──\n# Teaching version uses simple file append + unlink.\n# Real CC uses proper-lockfile for concurrent write safety.\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n \"\"\"File-based message bus. Each agent has a .jsonl inbox.\n Read is destructive: read_text + unlink (consumes messages).\n Teaching version: no file locking; real CC uses proper-lockfile.\"\"\"\n\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\"):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time()}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"{content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink() # consume: read + delete\n return msgs\n\n\nBUS = MessageBus()\n\n# Track spawned teammates\nactive_teammates: dict[str, bool] = {}\n\n\n# ── Teammate Thread (s15 new) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n \"\"\"Spawn a teammate agent in a background thread.\n Teaching version: max 10 rounds per teammate.\n Real CC: teammates use idle loop (wait for inbox, work, repeat)\n until shutdown_request.\"\"\"\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"Send results via send_message to 'lead'.\")\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send a message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n ]\n sub_handlers = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n }\n\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{json.dumps(inbox)}\"})\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Send final summary to Lead\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role}\"\n\n\n# ── Team Tool Handlers (s15 new) ──\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef run_check_inbox() -> str:\n msgs = BUS.read_inbox(\"lead\")\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n lines.append(f\" [{m['from']}] {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"schedule_cron\",\n \"description\": \"Schedule a cron job. cron is 5-field: min hour dom month dow.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"cron\": {\"type\": \"string\",\n \"description\": \"5-field cron expression\"},\n \"prompt\": {\"type\": \"string\",\n \"description\": \"Message to inject when fired\"},\n \"recurring\": {\"type\": \"boolean\",\n \"description\": \"True=recurring, False=one-shot\"},\n \"durable\": {\"type\": \"boolean\",\n \"description\": \"True=persist to disk\"}},\n \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"list_crons\",\n \"description\": \"List all registered cron jobs.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"cancel_cron\",\n \"description\": \"Cancel a cron job by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"job_id\": {\"type\": \"string\"}},\n \"required\": [\"job_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn a teammate agent in a background thread.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send a message to a teammate via MessageBus.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check Lead's inbox for teammate messages.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n]\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": [t[\"name\"] for t in TOOLS],\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop ──\n# Teaching code keeps a basic agent loop. S11's full error recovery is omitted.\n# Cron queue is consumed when agent_loop is called; real CC auto-wakes via\n# queue processor (useQueueProcessor.ts) when items arrive.\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n # Consume fired cron jobs → inject as messages\n fired = consume_cron_queue()\n for job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n print(f\" \\033[35m[inject cron] {job.prompt[:50]}\\033[0m\")\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Merge background notifications + tool results into one user message\n user_content = []\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n user_content.extend(results)\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s15: agent teams\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms15 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n\n # Check inbox for teammate results → inject into history\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']}: {m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print(f\"\\n\\033[33m[Inbox: {len(inbox)} messages injected]\\033[0m\")\n print()\n", + "images": [ + { + "src": "/course-assets/s15_agent_teams/agent-teams-overview.svg", + "alt": "agent teams overview" + }, + { + "src": "/course-assets/s15_agent_teams/team-topology.svg", + "alt": "team topology" + } + ] + }, + { + "id": "s16", + "filename": "s16_team_protocols/code.py", + "title": "Team Protocols", + "subtitle": "Teammates Need Agreements", + "loc": 710, + "tools": [ + "bash", + "read_file", + "write_file", + "send_message", + "submit_plan", + "create_task", + "list_tasks", + "get_task", + "claim_task", + "complete_task", + "spawn_teammate", + "check_inbox", + "request_shutdown", + "request_plan", + "review_plan" + ], + "newTools": [ + "submit_plan", + "request_shutdown", + "request_plan", + "review_plan" + ], + "coreAddition": "Shared coordination protocols", + "keyInsight": "Multi-agent systems need explicit message contracts, not vibes.", + "classes": [ + { + "name": "Task", + "startLine": 58, + "endLine": 66 + }, + { + "name": "MessageBus", + "startLine": 340, + "endLine": 365 + }, + { + "name": "ProtocolState", + "startLine": 372, + "endLine": 381 + } + ], + "functions": [ + { + "name": "_task_path", + "signature": "def _task_path(task_id: str)", + "startLine": 67 + }, + { + "name": "save_task", + "signature": "def save_task(task: Task)", + "startLine": 83 + }, + { + "name": "load_task", + "signature": "def load_task(task_id: str)", + "startLine": 87 + }, + { + "name": "list_tasks", + "signature": "def list_tasks()", + "startLine": 91 + }, + { + "name": "get_task", + "signature": "def get_task(task_id: str)", + "startLine": 96 + }, + { + "name": "can_start", + "signature": "def can_start(task_id: str)", + "startLine": 102 + }, + { + "name": "claim_task", + "signature": "def claim_task(task_id: str, owner: str = \"agent\")", + "startLine": 114 + }, + { + "name": "complete_task", + "signature": "def complete_task(task_id: str)", + "startLine": 129 + }, + { + "name": "assemble_system_prompt", + "signature": "def assemble_system_prompt(context: dict)", + "startLine": 158 + }, + { + "name": "get_system_prompt", + "signature": "def get_system_prompt(context: dict)", + "startLine": 171 + }, + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 183 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str, run_in_background: bool = False)", + "startLine": 190 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int | None = None)", + "startLine": 201 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 211 + }, + { + "name": "run_list_tasks", + "signature": "def run_list_tasks()", + "startLine": 231 + }, + { + "name": "run_get_task", + "signature": "def run_get_task(task_id: str)", + "startLine": 246 + }, + { + "name": "run_claim_task", + "signature": "def run_claim_task(task_id: str)", + "startLine": 253 + }, + { + "name": "run_complete_task", + "signature": "def run_complete_task(task_id: str)", + "startLine": 257 + }, + { + "name": "is_slow_operation", + "signature": "def is_slow_operation(tool_name: str, tool_input: dict)", + "startLine": 269 + }, + { + "name": "should_run_background", + "signature": "def should_run_background(tool_name: str, tool_input: dict)", + "startLine": 280 + }, + { + "name": "start_background_task", + "signature": "def start_background_task(block)", + "startLine": 287 + }, + { + "name": "collect_background_results", + "signature": "def collect_background_results()", + "startLine": 311 + }, + { + "name": "new_request_id", + "signature": "def new_request_id()", + "startLine": 385 + }, + { + "name": "match_response", + "signature": "def match_response(response_type: str, request_id: str, approve: bool)", + "startLine": 389 + }, + { + "name": "consume_lead_inbox", + "signature": "def consume_lead_inbox(route_protocol: bool = True)", + "startLine": 420 + }, + { + "name": "spawn_teammate_thread", + "signature": "def spawn_teammate_thread(name: str, role: str, prompt: str)", + "startLine": 440 + }, + { + "name": "_teammate_submit_plan", + "signature": "def _teammate_submit_plan(from_name: str, plan: str)", + "startLine": 598 + }, + { + "name": "run_request_shutdown", + "signature": "def run_request_shutdown(teammate: str)", + "startLine": 621 + }, + { + "name": "run_request_plan", + "signature": "def run_request_plan(teammate: str, task: str)", + "startLine": 635 + }, + { + "name": "run_review_plan", + "signature": "def run_review_plan(request_id: str, approve: bool, feedback: str = \"\")", + "startLine": 642 + }, + { + "name": "run_spawn_teammate", + "signature": "def run_spawn_teammate(name: str, role: str, prompt: str)", + "startLine": 659 + }, + { + "name": "run_send_message", + "signature": "def run_send_message(to: str, content: str)", + "startLine": 663 + }, + { + "name": "run_check_inbox", + "signature": "def run_check_inbox()", + "startLine": 668 + }, + { + "name": "execute_tool", + "signature": "def execute_tool(block)", + "startLine": 684 + }, + { + "name": "update_context", + "signature": "def update_context(context: dict, messages: list)", + "startLine": 790 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list, context: dict)", + "startLine": 806 + } + ], + "layer": "collaboration", + "source": "#!/usr/bin/env python3\n\"\"\"\ns16: Team Protocols — request-response protocol + request_id + dispatch + state machine.\n\nRun: python s16_team_protocols/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s15:\n - ProtocolState dataclass (request_id, type, sender, status, created_at)\n - pending_requests dict: tracks in-flight protocol requests\n - dispatch_message: routes incoming messages by type to handlers\n - request_shutdown: Lead sends shutdown protocol request\n - request_plan: Lead asks teammate to submit plan\n - handle_shutdown_request / handle_plan_response: teammate receives & responds\n - match_response: Lead correlates response to request via request_id (with type validation)\n - Teammate idle loop: waits for inbox messages instead of exiting after 10 rounds\n - Unified consume_lead_inbox: protocol routing + injection into history\n - 3 new Lead tools: request_shutdown, request_plan, review_plan\n - 1 new teammate tool: submit_plan\n\nASCII flow:\n Lead: BUS.send(\"shutdown_request\", {request_id}) ──────→ teammate inbox\n Teammate: dispatch → handler → BUS.send(\"shutdown_response\", {request_id}) ─→ Lead inbox\n Lead: consume_lead_inbox → match_response(request_id) → pending_requests[req_id].status = approved\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"get_task, create_task, list_tasks, claim_task, complete_task, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\n# ── Background Tasks (from s13, synced) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── MessageBus (from s15) ──\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n \"\"\"File-based message bus. Each agent has a .jsonl inbox.\n Read is destructive: read_text + unlink (consumes messages).\n Teaching version: no file locking; real CC uses proper-lockfile.\"\"\"\n\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink() # consume: read + delete\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State (s16 new) ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str # \"shutdown\" | \"plan_approval\"\n sender: str\n target: str\n status: str # pending | approved | rejected\n payload: str # plan text or shutdown reason\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n \"\"\"Correlate a response to the original request via request_id.\n Validates that response_type matches the request type.\"\"\"\n state = pending_requests.get(request_id)\n if not state:\n print(f\" \\033[31m[protocol] unknown request_id: {request_id}\\033[0m\")\n return\n # Validate response type matches request type\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected shutdown_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected plan_approval_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.status != \"pending\":\n print(f\" \\033[33m[protocol] {request_id} already {state.status}, \"\n f\"ignoring duplicate\\033[0m\")\n return\n state.status = \"approved\" if approve else \"rejected\"\n icon = \"✓\" if approve else \"✗\"\n color = \"32\" if approve else \"31\"\n print(f\" \\033[{color}m[protocol] {state.type} {icon} \"\n f\"({request_id}: {state.status})\\033[0m\")\n\n\n# ── Unified Lead Inbox Consumer (s16 fix) ──\n# Both check_inbox tool and main loop call this function.\n# Protocol responses are routed via match_response before returning.\n\ndef consume_lead_inbox(route_protocol: bool = True) -> list[dict]:\n \"\"\"Read Lead's inbox. Route protocol responses, return all messages.\n Called by both run_check_inbox() and main loop to avoid\n messages being consumed without protocol routing.\"\"\"\n msgs = BUS.read_inbox(\"lead\")\n if not msgs:\n return []\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n approve = meta.get(\"approve\", False)\n match_response(msg_type, req_id, approve)\n return msgs\n\n\n# ── Teammate Thread (s16: idle loop + dispatch) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n \"\"\"Spawn a teammate agent in a background thread.\n Uses idle loop: after each LLM turn, waits for inbox messages\n (shutdown_request, new task) instead of exiting.\"\"\"\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"Check inbox for protocol messages (shutdown_request, etc).\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list) -> bool:\n \"\"\"Dispatch incoming protocol messages by type.\n Returns True if teammate should stop.\"\"\"\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"({req_id})\\033[0m\")\n return True # stop the loop\n\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if approve:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan approved] Proceed with the task.\"})\n else:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan rejected] Feedback: {msg['content']}\"})\n\n return False # continue\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n ]\n sub_handlers = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"submit_plan\": lambda plan: _teammate_submit_plan(name, plan),\n }\n\n shutdown_requested = False\n while not shutdown_requested:\n # Check inbox for protocol messages\n inbox = BUS.read_inbox(name)\n should_stop = False\n non_protocol = []\n for msg in inbox:\n if msg.get(\"type\") in (\"shutdown_request\", \"plan_approval_response\"):\n should_stop = handle_inbox_message(name, msg, messages)\n if should_stop:\n break\n else:\n non_protocol.append(msg)\n if should_stop:\n shutdown_requested = True\n break\n if non_protocol:\n inbox_json = json.dumps(non_protocol)\n messages.append({\"role\": \"user\",\n \"content\": \"\" + inbox_json + \"\"})\n\n # LLM turn\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n # Idle: wait for inbox messages instead of exiting\n # Real CC sends idle_notification to Lead here\n while not shutdown_requested:\n time.sleep(1)\n inbox = BUS.read_inbox(name)\n if not inbox:\n continue\n for msg in inbox:\n if msg.get(\"type\") in (\"shutdown_request\", \"plan_approval_response\"):\n should_stop = handle_inbox_message(name, msg, messages)\n if should_stop:\n shutdown_requested = True\n break\n else:\n non_protocol.append(msg)\n if shutdown_requested:\n break\n if non_protocol:\n inbox_json = json.dumps(non_protocol)\n messages.append({\"role\": \"user\",\n \"content\": \"\" + inbox_json + \"\"})\n break # back to LLM turn with new messages\n\n # Execute tool calls\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Send final summary to Lead\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role}\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n \"\"\"Teammate submits a plan to Lead for approval.\n\n Note: This is a protocol-level request, not a code-level gate.\n After submitting, the teammate's thread continues running — it can\n still call bash/write/etc. Real enforcement relies on the model\n waiting for the approval response before acting. Code-level tool\n gating would require blocking the teammate's tool dispatch until\n approval arrives.\n \"\"\"\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id}). Waiting for approval...\"\n\n\n# ── Lead Protocol Tools (s16 new) ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\",\n {\"request_id\": req_id})\n print(f\" \\033[35m[protocol] shutdown_request → {teammate} \"\n f\"({req_id})\\033[0m\")\n return f\"Shutdown request sent to {teammate} (req: {req_id})\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n \"\"\"Lead asks a teammate to submit a plan for a task.\"\"\"\n BUS.send(\"lead\", teammate, f\"Please submit a plan for: {task}\",\n \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool, feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n if state.status != \"pending\":\n return f\"Request {request_id} already {state.status}\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender, feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n icon = \"✓\" if approve else \"✗\"\n print(f\" \\033[32m[protocol] plan {icon} ({request_id})\\033[0m\")\n return f\"Plan {'approved' if approve else 'rejected'} ({request_id})\"\n\n\n# ── Other Lead Tool Handlers ──\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef run_check_inbox() -> str:\n \"\"\"Check Lead's inbox. Routes protocol responses via match_response.\"\"\"\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Dispatch ──\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n }.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn a teammate agent in a background thread.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to a teammate via MessageBus.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check Lead's inbox. Routes protocol responses automatically.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down gracefully.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan for review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan by request_id.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n]\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": [t[\"name\"] for t in TOOLS],\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Merge background notifications + tool results into one user message\n user_content = []\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n user_content.extend(results)\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s16: team protocols\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms16 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n\n # Check inbox → route protocol + inject into history\n inbox_msgs = consume_lead_inbox(route_protocol=True)\n if inbox_msgs:\n inbox_text = \"\\n\".join(\n f\"From {m['from']}: {m['content'][:200]}\" for m in inbox_msgs)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print(f\"\\n\\033[33m[Inbox: {len(inbox_msgs)} messages injected]\\033[0m\")\n print()\n", + "images": [ + { + "src": "/course-assets/s16_team_protocols/team-protocols-overview.svg", + "alt": "team protocols overview" + } + ] + }, + { + "id": "s17", + "filename": "s17_autonomous_agents/code.py", + "title": "Autonomous Agents", + "subtitle": "Check the Board, Claim the Task", + "loc": 648, + "tools": [ + "bash", + "read_file", + "write_file", + "send_message", + "submit_plan", + "list_tasks", + "claim_task", + "complete_task", + "create_task", + "get_task", + "spawn_teammate", + "check_inbox", + "request_shutdown", + "request_plan", + "review_plan" + ], + "newTools": [], + "coreAddition": "Autonomous task claiming", + "keyInsight": "Teammates become useful when they can discover and claim work themselves.", + "classes": [ + { + "name": "Task", + "startLine": 51, + "endLine": 59 + }, + { + "name": "MessageBus", + "startLine": 219, + "endLine": 240 + }, + { + "name": "ProtocolState", + "startLine": 248, + "endLine": 257 + } + ], + "functions": [ + { + "name": "_task_path", + "signature": "def _task_path(task_id: str)", + "startLine": 60 + }, + { + "name": "save_task", + "signature": "def save_task(task: Task)", + "startLine": 76 + }, + { + "name": "load_task", + "signature": "def load_task(task_id: str)", + "startLine": 80 + }, + { + "name": "list_tasks", + "signature": "def list_tasks()", + "startLine": 84 + }, + { + "name": "get_task", + "signature": "def get_task(task_id: str)", + "startLine": 89 + }, + { + "name": "can_start", + "signature": "def can_start(task_id: str)", + "startLine": 94 + }, + { + "name": "claim_task", + "signature": "def claim_task(task_id: str, owner: str = \"agent\")", + "startLine": 104 + }, + { + "name": "complete_task", + "signature": "def complete_task(task_id: str)", + "startLine": 125 + }, + { + "name": "assemble_system_prompt", + "signature": "def assemble_system_prompt(context: dict)", + "startLine": 153 + }, + { + "name": "get_system_prompt", + "signature": "def get_system_prompt(context: dict)", + "startLine": 165 + }, + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 176 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str)", + "startLine": 183 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int | None = None)", + "startLine": 193 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 203 + }, + { + "name": "new_request_id", + "signature": "def new_request_id()", + "startLine": 261 + }, + { + "name": "match_response", + "signature": "def match_response(response_type: str, request_id: str, approve: bool)", + "startLine": 265 + }, + { + "name": "scan_unclaimed_tasks", + "signature": "def scan_unclaimed_tasks()", + "startLine": 292 + }, + { + "name": "spawn_teammate_thread", + "signature": "def spawn_teammate_thread(name: str, role: str, prompt: str)", + "startLine": 351 + }, + { + "name": "_teammate_submit_plan", + "signature": "def _teammate_submit_plan(from_name: str, plan: str)", + "startLine": 528 + }, + { + "name": "run_request_shutdown", + "signature": "def run_request_shutdown(teammate: str)", + "startLine": 543 + }, + { + "name": "run_request_plan", + "signature": "def run_request_plan(teammate: str, task: str)", + "startLine": 557 + }, + { + "name": "run_list_tasks", + "signature": "def run_list_tasks()", + "startLine": 591 + }, + { + "name": "run_get_task", + "signature": "def run_get_task(task_id: str)", + "startLine": 600 + }, + { + "name": "run_claim_task", + "signature": "def run_claim_task(task_id: str)", + "startLine": 604 + }, + { + "name": "run_complete_task", + "signature": "def run_complete_task(task_id: str)", + "startLine": 608 + }, + { + "name": "run_spawn_teammate", + "signature": "def run_spawn_teammate(name: str, role: str, prompt: str)", + "startLine": 612 + }, + { + "name": "run_send_message", + "signature": "def run_send_message(to: str, content: str)", + "startLine": 616 + }, + { + "name": "consume_lead_inbox", + "signature": "def consume_lead_inbox(route_protocol=True)", + "startLine": 621 + }, + { + "name": "run_check_inbox", + "signature": "def run_check_inbox()", + "startLine": 634 + }, + { + "name": "update_context", + "signature": "def update_context(context: dict, messages: list)", + "startLine": 745 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list, context: dict)", + "startLine": 754 + } + ], + "layer": "collaboration", + "source": "#!/usr/bin/env python3\n\"\"\"\ns17: Autonomous Agents — idle poll + auto-claim + WORK/IDLE lifecycle.\n\nRun: python s17_autonomous_agents/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s16:\n - scan_unclaimed_tasks: find pending, unowned tasks with deps completed\n - idle_poll: 60s polling loop (inbox + task board), dispatches shutdown in IDLE\n - claim_task: owner check + return value verification\n - Teammate lifecycle: WORK → IDLE → SHUTDOWN\n - Teammate tools: + list_tasks, claim_task, complete_task (5→8)\n - consume_lead_inbox: unified inbox consumer for protocol + context injection\n - Identity re-injection after context compression\n\nASCII lifecycle:\n WORK: inbox → LLM → tools → (tool_use? loop) → (done? → IDLE)\n IDLE: 5s poll → inbox? → WORK / unclaimed? → claim → WORK / 60s? → SHUTDOWN\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Prompt Assembly (from s10) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_hash, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_hash, _last_prompt\n h = json.dumps(context, sort_keys=True)\n if h == _last_context_hash and _last_prompt:\n return _last_prompt\n _last_context_hash, _last_prompt = h, assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools (from s15) ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# ── MessageBus (from s15) ──\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n\n# ── Protocol State (from s16) ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n \"\"\"Correlate a response to the original request via request_id.\"\"\"\n state = pending_requests.get(request_id)\n if not state:\n print(f\" \\033[31m[protocol] unknown request_id: {request_id}\\033[0m\")\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected shutdown_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected plan_approval_response, \"\n f\"got {response_type}\\033[0m\")\n return\n state.status = \"approved\" if approve else \"rejected\"\n icon = \"✓\" if approve else \"✗\"\n color = \"32\" if approve else \"31\"\n print(f\" \\033[{color}m[protocol] {state.type} {icon} \"\n f\"({request_id}: {state.status})\\033[0m\")\n\n\n# ── Autonomous Agent (s17 new) ──\n\nIDLE_POLL_INTERVAL = 5 # seconds\nIDLE_TIMEOUT = 60 # seconds\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n \"\"\"Find pending, unowned tasks with all dependencies completed.\"\"\"\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(agent_name: str, messages: list,\n name: str, role: str) -> str:\n \"\"\"Poll for 60s. Return 'work', 'shutdown', or 'timeout'.\"\"\"\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n\n # Check inbox — dispatch protocol messages first\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n # Check for shutdown_request\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"in idle ({req_id})\\033[0m\")\n return \"shutdown\"\n\n # Non-protocol inbox: inject and resume work\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n print(f\" \\033[36m[idle] {name} found inbox messages\\033[0m\")\n return \"work\"\n\n # Scan task board\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n result = claim_task(task[\"id\"], agent_name)\n if \"Claimed\" in result:\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task['id']}: \"\n f\"{task['subject']}\"})\n print(f\" \\033[32m[idle] {name} auto-claimed: \"\n f\"{task['subject']}\\033[0m\")\n return \"work\"\n print(f\" \\033[33m[idle] {name} claim failed: \"\n f\"{result}\\033[0m\")\n\n print(f\" \\033[31m[idle] {name} timeout ({IDLE_TIMEOUT}s)\\033[0m\")\n return \"timeout\"\n\n\n# ── Teammate Thread (from s15 + s16 + s17) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"You can list and claim tasks from the board. \"\n f\"Check inbox for protocol messages.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n \"\"\"Dispatch incoming protocol messages by type.\"\"\"\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"({req_id})\\033[0m\")\n return True\n\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if approve:\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved] Proceed with the task.\"})\n else:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan rejected] Feedback: {msg['content']}\"})\n return False\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n # s17 new: teammates can list, claim, and complete tasks\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks on the board.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n return claim_task(task_id, owner=name)\n\n def _run_complete_task(task_id: str):\n return complete_task(task_id)\n\n sub_handlers = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"submit_plan\": lambda plan: _teammate_submit_plan(name, plan),\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n # Outer loop: WORK → IDLE cycle\n while True:\n # Identity re-injection (s17)\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n\n # WORK phase\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": f\"{json.dumps(non_protocol)}\"})\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n if should_shutdown:\n break\n\n # IDLE phase (s17 new)\n idle_result = idle_poll(name, messages, name, role)\n if idle_result == \"shutdown\":\n break\n if idle_result == \"timeout\":\n break\n\n # Summary\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role} (autonomous)\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n \"\"\"Teammate submits a plan to Lead for approval.\"\"\"\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id}). Waiting for approval...\"\n\n\n# ── Lead Protocol Tools (from s16) ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\",\n {\"request_id\": req_id})\n print(f\" \\033[35m[protocol] shutdown_request → {teammate} \"\n f\"({req_id})\\033[0m\")\n return f\"Shutdown request sent to {teammate} (req: {req_id})\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n \"\"\"Lead asks a teammate to submit a plan.\"\"\"\n BUS.send(\"lead\", teammate, f\"Please submit a plan for: {task}\",\n \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n if state.status != \"pending\":\n return f\"Request {request_id} already {state.status}\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n icon = \"✓\" if approve else \"✗\"\n print(f\" \\033[32m[protocol] plan {icon} ({request_id})\\033[0m\")\n return f\"Plan {'approved' if approve else 'rejected'} ({request_id})\"\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n return get_task(task_id)\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n \"\"\"Read Lead inbox: route protocol responses, return all messages.\"\"\"\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn an autonomous teammate agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down gracefully.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan for review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\"memories\": memories}\n\n\n# ── Agent Loop ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s17: autonomous agents\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = {\"memories\": \"\"}\n while True:\n try:\n query = input(\"\\033[36ms17 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n\n # Consume lead inbox: route protocol + inject into history\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{m.get('type', 'message')}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n", + "images": [ + { + "src": "/course-assets/s17_autonomous_agents/autonomous-agents-overview.svg", + "alt": "autonomous agents overview" + } + ] + }, + { + "id": "s18", + "filename": "s18_worktree_isolation/code.py", + "title": "Worktree Isolation", + "subtitle": "Separate Directories, No Conflicts", + "loc": 802, + "tools": [ + "bash", + "read_file", + "write_file", + "send_message", + "submit_plan", + "list_tasks", + "claim_task", + "complete_task", + "create_task", + "get_task", + "spawn_teammate", + "check_inbox", + "request_shutdown", + "request_plan", + "review_plan", + "create_worktree", + "remove_worktree", + "keep_worktree" + ], + "newTools": [ + "create_worktree", + "remove_worktree", + "keep_worktree" + ], + "coreAddition": "Worktree lifecycle", + "keyInsight": "Parallel agents need isolated filesystems as much as isolated conversations.", + "classes": [ + { + "name": "Task", + "startLine": 58, + "endLine": 67 + }, + { + "name": "MessageBus", + "startLine": 347, + "endLine": 368 + }, + { + "name": "ProtocolState", + "startLine": 375, + "endLine": 384 + } + ], + "functions": [ + { + "name": "_task_path", + "signature": "def _task_path(task_id: str)", + "startLine": 68 + }, + { + "name": "save_task", + "signature": "def save_task(task: Task)", + "startLine": 84 + }, + { + "name": "load_task", + "signature": "def load_task(task_id: str)", + "startLine": 88 + }, + { + "name": "list_tasks", + "signature": "def list_tasks()", + "startLine": 92 + }, + { + "name": "get_task_json", + "signature": "def get_task_json(task_id: str)", + "startLine": 97 + }, + { + "name": "can_start", + "signature": "def can_start(task_id: str)", + "startLine": 102 + }, + { + "name": "claim_task", + "signature": "def claim_task(task_id: str, owner: str = \"agent\")", + "startLine": 112 + }, + { + "name": "complete_task", + "signature": "def complete_task(task_id: str)", + "startLine": 133 + }, + { + "name": "validate_worktree_name", + "signature": "def validate_worktree_name(name: str)", + "startLine": 156 + }, + { + "name": "run_git", + "signature": "def run_git(args: list[str])", + "startLine": 168 + }, + { + "name": "log_event", + "signature": "def log_event(event_type: str, worktree_name: str, task_id: str = \"\")", + "startLine": 180 + }, + { + "name": "create_worktree", + "signature": "def create_worktree(name: str, task_id: str = \"\")", + "startLine": 189 + }, + { + "name": "bind_task_to_worktree", + "signature": "def bind_task_to_worktree(task_id: str, worktree_name: str)", + "startLine": 207 + }, + { + "name": "_count_worktree_changes", + "signature": "def _count_worktree_changes(path: Path)", + "startLine": 215 + }, + { + "name": "remove_worktree", + "signature": "def remove_worktree(name: str, discard_changes: bool = False)", + "startLine": 229 + }, + { + "name": "keep_worktree", + "signature": "def keep_worktree(name: str)", + "startLine": 256 + }, + { + "name": "assemble_system_prompt", + "signature": "def assemble_system_prompt(context: dict)", + "startLine": 280 + }, + { + "name": "get_system_prompt", + "signature": "def get_system_prompt(context: dict)", + "startLine": 292 + }, + { + "name": "safe_path", + "signature": "def safe_path(p: str, cwd: Path = None)", + "startLine": 303 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str, cwd: Path = None)", + "startLine": 311 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int | None = None, cwd: Path = None)", + "startLine": 321 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str, cwd: Path = None)", + "startLine": 331 + }, + { + "name": "new_request_id", + "signature": "def new_request_id()", + "startLine": 388 + }, + { + "name": "match_response", + "signature": "def match_response(response_type: str, request_id: str, approve: bool)", + "startLine": 392 + }, + { + "name": "consume_lead_inbox", + "signature": "def consume_lead_inbox(route_protocol=True)", + "startLine": 412 + }, + { + "name": "scan_unclaimed_tasks", + "signature": "def scan_unclaimed_tasks()", + "startLine": 430 + }, + { + "name": "spawn_teammate_thread", + "signature": "def spawn_teammate_thread(name: str, role: str, prompt: str)", + "startLine": 489 + }, + { + "name": "_teammate_submit_plan", + "signature": "def _teammate_submit_plan(from_name: str, plan: str)", + "startLine": 691 + }, + { + "name": "run_request_shutdown", + "signature": "def run_request_shutdown(teammate: str)", + "startLine": 705 + }, + { + "name": "run_request_plan", + "signature": "def run_request_plan(teammate: str, task: str)", + "startLine": 719 + }, + { + "name": "run_create_worktree", + "signature": "def run_create_worktree(name: str, task_id: str = \"\")", + "startLine": 744 + }, + { + "name": "run_remove_worktree", + "signature": "def run_remove_worktree(name: str, discard_changes: bool = False)", + "startLine": 748 + }, + { + "name": "run_keep_worktree", + "signature": "def run_keep_worktree(name: str)", + "startLine": 752 + }, + { + "name": "run_list_tasks", + "signature": "def run_list_tasks()", + "startLine": 766 + }, + { + "name": "run_get_task", + "signature": "def run_get_task(task_id: str)", + "startLine": 776 + }, + { + "name": "run_claim_task", + "signature": "def run_claim_task(task_id: str)", + "startLine": 780 + }, + { + "name": "run_complete_task", + "signature": "def run_complete_task(task_id: str)", + "startLine": 784 + }, + { + "name": "run_spawn_teammate", + "signature": "def run_spawn_teammate(name: str, role: str, prompt: str)", + "startLine": 788 + }, + { + "name": "run_send_message", + "signature": "def run_send_message(to: str, content: str)", + "startLine": 792 + }, + { + "name": "run_check_inbox", + "signature": "def run_check_inbox()", + "startLine": 797 + }, + { + "name": "update_context", + "signature": "def update_context(context: dict, messages: list)", + "startLine": 929 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list, context: dict)", + "startLine": 938 + } + ], + "layer": "collaboration", + "source": "#!/usr/bin/env python3\n\"\"\"\ns18: Worktree Isolation — git worktree + task-directory binding + event log.\n\nRun: python s18_worktree_isolation/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s17:\n - Task dataclass gains worktree field (str | None)\n - validate_worktree_name: reject path traversal and illegal chars\n - create_worktree: validate name, git worktree add, optional task binding\n - bind_task_to_worktree: write worktree field only, keep task pending\n - remove_worktree: safety check before force, no auto-complete\n - run_git returns (ok, output), events only on success\n - Teammate tools: + complete_task, run in worktree cwd when bound\n - scan_unclaimed_tasks: uses can_start() for dependency checking\n - idle_poll: checks claim result, dispatches shutdown in IDLE\n - consume_lead_inbox: unified inbox consumer\n - 3 new Lead tools: create_worktree, remove_worktree, keep_worktree\n\nASCII topology:\n Main repo (/)\n ├── .worktrees/auth/ (branch: wt/auth) ← Task #1\n ├── .worktrees/ui/ (branch: wt/ui) ← Task #2\n ├── .tasks/task_xxx.json (worktree: \"auth\")\n └── .worktrees/events.jsonl\n\"\"\"\n\nimport os, subprocess, json, time, random, threading, re\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12 + s18 worktree field) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n worktree: str | None = None # s18: bound worktree name\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task_json(task_id: str) -> str:\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Worktree System (s18 new) ──\n\nWORKTREES_DIR = WORKDIR / \".worktrees\"\nWORKTREES_DIR.mkdir(exist_ok=True)\n\nVALID_WT_NAME = re.compile(r'^[A-Za-z0-9._-]{1,64}$')\n\n\ndef validate_worktree_name(name: str) -> str | None:\n \"\"\"Return error message if invalid, None if valid.\"\"\"\n if not name:\n return \"Worktree name cannot be empty\"\n if name == \".\" or name == \"..\":\n return f\"'{name}' is not a valid worktree name\"\n if not VALID_WT_NAME.match(name):\n return (f\"Invalid worktree name '{name}': \"\n \"only letters, digits, dots, underscores, dashes (1-64 chars)\")\n return None\n\n\ndef run_git(args: list[str]) -> tuple[bool, str]:\n \"\"\"Run git command. Return (ok, output).\"\"\"\n try:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR,\n capture_output=True, text=True, timeout=30)\n out = (r.stdout + r.stderr).strip()\n out = out[:5000] if out else \"(no output)\"\n return r.returncode == 0, out\n except subprocess.TimeoutExpired:\n return False, \"Error: git timeout\"\n\n\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n \"\"\"Append a lifecycle event to events.jsonl.\"\"\"\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n events_file = WORKTREES_DIR / \"events.jsonl\"\n with open(events_file, \"a\") as f:\n f.write(json.dumps(event) + \"\\n\")\n\n\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n \"\"\"Create a git worktree with a dedicated branch. Optionally bind to a task.\"\"\"\n err = validate_worktree_name(name)\n if err:\n return f\"Error: {err}\"\n path = WORKTREES_DIR / name\n if path.exists():\n return f\"Worktree '{name}' already exists at {path}\"\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n print(f\" \\033[33m[worktree] created: {name} at {path}\\033[0m\")\n return f\"Worktree '{name}' created at {path}\"\n\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n \"\"\"Write worktree field to task. Keep status as pending for auto-claim.\"\"\"\n task = load_task(task_id)\n task.worktree = worktree_name\n save_task(task)\n print(f\" \\033[33m[bind] {task.subject} → worktree:{worktree_name}\\033[0m\")\n\n\ndef _count_worktree_changes(path: Path) -> tuple[int, int]:\n \"\"\"Count uncommitted files and commits in a worktree.\"\"\"\n try:\n r1 = subprocess.run([\"git\", \"status\", \"--porcelain\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n files = len([l for l in r1.stdout.strip().splitlines() if l.strip()])\n r2 = subprocess.run([\"git\", \"log\", \"@{push}..HEAD\", \"--oneline\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n commits = len([l for l in r2.stdout.strip().splitlines() if l.strip()])\n return files, commits\n except Exception:\n return -1, -1\n\n\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n \"\"\"Remove worktree. Refuses if uncommitted changes unless discard_changes.\"\"\"\n err = validate_worktree_name(name)\n if err:\n return err\n path = WORKTREES_DIR / name\n if not path.exists():\n return f\"Worktree '{name}' not found\"\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files < 0:\n return (f\"Cannot verify worktree '{name}' status. \"\n \"Use discard_changes=true to force removal.\")\n if files > 0 or commits > 0:\n return (f\"Worktree '{name}' has {files} uncommitted file(s) \"\n f\"and {commits} unpushed commit(s). \"\n \"Use discard_changes=true to force removal, \"\n \"or keep_worktree to preserve for review.\")\n ok1, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok1:\n return f\"Failed to remove worktree directory for '{name}'\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n print(f\" \\033[33m[worktree] removed: {name}\\033[0m\")\n return f\"Worktree '{name}' removed\"\n\n\ndef keep_worktree(name: str) -> str:\n \"\"\"Keep worktree for manual review. Branch preserved.\"\"\"\n err = validate_worktree_name(name)\n if err:\n return err\n log_event(\"keep\", name)\n print(f\" \\033[36m[worktree] kept: {name}\\033[0m\")\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n\n\n# ── Prompt Assembly (from s10) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan, \"\n \"create_worktree, remove_worktree, keep_worktree.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_hash, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_hash, _last_prompt\n h = json.dumps(context, sort_keys=True)\n if h == _last_context_hash and _last_prompt:\n return _last_prompt\n _last_context_hash, _last_prompt = h, assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Basic Tools ──\n\ndef safe_path(p: str, cwd: Path = None) -> Path:\n base = cwd or WORKDIR\n path = (base / p).resolve()\n if not path.is_relative_to(base):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, cwd: Path = None) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=cwd or WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None, cwd: Path = None) -> str:\n try:\n lines = safe_path(path, cwd).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str, cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# ── MessageBus (from s15) ──\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State (from s16) ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n state = pending_requests.get(request_id)\n if not state:\n print(f\" \\033[31m[protocol] unknown request_id: {request_id}\\033[0m\")\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected shutdown_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected plan_approval_response, \"\n f\"got {response_type}\\033[0m\")\n return\n state.status = \"approved\" if approve else \"rejected\"\n icon = \"✓\" if approve else \"✗\"\n color = \"32\" if approve else \"31\"\n print(f\" \\033[{color}m[protocol] {state.type} {icon} \"\n f\"({request_id}: {state.status})\\033[0m\")\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\n# ── Autonomous Agent (from s17, + worktree cwd) ──\n\nIDLE_POLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n \"\"\"Find pending, unowned tasks with all dependencies completed.\"\"\"\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(agent_name: str, messages: list,\n name: str, role: str) -> str:\n \"\"\"Poll for 60s. Return 'work', 'shutdown', or 'timeout'.\"\"\"\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"in idle ({req_id})\\033[0m\")\n return \"shutdown\"\n\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n print(f\" \\033[36m[idle] {name} found inbox messages\\033[0m\")\n return \"work\"\n\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task_data = unclaimed[0]\n result = claim_task(task_data[\"id\"], agent_name)\n if \"Claimed\" in result:\n wt_info = \"\"\n if task_data.get(\"worktree\"):\n wt_path = WORKTREES_DIR / task_data[\"worktree\"]\n wt_info = f\"\\nWork directory: {wt_path}\"\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task_data['id']}: \"\n f\"{task_data['subject']}{wt_info}\"})\n print(f\" \\033[32m[idle] {name} auto-claimed: \"\n f\"{task_data['subject']}\\033[0m\")\n return \"work\"\n print(f\" \\033[33m[idle] {name} claim failed: \"\n f\"{result}\\033[0m\")\n\n print(f\" \\033[31m[idle] {name} timeout ({IDLE_TIMEOUT}s)\\033[0m\")\n return \"timeout\"\n\n\n# ── Teammate Thread (from s15 + s16 + s17 + s18) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"You can list and claim tasks from the board. \"\n f\"If a task has a worktree, work in that directory.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"({req_id})\\033[0m\")\n return True\n\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if approve:\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved] Proceed with the task.\"})\n else:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan rejected] Feedback: {msg['content']}\"})\n return False\n\n def run():\n # Track current worktree for this teammate's cwd\n wt_ctx = {\"path\": None}\n\n def _wt_cwd() -> Path | None:\n p = wt_ctx[\"path\"]\n return Path(p) if p else None\n\n def _run_bash(command: str) -> str:\n return run_bash(command, cwd=_wt_cwd())\n\n def _run_read(path: str) -> str:\n return run_read(path, cwd=_wt_cwd())\n\n def _run_write(path: str, content: str) -> str:\n return run_write(path, content, cwd=_wt_cwd())\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n # Set worktree cwd if task has one\n task = load_task(task_id)\n if task.worktree:\n wt_ctx[\"path\"] = str(WORKTREES_DIR / task.worktree)\n else:\n wt_ctx[\"path\"] = None\n return result\n\n def _run_complete_task(task_id: str):\n result = complete_task(task_id)\n wt_ctx[\"path\"] = None\n return result\n\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks on the board.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n sub_handlers = {\n \"bash\": _run_bash, \"read_file\": _run_read,\n \"write_file\": _run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"submit_plan\": lambda plan: _teammate_submit_plan(name, plan),\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n # Outer loop: WORK → IDLE cycle\n while True:\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n\n # WORK phase\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(non_protocol) + \"\"})\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n if should_shutdown:\n break\n\n # IDLE phase\n idle_result = idle_poll(name, messages, name, role)\n if idle_result == \"shutdown\":\n break\n if idle_result == \"timeout\":\n break\n\n # Summary\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role} (autonomous)\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id}). Waiting for approval...\"\n\n\n# ── Lead Protocol Tools (from s16) ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\",\n {\"request_id\": req_id})\n print(f\" \\033[35m[protocol] shutdown_request → {teammate} \"\n f\"({req_id})\\033[0m\")\n return f\"Shutdown request sent to {teammate} (req: {req_id})\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n BUS.send(\"lead\", teammate, f\"Please submit a plan for: {task}\",\n \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n if state.status != \"pending\":\n return f\"Request {request_id} already {state.status}\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n icon = \"✓\" if approve else \"✗\"\n print(f\" \\033[32m[protocol] plan {icon} ({request_id})\\033[0m\")\n return f\"Plan {'approved' if approve else 'rejected'} ({request_id})\"\n\n\n# ── Lead Worktree Tools (s18 new) ──\n\ndef run_create_worktree(name: str, task_id: str = \"\") -> str:\n return create_worktree(name, task_id)\n\n\ndef run_remove_worktree(name: str, discard_changes: bool = False) -> str:\n return remove_worktree(name, discard_changes)\n\n\ndef run_keep_worktree(name: str) -> str:\n return keep_worktree(name)\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n return get_task_json(task_id)\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn an autonomous teammate agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down gracefully.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan for review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n # s18 new: worktree tools\n {\"name\": \"create_worktree\",\n \"description\": \"Create an isolated git worktree with its own branch.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"remove_worktree\",\n \"description\": \"Remove a worktree. Refuses if uncommitted changes unless discard_changes=true.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"discard_changes\": {\"type\": \"boolean\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"keep_worktree\",\n \"description\": \"Keep a worktree for manual review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n \"create_worktree\": run_create_worktree,\n \"remove_worktree\": run_remove_worktree,\n \"keep_worktree\": run_keep_worktree,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\"memories\": memories}\n\n\n# ── Agent Loop ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s18: worktree isolation\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = {\"memories\": \"\"}\n while True:\n try:\n query = input(\"\\033[36ms18 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n\n # Consume lead inbox: route protocol + inject into history\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{m.get('type', 'message')}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n", + "images": [ + { + "src": "/course-assets/s18_worktree_isolation/worktree-overview.svg", + "alt": "worktree overview" + } + ] + }, + { + "id": "s19", + "filename": "s19_mcp_plugin/code.py", + "title": "MCP Tools", + "subtitle": "External Tools, Standard Protocol", + "loc": 835, + "tools": [ + "bash", + "read_file", + "write_file", + "send_message", + "submit_plan", + "list_tasks", + "claim_task", + "complete_task", + "search", + "get_version", + "trigger", + "status", + "create_task", + "get_task", + "spawn_teammate", + "check_inbox", + "request_shutdown", + "request_plan", + "review_plan", + "create_worktree", + "remove_worktree", + "keep_worktree", + "connect_mcp" + ], + "newTools": [ + "search", + "get_version", + "trigger", + "status", + "connect_mcp" + ], + "coreAddition": "MCP tool bridge", + "keyInsight": "External services can become agent tools through a standard discovery and call protocol.", + "classes": [ + { + "name": "Task", + "startLine": 53, + "endLine": 62 + }, + { + "name": "MessageBus", + "startLine": 319, + "endLine": 340 + }, + { + "name": "ProtocolState", + "startLine": 347, + "endLine": 356 + }, + { + "name": "MCPClient", + "startLine": 660, + "endLine": 682 + } + ], + "functions": [ + { + "name": "_task_path", + "signature": "def _task_path(task_id: str)", + "startLine": 63 + }, + { + "name": "save_task", + "signature": "def save_task(task: Task)", + "startLine": 79 + }, + { + "name": "load_task", + "signature": "def load_task(task_id: str)", + "startLine": 83 + }, + { + "name": "list_tasks", + "signature": "def list_tasks()", + "startLine": 87 + }, + { + "name": "get_task_json", + "signature": "def get_task_json(task_id: str)", + "startLine": 92 + }, + { + "name": "can_start", + "signature": "def can_start(task_id: str)", + "startLine": 96 + }, + { + "name": "claim_task", + "signature": "def claim_task(task_id: str, owner: str = \"agent\")", + "startLine": 106 + }, + { + "name": "complete_task", + "signature": "def complete_task(task_id: str)", + "startLine": 127 + }, + { + "name": "validate_worktree_name", + "signature": "def validate_worktree_name(name: str)", + "startLine": 150 + }, + { + "name": "run_git", + "signature": "def run_git(args: list[str])", + "startLine": 161 + }, + { + "name": "log_event", + "signature": "def log_event(event_type: str, worktree_name: str, task_id: str = \"\")", + "startLine": 171 + }, + { + "name": "create_worktree", + "signature": "def create_worktree(name: str, task_id: str = \"\")", + "startLine": 179 + }, + { + "name": "bind_task_to_worktree", + "signature": "def bind_task_to_worktree(task_id: str, worktree_name: str)", + "startLine": 196 + }, + { + "name": "_count_worktree_changes", + "signature": "def _count_worktree_changes(path: Path)", + "startLine": 202 + }, + { + "name": "remove_worktree", + "signature": "def remove_worktree(name: str, discard_changes: bool = False)", + "startLine": 215 + }, + { + "name": "keep_worktree", + "signature": "def keep_worktree(name: str)", + "startLine": 238 + }, + { + "name": "assemble_system_prompt", + "signature": "def assemble_system_prompt(context: dict)", + "startLine": 261 + }, + { + "name": "safe_path", + "signature": "def safe_path(p: str, cwd: Path = None)", + "startLine": 275 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str, cwd: Path = None)", + "startLine": 283 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int | None = None, cwd: Path = None)", + "startLine": 293 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str, cwd: Path = None)", + "startLine": 303 + }, + { + "name": "new_request_id", + "signature": "def new_request_id()", + "startLine": 360 + }, + { + "name": "match_response", + "signature": "def match_response(response_type: str, request_id: str, approve: bool)", + "startLine": 364 + }, + { + "name": "consume_lead_inbox", + "signature": "def consume_lead_inbox(route_protocol=True)", + "startLine": 375 + }, + { + "name": "scan_unclaimed_tasks", + "signature": "def scan_unclaimed_tasks()", + "startLine": 393 + }, + { + "name": "spawn_teammate_thread", + "signature": "def spawn_teammate_thread(name: str, role: str, prompt: str)", + "startLine": 437 + }, + { + "name": "_teammate_submit_plan", + "signature": "def _teammate_submit_plan(from_name: str, plan: str)", + "startLine": 615 + }, + { + "name": "run_request_shutdown", + "signature": "def run_request_shutdown(teammate: str)", + "startLine": 629 + }, + { + "name": "run_request_plan", + "signature": "def run_request_plan(teammate: str, task: str)", + "startLine": 640 + }, + { + "name": "normalize_mcp_name", + "signature": "def normalize_mcp_name(name: str)", + "startLine": 688 + }, + { + "name": "_mock_server_docs", + "signature": "def _mock_server_docs()", + "startLine": 693 + }, + { + "name": "_mock_server_deploy", + "signature": "def _mock_server_deploy()", + "startLine": 712 + }, + { + "name": "connect_mcp", + "signature": "def connect_mcp(name: str)", + "startLine": 739 + }, + { + "name": "assemble_tool_pool", + "signature": "def assemble_tool_pool()", + "startLine": 754 + }, + { + "name": "run_create_worktree", + "signature": "def run_create_worktree(name: str, task_id: str = \"\")", + "startLine": 775 + }, + { + "name": "run_remove_worktree", + "signature": "def run_remove_worktree(name: str, discard_changes: bool = False)", + "startLine": 778 + }, + { + "name": "run_keep_worktree", + "signature": "def run_keep_worktree(name: str)", + "startLine": 781 + }, + { + "name": "run_list_tasks", + "signature": "def run_list_tasks()", + "startLine": 795 + }, + { + "name": "run_get_task", + "signature": "def run_get_task(task_id: str)", + "startLine": 805 + }, + { + "name": "run_claim_task", + "signature": "def run_claim_task(task_id: str)", + "startLine": 808 + }, + { + "name": "run_complete_task", + "signature": "def run_complete_task(task_id: str)", + "startLine": 811 + }, + { + "name": "run_spawn_teammate", + "signature": "def run_spawn_teammate(name: str, role: str, prompt: str)", + "startLine": 814 + }, + { + "name": "run_send_message", + "signature": "def run_send_message(to: str, content: str)", + "startLine": 817 + }, + { + "name": "run_check_inbox", + "signature": "def run_check_inbox()", + "startLine": 821 + }, + { + "name": "run_connect_mcp", + "signature": "def run_connect_mcp(name: str)", + "startLine": 833 + }, + { + "name": "update_context", + "signature": "def update_context(context: dict, messages: list)", + "startLine": 953 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list, context: dict)", + "startLine": 962 + } + ], + "layer": "collaboration", + "source": "#!/usr/bin/env python3\n\"\"\"\ns19: MCP Tools — MCPClient + tool discovery + assemble_tool_pool.\n\nRun: python s19_mcp_plugin/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s18:\n - MCPClient class: discovers tools, calls tools via mock handler\n - normalize_mcp_name: normalize tool/server names\n - assemble_tool_pool: assembles builtin + MCP tools into one pool\n - connect_mcp: connect to an MCP server, discover tools\n - Tool naming: mcp__{server}__{tool} with normalization\n - MCP tools have readOnly/destructive annotations\n - agent_loop uses dynamic tool pool (builtin + MCP), no prompt cache\n - Teammate tools: complete_task, worktree cwd (from s17/s18 fixes)\n\nASCII flow:\n connect_mcp(\"docs\") → MCPClient discovers tools →\n assemble_tool_pool → [builtin... , mcp__docs__search, mcp__docs__get_version]\n agent_loop uses assembled pool\n\"\"\"\n\nimport os, subprocess, json, time, random, threading, re\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n worktree: str | None = None\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task_json(task_id: str) -> str:\n return json.dumps(asdict(load_task(task_id)), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Worktree System ──\n\nWORKTREES_DIR = WORKDIR / \".worktrees\"\nWORKTREES_DIR.mkdir(exist_ok=True)\n\nVALID_WT_NAME = re.compile(r'^[A-Za-z0-9._-]{1,64}$')\n\n\ndef validate_worktree_name(name: str) -> str | None:\n if not name:\n return \"Worktree name cannot be empty\"\n if name in (\".\", \"..\"):\n return f\"'{name}' is not a valid worktree name\"\n if not VALID_WT_NAME.match(name):\n return (f\"Invalid worktree name '{name}': \"\n \"only letters, digits, dots, underscores, dashes (1-64 chars)\")\n return None\n\n\ndef run_git(args: list[str]) -> tuple[bool, str]:\n try:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR,\n capture_output=True, text=True, timeout=30)\n out = (r.stdout + r.stderr).strip()\n return r.returncode == 0, out[:5000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return False, \"Error: git timeout\"\n\n\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n events_file = WORKTREES_DIR / \"events.jsonl\"\n with open(events_file, \"a\") as f:\n f.write(json.dumps(event) + \"\\n\")\n\n\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n err = validate_worktree_name(name)\n if err:\n return f\"Error: {err}\"\n path = WORKTREES_DIR / name\n if path.exists():\n return f\"Worktree '{name}' already exists at {path}\"\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n print(f\" \\033[33m[worktree] created: {name} at {path}\\033[0m\")\n return f\"Worktree '{name}' created at {path}\"\n\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n task = load_task(task_id)\n task.worktree = worktree_name\n save_task(task)\n\n\ndef _count_worktree_changes(path: Path) -> tuple[int, int]:\n try:\n r1 = subprocess.run([\"git\", \"status\", \"--porcelain\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n files = len([l for l in r1.stdout.strip().splitlines() if l.strip()])\n r2 = subprocess.run([\"git\", \"log\", \"@{push}..HEAD\", \"--oneline\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n commits = len([l for l in r2.stdout.strip().splitlines() if l.strip()])\n return files, commits\n except Exception:\n return -1, -1\n\n\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n path = WORKTREES_DIR / name\n if not path.exists():\n return f\"Worktree '{name}' not found\"\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files < 0:\n return \"Cannot verify status. Use discard_changes=true to force.\"\n if files > 0 or commits > 0:\n return (f\"Worktree '{name}' has {files} file(s), {commits} commit(s). \"\n \"Use discard_changes=true or keep_worktree.\")\n ok1, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok1:\n return f\"Failed to remove worktree '{name}'\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n print(f\" \\033[33m[worktree] removed: {name}\\033[0m\")\n return f\"Worktree '{name}' removed\"\n\n\ndef keep_worktree(name: str) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n log_event(\"keep\", name)\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n\n\n# ── Prompt Assembly ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan, \"\n \"create_worktree, remove_worktree, keep_worktree, \"\n \"connect_mcp. MCP tools are prefixed mcp__{server}__{tool}.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n mcp_names = list(mcp_clients.keys())\n if mcp_names:\n sections.append(f\"Connected MCP servers: {', '.join(mcp_names)}\")\n return \"\\n\\n\".join(sections)\n\n\n# ── Basic Tools ──\n\ndef safe_path(p: str, cwd: Path = None) -> Path:\n base = cwd or WORKDIR\n path = (base / p).resolve()\n if not path.is_relative_to(base):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, cwd: Path = None) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=cwd or WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None, cwd: Path = None) -> str:\n try:\n lines = safe_path(path, cwd).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str, cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# ── MessageBus ──\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n state = pending_requests.get(request_id)\n if not state:\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n return\n state.status = \"approved\" if approve else \"rejected\"\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\n# ── Autonomous Agent ──\n\nIDLE_POLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(agent_name: str, messages: list,\n name: str, role: str) -> str:\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return \"shutdown\"\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n return \"work\"\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task_data = unclaimed[0]\n result = claim_task(task_data[\"id\"], agent_name)\n if \"Claimed\" in result:\n wt_info = \"\"\n if task_data.get(\"worktree\"):\n wt_info = f\"\\nWork directory: {WORKTREES_DIR / task_data['worktree']}\"\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task_data['id']}: \"\n f\"{task_data['subject']}{wt_info}\"})\n return \"work\"\n return \"timeout\"\n\n\n# ── Teammate Thread ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"If a task has a worktree, work in that directory.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return True\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved]\" if approve\n else f\"[Plan rejected] {msg['content']}\"})\n return False\n\n def run():\n wt_ctx = {\"path\": None}\n\n def _wt_cwd():\n p = wt_ctx[\"path\"]\n return Path(p) if p else None\n\n def _run_bash(command: str) -> str:\n return run_bash(command, cwd=_wt_cwd())\n\n def _run_read(path: str) -> str:\n return run_read(path, cwd=_wt_cwd())\n\n def _run_write(path: str, content: str) -> str:\n return run_write(path, content, cwd=_wt_cwd())\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n task = load_task(task_id)\n wt_ctx[\"path\"] = (str(WORKTREES_DIR / task.worktree)\n if task.worktree else None)\n return result\n\n def _run_complete_task(task_id: str):\n result = complete_task(task_id)\n wt_ctx[\"path\"] = None\n return result\n\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n sub_handlers = {\n \"bash\": _run_bash, \"read_file\": _run_read,\n \"write_file\": _run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"submit_plan\": lambda plan: _teammate_submit_plan(name, plan),\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n while True:\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(non_protocol) + \"\"})\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n if should_shutdown:\n break\n idle_result = idle_poll(name, messages, name, role)\n if idle_result in (\"shutdown\", \"timeout\"):\n break\n\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n return f\"Teammate '{name}' spawned as {role}\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id})\"\n\n\n# ── Lead Protocol Tools ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Shut down.\", \"shutdown_request\",\n {\"request_id\": req_id})\n return f\"Shutdown request sent to {teammate}\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n BUS.send(\"lead\", teammate, f\"Submit plan for: {task}\", \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n return f\"Plan {'approved' if approve else 'rejected'}\"\n\n\n# ── MCP System (s19 new) ──\n\nclass MCPClient:\n \"\"\"Discovers and calls tools on an MCP server (mock for teaching).\"\"\"\n\n def __init__(self, name: str):\n self.name = name\n self.tools: list[dict] = []\n self._handlers: dict[str, callable] = {}\n\n def register(self, tool_defs: list[dict],\n handlers: dict[str, callable]):\n self.tools = tool_defs\n self._handlers = handlers\n\n def call_tool(self, tool_name: str, args: dict) -> str:\n handler = self._handlers.get(tool_name)\n if not handler:\n return f\"MCP error: unknown tool '{tool_name}'\"\n try:\n return handler(**args)\n except Exception as e:\n return f\"MCP error: {e}\"\n\n\nmcp_clients: dict[str, MCPClient] = {}\n\n_DISALLOWED_CHARS = re.compile(r'[^a-zA-Z0-9_-]')\n\n\ndef normalize_mcp_name(name: str) -> str:\n \"\"\"Replace non [a-zA-Z0-9_-] with underscore.\"\"\"\n return _DISALLOWED_CHARS.sub('_', name)\n\n\ndef _mock_server_docs():\n client = MCPClient(\"docs\")\n client.register(\n tool_defs=[\n {\"name\": \"search\", \"description\": \"Search documentation. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"query\": {\"type\": \"string\"}},\n \"required\": [\"query\"]}},\n {\"name\": \"get_version\", \"description\": \"Get API version. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n ],\n handlers={\n \"search\": lambda query: f\"[docs] Found 3 results for '{query}'\",\n \"get_version\": lambda: \"[docs] API v2.1.0\",\n })\n return client\n\n\ndef _mock_server_deploy():\n client = MCPClient(\"deploy\")\n client.register(\n tool_defs=[\n {\"name\": \"trigger\",\n \"description\": \"Trigger a deployment. (destructive — requires approval in real CC)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n {\"name\": \"status\", \"description\": \"Check deployment status. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n ],\n handlers={\n \"trigger\": lambda service: f\"[deploy] Triggered: {service}\",\n \"status\": lambda service: f\"[deploy] {service}: running (v1.4.2)\",\n })\n return client\n\n\nMOCK_SERVERS = {\n \"docs\": _mock_server_docs,\n \"deploy\": _mock_server_deploy,\n}\n\n\ndef connect_mcp(name: str) -> str:\n if name in mcp_clients:\n return f\"MCP server '{name}' already connected\"\n factory = MOCK_SERVERS.get(name)\n if not factory:\n available = \", \".join(MOCK_SERVERS.keys())\n return f\"Unknown server '{name}'. Available: {available}\"\n mcp_client = factory()\n mcp_clients[name] = mcp_client\n tool_names = [t[\"name\"] for t in mcp_client.tools]\n print(f\" \\033[31m[mcp] connected: {name} → {tool_names}\\033[0m\")\n return (f\"Connected to MCP server '{name}'. \"\n f\"Discovered {len(mcp_client.tools)} tools: {', '.join(tool_names)}\")\n\n\ndef assemble_tool_pool() -> tuple[list[dict], dict]:\n \"\"\"Assemble builtin tools + all MCP tools into one pool.\"\"\"\n tools = list(BUILTIN_TOOLS)\n handlers = dict(BUILTIN_HANDLERS)\n for server_name, mcp_client in mcp_clients.items():\n safe_server = normalize_mcp_name(server_name)\n for tool_def in mcp_client.tools:\n safe_tool = normalize_mcp_name(tool_def[\"name\"])\n prefixed = f\"mcp__{safe_server}__{safe_tool}\"\n tools.append({\n \"name\": prefixed,\n \"description\": tool_def.get(\"description\", \"\"),\n \"input_schema\": tool_def.get(\"inputSchema\", {}),\n })\n handlers[prefixed] = (\n lambda *, c=mcp_client, t=tool_def[\"name\"], **kw: c.call_tool(t, kw))\n return tools, handlers\n\n\n# ── Lead Worktree Tools ──\n\ndef run_create_worktree(name: str, task_id: str = \"\") -> str:\n return create_worktree(name, task_id)\n\ndef run_remove_worktree(name: str, discard_changes: bool = False) -> str:\n return remove_worktree(name, discard_changes)\n\ndef run_keep_worktree(name: str) -> str:\n return keep_worktree(name)\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n return get_task_json(task_id)\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\ndef run_connect_mcp(name: str) -> str:\n return connect_mcp(name)\n\n\n# ── Tool Definitions ──\n\nBUILTIN_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\", \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\", \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\", \"description\": \"Get full task details.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\", \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\", \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"create_worktree\",\n \"description\": \"Create an isolated git worktree.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"remove_worktree\",\n \"description\": \"Remove a worktree. Refuses if changes exist.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"discard_changes\": {\"type\": \"boolean\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"keep_worktree\",\n \"description\": \"Keep a worktree for manual review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"connect_mcp\",\n \"description\": \"Connect to an MCP server (docs, deploy) and discover tools.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n]\n\nBUILTIN_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n \"create_worktree\": run_create_worktree,\n \"remove_worktree\": run_remove_worktree,\n \"keep_worktree\": run_keep_worktree,\n \"connect_mcp\": run_connect_mcp,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\"memories\": memories}\n\n\n# ── Agent Loop (s19: dynamic tool pool, no prompt cache) ──\n\ndef agent_loop(messages: list, context: dict):\n tools, handlers = assemble_tool_pool()\n system = assemble_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=tools, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n if any(b.name == \"connect_mcp\" for b in response.content\n if b.type == \"tool_use\"):\n tools, handlers = assemble_tool_pool()\n context = update_context(context, messages)\n system = assemble_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s19: mcp tools\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = {\"memories\": \"\"}\n while True:\n try:\n query = input(\"\\033[36ms19 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{m.get('type', 'message')}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n", + "images": [ + { + "src": "/course-assets/s19_mcp_plugin/mcp-architecture.svg", + "alt": "mcp architecture" + } + ] + }, + { + "id": "s20", + "filename": "s20_comprehensive/code.py", + "title": "Comprehensive Agent", + "subtitle": "All Mechanisms, One Loop", + "loc": 1661, + "tools": [ + "bash", + "read_file", + "write_file", + "send_message", + "submit_plan", + "list_tasks", + "claim_task", + "complete_task", + "edit_file", + "glob", + "search", + "get_version", + "trigger", + "status", + "todo_write", + "task", + "load_skill", + "compact", + "create_task", + "get_task", + "schedule_cron", + "list_crons", + "cancel_cron", + "spawn_teammate", + "check_inbox", + "request_shutdown", + "request_plan", + "review_plan", + "create_worktree", + "remove_worktree", + "keep_worktree", + "connect_mcp" + ], + "newTools": [ + "edit_file", + "glob", + "todo_write", + "task", + "load_skill", + "compact", + "schedule_cron", + "list_crons", + "cancel_cron" + ], + "coreAddition": "Integrated harness", + "keyInsight": "The final harness is still one loop, now surrounded by the systems that make it production-shaped.", + "classes": [ + { + "name": "Task", + "startLine": 80, + "endLine": 89 + }, + { + "name": "MessageBus", + "startLine": 480, + "endLine": 501 + }, + { + "name": "ProtocolState", + "startLine": 508, + "endLine": 517 + }, + { + "name": "RecoveryState", + "startLine": 1154, + "endLine": 1162 + }, + { + "name": "CronJob", + "startLine": 1284, + "endLine": 1291 + }, + { + "name": "MCPClient", + "startLine": 1481, + "endLine": 1503 + } + ], + "functions": [ + { + "name": "terminal_print", + "signature": "def terminal_print(text: str)", + "startLine": 57 + }, + { + "name": "_task_path", + "signature": "def _task_path(task_id: str)", + "startLine": 90 + }, + { + "name": "save_task", + "signature": "def save_task(task: Task)", + "startLine": 106 + }, + { + "name": "load_task", + "signature": "def load_task(task_id: str)", + "startLine": 110 + }, + { + "name": "list_tasks", + "signature": "def list_tasks()", + "startLine": 114 + }, + { + "name": "get_task_json", + "signature": "def get_task_json(task_id: str)", + "startLine": 119 + }, + { + "name": "can_start", + "signature": "def can_start(task_id: str)", + "startLine": 123 + }, + { + "name": "claim_task", + "signature": "def claim_task(task_id: str, owner: str = \"agent\")", + "startLine": 135 + }, + { + "name": "complete_task", + "signature": "def complete_task(task_id: str)", + "startLine": 156 + }, + { + "name": "validate_worktree_name", + "signature": "def validate_worktree_name(name: str)", + "startLine": 181 + }, + { + "name": "run_git", + "signature": "def run_git(args: list[str])", + "startLine": 192 + }, + { + "name": "log_event", + "signature": "def log_event(event_type: str, worktree_name: str, task_id: str = \"\")", + "startLine": 202 + }, + { + "name": "create_worktree", + "signature": "def create_worktree(name: str, task_id: str = \"\")", + "startLine": 210 + }, + { + "name": "bind_task_to_worktree", + "signature": "def bind_task_to_worktree(task_id: str, worktree_name: str)", + "startLine": 234 + }, + { + "name": "_count_worktree_changes", + "signature": "def _count_worktree_changes(path: Path)", + "startLine": 240 + }, + { + "name": "remove_worktree", + "signature": "def remove_worktree(name: str, discard_changes: bool = False)", + "startLine": 253 + }, + { + "name": "keep_worktree", + "signature": "def keep_worktree(name: str)", + "startLine": 276 + }, + { + "name": "_parse_frontmatter", + "signature": "def _parse_frontmatter(text: str)", + "startLine": 289 + }, + { + "name": "scan_skills", + "signature": "def scan_skills()", + "startLine": 303 + }, + { + "name": "list_skills", + "signature": "def list_skills()", + "startLine": 327 + }, + { + "name": "load_skill", + "signature": "def load_skill(name: str)", + "startLine": 335 + }, + { + "name": "assemble_system_prompt", + "signature": "def assemble_system_prompt(context: dict)", + "startLine": 360 + }, + { + "name": "safe_path", + "signature": "def safe_path(p: str, cwd: Path = None)", + "startLine": 379 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str, cwd: Path = None)", + "startLine": 415 + }, + { + "name": "run_glob", + "signature": "def run_glob(pattern: str, cwd: Path = None)", + "startLine": 438 + }, + { + "name": "call_tool_handler", + "signature": "def call_tool_handler(handler, args: dict, name: str)", + "startLine": 451 + }, + { + "name": "run_todo_write", + "signature": "def run_todo_write(todos: list)", + "startLine": 460 + }, + { + "name": "new_request_id", + "signature": "def new_request_id()", + "startLine": 521 + }, + { + "name": "match_response", + "signature": "def match_response(response_type: str, request_id: str, approve: bool)", + "startLine": 525 + }, + { + "name": "consume_lead_inbox", + "signature": "def consume_lead_inbox(route_protocol=True)", + "startLine": 538 + }, + { + "name": "scan_unclaimed_tasks", + "signature": "def scan_unclaimed_tasks()", + "startLine": 556 + }, + { + "name": "spawn_teammate_thread", + "signature": "def spawn_teammate_thread(name: str, role: str, prompt: str)", + "startLine": 606 + }, + { + "name": "_teammate_submit_plan", + "signature": "def _teammate_submit_plan(from_name: str, plan: str)", + "startLine": 813 + }, + { + "name": "run_request_shutdown", + "signature": "def run_request_shutdown(teammate: str)", + "startLine": 827 + }, + { + "name": "run_request_plan", + "signature": "def run_request_plan(teammate: str, task: str)", + "startLine": 838 + }, + { + "name": "register_hook", + "signature": "def register_hook(event: str, callback)", + "startLine": 864 + }, + { + "name": "trigger_hooks", + "signature": "def trigger_hooks(event: str, *args)", + "startLine": 868 + }, + { + "name": "permission_hook", + "signature": "def permission_hook(block)", + "startLine": 880 + }, + { + "name": "log_hook", + "signature": "def log_hook(block)", + "startLine": 908 + }, + { + "name": "large_output_hook", + "signature": "def large_output_hook(block, output)", + "startLine": 913 + }, + { + "name": "user_prompt_hook", + "signature": "def user_prompt_hook(query: str)", + "startLine": 920 + }, + { + "name": "stop_hook", + "signature": "def stop_hook(messages: list)", + "startLine": 925 + }, + { + "name": "extract_text", + "signature": "def extract_text(content)", + "startLine": 989 + }, + { + "name": "has_tool_use", + "signature": "def has_tool_use(content)", + "startLine": 998 + }, + { + "name": "spawn_subagent", + "signature": "def spawn_subagent(description: str)", + "startLine": 1005 + }, + { + "name": "estimate_size", + "signature": "def estimate_size(messages: list)", + "startLine": 1042 + }, + { + "name": "collect_tool_results", + "signature": "def collect_tool_results(messages: list)", + "startLine": 1046 + }, + { + "name": "persist_large_output", + "signature": "def persist_large_output(tool_use_id: str, output: str)", + "startLine": 1058 + }, + { + "name": "tool_result_budget", + "signature": "def tool_result_budget(messages: list, max_bytes: int = 200_000)", + "startLine": 1069 + }, + { + "name": "snip_compact", + "signature": "def snip_compact(messages: list, max_messages: int = 50)", + "startLine": 1093 + }, + { + "name": "micro_compact", + "signature": "def micro_compact(messages: list)", + "startLine": 1103 + }, + { + "name": "write_transcript", + "signature": "def write_transcript(messages: list)", + "startLine": 1113 + }, + { + "name": "summarize_history", + "signature": "def summarize_history(messages: list)", + "startLine": 1122 + }, + { + "name": "compact_history", + "signature": "def compact_history(messages: list)", + "startLine": 1134 + }, + { + "name": "reactive_compact", + "signature": "def reactive_compact(messages: list)", + "startLine": 1141 + }, + { + "name": "retry_delay", + "signature": "def retry_delay(attempt: int)", + "startLine": 1163 + }, + { + "name": "with_retry", + "signature": "def with_retry(fn, state: RecoveryState)", + "startLine": 1168 + }, + { + "name": "is_prompt_too_long_error", + "signature": "def is_prompt_too_long_error(e: Exception)", + "startLine": 1198 + }, + { + "name": "is_slow_operation", + "signature": "def is_slow_operation(tool_name: str, tool_input: dict)", + "startLine": 1215 + }, + { + "name": "should_run_background", + "signature": "def should_run_background(tool_name: str, tool_input: dict)", + "startLine": 1225 + }, + { + "name": "start_background_task", + "signature": "def start_background_task(block, handlers: dict)", + "startLine": 1231 + }, + { + "name": "collect_background_results", + "signature": "def collect_background_results()", + "startLine": 1256 + }, + { + "name": "_cron_field_matches", + "signature": "def _cron_field_matches(field: str, value: int)", + "startLine": 1298 + }, + { + "name": "cron_matches", + "signature": "def cron_matches(cron_expr: str, dt: datetime)", + "startLine": 1313 + }, + { + "name": "_validate_cron_field", + "signature": "def _validate_cron_field(field: str, lo: int, hi: int)", + "startLine": 1335 + }, + { + "name": "validate_cron", + "signature": "def validate_cron(cron_expr: str)", + "startLine": 1367 + }, + { + "name": "save_durable_jobs", + "signature": "def save_durable_jobs()", + "startLine": 1380 + }, + { + "name": "load_durable_jobs", + "signature": "def load_durable_jobs()", + "startLine": 1385 + }, + { + "name": "cancel_job", + "signature": "def cancel_job(job_id: str)", + "startLine": 1413 + }, + { + "name": "cron_scheduler_loop", + "signature": "def cron_scheduler_loop()", + "startLine": 1423 + }, + { + "name": "consume_cron_queue", + "signature": "def consume_cron_queue()", + "startLine": 1442 + }, + { + "name": "run_list_crons", + "signature": "def run_list_crons()", + "startLine": 1457 + }, + { + "name": "run_cancel_cron", + "signature": "def run_cancel_cron(job_id: str)", + "startLine": 1469 + }, + { + "name": "normalize_mcp_name", + "signature": "def normalize_mcp_name(name: str)", + "startLine": 1509 + }, + { + "name": "_mock_server_docs", + "signature": "def _mock_server_docs()", + "startLine": 1514 + }, + { + "name": "_mock_server_deploy", + "signature": "def _mock_server_deploy()", + "startLine": 1533 + }, + { + "name": "connect_mcp", + "signature": "def connect_mcp(name: str)", + "startLine": 1560 + }, + { + "name": "assemble_tool_pool", + "signature": "def assemble_tool_pool()", + "startLine": 1575 + }, + { + "name": "run_create_worktree", + "signature": "def run_create_worktree(name: str, task_id: str = \"\")", + "startLine": 1596 + }, + { + "name": "run_remove_worktree", + "signature": "def run_remove_worktree(name: str, discard_changes: bool = False)", + "startLine": 1599 + }, + { + "name": "run_keep_worktree", + "signature": "def run_keep_worktree(name: str)", + "startLine": 1602 + }, + { + "name": "run_list_tasks", + "signature": "def run_list_tasks()", + "startLine": 1616 + }, + { + "name": "run_get_task", + "signature": "def run_get_task(task_id: str)", + "startLine": 1626 + }, + { + "name": "run_claim_task", + "signature": "def run_claim_task(task_id: str)", + "startLine": 1632 + }, + { + "name": "run_complete_task", + "signature": "def run_complete_task(task_id: str)", + "startLine": 1638 + }, + { + "name": "run_spawn_teammate", + "signature": "def run_spawn_teammate(name: str, role: str, prompt: str)", + "startLine": 1644 + }, + { + "name": "run_send_message", + "signature": "def run_send_message(to: str, content: str)", + "startLine": 1647 + }, + { + "name": "run_check_inbox", + "signature": "def run_check_inbox()", + "startLine": 1651 + }, + { + "name": "run_connect_mcp", + "signature": "def run_connect_mcp(name: str)", + "startLine": 1663 + }, + { + "name": "update_context", + "signature": "def update_context(context: dict, messages: list)", + "startLine": 1845 + }, + { + "name": "prepare_context", + "signature": "def prepare_context(messages: list)", + "startLine": 1862 + }, + { + "name": "build_user_content", + "signature": "def build_user_content(results: list[dict])", + "startLine": 1872 + }, + { + "name": "inject_background_notifications", + "signature": "def inject_background_notifications(messages: list)", + "startLine": 1882 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list, context: dict)", + "startLine": 1902 + }, + { + "name": "print_turn_assistants", + "signature": "def print_turn_assistants(messages: list, turn_start: int)", + "startLine": 2008 + }, + { + "name": "cron_autorun_loop", + "signature": "def cron_autorun_loop(history: list, context: dict)", + "startLine": 2017 + } + ], + "layer": "collaboration", + "source": "#!/usr/bin/env python3\n\"\"\"\ns20: Comprehensive Agent — all teaching components in one loop.\n\nRun: python s20_comprehensive/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nThis final chapter intentionally puts the earlier teaching mechanisms back\ntogether: dispatch, permission, hooks, todo, subagent, skills, compaction,\nmemory, prompt assembly, error recovery, task graph, background tasks, cron,\nteams, protocols, autonomous agents, worktrees, and MCP.\n\"\"\"\n\nimport os, subprocess, json, time, random, threading, re\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\n READLINE_AVAILABLE = True\nexcept ImportError:\n READLINE_AVAILABLE = False\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nPRIMARY_MODEL = MODEL\nFALLBACK_MODEL = os.getenv(\"FALLBACK_MODEL_ID\")\n\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\n\nDEFAULT_MAX_TOKENS = 8000\nESCALATED_MAX_TOKENS = 16000\nMAX_RETRIES = 3\nMAX_CONSECUTIVE_529 = 2\nMAX_RECOVERY_RETRIES = 2\nBASE_DELAY_MS = 500\nCONTEXT_LIMIT = 50000\nKEEP_RECENT_TOOL_RESULTS = 3\nPERSIST_THRESHOLD = 30000\nCONTINUATION_PROMPT = \"Continue from the previous response. Do not repeat completed work.\"\nPROMPT = \"\\033[36ms20 >> \\033[0m\"\nCLI_ACTIVE = False\n\n\ndef terminal_print(text: str):\n if threading.current_thread() is threading.main_thread() or not CLI_ACTIVE:\n print(text)\n return\n line = \"\"\n if READLINE_AVAILABLE:\n try:\n line = readline.get_line_buffer()\n except Exception:\n line = \"\"\n print(f\"\\r\\033[K{text}\")\n print(PROMPT + line, end=\"\", flush=True)\n\n# ── Task System ──\n\n# Tasks are tiny durable records. Later systems add ownership, dependencies,\n# worktrees, and teammates on top of this same file-backed state.\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\nCURRENT_TODOS: list[dict] = []\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n worktree: str | None = None\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task_json(task_id: str) -> str:\n return json.dumps(asdict(load_task(task_id)), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n # Dependencies are intentionally simple: every blocker must exist and be\n # completed before the task can be claimed.\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Worktree System ──\n\n# Worktree names become filesystem paths, so the teaching version keeps the\n# validation rules strict and reuses them for create/remove/keep.\nWORKTREES_DIR = WORKDIR / \".worktrees\"\nWORKTREES_DIR.mkdir(exist_ok=True)\n\nVALID_WT_NAME = re.compile(r'^[A-Za-z0-9._-]{1,64}$')\n\n\ndef validate_worktree_name(name: str) -> str | None:\n if not name:\n return \"Worktree name cannot be empty\"\n if name in (\".\", \"..\"):\n return f\"'{name}' is not a valid worktree name\"\n if not VALID_WT_NAME.match(name):\n return (f\"Invalid worktree name '{name}': \"\n \"only letters, digits, dots, underscores, dashes (1-64 chars)\")\n return None\n\n\ndef run_git(args: list[str]) -> tuple[bool, str]:\n try:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR,\n capture_output=True, text=True, timeout=30)\n out = (r.stdout + r.stderr).strip()\n return r.returncode == 0, out[:5000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return False, \"Error: git timeout\"\n\n\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n events_file = WORKTREES_DIR / \"events.jsonl\"\n with open(events_file, \"a\") as f:\n f.write(json.dumps(event) + \"\\n\")\n\n\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n # Tool-layer validation is part of the safety boundary; do it before git\n # sees the name, not only after git happens to reject something.\n err = validate_worktree_name(name)\n if err:\n return f\"Error: {err}\"\n if task_id:\n try:\n load_task(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n path = WORKTREES_DIR / name\n if path.exists():\n return f\"Worktree '{name}' already exists at {path}\"\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n print(f\" \\033[33m[worktree] created: {name} at {path}\\033[0m\")\n return f\"Worktree '{name}' created at {path}\"\n\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n task = load_task(task_id)\n task.worktree = worktree_name\n save_task(task)\n\n\ndef _count_worktree_changes(path: Path) -> tuple[int, int]:\n try:\n r1 = subprocess.run([\"git\", \"status\", \"--porcelain\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n files = len([l for l in r1.stdout.strip().splitlines() if l.strip()])\n r2 = subprocess.run([\"git\", \"log\", \"@{push}..HEAD\", \"--oneline\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n commits = len([l for l in r2.stdout.strip().splitlines() if l.strip()])\n return files, commits\n except Exception:\n return -1, -1\n\n\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n path = WORKTREES_DIR / name\n if not path.exists():\n return f\"Worktree '{name}' not found\"\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files < 0:\n return \"Cannot verify status. Use discard_changes=true to force.\"\n if files > 0 or commits > 0:\n return (f\"Worktree '{name}' has {files} file(s), {commits} commit(s). \"\n \"Use discard_changes=true or keep_worktree.\")\n ok1, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok1:\n return f\"Failed to remove worktree '{name}'\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n print(f\" \\033[33m[worktree] removed: {name}\\033[0m\")\n return f\"Worktree '{name}' removed\"\n\n\ndef keep_worktree(name: str) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n log_event(\"keep\", name)\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n\n\n# ── Skill Loading ──\n\nSKILL_REGISTRY: dict[str, dict] = {}\n\n\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n meta = {}\n for line in parts[1].strip().splitlines():\n if \":\" in line:\n key, value = line.split(\":\", 1)\n meta[key.strip()] = value.strip().strip('\"').strip(\"'\")\n return meta, parts[2].strip()\n\n\ndef scan_skills():\n SKILL_REGISTRY.clear()\n if not SKILLS_DIR.exists():\n return\n for directory in sorted(SKILLS_DIR.iterdir()):\n if not directory.is_dir():\n continue\n manifest = directory / \"SKILL.md\"\n if not manifest.exists():\n continue\n raw = manifest.read_text()\n meta, _ = _parse_frontmatter(raw)\n name = meta.get(\"name\", directory.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\n \"name\": name,\n \"description\": desc,\n \"content\": raw,\n }\n\n\nscan_skills()\n\n\ndef list_skills() -> str:\n if not SKILL_REGISTRY:\n return \"(no skills found)\"\n return \"\\n\".join(\n f\"- {skill['name']}: {skill['description']}\"\n for skill in SKILL_REGISTRY.values())\n\n\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n available = \", \".join(SKILL_REGISTRY.keys()) or \"(none)\"\n return f\"Skill not found: {name}. Available: {available}\"\n return skill[\"content\"]\n\n\n# ── Prompt Assembly ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, edit_file, glob, \"\n \"todo_write, task, load_skill, compact, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"schedule_cron, list_crons, cancel_cron, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan, \"\n \"create_worktree, remove_worktree, keep_worktree, \"\n \"connect_mcp. MCP tools are prefixed mcp__{server}__{tool}.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n # The system prompt is rebuilt each turn from live context. This is where\n # memory, skill catalog, MCP state, and active teammates become visible.\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n sections.append(f\"Current time: {datetime.now().isoformat(timespec='seconds')}\")\n sections.append(\"Skills catalog:\\n\" + list_skills() +\n \"\\nUse load_skill(name) when a skill is relevant.\")\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n mcp_names = list(mcp_clients.keys())\n if mcp_names:\n sections.append(f\"Connected MCP servers: {', '.join(mcp_names)}\")\n return \"\\n\\n\".join(sections)\n\n\n# ── Basic Tools ──\n\ndef safe_path(p: str, cwd: Path = None) -> Path:\n # File tools stay inside the workspace or teammate worktree. Bash remains\n # powerful on purpose and is controlled by the permission hook instead.\n base = cwd or WORKDIR\n path = (base / p).resolve()\n if not path.is_relative_to(base):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, cwd: Path = None,\n run_in_background: bool = False) -> str:\n # run_in_background is consumed by the dispatcher; direct execution ignores it.\n try:\n r = subprocess.run(command, shell=True, cwd=cwd or WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None,\n offset: int = 0, cwd: Path = None) -> str:\n try:\n lines = safe_path(path, cwd).read_text().splitlines()\n offset = max(int(offset or 0), 0)\n limit = int(limit) if limit is not None else None\n lines = lines[offset:]\n if limit is not None and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str, cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str,\n cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n text = fp.read_text()\n if old_text not in text:\n return f\"Error: text not found in {path}\"\n fp.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_glob(pattern: str, cwd: Path = None) -> str:\n import glob as g\n try:\n base = cwd or WORKDIR\n results = []\n for match in g.glob(pattern, root_dir=base):\n if (base / match).resolve().is_relative_to(base):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef call_tool_handler(handler, args: dict, name: str) -> str:\n if not handler:\n return f\"Unknown: {name}\"\n try:\n return handler(**(args or {}))\n except TypeError as e:\n return f\"Error: {e}\"\n\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n for i, todo in enumerate(todos):\n if \"content\" not in todo or \"status\" not in todo:\n return f\"Error: todos[{i}] missing 'content' or 'status'\"\n if todo[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return f\"Error: todos[{i}] has invalid status '{todo['status']}'\"\n CURRENT_TODOS = todos\n print(f\" \\033[33m[todo] updated {len(CURRENT_TODOS)} item(s)\\033[0m\")\n return f\"Updated {len(CURRENT_TODOS)} todos\"\n\n\n# ── MessageBus ──\n\n# Team communication is append-only JSONL mailboxes. This keeps the protocol\n# inspectable on disk and lets background teammates send messages.\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n terminal_print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n # Responses are matched by request_id so one protocol reply cannot approve\n # a different pending request.\n state = pending_requests.get(request_id)\n if not state:\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n return\n state.status = \"approved\" if approve else \"rejected\"\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\n# ── Autonomous Agent ──\n\nIDLE_POLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(agent_name: str, messages: list,\n name: str, role: str,\n worktree_context: dict | None = None) -> str:\n # Autonomous teammates wake up for inbox messages first, then look for\n # unclaimed tasks. This keeps direct protocol messages higher priority.\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return \"shutdown\"\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n return \"work\"\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task_data = unclaimed[0]\n result = claim_task(task_data[\"id\"], agent_name)\n if \"Claimed\" in result:\n wt_info = \"\"\n if task_data.get(\"worktree\"):\n wt_path = WORKTREES_DIR / task_data[\"worktree\"]\n wt_info = f\"\\nWork directory: {wt_path}\"\n if worktree_context is not None:\n worktree_context[\"path\"] = str(wt_path)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task_data['id']}: \"\n f\"{task_data['subject']}{wt_info}\"})\n return \"work\"\n return \"timeout\"\n\n\n# ── Teammate Thread ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n # Plan approval is a real gate: after submit_plan, the teammate stops\n # taking model/tool steps until lead sends plan_approval_response.\n protocol_ctx = {\"waiting_plan\": None}\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"If a task has a worktree, work in that directory.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return True\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if req_id == protocol_ctx[\"waiting_plan\"]:\n protocol_ctx[\"waiting_plan\"] = None\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved]\" if approve\n else f\"[Plan rejected] {msg['content']}\"})\n return False\n\n def run():\n wt_ctx = {\"path\": None}\n\n def _wt_cwd():\n # Once a task with a worktree is claimed, all teammate file tools\n # transparently run inside that isolated directory.\n p = wt_ctx[\"path\"]\n return Path(p) if p else None\n\n def _run_bash(command: str) -> str:\n return run_bash(command, cwd=_wt_cwd())\n\n def _run_read(path: str) -> str:\n return run_read(path, cwd=_wt_cwd())\n\n def _run_write(path: str, content: str) -> str:\n return run_write(path, content, cwd=_wt_cwd())\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n task = load_task(task_id)\n wt_ctx[\"path\"] = (str(WORKTREES_DIR / task.worktree)\n if task.worktree else None)\n return result\n\n def _run_complete_task(task_id: str):\n result = complete_task(task_id)\n wt_ctx[\"path\"] = None\n return result\n\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n sub_handlers = {\n \"bash\": _run_bash, \"read_file\": _run_read,\n \"write_file\": _run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n while True:\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if protocol_ctx[\"waiting_plan\"]:\n # Poll only for protocol replies while the approval gate is\n # closed; do not let the model continue with the task.\n time.sleep(IDLE_POLL_INTERVAL)\n continue\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(non_protocol) + \"\"})\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"submit_plan\":\n output = _teammate_submit_plan(\n name, block.input.get(\"plan\", \"\"))\n match = re.search(r\"\\((req_\\d+)\\)\", output)\n protocol_ctx[\"waiting_plan\"] = (\n match.group(1) if match else output)\n else:\n handler = sub_handlers.get(block.name)\n output = call_tool_handler(handler, block.input,\n block.name)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n if protocol_ctx[\"waiting_plan\"]:\n # Ignore later tool_use blocks from the same model\n # response; they belong after approval, not before.\n break\n messages.append({\"role\": \"user\", \"content\": results})\n if protocol_ctx[\"waiting_plan\"]:\n break\n if should_shutdown:\n break\n if protocol_ctx[\"waiting_plan\"]:\n continue\n idle_result = idle_poll(name, messages, name, role, wt_ctx)\n if idle_result in (\"shutdown\", \"timeout\"):\n break\n\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n return f\"Teammate '{name}' spawned as {role}\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id})\"\n\n\n# ── Lead Protocol Tools ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Shut down.\", \"shutdown_request\",\n {\"request_id\": req_id})\n return f\"Shutdown request sent to {teammate}\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n BUS.send(\"lead\", teammate, f\"Submit plan for: {task}\", \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n return f\"Plan {'approved' if approve else 'rejected'}\"\n\n\n# ── Hooks + Permission Pipeline ──\n\n# Hooks are intentionally outside tool handlers. The loop can add permission,\n# logging, and stop behavior without changing each individual tool.\nHOOKS = {\"UserPromptSubmit\": [], \"PreToolUse\": [],\n \"PostToolUse\": [], \"Stop\": []}\n\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None:\n return result\n return None\n\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"mkfs\", \"dd if=\"]\nDESTRUCTIVE = [\"rm \", \"> /etc/\", \"chmod 777\"]\n\n\ndef permission_hook(block):\n # The permission layer sees the raw tool_use before dispatch. It can deny,\n # ask the user, or allow execution to continue.\n if block.name == \"bash\":\n command = block.input.get(\"command\", \"\")\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Permission denied: '{pattern}' is on the deny list\"\n if any(token in command for token in DESTRUCTIVE):\n print(f\"\\n\\033[33m[permission] destructive command\\033[0m\")\n print(f\" {command}\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n try:\n safe_path(path)\n except Exception:\n return f\"Permission denied: path escapes workspace: {path}\"\n if block.name.startswith(\"mcp__\") and \"deploy\" in block.name:\n print(f\"\\n\\033[33m[permission] MCP destructive-looking tool: {block.name}\\033[0m\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n\ndef log_hook(block):\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\n\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"\\033[33m[HOOK] large output from {block.name}: \"\n f\"{len(str(output))} chars\\033[0m\")\n return None\n\n\ndef user_prompt_hook(query: str):\n print(f\"\\033[90m[HOOK] UserPromptSubmit: {WORKDIR}\\033[0m\")\n return None\n\n\ndef stop_hook(messages: list):\n tool_count = 0\n for msg in messages:\n content = msg.get(\"content\")\n if isinstance(content, list):\n tool_count += sum(1 for item in content\n if isinstance(item, dict)\n and item.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: {tool_count} tool result(s)\\033[0m\")\n return None\n\n\nregister_hook(\"UserPromptSubmit\", user_prompt_hook)\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\nregister_hook(\"Stop\", stop_hook)\n\n\n# ── Subagent Tool ──\n\nSUB_SYSTEM = (\n f\"You are a coding subagent at {WORKDIR}. \"\n \"Complete the task, then return a concise final summary. \"\n \"Do not spawn more agents.\"\n)\n\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"pattern\": {\"type\": \"string\"}},\n \"required\": [\"pattern\"]}},\n]\n\n\nSUB_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read,\n \"write_file\": run_write, \"edit_file\": run_edit,\n \"glob\": run_glob,\n}\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return str(content)\n return \"\\n\".join(\n getattr(block, \"text\", \"\")\n for block in content\n if getattr(block, \"type\", None) == \"text\").strip()\n\n\ndef has_tool_use(content) -> bool:\n # Do not rely on stop_reason alone; the concrete tool_use block is the\n # continuation signal used by the loop.\n return any(getattr(block, \"type\", None) == \"tool_use\"\n for block in content)\n\n\ndef spawn_subagent(description: str) -> str:\n messages = [{\"role\": \"user\", \"content\": description}]\n for _ in range(30):\n response = client.messages.create(\n model=MODEL, system=SUB_SYSTEM, messages=messages,\n tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n break\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n output = str(blocked)\n else:\n handler = SUB_HANDLERS.get(block.name)\n output = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, output)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n text = extract_text(msg[\"content\"])\n if text:\n return text\n return \"Subagent finished without a text summary.\"\n\n\n# ── Context Compaction ──\n\n# Compaction is layered: first shrink oversized tool results, then trim old\n# message ranges, and only call the model for a summary when the context is\n# still too large or the model explicitly asks for compact.\ndef estimate_size(messages: list) -> int:\n return len(json.dumps(messages, default=str))\n\n\ndef collect_tool_results(messages: list):\n found = []\n for mi, msg in enumerate(messages):\n content = msg.get(\"content\")\n if msg.get(\"role\") != \"user\" or not isinstance(content, list):\n continue\n for bi, block in enumerate(content):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n found.append((mi, bi, block))\n return found\n\n\ndef persist_large_output(tool_use_id: str, output: str) -> str:\n if len(output) <= PERSIST_THRESHOLD:\n return output\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not path.exists():\n path.write_text(output)\n return (f\"\\nFull output: {path}\\n\"\n f\"Preview:\\n{output[:2000]}\\n\")\n\n\ndef tool_result_budget(messages: list, max_bytes: int = 200_000) -> list:\n if not messages:\n return messages\n last = messages[-1]\n content = last.get(\"content\")\n if last.get(\"role\") != \"user\" or not isinstance(content, list):\n return messages\n blocks = [(i, b) for i, b in enumerate(content)\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n for _, block in sorted(blocks,\n key=lambda pair: len(str(pair[1].get(\"content\", \"\"))),\n reverse=True):\n if total <= max_bytes:\n break\n text = str(block.get(\"content\", \"\"))\n block[\"content\"] = persist_large_output(\n block.get(\"tool_use_id\", \"unknown\"), text)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return messages\n\n\ndef snip_compact(messages: list, max_messages: int = 50) -> list:\n if len(messages) <= max_messages:\n return messages\n keep_head, keep_tail = 3, max_messages - 3\n snipped = len(messages) - keep_head - keep_tail\n return (messages[:keep_head]\n + [{\"role\": \"user\", \"content\": f\"[snipped {snipped} messages]\"}]\n + messages[-keep_tail:])\n\n\ndef micro_compact(messages: list) -> list:\n tool_results = collect_tool_results(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(str(block.get(\"content\", \"\"))) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n\n\ndef write_transcript(messages: list) -> Path:\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n return path\n\n\ndef summarize_history(messages: list) -> str:\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\"Summarize this coding-agent conversation so work can continue. \"\n \"Preserve current goal, key findings, changed files, remaining work, \"\n \"and user constraints.\\n\\n\" + conversation)\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=2000)\n return extract_text(response.content) or \"(empty summary)\"\n\n\ndef compact_history(messages: list) -> list:\n transcript = write_transcript(messages)\n print(f\" \\033[36m[compact] transcript saved: {transcript}\\033[0m\")\n summary = summarize_history(messages)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\n\ndef reactive_compact(messages: list) -> list:\n transcript = write_transcript(messages)\n print(f\" \\033[31m[reactive compact] transcript saved: {transcript}\\033[0m\")\n try:\n summary = summarize_history(messages)\n except Exception:\n summary = \"Earlier conversation was trimmed after a prompt-too-long error.\"\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"},\n *messages[-5:]]\n\n\n# ── Error Recovery ──\n\nclass RecoveryState:\n def __init__(self):\n self.has_escalated = False\n self.recovery_count = 0\n self.consecutive_529 = 0\n self.has_attempted_reactive_compact = False\n self.current_model = PRIMARY_MODEL\n\n\ndef retry_delay(attempt: int) -> float:\n base = min(BASE_DELAY_MS * (2 ** attempt), 32000) / 1000\n return base + random.uniform(0, base * 0.25)\n\n\ndef with_retry(fn, state: RecoveryState):\n for attempt in range(MAX_RETRIES):\n try:\n result = fn()\n state.consecutive_529 = 0\n return result\n except Exception as e:\n name = type(e).__name__.lower()\n msg = str(e).lower()\n if \"ratelimit\" in name or \"429\" in msg:\n delay = retry_delay(attempt)\n print(f\" \\033[33m[429] retry {attempt + 1}/{MAX_RETRIES} \"\n f\"after {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n if \"overloaded\" in name or \"529\" in msg or \"overloaded\" in msg:\n state.consecutive_529 += 1\n if state.consecutive_529 >= MAX_CONSECUTIVE_529 and FALLBACK_MODEL:\n state.current_model = FALLBACK_MODEL\n state.consecutive_529 = 0\n print(f\" \\033[31m[529] switching to {FALLBACK_MODEL}\\033[0m\")\n delay = retry_delay(attempt)\n print(f\" \\033[33m[529] retry {attempt + 1}/{MAX_RETRIES} \"\n f\"after {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n raise\n raise RuntimeError(f\"Max retries ({MAX_RETRIES}) exceeded\")\n\n\ndef is_prompt_too_long_error(e: Exception) -> bool:\n msg = str(e).lower()\n return ((\"prompt\" in msg and \"long\" in msg)\n or \"context_length_exceeded\" in msg\n or \"max_context_window\" in msg)\n\n\n# ── Background Tasks ──\n\n# Slow tools return a placeholder tool_result immediately. Their real output is\n# later injected as a task_notification, so the main loop can keep moving.\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n if tool_name != \"bash\":\n return False\n command = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(keyword in command for keyword in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n if tool_name != \"bash\":\n return False\n return bool(tool_input.get(\"run_in_background\")) or is_slow_operation(tool_name, tool_input)\n\n\ndef start_background_task(block, handlers: dict) -> str:\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n command = block.input.get(\"command\", block.name)\n\n def worker():\n handler = handlers.get(block.name)\n result = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, result)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = str(result)\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": command,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] {bg_id}: {str(command)[:60]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n with background_lock:\n ready = [bg_id for bg_id, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n return notifications\n\n\n# ── Cron Scheduler ──\n\n# Cron jobs are stored separately from conversation history. When a job fires,\n# it becomes a scheduled prompt that is injected back into the same agent loop.\nDURABLE_PATH = WORKDIR / \".scheduled_tasks.json\"\n\n\n@dataclass\nclass CronJob:\n id: str\n cron: str\n prompt: str\n recurring: bool\n durable: bool\n\n\nscheduled_jobs: dict[str, CronJob] = {}\ncron_queue: list[CronJob] = []\ncron_lock = threading.Lock()\n_last_fired: dict[str, str] = {}\n\n\ndef _cron_field_matches(field: str, value: int) -> bool:\n if field == \"*\":\n return True\n if field.startswith(\"*/\"):\n step = int(field[2:])\n return step > 0 and value % step == 0\n if \",\" in field:\n return any(_cron_field_matches(part.strip(), value)\n for part in field.split(\",\"))\n if \"-\" in field:\n lo, hi = field.split(\"-\", 1)\n return int(lo) <= value <= int(hi)\n return value == int(field)\n\n\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n if not (m and h and month_ok):\n return False\n if dom == \"*\" and dow == \"*\":\n return True\n if dom == \"*\":\n return dow_ok\n if dow == \"*\":\n return dom_ok\n return dom_ok or dow_ok\n\n\ndef _validate_cron_field(field: str, lo: int, hi: int) -> str | None:\n if field == \"*\":\n return None\n if field.startswith(\"*/\"):\n step = field[2:]\n if not step.isdigit() or int(step) <= 0:\n return f\"Invalid step: {field}\"\n return None\n if \",\" in field:\n for part in field.split(\",\"):\n err = _validate_cron_field(part.strip(), lo, hi)\n if err:\n return err\n return None\n if \"-\" in field:\n left, right = field.split(\"-\", 1)\n if not left.isdigit() or not right.isdigit():\n return f\"Invalid range: {field}\"\n a, b = int(left), int(right)\n if a < lo or a > hi or b < lo or b > hi:\n return f\"Range {field} out of bounds [{lo}-{hi}]\"\n if a > b:\n return f\"Range start > end: {field}\"\n return None\n if not field.isdigit():\n return f\"Invalid field: {field}\"\n value = int(field)\n if value < lo or value > hi:\n return f\"Value {value} out of bounds [{lo}-{hi}]\"\n return None\n\n\ndef validate_cron(cron_expr: str) -> str | None:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return f\"Expected 5 fields, got {len(fields)}\"\n bounds = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n names = [\"minute\", \"hour\", \"day-of-month\", \"month\", \"day-of-week\"]\n for field, (lo, hi), name in zip(fields, bounds, names):\n err = _validate_cron_field(field, lo, hi)\n if err:\n return f\"{name}: {err}\"\n return None\n\n\ndef save_durable_jobs():\n durable = [asdict(job) for job in scheduled_jobs.values() if job.durable]\n DURABLE_PATH.write_text(json.dumps(durable, indent=2))\n\n\ndef load_durable_jobs():\n if not DURABLE_PATH.exists():\n return\n try:\n for item in json.loads(DURABLE_PATH.read_text()):\n job = CronJob(**item)\n if not validate_cron(job.cron):\n scheduled_jobs[job.id] = job\n except Exception:\n pass\n\n\ndef schedule_job(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> CronJob | str:\n err = validate_cron(cron)\n if err:\n return err\n job = CronJob(\n id=f\"cron_{random.randint(0, 999999):06d}\",\n cron=cron, prompt=prompt,\n recurring=recurring, durable=durable)\n with cron_lock:\n scheduled_jobs[job.id] = job\n if durable:\n save_durable_jobs()\n return job\n\n\ndef cancel_job(job_id: str) -> str:\n with cron_lock:\n job = scheduled_jobs.pop(job_id, None)\n if not job:\n return f\"Job {job_id} not found\"\n if job.durable:\n save_durable_jobs()\n return f\"Cancelled {job_id}\"\n\n\ndef cron_scheduler_loop():\n while True:\n time.sleep(1)\n now = datetime.now()\n marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now) and _last_fired.get(job.id) != marker:\n cron_queue.append(job)\n _last_fired[job.id] = marker\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\" \\033[31m[cron error] {job.id}: {e}\\033[0m\")\n\n\ndef consume_cron_queue() -> list[CronJob]:\n with cron_lock:\n fired = list(cron_queue)\n cron_queue.clear()\n return fired\n\n\ndef run_schedule_cron(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> str:\n result = schedule_job(cron, prompt, recurring, durable)\n if isinstance(result, str):\n return f\"Error: {result}\"\n return f\"Scheduled {result.id}: '{cron}' -> {prompt}\"\n\n\ndef run_list_crons() -> str:\n with cron_lock:\n jobs = list(scheduled_jobs.values())\n if not jobs:\n return \"No cron jobs.\"\n return \"\\n\".join(\n f\" {job.id}: '{job.cron}' -> {job.prompt[:40]} \"\n f\"[{'recurring' if job.recurring else 'one-shot'}, \"\n f\"{'durable' if job.durable else 'session'}]\"\n for job in jobs)\n\n\ndef run_cancel_cron(job_id: str) -> str:\n return cancel_job(job_id)\n\n\nload_durable_jobs()\nthreading.Thread(target=cron_scheduler_loop, daemon=True).start()\n\n\n# ── MCP System ──\n\n# MCP is modeled as late-bound tools: connect first, then discovered server\n# tools are merged into the normal tool pool with mcp__server__tool names.\nclass MCPClient:\n \"\"\"Discovers and calls tools on an MCP server (mock for teaching).\"\"\"\n\n def __init__(self, name: str):\n self.name = name\n self.tools: list[dict] = []\n self._handlers: dict[str, callable] = {}\n\n def register(self, tool_defs: list[dict],\n handlers: dict[str, callable]):\n self.tools = tool_defs\n self._handlers = handlers\n\n def call_tool(self, tool_name: str, args: dict) -> str:\n handler = self._handlers.get(tool_name)\n if not handler:\n return f\"MCP error: unknown tool '{tool_name}'\"\n try:\n return handler(**args)\n except Exception as e:\n return f\"MCP error: {e}\"\n\n\nmcp_clients: dict[str, MCPClient] = {}\n\n_DISALLOWED_CHARS = re.compile(r'[^a-zA-Z0-9_-]')\n\n\ndef normalize_mcp_name(name: str) -> str:\n \"\"\"Replace non [a-zA-Z0-9_-] with underscore.\"\"\"\n return _DISALLOWED_CHARS.sub('_', name)\n\n\ndef _mock_server_docs():\n client = MCPClient(\"docs\")\n client.register(\n tool_defs=[\n {\"name\": \"search\", \"description\": \"Search documentation. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"query\": {\"type\": \"string\"}},\n \"required\": [\"query\"]}},\n {\"name\": \"get_version\", \"description\": \"Get API version. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n ],\n handlers={\n \"search\": lambda query: f\"[docs] Found 3 results for '{query}'\",\n \"get_version\": lambda: \"[docs] API v2.1.0\",\n })\n return client\n\n\ndef _mock_server_deploy():\n client = MCPClient(\"deploy\")\n client.register(\n tool_defs=[\n {\"name\": \"trigger\",\n \"description\": \"Trigger a deployment. (destructive — requires approval in real CC)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n {\"name\": \"status\", \"description\": \"Check deployment status. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n ],\n handlers={\n \"trigger\": lambda service: f\"[deploy] Triggered: {service}\",\n \"status\": lambda service: f\"[deploy] {service}: running (v1.4.2)\",\n })\n return client\n\n\nMOCK_SERVERS = {\n \"docs\": _mock_server_docs,\n \"deploy\": _mock_server_deploy,\n}\n\n\ndef connect_mcp(name: str) -> str:\n if name in mcp_clients:\n return f\"MCP server '{name}' already connected\"\n factory = MOCK_SERVERS.get(name)\n if not factory:\n available = \", \".join(MOCK_SERVERS.keys())\n return f\"Unknown server '{name}'. Available: {available}\"\n mcp_client = factory()\n mcp_clients[name] = mcp_client\n tool_names = [t[\"name\"] for t in mcp_client.tools]\n print(f\" \\033[31m[mcp] connected: {name} → {tool_names}\\033[0m\")\n return (f\"Connected to MCP server '{name}'. \"\n f\"Discovered {len(mcp_client.tools)} tools: {', '.join(tool_names)}\")\n\n\ndef assemble_tool_pool() -> tuple[list[dict], dict]:\n \"\"\"Merge builtin tools + all MCP tools into one pool.\"\"\"\n tools = list(BUILTIN_TOOLS)\n handlers = dict(BUILTIN_HANDLERS)\n for server_name, mcp_client in mcp_clients.items():\n safe_server = normalize_mcp_name(server_name)\n for tool_def in mcp_client.tools:\n safe_tool = normalize_mcp_name(tool_def[\"name\"])\n prefixed = f\"mcp__{safe_server}__{safe_tool}\"\n tools.append({\n \"name\": prefixed,\n \"description\": tool_def.get(\"description\", \"\"),\n \"input_schema\": tool_def.get(\"inputSchema\", {}),\n })\n handlers[prefixed] = (\n lambda *, c=mcp_client, t=tool_def[\"name\"], **kw: c.call_tool(t, kw))\n return tools, handlers\n\n\n# ── Lead Worktree Tools ──\n\ndef run_create_worktree(name: str, task_id: str = \"\") -> str:\n return create_worktree(name, task_id)\n\ndef run_remove_worktree(name: str, discard_changes: bool = False) -> str:\n return remove_worktree(name, discard_changes)\n\ndef run_keep_worktree(name: str) -> str:\n return keep_worktree(name)\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task_json(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_claim_task(task_id: str) -> str:\n try:\n return claim_task(task_id, owner=\"agent\")\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_complete_task(task_id: str) -> str:\n try:\n return complete_task(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\ndef run_connect_mcp(name: str) -> str:\n return connect_mcp(name)\n\n\n# ── Tool Definitions ──\n\n# The model sees tool schemas; Python executes handlers. S20 keeps both tables\n# explicit so every added capability is visible in one place.\nBUILTIN_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"pattern\": {\"type\": \"string\"}},\n \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\",\n \"description\": \"Create and manage a task list for the current session.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"todos\": {\"type\": \"array\",\n \"items\": {\"type\": \"object\",\n \"properties\": {\n \"content\": {\"type\": \"string\"},\n \"status\": {\"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"]}},\n \"required\": [\"content\", \"status\"]}}},\n \"required\": [\"todos\"]}},\n {\"name\": \"task\",\n \"description\": \"Launch a focused subagent. Returns only its final summary.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"description\": {\"type\": \"string\"}},\n \"required\": [\"description\"]}},\n {\"name\": \"load_skill\",\n \"description\": \"Load the full content of a skill by name.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"compact\",\n \"description\": \"Summarize earlier conversation and continue with compacted context.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"focus\": {\"type\": \"string\"}},\n \"required\": []}},\n {\"name\": \"create_task\", \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\", \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\", \"description\": \"Get full task details.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\", \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\", \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"schedule_cron\",\n \"description\": (\"Schedule a cron job. cron is 5-field: min hour dom \"\n \"month dow. For one-shot reminders, compute the target \"\n \"minute and set recurring=false.\"),\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"cron\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"},\n \"recurring\": {\"type\": \"boolean\"},\n \"durable\": {\"type\": \"boolean\"}},\n \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"list_crons\", \"description\": \"List registered cron jobs.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"cancel_cron\", \"description\": \"Cancel a cron job by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"job_id\": {\"type\": \"string\"}},\n \"required\": [\"job_id\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"create_worktree\",\n \"description\": \"Create an isolated git worktree.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"remove_worktree\",\n \"description\": \"Remove a worktree. Refuses if changes exist.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"discard_changes\": {\"type\": \"boolean\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"keep_worktree\",\n \"description\": \"Keep a worktree for manual review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"connect_mcp\",\n \"description\": \"Connect to an MCP server (docs, deploy) and discover tools.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n]\n\nBUILTIN_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob,\n \"todo_write\": run_todo_write, \"task\": spawn_subagent,\n \"load_skill\": load_skill,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"schedule_cron\": run_schedule_cron,\n \"list_crons\": run_list_crons,\n \"cancel_cron\": run_cancel_cron,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n \"create_worktree\": run_create_worktree,\n \"remove_worktree\": run_remove_worktree,\n \"keep_worktree\": run_keep_worktree,\n \"connect_mcp\": run_connect_mcp,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\n \"memories\": memories,\n \"connected_mcp\": list(mcp_clients.keys()),\n \"active_teammates\": list(active_teammates.keys()),\n }\n\n\n# ── Agent Loop ──\n\nrounds_since_todo = 0\nagent_lock = threading.Lock()\n\n\ndef prepare_context(messages: list) -> list:\n # Every LLM turn enters through the same context budget pipeline.\n messages[:] = tool_result_budget(messages)\n messages[:] = snip_compact(messages)\n messages[:] = micro_compact(messages)\n if estimate_size(messages) > CONTEXT_LIMIT:\n messages[:] = compact_history(messages)\n return messages\n\n\ndef build_user_content(results: list[dict]) -> list[dict]:\n # Tool results and completed background notifications are both returned to\n # the model as user-side content, matching the tool_result feedback loop.\n content = []\n for note in collect_background_results():\n content.append({\"type\": \"text\", \"text\": note})\n content.extend(results)\n return content\n\n\ndef inject_background_notifications(messages: list):\n notes = collect_background_results()\n if notes:\n messages.append({\"role\": \"user\", \"content\": [\n {\"type\": \"text\", \"text\": note} for note in notes]})\n\n\ndef call_llm(messages: list, context: dict, tools: list,\n state: RecoveryState, max_tokens: int):\n system = assemble_system_prompt(context)\n return with_retry(\n lambda: client.messages.create(\n model=state.current_model,\n system=system,\n messages=messages,\n tools=tools,\n max_tokens=max_tokens),\n state)\n\n\ndef agent_loop(messages: list, context: dict):\n global rounds_since_todo\n tools, handlers = assemble_tool_pool()\n state = RecoveryState()\n max_tokens = DEFAULT_MAX_TOKENS\n\n while True:\n # One cycle: inject scheduled/background work, prepare context, call\n # the model, execute tool_use blocks, append tool_results, repeat.\n fired = consume_cron_queue()\n for job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n print(f\" \\033[35m[cron inject] {job.prompt[:60]}\\033[0m\")\n\n inject_background_notifications(messages)\n\n if rounds_since_todo >= 3:\n messages.append({\"role\": \"user\",\n \"content\": \"Update your todos.\"})\n rounds_since_todo = 0\n\n prepare_context(messages)\n context = update_context(context, messages)\n tools, handlers = assemble_tool_pool()\n\n try:\n response = call_llm(messages, context, tools, state, max_tokens)\n except Exception as e:\n if is_prompt_too_long_error(e) and not state.has_attempted_reactive_compact:\n messages[:] = reactive_compact(messages)\n state.has_attempted_reactive_compact = True\n continue\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n if response.stop_reason == \"max_tokens\":\n if not state.has_escalated:\n max_tokens = ESCALATED_MAX_TOKENS\n state.has_escalated = True\n print(f\" \\033[33m[max_tokens] retry with {max_tokens}\\033[0m\")\n continue\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if state.recovery_count < MAX_RECOVERY_RETRIES:\n messages.append({\"role\": \"user\", \"content\": CONTINUATION_PROMPT})\n state.recovery_count += 1\n continue\n return\n\n max_tokens = DEFAULT_MAX_TOKENS\n state.has_escalated = False\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n trigger_hooks(\"Stop\", messages)\n return\n\n results = []\n compacted_now = False\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n messages.append({\"role\": \"user\",\n \"content\": \"[Compacted. Continue with summarized context.]\"})\n compacted_now = True\n break\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block, handlers)\n output = (f\"[Background task {bg_id} started] \"\n \"Result will arrive as a task_notification.\")\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n continue\n\n handler = handlers.get(block.name)\n output = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, output)\n print(str(output)[:300])\n\n if block.name == \"todo_write\":\n rounds_since_todo = 0\n else:\n rounds_since_todo += 1\n\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n\n if compacted_now:\n continue\n\n messages.append({\"role\": \"user\", \"content\": build_user_content(results)})\n\n\ndef print_turn_assistants(messages: list, turn_start: int):\n for msg in messages[turn_start:]:\n if msg.get(\"role\") != \"assistant\":\n continue\n for block in msg.get(\"content\", []):\n if getattr(block, \"type\", None) == \"text\":\n terminal_print(block.text)\n\n\ndef cron_autorun_loop(history: list, context: dict):\n while True:\n time.sleep(1)\n fired = consume_cron_queue()\n if not fired:\n continue\n with agent_lock:\n turn_start = len(history)\n for job in fired:\n history.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n terminal_print(\n f\" \\033[35m[cron auto] {job.prompt[:60]}\\033[0m\")\n agent_loop(history, context)\n context.update(update_context(context, history))\n print_turn_assistants(history, turn_start)\n\n\nif __name__ == \"__main__\":\n CLI_ACTIVE = True\n print(\"s20: comprehensive agent\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n threading.Thread(target=cron_autorun_loop,\n args=(history, context), daemon=True).start()\n while True:\n try:\n query = input(PROMPT)\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n trigger_hooks(\"UserPromptSubmit\", query)\n turn_start = len(history)\n history.append({\"role\": \"user\", \"content\": query})\n with agent_lock:\n agent_loop(history, context)\n context = update_context(context, history)\n print_turn_assistants(history, turn_start)\n\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n def inbox_label(msg):\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n suffix = f\" req:{req_id}\" if req_id else \"\"\n return f\"{msg.get('type', 'message')}{suffix}\"\n\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{inbox_label(m)}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n", + "images": [ + { + "src": "/course-assets/s20_comprehensive/system-architecture.svg", + "alt": "system architecture" + } + ] } ], "diffs": [ @@ -837,179 +3607,399 @@ "safe_path", "run_read", "run_write", - "run_edit" + "run_edit", + "run_glob" ], "newTools": [ "read_file", "write_file", - "edit_file" + "edit_file", + "glob" ], - "locDelta": 36 + "locDelta": 33 }, { "from": "s02", "to": "s03", - "newClasses": [ - "TodoManager" + "newClasses": [], + "newFunctions": [ + "check_deny_list", + "check_rules", + "ask_user", + "check_permission" ], - "newFunctions": [], - "newTools": [ - "todo" - ], - "locDelta": 56 + "newTools": [], + "locDelta": 45 }, { "from": "s03", "to": "s04", "newClasses": [], "newFunctions": [ - "run_subagent" + "register_hook", + "trigger_hooks", + "permission_hook", + "log_hook", + "large_output_hook", + "context_inject_hook", + "summary_hook" ], - "newTools": [ - "task" - ], - "locDelta": -25 + "newTools": [], + "locDelta": 52 }, { "from": "s04", "to": "s05", - "newClasses": [ - "SkillLoader" + "newClasses": [], + "newFunctions": [ + "run_todo_write" ], - "newFunctions": [], "newTools": [ - "load_skill" + "todo_write" ], - "locDelta": 36 + "locDelta": -13 }, { "from": "s05", "to": "s06", "newClasses": [], "newFunctions": [ - "estimate_tokens", - "micro_compact", - "auto_compact" + "extract_text", + "spawn_subagent" ], "newTools": [ - "compact" + "task" ], - "locDelta": 18 + "locDelta": 68 }, { "from": "s06", "to": "s07", - "newClasses": [ - "TaskManager" + "newClasses": [], + "newFunctions": [ + "_parse_frontmatter", + "_scan_skills", + "list_skills", + "build_system", + "load_skill" ], - "newFunctions": [], "newTools": [ - "task_create", - "task_update", - "task_list", - "task_get" + "load_skill" ], - "locDelta": 2 + "locDelta": 34 }, { "from": "s07", "to": "s08", - "newClasses": [ - "BackgroundManager" + "newClasses": [], + "newFunctions": [ + "estimate_size", + "snip_compact", + "collect_tool_results", + "micro_compact", + "persist_large_output", + "tool_result_budget", + "write_transcript", + "summarize_history", + "compact_history", + "reactive_compact" ], - "newFunctions": [], "newTools": [ - "background_run", - "check_background" + "compact" ], - "locDelta": -9 + "locDelta": 44 }, { "from": "s08", "to": "s09", - "newClasses": [ - "MessageBus", - "TeammateManager" - ], + "newClasses": [], "newFunctions": [ - "_safe_path", - "_run_bash", - "_run_read", - "_run_write", - "_run_edit" + "write_memory_file", + "_rebuild_index", + "read_memory_index", + "read_memory_file", + "list_memory_files", + "select_relevant_memories", + "load_memories", + "extract_memories", + "consolidate_memories", + "persist_large" ], - "newTools": [ - "alice", - "send_message", - "read_inbox", - "spawn_teammate", - "list_teammates", - "broadcast" - ], - "locDelta": 150 + "newTools": [], + "locDelta": 127 }, { "from": "s09", "to": "s10", "newClasses": [], "newFunctions": [ - "handle_shutdown_request", - "handle_plan_review", - "_check_shutdown_status" + "assemble_system_prompt", + "get_system_prompt", + "update_context" ], - "newTools": [ - "shutdown_response", - "plan_approval", - "shutdown_request" - ], - "locDelta": 71 + "newTools": [], + "locDelta": -326 }, { "from": "s10", "to": "s11", - "newClasses": [], + "newClasses": [ + "RecoveryState" + ], "newFunctions": [ - "scan_unclaimed_tasks", - "claim_task", - "make_identity_block" + "retry_delay", + "with_retry", + "is_prompt_too_long_error", + "reactive_compact" ], - "newTools": [ - "idle", - "claim_task" - ], - "locDelta": 80 + "newTools": [], + "locDelta": 121 }, { "from": "s11", "to": "s12", "newClasses": [ - "EventBus", - "TaskManager", - "WorktreeManager" + "Task" ], "newFunctions": [ - "detect_repo_root", - "safe_path", - "run_bash", - "run_read", - "run_write", - "run_edit" + "_task_path", + "save_task", + "load_task", + "list_tasks", + "get_task", + "can_start", + "claim_task", + "complete_task", + "run_list_tasks", + "run_get_task", + "run_claim_task", + "run_complete_task" ], "newTools": [ - "task_create", - "task_list", - "task_get", - "task_update", - "task_bind_worktree", - "worktree_create", - "worktree_list", - "worktree_status", - "worktree_run", - "worktree_remove", - "worktree_keep", - "worktree_events" + "create_task", + "list_tasks", + "get_task", + "claim_task", + "complete_task" ], - "locDelta": 195 + "locDelta": 10 + }, + { + "from": "s12", + "to": "s13", + "newClasses": [], + "newFunctions": [ + "is_slow_operation", + "should_run_background", + "execute_tool", + "start_background_task", + "collect_background_results" + ], + "newTools": [], + "locDelta": 83 + }, + { + "from": "s13", + "to": "s14", + "newClasses": [ + "CronJob" + ], + "newFunctions": [ + "_cron_field_matches", + "cron_matches", + "_validate_cron_field", + "validate_cron", + "save_durable_jobs", + "load_durable_jobs", + "cancel_job", + "cron_scheduler_loop", + "consume_cron_queue", + "has_cron_queue", + "run_list_crons", + "run_cancel_cron", + "print_latest_assistant_text", + "run_agent_turn_locked", + "queue_processor_loop" + ], + "newTools": [ + "schedule_cron", + "list_crons", + "cancel_cron" + ], + "locDelta": 266 + }, + { + "from": "s14", + "to": "s15", + "newClasses": [ + "MessageBus" + ], + "newFunctions": [ + "spawn_teammate_thread", + "run_spawn_teammate", + "run_send_message", + "run_check_inbox" + ], + "newTools": [ + "send_message", + "spawn_teammate", + "check_inbox" + ], + "locDelta": 100 + }, + { + "from": "s15", + "to": "s16", + "newClasses": [ + "ProtocolState" + ], + "newFunctions": [ + "new_request_id", + "match_response", + "consume_lead_inbox", + "_teammate_submit_plan", + "run_request_shutdown", + "run_request_plan", + "run_review_plan" + ], + "newTools": [ + "submit_plan", + "request_shutdown", + "request_plan", + "review_plan" + ], + "locDelta": -36 + }, + { + "from": "s16", + "to": "s17", + "newClasses": [], + "newFunctions": [ + "scan_unclaimed_tasks" + ], + "newTools": [], + "locDelta": -62 + }, + { + "from": "s17", + "to": "s18", + "newClasses": [], + "newFunctions": [ + "get_task_json", + "validate_worktree_name", + "run_git", + "log_event", + "create_worktree", + "bind_task_to_worktree", + "_count_worktree_changes", + "remove_worktree", + "keep_worktree", + "run_create_worktree", + "run_remove_worktree", + "run_keep_worktree" + ], + "newTools": [ + "create_worktree", + "remove_worktree", + "keep_worktree" + ], + "locDelta": 154 + }, + { + "from": "s18", + "to": "s19", + "newClasses": [ + "MCPClient" + ], + "newFunctions": [ + "normalize_mcp_name", + "_mock_server_docs", + "_mock_server_deploy", + "connect_mcp", + "assemble_tool_pool", + "run_connect_mcp" + ], + "newTools": [ + "search", + "get_version", + "trigger", + "status", + "connect_mcp" + ], + "locDelta": 33 + }, + { + "from": "s19", + "to": "s20", + "newClasses": [ + "RecoveryState", + "CronJob" + ], + "newFunctions": [ + "terminal_print", + "_parse_frontmatter", + "scan_skills", + "list_skills", + "load_skill", + "run_glob", + "call_tool_handler", + "run_todo_write", + "register_hook", + "trigger_hooks", + "permission_hook", + "log_hook", + "large_output_hook", + "user_prompt_hook", + "stop_hook", + "extract_text", + "has_tool_use", + "spawn_subagent", + "estimate_size", + "collect_tool_results", + "persist_large_output", + "tool_result_budget", + "snip_compact", + "micro_compact", + "write_transcript", + "summarize_history", + "compact_history", + "reactive_compact", + "retry_delay", + "with_retry", + "is_prompt_too_long_error", + "is_slow_operation", + "should_run_background", + "start_background_task", + "collect_background_results", + "_cron_field_matches", + "cron_matches", + "_validate_cron_field", + "validate_cron", + "save_durable_jobs", + "load_durable_jobs", + "cancel_job", + "cron_scheduler_loop", + "consume_cron_queue", + "run_list_crons", + "run_cancel_cron", + "prepare_context", + "build_user_content", + "inject_background_notifications", + "print_turn_assistants", + "cron_autorun_loop" + ], + "newTools": [ + "edit_file", + "glob", + "todo_write", + "task", + "load_skill", + "compact", + "schedule_cron", + "list_crons", + "cancel_cron" + ], + "locDelta": 826 } ] } \ No newline at end of file diff --git a/web/src/data/scenarios/s02.json b/web/src/data/scenarios/s02.json index 4fa1b42..64b4a86 100644 --- a/web/src/data/scenarios/s02.json +++ b/web/src/data/scenarios/s02.json @@ -1,6 +1,6 @@ { "version": "s02", - "title": "Tools", + "title": "Tool Use", "description": "Agent with read, write, edit, and bash tools", "steps": [ { diff --git a/web/src/data/scenarios/s03.json b/web/src/data/scenarios/s03.json index b75bf66..a56fa58 100644 --- a/web/src/data/scenarios/s03.json +++ b/web/src/data/scenarios/s03.json @@ -1,53 +1,39 @@ { "version": "s03", - "title": "TodoWrite", - "description": "Agent creates a visible plan before executing", + "title": "Permission", + "description": "Tool calls pass through hard denies, rule checks, and optional user approval before execution.", "steps": [ { "type": "user_message", - "content": "Build a CLI calculator that supports add, subtract, multiply, divide", - "annotation": "A multi-step task that benefits from planning" - }, - { - "type": "tool_call", - "content": "[\n { \"id\": 1, \"task\": \"Create calc.py with argument parsing\", \"status\": \"in_progress\" },\n { \"id\": 2, \"task\": \"Implement 4 operations\", \"status\": \"pending\" },\n { \"id\": 3, \"task\": \"Add error handling\", \"status\": \"pending\" },\n { \"id\": 4, \"task\": \"Test all operations\", \"status\": \"pending\" }\n]", - "toolName": "todo_write", - "annotation": "TodoManager creates a visible plan BEFORE any code is written" - }, - { - "type": "tool_result", - "content": "Todo list created with 4 tasks", - "toolName": "todo_write", - "annotation": "The plan is now visible and trackable" - }, - { - "type": "tool_call", - "content": "calc.py\n---\nimport sys\n\ndef calculate(op, a, b):\n ops = {'+': a+b, '-': a-b, '*': a*b, '/': a/b}\n return ops.get(op, 'Unknown operator')\n\nif __name__ == '__main__':\n a, op, b = float(sys.argv[1]), sys.argv[2], float(sys.argv[3])\n print(calculate(op, a, b))", - "toolName": "write_file", - "annotation": "Executing task #1 and #2: create file with operations" - }, - { - "type": "tool_result", - "content": "File written successfully", - "toolName": "write_file", - "annotation": "Code written" - }, - { - "type": "tool_call", - "content": "python calc.py 10 + 5", - "toolName": "bash", - "annotation": "Executing task #4: testing" - }, - { - "type": "tool_result", - "content": "15.0", - "toolName": "bash", - "annotation": "Test passes" + "content": "Delete the temporary build directory.", + "annotation": "The user asks for a filesystem action that may be safe or dangerous depending on the path." }, { "type": "assistant_text", - "content": "Calculator built and tested. All 4 operations work correctly.", - "annotation": "Visible plans improve task completion by making progress trackable" + "content": "I'll check the target before running the command.", + "annotation": "The model can propose a tool call, but the harness decides whether execution is allowed." + }, + { + "type": "tool_call", + "toolName": "bash", + "content": "rm -rf /tmp/build-cache", + "annotation": "A destructive-looking command enters the permission pipeline." + }, + { + "type": "system_event", + "content": "Gate 1 hard deny: pass\nGate 2 rule match: destructive command -> ask user", + "annotation": "The command is not absolutely forbidden, but it requires confirmation." + }, + { + "type": "system_event", + "content": "User approval: allowed", + "annotation": "The runtime pauses before execution and resumes only after approval." + }, + { + "type": "tool_result", + "toolName": "bash", + "content": "(no output)", + "annotation": "Allowed commands continue through the normal tool result path." } ] } diff --git a/web/src/data/scenarios/s04.json b/web/src/data/scenarios/s04.json index 9882412..1520d79 100644 --- a/web/src/data/scenarios/s04.json +++ b/web/src/data/scenarios/s04.json @@ -1,51 +1,44 @@ { "version": "s04", - "title": "Subagents", - "description": "Agent spawns a subagent for isolated subtask execution", + "title": "Hooks", + "description": "Lifecycle hooks attach logging, permission, and output handling around the same stable agent loop.", "steps": [ { "type": "user_message", - "content": "Refactor the calculator: split into modules and add tests", - "annotation": "Complex task suitable for delegation" - }, - { - "type": "assistant_text", - "content": "I'll delegate the test writing to a subagent while I handle the refactoring.", - "annotation": "Agent decides to split work using the Task tool" - }, - { - "type": "tool_call", - "content": "Write unit tests for calc.py covering all 4 operations and edge cases like division by zero", - "toolName": "task", - "annotation": "Task tool spawns a subagent with its own isolated context" + "content": "Read package.json and summarize the scripts.", + "annotation": "UserPromptSubmit hooks run before the prompt enters the LLM call." }, { "type": "system_event", - "content": "[Subagent spawned] Context: fresh messages[], same tools, scoped prompt", - "annotation": "Subagent gets clean context -- no confusion from parent's conversation" - }, - { - "type": "tool_result", - "content": "Created test_calc.py with 8 test cases. All passing.", - "toolName": "task", - "annotation": "Subagent completes and returns a summary to parent" + "content": "trigger_hooks(UserPromptSubmit): log_user_prompt", + "annotation": "Extension logic is outside the core loop." }, { "type": "tool_call", - "content": "calc.py -> calc/operations.py, calc/cli.py", - "toolName": "write_file", - "annotation": "Parent continues with its own task: refactoring" + "toolName": "read_file", + "content": "{\"path\":\"package.json\"}", + "annotation": "The LLM asks to use a tool." + }, + { + "type": "system_event", + "content": "trigger_hooks(PreToolUse): permission_hook, log_hook -> allowed", + "annotation": "PreToolUse hooks can block, audit, or annotate the tool call." }, { "type": "tool_result", - "content": "Files written successfully", - "toolName": "write_file", - "annotation": "Modules split" + "toolName": "read_file", + "content": "{ \"scripts\": { \"build\": \"next build\" } }", + "annotation": "The handler executes only after hooks allow it." + }, + { + "type": "system_event", + "content": "trigger_hooks(PostToolUse): large_output_hook", + "annotation": "PostToolUse hooks see the result before it returns to the model." }, { "type": "assistant_text", - "content": "Refactored into calc/operations.py and calc/cli.py. Tests were written by a subagent and are all passing.", - "annotation": "Context isolation prevents confusion between tasks" + "content": "The project exposes a build script that runs Next's production build.", + "annotation": "Stop hooks can run when the model no longer requests tools." } ] } diff --git a/web/src/data/scenarios/s05.json b/web/src/data/scenarios/s05.json index 4e873a4..0d0dad6 100644 --- a/web/src/data/scenarios/s05.json +++ b/web/src/data/scenarios/s05.json @@ -1,44 +1,40 @@ { "version": "s05", - "title": "Skills", - "description": "Agent loads external knowledge from SKILL.md files", + "title": "TodoWrite", + "description": "A planning tool makes the agent's intended work visible and keeps long tasks from drifting.", "steps": [ { "type": "user_message", - "content": "Set up a new Next.js project with TypeScript", - "annotation": "Task requires framework-specific knowledge" - }, - { - "type": "system_event", - "content": "[SkillLoader] Scanning for SKILL.md files... Found: nextjs.SKILL.md", - "annotation": "SkillLoader finds a matching skill file before the loop runs" - }, - { - "type": "system_event", - "content": "[Skill injected] nextjs.SKILL.md -> tool_result message appended", - "annotation": "Skill content is injected as a tool_result, not bloating the system prompt" - }, - { - "type": "assistant_text", - "content": "I have knowledge about Next.js setup. Let me create the project structure.", - "annotation": "Model now has framework-specific knowledge from the skill file" + "content": "Refactor the parser and add a regression test.", + "annotation": "The task has multiple steps, so planning should become visible." }, { "type": "tool_call", - "content": "npx create-next-app@latest my-app --typescript --tailwind --eslint --app --src-dir", - "toolName": "bash", - "annotation": "Using skill-provided knowledge for correct flags" + "toolName": "todo_write", + "content": "[{\"content\":\"Inspect parser\",\"status\":\"in_progress\"},{\"content\":\"Refactor parsing branch\",\"status\":\"pending\"},{\"content\":\"Add regression test\",\"status\":\"pending\"}]", + "annotation": "The model records a plan before acting." }, { "type": "tool_result", - "content": "Success! Created my-app at /workspace/my-app", - "toolName": "bash", - "annotation": "Project created with correct configuration" + "toolName": "todo_write", + "content": "Todos updated.", + "annotation": "The current todo list is kept by the harness." + }, + { + "type": "tool_call", + "toolName": "read_file", + "content": "{\"path\":\"parser.py\"}", + "annotation": "Normal tools continue through the same dispatch table." + }, + { + "type": "system_event", + "content": "Update todo status after 3 rounds without todo_write.", + "annotation": "The nag reminder keeps the plan fresh during long work." }, { "type": "assistant_text", - "content": "Next.js project created with TypeScript, Tailwind CSS, ESLint, App Router, and src/ directory.", - "annotation": "Skills inject via tool_result, not system prompt -- keeping context lean" + "content": "I inspected the parser and will update the todo list before making the code change.", + "annotation": "The planning state guides the next turn." } ] } diff --git a/web/src/data/scenarios/s06.json b/web/src/data/scenarios/s06.json index 6b47f6d..b44ca26 100644 --- a/web/src/data/scenarios/s06.json +++ b/web/src/data/scenarios/s06.json @@ -1,51 +1,40 @@ { "version": "s06", - "title": "Compact", - "description": "Agent compresses context when tokens exceed threshold", + "title": "Subagent", + "description": "The task tool spawns a fresh subagent context and returns only a final summary to the parent.", "steps": [ { "type": "user_message", - "content": "Analyze all Python files in this project and summarize each one", - "annotation": "A task that will generate large amounts of context" + "content": "Explore how the CLI parser works without polluting this main context.", + "annotation": "The user asks for exploratory work that can be isolated." }, { "type": "tool_call", - "content": "find . -name '*.py' -type f", - "toolName": "bash", - "annotation": "Finding all files to analyze" - }, - { - "type": "tool_result", - "content": "./src/main.py\n./src/utils.py\n./src/models.py\n./tests/test_main.py\n... (28 more files)", - "toolName": "bash", - "annotation": "32 files found -- reading all of them will generate massive context" - }, - { - "type": "assistant_text", - "content": "I'll read and analyze each file systematically.", - "annotation": "Agent begins processing files one by one" - }, - { - "type": "tool_call", - "content": "./src/main.py", - "toolName": "read_file", - "annotation": "Reading file 1 of 32..." - }, - { - "type": "tool_result", - "content": "[... 450 lines of code ...]", - "toolName": "read_file", - "annotation": "Large file content added to messages[]" + "toolName": "task", + "content": "{\"prompt\":\"Explore the CLI parser and summarize the control flow.\"}", + "annotation": "The parent delegates through the task tool." }, { "type": "system_event", - "content": "[ContextManager] Token count: 148,231 / 150,000 threshold\nTriggering 3-layer compression:\n 1. Drop tool_result content > 1000 chars -> replaced with summary\n 2. Merge sequential assistant turns\n 3. Summarize oldest 50% of conversation\nResult: 148,231 -> 42,087 tokens", - "annotation": "Compression fires automatically when approaching the limit" + "content": "spawn_subagent: messages=[{role:\"user\", content: prompt}], tools=bash/read/write/edit/glob", + "annotation": "The child receives a fresh message history and no recursive task tool." + }, + { + "type": "tool_call", + "toolName": "read_file", + "content": "{\"path\":\"cli.py\"}", + "annotation": "Inside the child loop, intermediate tool calls stay private." + }, + { + "type": "tool_result", + "toolName": "task", + "content": "Summary: cli.py parses args, selects a command handler, then dispatches.", + "annotation": "Only the subagent's final summary returns to the parent." }, { "type": "assistant_text", - "content": "Continuing analysis with compressed context. Files 1-18 have been summarized, proceeding with remaining files.", - "annotation": "Forgetting old tool results enables infinite work -- the agent can process any number of files" + "content": "The parser is isolated in cli.py and dispatches by command name. I'll use that summary in the main plan.", + "annotation": "The parent avoids carrying every exploratory detail." } ] } diff --git a/web/src/data/scenarios/s07.json b/web/src/data/scenarios/s07.json index fcbdc4d..d9c060d 100644 --- a/web/src/data/scenarios/s07.json +++ b/web/src/data/scenarios/s07.json @@ -1,53 +1,34 @@ { "version": "s07", - "title": "Tasks", - "description": "Agent uses persistent file-based tasks with dependencies", + "title": "Skills", + "description": "The agent sees a cheap skill catalog first, then loads full SKILL.md content only when needed.", "steps": [ + { + "type": "system_event", + "content": "build_system: skills available: agent-builder, code-review, mcp-builder, pdf", + "annotation": "Startup scans the skills directory and injects short descriptions into the system prompt." + }, { "type": "user_message", - "content": "Build a REST API with user auth, database, and deployment config", - "annotation": "Multi-part task requiring dependency tracking" + "content": "Review this change like a code reviewer.", + "annotation": "The user asks for specialized behavior." }, { "type": "tool_call", - "content": "create_task(\"Set up database schema\", deps=[])\ncreate_task(\"Implement user model\", deps=[1])\ncreate_task(\"Add auth endpoints\", deps=[2])\ncreate_task(\"Write deployment config\", deps=[3])", - "toolName": "task_manager", - "annotation": "TaskManager creates file-based tasks with explicit dependencies" + "toolName": "load_skill", + "content": "{\"name\":\"code-review\"}", + "annotation": "The model chooses to load the full skill on demand." }, { "type": "tool_result", - "content": "Created 4 tasks in .tasks/\n #1 Set up database schema [ready]\n #2 Implement user model [blocked by #1]\n #3 Add auth endpoints [blocked by #2]\n #4 Write deployment config [blocked by #3]", - "toolName": "task_manager", - "annotation": "Tasks persisted to .tasks/ directory as JSON files" - }, - { - "type": "tool_call", - "content": "schema.sql\n---\nCREATE TABLE users (id SERIAL PRIMARY KEY, email TEXT UNIQUE, password_hash TEXT);", - "toolName": "write_file", - "annotation": "Working on task #1" - }, - { - "type": "tool_result", - "content": "File written successfully", - "toolName": "write_file", - "annotation": "Schema created" - }, - { - "type": "tool_call", - "content": "complete_task(1)", - "toolName": "task_manager", - "annotation": "Completing task #1 automatically unblocks task #2" - }, - { - "type": "tool_result", - "content": "Task #1 completed. Task #2 is now ready.", - "toolName": "task_manager", - "annotation": "File-based persistence outlives process memory" + "toolName": "load_skill", + "content": "# Code Review Skill\nFocus on bugs, regressions, and missing tests...", + "annotation": "Full skill text is injected as a tool result, not permanently in the system prompt." }, { "type": "assistant_text", - "content": "Database schema is done. Moving to user model implementation.", - "annotation": "Dependency graph ensures tasks execute in correct order" + "content": "I'll review the diff for behavioral risks first, then note test gaps.", + "annotation": "The loaded skill guides the next response." } ] } diff --git a/web/src/data/scenarios/s08.json b/web/src/data/scenarios/s08.json index 31d0ba3..d712e0c 100644 --- a/web/src/data/scenarios/s08.json +++ b/web/src/data/scenarios/s08.json @@ -1,56 +1,33 @@ { "version": "s08", - "title": "Background Tasks", - "description": "Agent executes long-running tasks in background threads", + "title": "Context Compact", + "description": "A layered compaction pipeline trims cheap context first and calls the LLM summary only when needed.", "steps": [ { - "type": "user_message", - "content": "Run the test suite and while it's running, fix the linting errors", - "annotation": "Two tasks that can run concurrently" - }, - { - "type": "assistant_text", - "content": "I'll start the tests in the background and fix linting in parallel.", - "annotation": "Agent recognizes opportunity for concurrent execution" - }, - { - "type": "tool_call", - "content": "pytest tests/ -v --timeout=300", - "toolName": "background_task", - "annotation": "BackgroundManager spawns a thread for long-running test suite" + "type": "system_event", + "content": "messages: 72 entries, tool_result output: 180k chars", + "annotation": "The context window is approaching the budget." }, { "type": "system_event", - "content": "[BackgroundManager] Thread spawned: task_001\nRunning: pytest tests/ -v --timeout=300\nStatus: in_progress", - "annotation": "Thread is running independently -- main loop continues" - }, - { - "type": "tool_result", - "content": "Background task started: task_001", - "toolName": "background_task", - "annotation": "Control returns immediately to the agent" - }, - { - "type": "tool_call", - "content": "ruff check --fix .", - "toolName": "bash", - "annotation": "Agent works on linting while tests run in background" - }, - { - "type": "tool_result", - "content": "Fixed 12 linting errors in 5 files", - "toolName": "bash", - "annotation": "Linting fixed concurrently with tests" + "content": "tool_result_budget -> persisted large output to .large_outputs/out_001.txt", + "annotation": "Large tool outputs are moved out before expensive summarization." }, { "type": "system_event", - "content": "[Notification] Background task_001 completed\nResult: 47 tests passed, 2 failed\nFailed: test_auth.py::test_token_expiry, test_api.py::test_rate_limit", - "annotation": "Background thread notifies main loop via notification queue" + "content": "snip_compact -> trimmed middle messages\nmicro_compact -> replaced old tool_results with placeholders", + "annotation": "Cheap compaction layers run every turn." }, { - "type": "assistant_text", - "content": "Linting is fixed. Tests finished: 47 passed, 2 failed. I'll fix the failing tests now.", - "annotation": "Non-blocking execution via threads + queue enables parallel work" + "type": "system_event", + "content": "token estimate still above threshold -> compact_history()", + "annotation": "Only after cheap passes fail does the runtime ask the LLM for a summary." + }, + { + "type": "tool_result", + "toolName": "compact", + "content": "Summary: user asked for parser refactor; files inspected: cli.py, parser.py; open task: add test.", + "annotation": "The compacted summary replaces older context while preserving task-relevant facts." } ] } diff --git a/web/src/data/scenarios/s09.json b/web/src/data/scenarios/s09.json index 0320cba..dd6eb09 100644 --- a/web/src/data/scenarios/s09.json +++ b/web/src/data/scenarios/s09.json @@ -1,44 +1,33 @@ { "version": "s09", - "title": "Agent Teams", - "description": "Persistent teammates communicate via JSONL inboxes", + "title": "Memory", + "description": "Persistent memory keeps selected project facts available across turns and sessions.", "steps": [ + { + "type": "system_event", + "content": "Loaded .memory/MEMORY.md index with 3 entries.", + "annotation": "A cheap index is always available to the system prompt." + }, { "type": "user_message", - "content": "Build a full-stack app: frontend in React, backend in Python, with tests", - "annotation": "Large task requiring team collaboration" + "content": "Use the repository's preferred test command.", + "annotation": "The user references a fact that may have been learned earlier." + }, + { + "type": "system_event", + "content": "Selected memory: project_facts.md -> preferred_test_command=npm run test:unit", + "annotation": "Relevant memory content is injected selectively." }, { "type": "tool_call", - "content": "spawn_teammate(\n name: \"frontend-dev\",\n role: \"React specialist\"\n)\nspawn_teammate(\n name: \"backend-dev\",\n role: \"Python API specialist\"\n)", - "toolName": "teammate_manager", - "annotation": "Unlike subagents (s04) that die after one task, teammates persist" + "toolName": "bash", + "content": "npm run test:unit", + "annotation": "The agent acts on durable knowledge without asking again." }, { "type": "system_event", - "content": "[TeammateManager] Team created:\n .team/config.json written\n frontend-dev [idle] - own thread, own agent loop\n backend-dev [idle] - own thread, own agent loop", - "annotation": "Each teammate runs its own while(tool_use) loop in a separate thread" - }, - { - "type": "tool_call", - "content": "send_message(\n to: \"backend-dev\",\n content: \"Build the user API: GET/POST /users\"\n)", - "toolName": "send_message", - "annotation": "Message appended to .team/inbox/backend-dev.jsonl" - }, - { - "type": "system_event", - "content": "[backend-dev] Checking inbox... 1 new message\n From: lead\n Content: \"Build the user API: GET/POST /users\"\n (inbox drained after read)", - "annotation": "Inbox is read-then-clear: prevents duplicate processing" - }, - { - "type": "system_event", - "content": "[backend-dev -> frontend-dev] Reply via inbox:\n \"API ready: GET /users, POST /users, GET /users/:id\"\n Written to .team/inbox/frontend-dev.jsonl", - "annotation": "Teammates message each other through the same JSONL mechanism" - }, - { - "type": "assistant_text", - "content": "Team is communicating via file-based inboxes. Each teammate has its own .jsonl file -- append to send, drain to read.", - "annotation": "JSONL inboxes decouple sender timing from receiver timing" + "content": "extract_memory -> wrote feedback_tabs.md", + "annotation": "After the turn, new durable facts can be extracted from the original conversation." } ] } diff --git a/web/src/data/scenarios/s10.json b/web/src/data/scenarios/s10.json index e219f40..41fd5fe 100644 --- a/web/src/data/scenarios/s10.json +++ b/web/src/data/scenarios/s10.json @@ -1,38 +1,27 @@ { "version": "s10", - "title": "Team Protocols", - "description": "Request-response protocols for shutdown and plan approval", + "title": "System Prompt", + "description": "The system prompt is assembled from runtime state and cached by a deterministic context key.", "steps": [ { - "type": "user_message", - "content": "Work is done. Shut down the team gracefully.", - "annotation": "Can't just kill threads -- need coordinated shutdown" - }, - { - "type": "tool_call", - "content": "shutdown_request(\n teammate: \"backend-dev\",\n request_id: \"req_abc123\"\n)", - "toolName": "shutdown_teammate", - "annotation": "request_id correlates the request with the response" + "type": "system_event", + "content": "context={workspace:\"/repo\", memories:true, skills:[\"code-review\"], tools:[\"bash\",\"read_file\"]}", + "annotation": "Prompt inputs are explicit runtime data." }, { "type": "system_event", - "content": "[Shutdown FSM] req_abc123: pending\n Lead -> backend-dev inbox:\n { type: \"shutdown_request\", request_id: \"req_abc123\" }", - "annotation": "FSM tracks state: pending -> approved | rejected" - }, - { - "type": "system_event", - "content": "[backend-dev] Received shutdown request req_abc123\n Current task: none (idle)\n Decision: approve\n -> lead inbox: { type: \"shutdown_response\", request_id: \"req_abc123\", approve: true }", - "annotation": "Teammate decides whether to approve based on its own state" - }, - { - "type": "system_event", - "content": "[Shutdown FSM] req_abc123: approved\n backend-dev thread stopped\n Status: idle -> shutdown", - "annotation": "Same request_id pattern works for plan approval too" + "content": "cache miss -> assemble_system_prompt(context)", + "annotation": "A new context key causes sections to be selected and joined." }, { "type": "assistant_text", - "content": "backend-dev shut down gracefully. The same request_id + FSM pattern handles plan approval: teammate submits plan, lead approves/rejects with the correlated request_id.", - "annotation": "One pattern (request_id correlation), two applications (shutdown + plan approval)" + "content": "I can use the workspace, memory index, tool guidance, and available skills for this task.", + "annotation": "The LLM sees the assembled prompt, not a hardcoded static string." + }, + { + "type": "system_event", + "content": "same context key -> reuse cached prompt", + "annotation": "Repeated turns avoid rebuilding identical prompt text." } ] } diff --git a/web/src/data/scenarios/s11.json b/web/src/data/scenarios/s11.json index e6f33ce..d7b0909 100644 --- a/web/src/data/scenarios/s11.json +++ b/web/src/data/scenarios/s11.json @@ -1,44 +1,33 @@ { "version": "s11", - "title": "Autonomous Agents", - "description": "Teammates self-govern with idle cycles and auto-claiming", + "title": "Error Recovery", + "description": "LLM calls are wrapped with targeted recovery paths for token limits, prompt overflow, and transient provider errors.", "steps": [ { - "type": "user_message", - "content": "Implement the full feature backlog", - "annotation": "Open-ended task for autonomous team execution" + "type": "system_event", + "content": "try LLM call with max_tokens=8000", + "annotation": "The normal path is still a regular model call." + }, + { + "type": "system_event", + "content": "error=max_tokens -> escalate to 64000 without appending a fake user message", + "annotation": "Token exhaustion gets a continuation-aware recovery path." + }, + { + "type": "system_event", + "content": "error=prompt_too_long -> reactive_compact(messages) -> retry once", + "annotation": "Prompt overflow triggers compaction instead of blind retry." + }, + { + "type": "system_event", + "content": "error=529 -> exponential backoff with jitter; repeated 529 -> fallback model", + "annotation": "Transient provider errors use backoff and model fallback." }, { "type": "tool_call", - "content": "create_tasks([\n { title: \"User authentication\", status: \"open\" },\n { title: \"Dashboard UI\", status: \"open\" },\n { title: \"API rate limiting\", status: \"open\" },\n { title: \"Integration tests\", status: \"open\" }\n])", - "toolName": "task_manager", - "annotation": "Tasks created with no owner -- available for auto-claiming" - }, - { - "type": "tool_result", - "content": "4 tasks created on shared board", - "toolName": "task_manager", - "annotation": "Unassigned tasks visible to all teammates" - }, - { - "type": "system_event", - "content": "[frontend-dev] Idle cycle triggered\n 1. Check inbox -> 0 messages\n 2. Poll task board -> found unowned task #2 \"Dashboard UI\"\n 3. Auto-claim task #2\n 4. Status: idle -> working", - "annotation": "Idle cycle: check inbox, poll tasks, auto-claim, resume work" - }, - { - "type": "system_event", - "content": "[backend-dev] Idle cycle triggered\n 1. Check inbox -> 0 messages\n 2. Poll task board -> found unowned task #1 \"User authentication\"\n 3. Auto-claim task #1\n 4. Status: idle -> working", - "annotation": "Multiple teammates claim different tasks concurrently" - }, - { - "type": "system_event", - "content": "[tester] Idle cycle triggered\n 1. Check inbox -> 0 messages\n 2. Poll task board -> task #4 blocked by #1, #2, #3\n 3. No claimable tasks\n 4. Status: idle (will retry in 30s)", - "annotation": "Timeout-based polling prevents busy-waiting" - }, - { - "type": "assistant_text", - "content": "Team is self-organizing: frontend-dev claimed Dashboard UI, backend-dev claimed User auth. Tester is waiting for dependencies to clear.", - "annotation": "Polling + timeout makes teammates autonomous -- no micromanagement needed" + "toolName": "read_file", + "content": "{\"path\":\"README.md\"}", + "annotation": "After recovery succeeds, the agent continues the normal tool loop." } ] } diff --git a/web/src/data/scenarios/s12.json b/web/src/data/scenarios/s12.json index 745191e..6266ae8 100644 --- a/web/src/data/scenarios/s12.json +++ b/web/src/data/scenarios/s12.json @@ -1,51 +1,48 @@ { "version": "s12", - "title": "Worktree + Task Isolation", - "description": "Use a shared task board with optional worktree lanes for clean parallel execution", + "title": "Task System", + "description": "A file-persisted task graph tracks status, ownership, and blockedBy dependencies.", "steps": [ { "type": "user_message", - "content": "Implement auth refactor and login UI updates in parallel", - "annotation": "Two active tasks in one workspace would collide" + "content": "Break the release into tasks and block deployment until tests pass.", + "annotation": "The user asks for durable multi-step coordination." }, { "type": "tool_call", - "content": "task_create(subject: \"Auth refactor\")\ntask_create(subject: \"Login UI polish\")", - "toolName": "task_manager", - "annotation": "Shared board remains the coordination source of truth" + "toolName": "create_task", + "content": "{\"subject\":\"Run tests\"}", + "annotation": "Tasks are created as JSON files under .tasks/." }, { "type": "tool_call", - "content": "worktree_create(name: \"auth-refactor\", task_id: 1)\nworktree_create(name: \"ui-login\")\ntask_bind_worktree(task_id: 2, worktree: \"ui-login\")", - "toolName": "worktree_manager", - "annotation": "Lane allocation and task association are composable; task 2 binds after lane creation" - }, - { - "type": "system_event", - "content": "worktree.create.before/after emitted\n.tasks/task_1.json -> { status: \"in_progress\", worktree: \"auth-refactor\" }\n.tasks/task_2.json -> { status: \"in_progress\", worktree: \"ui-login\" }\n.worktrees/index.json updated", - "annotation": "Control-plane state remains canonical; hook-style consumers can react to lifecycle events without owning canonical state writes" + "toolName": "create_task", + "content": "{\"subject\":\"Deploy release\",\"blockedBy\":[\"task_tests\"]}", + "annotation": "blockedBy encodes dependency ordering." }, { "type": "tool_call", - "content": "worktree_run(name: \"auth-refactor\", command: \"pytest tests/auth -q\")\nworktree_run(name: \"ui-login\", command: \"npm test -- login\")", - "toolName": "worktree_run", - "annotation": "In this teaching runtime, commands route by lane-scoped cwd; other runtimes may use session-level directory switches. The invariant is explicit execution context." + "toolName": "claim_task", + "content": "{\"task_id\":\"task_deploy\",\"owner\":\"agent\"}", + "annotation": "The claim fails until dependencies are complete." + }, + { + "type": "tool_result", + "toolName": "claim_task", + "content": "Blocked by: [\"task_tests\"]", + "annotation": "The task graph prevents premature work." }, { "type": "tool_call", - "content": "worktree_keep(name: \"ui-login\")\nworktree_remove(name: \"auth-refactor\", complete_task: true)\nworktree_events(limit: 10)", - "toolName": "worktree_manager", - "annotation": "Closeout is explicit tool-driven state transition: mix keep/remove decisions and query lifecycle events in one pass" + "toolName": "complete_task", + "content": "{\"task_id\":\"task_tests\"}", + "annotation": "Completing a dependency can unblock downstream tasks." }, { - "type": "system_event", - "content": "worktree.keep emitted for ui-login\nworktree.remove.before/after emitted for auth-refactor\ntask.completed emitted for #1\n.worktrees/events.jsonl appended", - "annotation": "Lifecycle transitions become explicit records while task/worktree files remain source-of-truth" - }, - { - "type": "assistant_text", - "content": "Task board handles coordination, worktrees handle isolation. Parallel tracks stay clean and auditable.", - "annotation": "Coordinate in one board, isolate by lane only where needed, and run optional policy/audit side effects from lifecycle events" + "type": "tool_result", + "toolName": "complete_task", + "content": "Completed task_tests\nUnblocked: Deploy release", + "annotation": "The harness reports newly available work." } ] } diff --git a/web/src/data/scenarios/s13.json b/web/src/data/scenarios/s13.json new file mode 100644 index 0000000..f391491 --- /dev/null +++ b/web/src/data/scenarios/s13.json @@ -0,0 +1,45 @@ +{ + "version": "s13", + "title": "Background Tasks", + "description": "Long-running work moves to a thread while the agent keeps the main loop responsive.", + "steps": [ + { + "type": "user_message", + "content": "Run a long repository scan and tell me when it finishes.", + "annotation": "The user asks for work that may exceed the normal foreground loop." + }, + { + "type": "assistant_text", + "content": "I'll start the scan in the background so we can keep the conversation responsive.", + "annotation": "The model chooses the background path instead of blocking on the tool result." + }, + { + "type": "tool_call", + "toolName": "bash", + "content": "rg \"TODO|FIXME\" .", + "toolInput": "{\"background\":true}", + "annotation": "The tool call is wrapped with a background flag." + }, + { + "type": "system_event", + "content": "background_tasks[task_42] = Thread(target=run_bash)", + "annotation": "The runtime starts a thread and returns immediately." + }, + { + "type": "tool_result", + "toolName": "bash", + "content": "Started background task task_42.", + "annotation": "A placeholder result is appended to the main conversation." + }, + { + "type": "system_event", + "content": "Found 12 matches.", + "annotation": "The completed thread injects a notification back into the next loop." + }, + { + "type": "assistant_text", + "content": "The scan finished with 12 matches. I'll summarize the most important ones now.", + "annotation": "The agent consumes the background result as fresh context." + } + ] +} diff --git a/web/src/data/scenarios/s14.json b/web/src/data/scenarios/s14.json new file mode 100644 index 0000000..234b32d --- /dev/null +++ b/web/src/data/scenarios/s14.json @@ -0,0 +1,44 @@ +{ + "version": "s14", + "title": "Cron Scheduler", + "description": "A daemon loop watches durable cron definitions, queues due jobs, and lets the agent process them.", + "steps": [ + { + "type": "user_message", + "content": "Every weekday morning, remind me to review open tasks.", + "annotation": "The user asks for a recurring future action." + }, + { + "type": "assistant_text", + "content": "I'll create a cron job and store it so the scheduler can pick it up later.", + "annotation": "The agent delegates recurrence to the scheduler instead of relying on current context." + }, + { + "type": "tool_call", + "toolName": "schedule_cron", + "content": "{\"name\":\"review-open-tasks\",\"cron\":\"0 9 * * 1-5\",\"prompt\":\"Review open tasks and summarize blockers.\"}", + "annotation": "The new tool validates and persists a cron definition." + }, + { + "type": "tool_result", + "toolName": "schedule_cron", + "content": "Scheduled review-open-tasks with id cron_7.", + "annotation": "The job is written to the durable schedule store." + }, + { + "type": "system_event", + "content": "cron_scheduler_loop: cron_7 matched current minute -> cron_queue.put(cron_7)", + "annotation": "The independent scheduler daemon detects that the job is due." + }, + { + "type": "system_event", + "content": "queue_processor_loop: dequeued cron_7 and invoked agent_loop(prompt)", + "annotation": "A separate processor turns due jobs into normal agent work." + }, + { + "type": "assistant_text", + "content": "Scheduled. When the cron fires, the queue processor will run the reminder prompt through the agent loop.", + "annotation": "The final answer explains the durable recurring behavior." + } + ] +} diff --git a/web/src/data/scenarios/s15.json b/web/src/data/scenarios/s15.json new file mode 100644 index 0000000..3a41d84 --- /dev/null +++ b/web/src/data/scenarios/s15.json @@ -0,0 +1,51 @@ +{ + "version": "s15", + "title": "Agent Teams", + "description": "A lead agent can spawn teammate loops and coordinate through file-backed mailboxes.", + "steps": [ + { + "type": "user_message", + "content": "Split this task: one agent checks tests, another reviews docs.", + "annotation": "The request needs parallel work with separate responsibilities." + }, + { + "type": "assistant_text", + "content": "I'll spawn teammates with narrow roles and coordinate their updates through the message bus.", + "annotation": "The lead keeps orchestration authority while delegating execution." + }, + { + "type": "tool_call", + "toolName": "spawn_teammate", + "content": "{\"name\":\"test-runner\",\"role\":\"Run test commands and report failures.\"}", + "annotation": "A teammate thread starts with its own simplified loop." + }, + { + "type": "tool_call", + "toolName": "send_message", + "content": "{\"to\":\"test-runner\",\"message\":\"Run the focused test suite and summarize failures.\"}", + "annotation": "The lead sends work through the file-backed mailbox." + }, + { + "type": "system_event", + "content": ".mailboxes/test-runner.jsonl <- {from:\"lead\", message:\"Run the focused test suite\"}", + "annotation": "MessageBus persists the handoff as JSONL." + }, + { + "type": "system_event", + "content": ".mailboxes/lead.jsonl <- {from:\"test-runner\", message:\"Tests passed.\"}", + "annotation": "The teammate replies without sharing the lead's whole conversation." + }, + { + "type": "tool_call", + "toolName": "check_inbox", + "content": "{\"agent\":\"lead\"}", + "annotation": "The lead polls its own inbox for teammate updates." + }, + { + "type": "tool_result", + "toolName": "check_inbox", + "content": "test-runner: Tests passed.", + "annotation": "Mailbox messages re-enter the lead loop as tool results." + } + ] +} diff --git a/web/src/data/scenarios/s16.json b/web/src/data/scenarios/s16.json new file mode 100644 index 0000000..0178846 --- /dev/null +++ b/web/src/data/scenarios/s16.json @@ -0,0 +1,45 @@ +{ + "version": "s16", + "title": "Team Protocols", + "description": "Typed requests, request ids, and response matching turn team chat into a protocol.", + "steps": [ + { + "type": "user_message", + "content": "Ask the reviewer agent for a plan before it edits anything.", + "annotation": "The lead needs a structured plan request, not free-form chat." + }, + { + "type": "tool_call", + "toolName": "request_plan", + "content": "{\"to\":\"reviewer\",\"task\":\"Review the parser change before editing.\"}", + "annotation": "The protocol records a pending request with a request_id." + }, + { + "type": "system_event", + "content": "pending_requests[req_18] = {kind:\"plan\", to:\"reviewer\", status:\"pending\"}", + "annotation": "ProtocolState keeps the request open until a matching response arrives." + }, + { + "type": "system_event", + "content": "dispatch_message -> reviewer inbox: {type:\"plan_request\", request_id:\"req_18\"}", + "annotation": "The message is typed so the teammate knows how to handle it." + }, + { + "type": "tool_call", + "toolName": "submit_plan", + "content": "{\"request_id\":\"req_18\",\"plan\":[\"inspect parser\",\"run fixtures\",\"report risks\"]}", + "annotation": "The teammate responds with the same request_id." + }, + { + "type": "system_event", + "content": "match_response(req_18) -> status: ready_for_review", + "annotation": "The lead can correlate the reply with the exact request." + }, + { + "type": "tool_call", + "toolName": "review_plan", + "content": "{\"request_id\":\"req_18\",\"approved\":true}", + "annotation": "The lead explicitly approves the plan before work proceeds." + } + ] +} diff --git a/web/src/data/scenarios/s17.json b/web/src/data/scenarios/s17.json new file mode 100644 index 0000000..9bb25f5 --- /dev/null +++ b/web/src/data/scenarios/s17.json @@ -0,0 +1,46 @@ +{ + "version": "s17", + "title": "Autonomous Agents", + "description": "Idle teammates can scan the task board, claim eligible work, and return to idle after completion.", + "steps": [ + { + "type": "system_event", + "content": "teammate(worker-a): state=IDLE -> idle_poll()", + "annotation": "Autonomy starts from an idle lifecycle tick, not a direct user command." + }, + { + "type": "tool_call", + "toolName": "list_tasks", + "content": "{\"status\":\"open\"}", + "annotation": "The idle agent scans the shared task board." + }, + { + "type": "tool_result", + "toolName": "list_tasks", + "content": "[{\"id\":\"task_5\",\"status\":\"open\",\"owner\":null,\"title\":\"Update README\"}]", + "annotation": "Only unclaimed work is eligible for autonomous pickup." + }, + { + "type": "tool_call", + "toolName": "claim_task", + "content": "{\"id\":\"task_5\",\"owner\":\"worker-a\"}", + "annotation": "The task manager enforces ownership before work begins." + }, + { + "type": "system_event", + "content": "worker-a: state=WORK task=task_5", + "annotation": "The lifecycle moves from IDLE to WORK." + }, + { + "type": "tool_call", + "toolName": "complete_task", + "content": "{\"id\":\"task_5\",\"result\":\"README updated with setup notes.\"}", + "annotation": "Completion writes the result back to the shared board." + }, + { + "type": "system_event", + "content": "worker-a: state=IDLE next_poll_in=5s", + "annotation": "After finishing, the agent becomes available for more work." + } + ] +} diff --git a/web/src/data/scenarios/s18.json b/web/src/data/scenarios/s18.json new file mode 100644 index 0000000..edbc04f --- /dev/null +++ b/web/src/data/scenarios/s18.json @@ -0,0 +1,45 @@ +{ + "version": "s18", + "title": "Worktree Isolation", + "description": "A task can be bound to an isolated git worktree so concurrent agents avoid stepping on each other.", + "steps": [ + { + "type": "user_message", + "content": "Let one teammate fix the docs while another changes the parser.", + "annotation": "Concurrent edits need isolated working directories." + }, + { + "type": "tool_call", + "toolName": "create_worktree", + "content": "{\"task_id\":\"task_docs\",\"name\":\"docs-fix\"}", + "annotation": "The tool validates a safe worktree name before touching git." + }, + { + "type": "system_event", + "content": "git worktree add .worktrees/docs-fix -b agent/docs-fix", + "annotation": "A separate branch and checkout are created for that task." + }, + { + "type": "tool_result", + "toolName": "create_worktree", + "content": "task_docs bound to .worktrees/docs-fix", + "annotation": "The task record stores the assigned worktree path." + }, + { + "type": "system_event", + "content": ".worktrees/events.jsonl <- {event:\"created\", task:\"task_docs\", worktree:\"docs-fix\"}", + "annotation": "Lifecycle events are emitted as a side channel." + }, + { + "type": "tool_call", + "toolName": "keep_worktree", + "content": "{\"task_id\":\"task_docs\",\"reason\":\"needs human review\"}", + "annotation": "Closeout can preserve a worktree instead of deleting it immediately." + }, + { + "type": "assistant_text", + "content": "The docs task now has an isolated worktree and can be reviewed independently from parser changes.", + "annotation": "The user sees the isolation boundary, not just a raw git command." + } + ] +} diff --git a/web/src/data/scenarios/s19.json b/web/src/data/scenarios/s19.json new file mode 100644 index 0000000..193877d --- /dev/null +++ b/web/src/data/scenarios/s19.json @@ -0,0 +1,46 @@ +{ + "version": "s19", + "title": "MCP Tools", + "description": "The agent discovers external MCP tools and exposes them through a normalized tool namespace.", + "steps": [ + { + "type": "user_message", + "content": "Connect the docs server and search for deployment guidance.", + "annotation": "The user asks for a tool source outside the built-in set." + }, + { + "type": "tool_call", + "toolName": "connect_mcp", + "content": "{\"server\":\"docs\",\"command\":\"mock-docs-server\"}", + "annotation": "The runtime creates an MCP client for the named server." + }, + { + "type": "system_event", + "content": "normalize_mcp_name(\"docs\", \"search\") -> mcp__docs__search", + "annotation": "External tools are namespaced to avoid collisions." + }, + { + "type": "tool_result", + "toolName": "connect_mcp", + "content": "Connected docs with tools: mcp__docs__search, mcp__docs__read", + "annotation": "Tool discovery expands the active tool pool." + }, + { + "type": "tool_call", + "toolName": "mcp__docs__search", + "content": "{\"query\":\"deployment\"}", + "annotation": "The LLM can now call the discovered tool by its normalized name." + }, + { + "type": "tool_result", + "toolName": "mcp__docs__search", + "content": "[\"Deploy with npm run build\", \"Use environment variables for tokens\"]", + "annotation": "The external result is appended like any other tool result." + }, + { + "type": "assistant_text", + "content": "The docs server says to build first and keep tokens in environment variables.", + "annotation": "The agent summarizes external tool output for the user." + } + ] +} diff --git a/web/src/data/scenarios/s20.json b/web/src/data/scenarios/s20.json new file mode 100644 index 0000000..5211bcf --- /dev/null +++ b/web/src/data/scenarios/s20.json @@ -0,0 +1,57 @@ +{ + "version": "s20", + "title": "Comprehensive Agent", + "description": "The final agent composes context assembly, tools, memory, teams, background work, cron, worktrees, and MCP.", + "steps": [ + { + "type": "user_message", + "content": "Plan a release: inspect tasks, run checks, delegate docs, and keep long scans in the background.", + "annotation": "The final lesson combines many earlier capabilities in one request." + }, + { + "type": "system_event", + "content": "assemble_prompt: memory + task graph + skills + available tools + policy", + "annotation": "The runtime builds the prompt from layered sources of context." + }, + { + "type": "assistant_text", + "content": "I'll inspect the task graph, delegate docs, start the slow scan in the background, and use the release checklist skill.", + "annotation": "The model chooses multiple mechanisms while keeping one coherent plan." + }, + { + "type": "tool_call", + "toolName": "load_skill", + "content": "{\"name\":\"release-check\"}", + "annotation": "Skills contribute procedural context before execution." + }, + { + "type": "tool_call", + "toolName": "spawn_teammate", + "content": "{\"name\":\"docs\",\"role\":\"Update release notes in its worktree.\"}", + "annotation": "Team delegation handles a parallel slice of the release." + }, + { + "type": "tool_call", + "toolName": "bash", + "content": "npm run lint && npm run test", + "toolInput": "{\"background\":true}", + "annotation": "Long-running validation goes through the background task path." + }, + { + "type": "tool_call", + "toolName": "connect_mcp", + "content": "{\"server\":\"deploy\",\"command\":\"mock-deploy-server\"}", + "annotation": "External capabilities are added only when needed." + }, + { + "type": "system_event", + "content": "recover: background task done, teammate replied, MCP deploy status ready", + "annotation": "The integrated runtime folds asynchronous results back into the loop." + }, + { + "type": "assistant_text", + "content": "Release preparation is ready: checks passed, docs are updated in the teammate worktree, and deployment status is available.", + "annotation": "The comprehensive agent returns a single user-facing outcome." + } + ] +} diff --git a/web/src/i18n/messages/en.json b/web/src/i18n/messages/en.json index 6dcb3ef..3ee1718 100644 --- a/web/src/i18n/messages/en.json +++ b/web/src/i18n/messages/en.json @@ -1,10 +1,10 @@ { "meta": { "title": "Learn Claude Code", "description": "Build a nano Claude Code-like agent from 0 to 1, one mechanism at a time" }, "nav": { "home": "Home", "timeline": "Timeline", "compare": "Compare", "layers": "Layers", "github": "GitHub" }, - "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "Build a nano Claude Code-like agent from 0 to 1, one mechanism at a time", "start": "Start Learning", "core_pattern": "The Core Pattern", "core_pattern_desc": "Every AI coding agent shares the same loop: call the model, execute tools, feed results back. Production systems add policy, permissions, and lifecycle layers on top.", "learning_path": "Learning Path", "learning_path_desc": "12 progressive sessions, from a simple loop to isolated autonomous execution", "layers_title": "Architectural Layers", "layers_desc": "Five orthogonal concerns that compose into a complete agent", "loc": "LOC", "learn_more": "Learn More", "versions_in_layer": "versions", "message_flow": "Message Growth", "message_flow_desc": "Watch the messages array grow as the agent loop executes" }, + "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "Build a nano Claude Code-like agent from 0 to 1, one mechanism at a time", "start": "Start Learning", "core_pattern": "The Core Pattern", "core_pattern_desc": "Every AI coding agent shares the same loop: call the model, execute tools, feed results back. Production systems add policy, permissions, and lifecycle layers on top.", "learning_path": "Learning Path", "learning_path_desc": "20 progressive sessions, from a simple loop to a complete multi-agent harness", "layers_title": "Architectural Layers", "layers_desc": "Five orthogonal concerns that compose into a complete agent", "loc": "LOC", "learn_more": "Learn More", "versions_in_layer": "versions", "message_flow": "Message Growth", "message_flow_desc": "Watch the messages array grow as the agent loop executes" }, "version": { "loc": "lines of code", "tools": "tools", "new": "New", "prev": "Previous", "next": "Next", "view_source": "View Source", "view_diff": "View Diff", "design_decisions": "Design Decisions", "whats_new": "What's New", "tutorial": "Tutorial", "simulator": "Agent Loop Simulator", "execution_flow": "Execution Flow", "architecture": "Architecture", "concept_viz": "Concept Visualization", "alternatives": "Alternatives Considered", "tab_learn": "Learn", "tab_simulate": "Simulate", "tab_code": "Code", "tab_deep_dive": "Deep Dive" }, "sim": { "play": "Play", "pause": "Pause", "step": "Step", "reset": "Reset", "speed": "Speed", "step_of": "of" }, - "timeline": { "title": "Learning Path", "subtitle": "s01 to s12: Progressive Agent Design", "layer_legend": "Layer Legend", "loc_growth": "LOC Growth", "learn_more": "Learn More" }, + "timeline": { "title": "Learning Path", "subtitle": "s01 to s20: Progressive Agent Harness Design", "layer_legend": "Layer Legend", "loc_growth": "LOC Growth", "learn_more": "Learn More" }, "layers": { "title": "Architectural Layers", "subtitle": "Five orthogonal concerns that compose into a complete agent", @@ -40,37 +40,53 @@ }, "sessions": { "s01": "The Agent Loop", - "s02": "Tools", - "s03": "TodoWrite", - "s04": "Subagents", - "s05": "Skills", - "s06": "Compact", - "s07": "Tasks", - "s08": "Background Tasks", - "s09": "Agent Teams", - "s10": "Team Protocols", - "s11": "Autonomous Agents", - "s12": "Worktree + Task Isolation" + "s02": "Tool Use", + "s03": "Permission", + "s04": "Hooks", + "s05": "TodoWrite", + "s06": "Subagent", + "s07": "Skills", + "s08": "Context Compact", + "s09": "Memory", + "s10": "System Prompt", + "s11": "Error Recovery", + "s12": "Task System", + "s13": "Background Tasks", + "s14": "Cron Scheduler", + "s15": "Agent Teams", + "s16": "Team Protocols", + "s17": "Autonomous Agents", + "s18": "Worktree Isolation", + "s19": "MCP Tools", + "s20": "Comprehensive Agent Turn" }, "layer_labels": { "tools": "Tools & Execution", "planning": "Planning & Coordination", "memory": "Memory Management", "concurrency": "Concurrency", - "collaboration": "Collaboration" + "collaboration": "Multi-Agent Platform" }, "viz": { "s01": "The Agent While-Loop", "s02": "Tool Dispatch Map", - "s03": "TodoWrite Nag System", - "s04": "Subagent Context Isolation", - "s05": "On-Demand Skill Loading", - "s06": "Three-Layer Context Compression", - "s07": "Task Dependency Graph", - "s08": "Background Task Lanes", - "s09": "Agent Team Mailboxes", - "s10": "FSM Team Protocols", - "s11": "Autonomous Agent Cycle", - "s12": "Worktree Task Isolation" + "s03": "Permission Desk", + "s04": "Hook Workbench", + "s05": "TodoWrite Nag System", + "s06": "Subagent Context Isolation", + "s07": "On-Demand Skill Loading", + "s08": "Three-Layer Context Compression", + "s09": "Memory Library", + "s10": "Runtime Prompt Assembly", + "s11": "Error Recovery Paths", + "s12": "Task Board Dependencies", + "s13": "Background Task Lanes", + "s14": "Cron Scheduler", + "s15": "Team Mailbox Workbench", + "s16": "Team Protocol Cards", + "s17": "Autonomous Agent Cycle", + "s18": "Worktree Task Isolation", + "s19": "MCP Tool Bridge", + "s20": "Comprehensive Agent Turn" } } diff --git a/web/src/i18n/messages/ja.json b/web/src/i18n/messages/ja.json index 25192d2..bc56c69 100644 --- a/web/src/i18n/messages/ja.json +++ b/web/src/i18n/messages/ja.json @@ -1,10 +1,10 @@ { "meta": { "title": "Learn Claude Code", "description": "0 から 1 へ nano Claude Code-like agent を構築し、毎回 1 つの仕組みを追加" }, "nav": { "home": "ホーム", "timeline": "学習パス", "compare": "バージョン比較", "layers": "アーキテクチャ層", "github": "GitHub" }, - "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "0 から 1 へ nano Claude Code-like agent を構築し、毎回 1 つの仕組みを追加", "start": "学習を始める", "core_pattern": "コアパターン", "core_pattern_desc": "すべての AI コーディングエージェントは同じループを共有する:モデルを呼び出し、ツールを実行し、結果を返す。実運用ではこの上にポリシー、権限、ライフサイクル層が重なる。", "learning_path": "学習パス", "learning_path_desc": "12の段階的セッション、シンプルなループから分離された自律実行まで", "layers_title": "アーキテクチャ層", "layers_desc": "5つの直交する関心事が完全なエージェントを構成", "loc": "行", "learn_more": "詳細を見る", "versions_in_layer": "バージョン", "message_flow": "メッセージの増加", "message_flow_desc": "エージェントループ実行時のメッセージ配列の成長を観察" }, + "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "0 から 1 へ nano Claude Code-like agent を構築し、毎回 1 つの仕組みを追加", "start": "学習を始める", "core_pattern": "コアパターン", "core_pattern_desc": "すべての AI コーディングエージェントは同じループを共有する:モデルを呼び出し、ツールを実行し、結果を返す。実運用ではこの上にポリシー、権限、ライフサイクル層が重なる。", "learning_path": "学習パス", "learning_path_desc": "20の段階的セッション、シンプルなループから完全なマルチエージェント Harness まで", "layers_title": "アーキテクチャ層", "layers_desc": "5つの直交する関心事が完全なエージェントを構成", "loc": "行", "learn_more": "詳細を見る", "versions_in_layer": "バージョン", "message_flow": "メッセージの増加", "message_flow_desc": "エージェントループ実行時のメッセージ配列の成長を観察" }, "version": { "loc": "行のコード", "tools": "ツール", "new": "新規", "prev": "前のバージョン", "next": "次のバージョン", "view_source": "ソースを見る", "view_diff": "差分を見る", "design_decisions": "設計判断", "whats_new": "新機能", "tutorial": "チュートリアル", "simulator": "エージェントループシミュレーター", "execution_flow": "実行フロー", "architecture": "アーキテクチャ", "concept_viz": "コンセプト可視化", "alternatives": "検討された代替案", "tab_learn": "学習", "tab_simulate": "シミュレーション", "tab_code": "ソースコード", "tab_deep_dive": "詳細分析" }, "sim": { "play": "再生", "pause": "一時停止", "step": "ステップ", "reset": "リセット", "speed": "速度", "step_of": "/" }, - "timeline": { "title": "学習パス", "subtitle": "s01からs12へ:段階的エージェント設計", "layer_legend": "レイヤー凡例", "loc_growth": "コード量の推移", "learn_more": "詳細を見る" }, + "timeline": { "title": "学習パス", "subtitle": "s01からs20へ:段階的エージェント Harness 設計", "layer_legend": "レイヤー凡例", "loc_growth": "コード量の推移", "learn_more": "詳細を見る" }, "layers": { "title": "アーキテクチャ層", "subtitle": "5つの直交する関心事が完全なエージェントを構成", @@ -40,37 +40,53 @@ }, "sessions": { "s01": "エージェントループ", - "s02": "ツール", - "s03": "TodoWrite", - "s04": "サブエージェント", - "s05": "スキル", - "s06": "コンテキスト圧縮", - "s07": "タスクシステム", - "s08": "バックグラウンドタスク", - "s09": "エージェントチーム", - "s10": "チームプロトコル", - "s11": "自律エージェント", - "s12": "Worktree + タスク分離" + "s02": "ツール使用", + "s03": "権限", + "s04": "フック", + "s05": "TodoWrite", + "s06": "サブエージェント", + "s07": "スキル", + "s08": "コンテキスト圧縮", + "s09": "メモリ", + "s10": "システムプロンプト", + "s11": "エラー回復", + "s12": "タスクシステム", + "s13": "バックグラウンドタスク", + "s14": "Cron スケジューラー", + "s15": "エージェントチーム", + "s16": "チームプロトコル", + "s17": "自律エージェント", + "s18": "Worktree 分離", + "s19": "MCP ツール", + "s20": "Comprehensive Agent Turn" }, "layer_labels": { "tools": "ツールと実行", "planning": "計画と調整", "memory": "メモリ管理", "concurrency": "並行処理", - "collaboration": "コラボレーション" + "collaboration": "マルチエージェント基盤" }, "viz": { "s01": "エージェント Whileループ", "s02": "ツールディスパッチマップ", - "s03": "TodoWrite リマインドシステム", - "s04": "サブエージェント コンテキスト分離", - "s05": "オンデマンド スキルローディング", - "s06": "3層コンテキスト圧縮", - "s07": "タスク依存関係グラフ", - "s08": "バックグラウンドタスクレーン", - "s09": "エージェントチーム メールボックス", - "s10": "FSM チームプロトコル", - "s11": "自律エージェントサイクル", - "s12": "Worktree タスク分離" + "s03": "Permission Desk", + "s04": "Hook Workbench", + "s05": "TodoWrite リマインドシステム", + "s06": "サブエージェント コンテキスト分離", + "s07": "オンデマンド スキルローディング", + "s08": "3層コンテキスト圧縮", + "s09": "メモリライブラリ", + "s10": "実行時プロンプト組み立て", + "s11": "エラー回復経路", + "s12": "タスクボード依存関係", + "s13": "バックグラウンドタスクレーン", + "s14": "Cron スケジューラー", + "s15": "チームメールボックス作業台", + "s16": "Team Protocol Cards", + "s17": "自律エージェントサイクル", + "s18": "Worktree タスク分離", + "s19": "MCP ツールブリッジ", + "s20": "Comprehensive Agent Turn" } } diff --git a/web/src/i18n/messages/zh.json b/web/src/i18n/messages/zh.json index a8d9f36..536b536 100644 --- a/web/src/i18n/messages/zh.json +++ b/web/src/i18n/messages/zh.json @@ -1,10 +1,10 @@ { "meta": { "title": "Learn Claude Code", "description": "从 0 到 1 构建 nano Claude Code-like agent,每次只加一个机制" }, "nav": { "home": "首页", "timeline": "学习路径", "compare": "版本对比", "layers": "架构层", "github": "GitHub" }, - "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "从 0 到 1 构建 nano Claude Code-like agent,每次只加一个机制", "start": "开始学习", "core_pattern": "核心模式", "core_pattern_desc": "所有 AI 编程 Agent 共享同一个循环:调用模型、执行工具、回传结果。生产级系统会在其上叠加策略、权限和生命周期层。", "learning_path": "学习路径", "learning_path_desc": "12 个渐进式课程,从简单循环到隔离化自治执行", "layers_title": "架构层次", "layers_desc": "五个正交关注点组合成完整的 Agent", "loc": "行", "learn_more": "了解更多", "versions_in_layer": "个版本", "message_flow": "消息增长", "message_flow_desc": "观察 Agent 循环执行时消息数组的增长" }, + "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "从 0 到 1 构建 nano Claude Code-like agent,每次只加一个机制", "start": "开始学习", "core_pattern": "核心模式", "core_pattern_desc": "所有 AI 编程 Agent 共享同一个循环:调用模型、执行工具、回传结果。生产级系统会在其上叠加策略、权限和生命周期层。", "learning_path": "学习路径", "learning_path_desc": "20 个渐进式课程,从简单循环到完整多 Agent Harness", "layers_title": "架构层次", "layers_desc": "五个正交关注点组合成完整的 Agent", "loc": "行", "learn_more": "了解更多", "versions_in_layer": "个版本", "message_flow": "消息增长", "message_flow_desc": "观察 Agent 循环执行时消息数组的增长" }, "version": { "loc": "行代码", "tools": "个工具", "new": "新增", "prev": "上一版", "next": "下一版", "view_source": "查看源码", "view_diff": "查看变更", "design_decisions": "设计决策", "whats_new": "新增内容", "tutorial": "教程", "simulator": "Agent 循环模拟器", "execution_flow": "执行流程", "architecture": "架构", "concept_viz": "概念可视化", "alternatives": "替代方案", "tab_learn": "学习", "tab_simulate": "模拟", "tab_code": "源码", "tab_deep_dive": "深入探索" }, "sim": { "play": "播放", "pause": "暂停", "step": "单步", "reset": "重置", "speed": "速度", "step_of": "/" }, - "timeline": { "title": "学习路径", "subtitle": "s01 到 s12:渐进式 Agent 设计", "layer_legend": "层次图例", "loc_growth": "代码量增长", "learn_more": "了解更多" }, + "timeline": { "title": "学习路径", "subtitle": "s01 到 s20:渐进式 Agent Harness 设计", "layer_legend": "层次图例", "loc_growth": "代码量增长", "learn_more": "了解更多" }, "layers": { "title": "架构层次", "subtitle": "五个正交关注点组合成完整的 Agent", @@ -41,36 +41,52 @@ "sessions": { "s01": "Agent Loop", "s02": "Tool Use", - "s03": "TodoWrite", - "s04": "Subagent", - "s05": "Skills", - "s06": "Context Compact", - "s07": "Task System", - "s08": "Background Tasks", - "s09": "Agent Teams", - "s10": "Team Protocols", - "s11": "Autonomous Agents", - "s12": "Worktree + Task Isolation" + "s03": "Permission", + "s04": "Hooks", + "s05": "TodoWrite", + "s06": "Subagent", + "s07": "Skills", + "s08": "Context Compact", + "s09": "Memory", + "s10": "System Prompt", + "s11": "Error Recovery", + "s12": "Task System", + "s13": "Background Tasks", + "s14": "Cron Scheduler", + "s15": "Agent Teams", + "s16": "Team Protocols", + "s17": "Autonomous Agents", + "s18": "Worktree Isolation", + "s19": "MCP Tools", + "s20": "Comprehensive Agent Turn" }, "layer_labels": { "tools": "工具与执行", "planning": "规划与协调", "memory": "记忆管理", "concurrency": "并发", - "collaboration": "协作" + "collaboration": "多 Agent 平台" }, "viz": { "s01": "Agent While-Loop", "s02": "Tool Dispatch Map", - "s03": "TodoWrite Nag System", - "s04": "Subagent Context Isolation", - "s05": "On-Demand Skill Loading", - "s06": "Three-Layer Context Compact", - "s07": "Task Dependency Graph", - "s08": "Background Task Lanes", - "s09": "Agent Team Mailboxes", - "s10": "FSM Team Protocols", - "s11": "Autonomous Agent Cycle", - "s12": "Worktree Task Isolation" + "s03": "Permission Desk", + "s04": "Hook Workbench", + "s05": "TodoWrite Nag System", + "s06": "Subagent Context Isolation", + "s07": "On-Demand Skill Loading", + "s08": "Three-Layer Context Compact", + "s09": "记忆图书馆", + "s10": "Runtime Prompt Assembly", + "s11": "Error Recovery Paths", + "s12": "任务看板依赖", + "s13": "Background Task Lanes", + "s14": "Cron Scheduler", + "s15": "团队邮箱工作台", + "s16": "Team Protocol Cards", + "s17": "Autonomous Agent Cycle", + "s18": "Worktree Task Isolation", + "s19": "MCP Tool Bridge", + "s20": "Comprehensive Agent Turn" } } diff --git a/web/src/lib/constants.ts b/web/src/lib/constants.ts index 0f1fdf7..b77a54a 100644 --- a/web/src/lib/constants.ts +++ b/web/src/lib/constants.ts @@ -1,5 +1,26 @@ +import type { AgentLayer } from "@/types/agent-data"; + export const VERSION_ORDER = [ - "s01", "s02", "s03", "s04", "s05", "s06", "s07", "s08", "s09", "s10", "s11", "s12" + "s01", + "s02", + "s03", + "s04", + "s05", + "s06", + "s07", + "s08", + "s09", + "s10", + "s11", + "s12", + "s13", + "s14", + "s15", + "s16", + "s17", + "s18", + "s19", + "s20", ] as const; export const LEARNING_PATH = VERSION_ORDER; @@ -11,27 +32,200 @@ export const VERSION_META: Record = { - s01: { title: "The Agent Loop", subtitle: "Bash is All You Need", coreAddition: "Single-tool agent loop", keyInsight: "The minimal agent kernel is a while loop + one tool", layer: "tools", prevVersion: null }, - s02: { title: "Tools", subtitle: "One Handler Per Tool", coreAddition: "Tool dispatch map", keyInsight: "The loop stays the same; new tools register into the dispatch map", layer: "tools", prevVersion: "s01" }, - s03: { title: "TodoWrite", subtitle: "Plan Before You Act", coreAddition: "TodoManager + nag reminder", keyInsight: "An agent without a plan drifts; list the steps first, then execute", layer: "planning", prevVersion: "s02" }, - s04: { title: "Subagents", subtitle: "Clean Context Per Subtask", coreAddition: "Subagent spawn with isolated messages[]", keyInsight: "Subagents use independent messages[], keeping the main conversation clean", layer: "planning", prevVersion: "s03" }, - s05: { title: "Skills", subtitle: "Load on Demand", coreAddition: "SkillLoader + two-layer injection", keyInsight: "Inject knowledge via tool_result when needed, not upfront in the system prompt", layer: "planning", prevVersion: "s04" }, - s06: { title: "Compact", subtitle: "Three-Layer Compression", coreAddition: "micro-compact + auto-compact + archival", keyInsight: "Context will fill up; three-layer compression strategy enables infinite sessions", layer: "memory", prevVersion: "s05" }, - s07: { title: "Tasks", subtitle: "Task Graph + Dependencies", coreAddition: "TaskManager with file-based state + dependency graph", keyInsight: "A file-based task graph with ordering, parallelism, and dependencies -- the coordination backbone for multi-agent work", layer: "planning", prevVersion: "s06" }, - s08: { title: "Background Tasks", subtitle: "Background Threads + Notifications", coreAddition: "BackgroundManager + notification queue", keyInsight: "Run slow operations in the background; the agent keeps thinking ahead", layer: "concurrency", prevVersion: "s07" }, - s09: { title: "Agent Teams", subtitle: "Teammates + Mailboxes", coreAddition: "TeammateManager + file-based mailbox", keyInsight: "When one agent can't finish, delegate to persistent teammates via async mailboxes", layer: "collaboration", prevVersion: "s08" }, - s10: { title: "Team Protocols", subtitle: "Shared Communication Rules", coreAddition: "request_id correlation for two protocols", keyInsight: "One request-response pattern drives all team negotiation", layer: "collaboration", prevVersion: "s09" }, - s11: { title: "Autonomous Agents", subtitle: "Scan Board, Claim Tasks", coreAddition: "Task board polling + timeout-based self-governance", keyInsight: "Teammates scan the board and claim tasks themselves; no need for the lead to assign each one", layer: "collaboration", prevVersion: "s10" }, - s12: { title: "Worktree + Task Isolation", subtitle: "Isolate by Directory", coreAddition: "Composable worktree lifecycle + event stream over a shared task board", keyInsight: "Each works in its own directory; tasks manage goals, worktrees manage directories, bound by ID", layer: "collaboration", prevVersion: "s11" }, + s01: { + title: "The Agent Loop", + subtitle: "One Loop Is All You Need", + coreAddition: "Minimal model/tool loop", + keyInsight: "The smallest useful agent is a loop that calls the model, runs tools, and feeds results back.", + layer: "tools", + prevVersion: null, + }, + s02: { + title: "Tool Use", + subtitle: "Add a Tool, Add Just One Line", + coreAddition: "Tool dispatch map", + keyInsight: "The loop stays stable while capabilities register into a dispatch table.", + layer: "tools", + prevVersion: "s01", + }, + s03: { + title: "Permission", + subtitle: "Check Permissions Before Execution", + coreAddition: "Permission gate", + keyInsight: "Dangerous actions need a harness decision point before the shell runs.", + layer: "tools", + prevVersion: "s02", + }, + s04: { + title: "Hooks", + subtitle: "Hang on the Loop, Don't Write into It", + coreAddition: "Lifecycle hooks", + keyInsight: "Cross-cutting behavior belongs around the loop, not tangled inside it.", + layer: "tools", + prevVersion: "s03", + }, + s05: { + title: "TodoWrite", + subtitle: "An Agent Without a Plan Drifts Off Course", + coreAddition: "Todo manager", + keyInsight: "Explicit plans keep long-running work visible and correctable.", + layer: "planning", + prevVersion: "s04", + }, + s06: { + title: "Subagent", + subtitle: "Break Large Tasks into Small Ones with Clean Context", + coreAddition: "Isolated subtask context", + keyInsight: "Subagents give each subtask a clean message history while preserving the main thread.", + layer: "planning", + prevVersion: "s05", + }, + s07: { + title: "Skill Loading", + subtitle: "Load Only When Needed", + coreAddition: "On-demand skill loader", + keyInsight: "Inject specialized knowledge only when the task actually needs it.", + layer: "planning", + prevVersion: "s06", + }, + s08: { + title: "Context Compact", + subtitle: "Context Will Fill Up", + coreAddition: "Context compaction", + keyInsight: "Compression keeps the conversation usable when the context window gets crowded.", + layer: "memory", + prevVersion: "s07", + }, + s09: { + title: "Memory", + subtitle: "Keep a Layer That Doesn't Lose Details", + coreAddition: "Durable memory layer", + keyInsight: "Some facts should survive summarization and future sessions.", + layer: "memory", + prevVersion: "s08", + }, + s10: { + title: "System Prompt", + subtitle: "Assembled at Runtime, Never Hardcoded", + coreAddition: "Runtime prompt assembly", + keyInsight: "The system prompt is a generated product of policy, tools, skills, and context.", + layer: "planning", + prevVersion: "s09", + }, + s11: { + title: "Error Recovery", + subtitle: "Errors Are the Start of a Retry", + coreAddition: "Retry strategy", + keyInsight: "A robust harness classifies failures and decides what kind of retry is worthwhile.", + layer: "planning", + prevVersion: "s10", + }, + s12: { + title: "Task System", + subtitle: "Break Big Goals into Small Tasks", + coreAddition: "Task board", + keyInsight: "A task graph turns vague goals into ordered, observable work.", + layer: "collaboration", + prevVersion: "s11", + }, + s13: { + title: "Background Tasks", + subtitle: "Slow Operations Go to the Background", + coreAddition: "Background execution", + keyInsight: "The agent can keep reasoning while slow work completes elsewhere.", + layer: "concurrency", + prevVersion: "s12", + }, + s14: { + title: "Cron Scheduler", + subtitle: "Producing Work on a Schedule", + coreAddition: "Scheduled task creation", + keyInsight: "Recurring work should be created by the harness, not remembered by the model.", + layer: "concurrency", + prevVersion: "s13", + }, + s15: { + title: "Agent Teams", + subtitle: "One Agent Isn't Enough, Form a Team", + coreAddition: "Teammate mailboxes", + keyInsight: "Persistent teammates let work continue in parallel without stuffing every thought into one context.", + layer: "collaboration", + prevVersion: "s14", + }, + s16: { + title: "Team Protocols", + subtitle: "Teammates Need Agreements", + coreAddition: "Shared coordination protocols", + keyInsight: "Multi-agent systems need explicit message contracts, not vibes.", + layer: "collaboration", + prevVersion: "s15", + }, + s17: { + title: "Autonomous Agents", + subtitle: "Check the Board, Claim the Task", + coreAddition: "Autonomous task claiming", + keyInsight: "Teammates become useful when they can discover and claim work themselves.", + layer: "collaboration", + prevVersion: "s16", + }, + s18: { + title: "Worktree Isolation", + subtitle: "Separate Directories, No Conflicts", + coreAddition: "Worktree lifecycle", + keyInsight: "Parallel agents need isolated filesystems as much as isolated conversations.", + layer: "collaboration", + prevVersion: "s17", + }, + s19: { + title: "MCP Tools", + subtitle: "External Tools, Standard Protocol", + coreAddition: "MCP tool bridge", + keyInsight: "External services can become agent tools through a standard discovery and call protocol.", + layer: "collaboration", + prevVersion: "s18", + }, + s20: { + title: "Comprehensive Agent", + subtitle: "All Mechanisms, One Loop", + coreAddition: "Integrated harness", + keyInsight: "The final harness is still one loop, now surrounded by the systems that make it production-shaped.", + layer: "collaboration", + prevVersion: "s19", + }, }; export const LAYERS = [ - { id: "tools" as const, label: "Tools & Execution", color: "#3B82F6", versions: ["s01", "s02"] }, - { id: "planning" as const, label: "Planning & Coordination", color: "#10B981", versions: ["s03", "s04", "s05", "s07"] }, - { id: "memory" as const, label: "Memory Management", color: "#8B5CF6", versions: ["s06"] }, - { id: "concurrency" as const, label: "Concurrency", color: "#F59E0B", versions: ["s08"] }, - { id: "collaboration" as const, label: "Collaboration", color: "#EF4444", versions: ["s09", "s10", "s11", "s12"] }, + { + id: "tools" as const, + label: "Tools & Execution", + color: "#3B82F6", + versions: ["s01", "s02", "s03", "s04"], + }, + { + id: "planning" as const, + label: "Planning & Control", + color: "#10B981", + versions: ["s05", "s06", "s07", "s10", "s11"], + }, + { + id: "memory" as const, + label: "Memory Management", + color: "#8B5CF6", + versions: ["s08", "s09"], + }, + { + id: "concurrency" as const, + label: "Concurrency & Scheduling", + color: "#F59E0B", + versions: ["s13", "s14"], + }, + { + id: "collaboration" as const, + label: "Multi-Agent Platform", + color: "#EF4444", + versions: ["s12", "s15", "s16", "s17", "s18", "s19", "s20"], + }, ] as const; diff --git a/web/src/types/agent-data.ts b/web/src/types/agent-data.ts index 7cf01a0..b41e8c0 100644 --- a/web/src/types/agent-data.ts +++ b/web/src/types/agent-data.ts @@ -1,3 +1,15 @@ +export type AgentLayer = + | "tools" + | "planning" + | "memory" + | "concurrency" + | "collaboration"; + +export interface ChapterImage { + src: string; + alt: string; +} + export interface AgentVersion { id: string; filename: string; @@ -10,8 +22,9 @@ export interface AgentVersion { keyInsight: string; classes: { name: string; startLine: number; endLine: number }[]; functions: { name: string; signature: string; startLine: number }[]; - layer: "tools" | "planning" | "memory" | "concurrency" | "collaboration"; + layer: AgentLayer; source: string; + images: ChapterImage[]; } export interface VersionDiff {