From aea8844bac756b354a72871533cd3dbeb60a4e1a Mon Sep 17 00:00:00 2001 From: CrazyBoyM Date: Tue, 24 Feb 2026 01:44:44 +0800 Subject: [PATCH] =?UTF-8?q?add=20worktree=20&=20up=20task=E3=80=81teammate?= =?UTF-8?q?=20etc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README-ja.md | 34 +- README-zh.md | 34 +- README.md | 32 +- agents/__init__.py | 2 +- agents/s01_agent_loop.py | 7 +- agents/s12_worktree_task_isolation.py | 775 ++++++++++++++++++ agents/s_full.py | 18 +- docs/en/s01-the-agent-loop.md | 7 +- docs/en/s02-tool-use.md | 4 +- docs/en/s03-todo-write.md | 4 +- docs/en/s04-subagent.md | 5 +- docs/en/s05-skill-loading.md | 1 - docs/en/s06-context-compact.md | 1 - docs/en/s07-task-system.md | 71 +- docs/en/s08-background-tasks.md | 1 - docs/en/s09-agent-teams.md | 4 +- docs/en/s11-autonomous-agents.md | 5 +- docs/en/s12-worktree-task-isolation.md | 250 ++++++ docs/ja/s01-the-agent-loop.md | 6 +- docs/ja/s03-todo-write.md | 2 +- docs/ja/s04-subagent.md | 5 +- docs/ja/s05-skill-loading.md | 1 - docs/ja/s06-context-compact.md | 1 - docs/ja/s07-task-system.md | 56 +- docs/ja/s08-background-tasks.md | 1 - docs/ja/s09-agent-teams.md | 6 +- docs/ja/s11-autonomous-agents.md | 2 +- docs/ja/s12-worktree-task-isolation.md | 226 +++++ docs/zh/s01-the-agent-loop.md | 6 +- docs/zh/s02-tool-use.md | 2 +- docs/zh/s03-todo-write.md | 2 +- docs/zh/s04-subagent.md | 5 +- docs/zh/s05-skill-loading.md | 3 +- docs/zh/s06-context-compact.md | 1 - docs/zh/s07-task-system.md | 33 +- docs/zh/s08-background-tasks.md | 1 - docs/zh/s09-agent-teams.md | 4 +- docs/zh/s11-autonomous-agents.md | 2 +- docs/zh/s12-worktree-task-isolation.md | 193 +++++ .../architecture/design-decisions.tsx | 2 + .../simulator/agent-loop-simulator.tsx | 1 + web/src/components/visualizations/index.tsx | 1 + .../visualizations/s04-subagent.tsx | 2 +- .../s12-worktree-task-isolation.tsx | 278 +++++++ web/src/data/annotations/s07.json | 58 +- web/src/data/annotations/s12.json | 103 +++ web/src/data/execution-flows.ts | 37 + web/src/data/generated/docs.json | 74 +- web/src/data/generated/versions.json | 147 +++- web/src/data/scenarios/s12.json | 51 ++ web/src/i18n/messages/en.json | 12 +- web/src/i18n/messages/ja.json | 12 +- web/src/i18n/messages/zh.json | 12 +- web/src/lib/constants.ts | 11 +- 54 files changed, 2404 insertions(+), 210 deletions(-) create mode 100644 agents/s12_worktree_task_isolation.py create mode 100644 docs/en/s12-worktree-task-isolation.md create mode 100644 docs/ja/s12-worktree-task-isolation.md create mode 100644 docs/zh/s12-worktree-task-isolation.md create mode 100644 web/src/components/visualizations/s12-worktree-task-isolation.tsx create mode 100644 web/src/data/annotations/s12.json create mode 100644 web/src/data/scenarios/s12.json diff --git a/README-ja.md b/README-ja.md index cfc2950..9e5e953 100644 --- a/README-ja.md +++ b/README-ja.md @@ -1,4 +1,4 @@ -# Learn Claude Code -- AI Agent をゼロから構築する +# Learn Claude Code -- 0 から 1 へ構築する nano Claude Code-like agent [English](./README.md) | [中文](./README-zh.md) | [日本語](./README-ja.md) @@ -17,16 +17,16 @@ loop back -----------------> messages[] - これだけだ。すべての AI コーディングエージェントはこのループ。 - 他はすべて改良に過ぎない。 + これは最小ループだ。すべての AI コーディングエージェントに必要な土台になる。 + 本番のエージェントには、ポリシー・権限・ライフサイクル層が追加される。 ``` -**11 の段階的セッション、シンプルなループから完全な自律チームまで。** +**12 の段階的セッション、シンプルなループから分離された自律実行まで。** **各セッションは1つのメカニズムを追加する。各メカニズムには1つのモットーがある。** > **s01**   *"Bash があれば十分"* — 1つのツール + 1つのループ = エージェント > -> **s02**   *"ループは変わらない"* — ツール追加はハンドラー追加であり、ロジック追加ではない +> **s02**   *"ループは変わらない"* — ツール追加はハンドラー追加であり、ループの作り直しではない > > **s03**   *"行動する前に計画せよ"* — 可視化された計画がタスク完了率を向上させる > @@ -45,6 +45,8 @@ > **s10**   *"同じ request_id、2つのプロトコル"* — 1つの FSM パターンでシャットダウン + プラン承認 > > **s11**   *"ポーリング、クレーム、作業、繰り返し"* — コーディネーター不要、エージェントが自己組織化 +> +> **s12**   *"ディレクトリで分離し、タスクIDで調整する"* — タスクボード + 必要時の worktree レーン --- @@ -77,6 +79,19 @@ def agent_loop(messages): 各セッションはこのループの上に1つのメカニズムを重ねる -- ループ自体は変わらない。 +## スコープ (重要) + +このリポジトリは、nano Claude Code-like agent を 0->1 で構築・学習するための教材プロジェクトです。 +学習を優先するため、以下の本番メカニズムは意図的に簡略化または省略しています。 + +- 完全なイベント / Hook バス (例: PreToolUse, SessionStart/End, ConfigChange)。 + s12 では教材用に最小の追記型ライフサイクルイベントのみ実装している。 +- ルールベースの権限ガバナンスと信頼フロー +- セッションライフサイクル制御 (resume/fork) と高度な worktree ライフサイクル制御 +- MCP ランタイムの詳細 (transport/OAuth/リソース購読/ポーリング) + +このリポジトリの JSONL メールボックス方式は教材用の実装であり、特定の本番内部実装を主張するものではありません。 + ## クイックスタート ```sh @@ -87,6 +102,7 @@ cp .env.example .env # .env を編集して ANTHROPIC_API_KEY を入力 python agents/s01_agent_loop.py # ここから開始 python agents/s11_autonomous_agents.py # 完全自律チーム +python agents/s12_worktree_task_isolation.py # Task 対応の worktree 分離 ``` ### Web プラットフォーム @@ -124,6 +140,9 @@ s08 バックグラウンドタスク [6] s10 チームプロトコル | s11 自律エージェント [14] アイドルサイクル + 自動クレーム + | + s12 Worktree 分離 [16] + タスク調整 + 必要時の分離実行レーン [N] = ツール数 ``` @@ -133,7 +152,7 @@ s08 バックグラウンドタスク [6] s10 チームプロトコル ``` learn-claude-code/ | -|-- agents/ # Python リファレンス実装 (s01-s11 + 完全版) +|-- agents/ # Python リファレンス実装 (s01-s12 + 完全版) |-- docs/{en,zh,ja}/ # メンタルモデル優先のドキュメント (3言語) |-- web/ # インタラクティブ学習プラットフォーム (Next.js) |-- skills/ # s05 の Skill ファイル @@ -158,6 +177,7 @@ learn-claude-code/ | [s09](./docs/ja/s09-agent-teams.md) | エージェントチーム | *追記で送信、排出で読取* | | [s10](./docs/ja/s10-team-protocols.md) | チームプロトコル | *同じ request_id、2つのプロトコル* | | [s11](./docs/ja/s11-autonomous-agents.md) | 自律エージェント | *ポーリング、クレーム、作業、繰り返し* | +| [s12](./docs/ja/s12-worktree-task-isolation.md) | Worktree + タスク分離 | *ディレクトリで分離し、タスクIDで調整する* | ## ライセンス @@ -165,4 +185,4 @@ MIT --- -**モデルがエージェントだ。私たちの仕事はツールを渡して、邪魔をしないこと。** +**モデルがエージェントだ。私たちの仕事はツールを与えて邪魔しないこと。** diff --git a/README-zh.md b/README-zh.md index 3a47b98..b05a300 100644 --- a/README-zh.md +++ b/README-zh.md @@ -1,4 +1,4 @@ -# Learn Claude Code -- 从零构建 AI Agent +# Learn Claude Code -- 从 0 到 1 构建 nano Claude Code-like agent [English](./README.md) | [中文](./README-zh.md) | [日本語](./README-ja.md) @@ -17,16 +17,16 @@ loop back -----------------> messages[] - 就这些。每个 AI 编程 Agent 都是这个循环。 - 其他一切都是优化。 + 这是最小循环。每个 AI 编程 Agent 都需要这个循环。 + 生产级 Agent 还会叠加策略、权限与生命周期层。 ``` -**11 个递进式课程, 从简单循环到完整的自治团队。** +**12 个递进式课程, 从简单循环到隔离化的自治执行。** **每个课程添加一个机制。每个机制有一句格言。** > **s01**   *"Bash 就够了"* — 一个工具 + 一个循环 = 一个智能体 > -> **s02**   *"循环没有变"* — 加工具就是加 handler, 不是加逻辑 +> **s02**   *"循环没有变"* — 加工具就是加 handler, 不是重写循环 > > **s03**   *"先计划再行动"* — 可见的计划提升任务完成率 > @@ -45,6 +45,8 @@ > **s10**   *"同一个 request_id, 两个协议"* — 一个 FSM 模式驱动关机 + 计划审批 > > **s11**   *"轮询, 认领, 工作, 重复"* — 无需协调者, 智能体自组织 +> +> **s12**   *"目录隔离, 任务 ID 协调"* — 任务板协调 + 按需 worktree 隔离通道 --- @@ -77,6 +79,19 @@ def agent_loop(messages): 每个课程在这个循环之上叠加一个机制 -- 循环本身始终不变。 +## 范围说明 (重要) + +本仓库是一个 0->1 的学习型项目,用于从零构建 nano Claude Code-like agent。 +为保证学习路径清晰,仓库有意简化或省略了部分生产机制: + +- 完整事件 / Hook 总线 (例如 PreToolUse、SessionStart/End、ConfigChange)。 + s12 仅提供教学用途的最小 append-only 生命周期事件流。 +- 基于规则的权限治理与信任流程 +- 会话生命周期控制 (resume/fork) 与更完整的 worktree 生命周期控制 +- 完整 MCP 运行时细节 (transport/OAuth/资源订阅/轮询) + +仓库中的团队 JSONL 邮箱协议是教学实现,不是对任何特定生产内部实现的声明。 + ## 快速开始 ```sh @@ -87,6 +102,7 @@ cp .env.example .env # 编辑 .env 填入你的 ANTHROPIC_API_KEY python agents/s01_agent_loop.py # 从这里开始 python agents/s11_autonomous_agents.py # 完整自治团队 +python agents/s12_worktree_task_isolation.py # Task 感知的 worktree 隔离 ``` ### Web 平台 @@ -124,6 +140,9 @@ s08 后台任务 [6] s10 团队协议 [12] | s11 自治智能体 [14] 空闲轮询 + 自动认领 + | + s12 Worktree 隔离 [16] + 任务协调 + 按需隔离执行通道 [N] = 工具数量 ``` @@ -133,7 +152,7 @@ s08 后台任务 [6] s10 团队协议 [12] ``` learn-claude-code/ | -|-- agents/ # Python 参考实现 (s01-s11 + 完整版) +|-- agents/ # Python 参考实现 (s01-s12 + 完整版) |-- docs/{en,zh,ja}/ # 心智模型优先的文档 (3 种语言) |-- web/ # 交互式学习平台 (Next.js) |-- skills/ # s05 的 Skill 文件 @@ -158,6 +177,7 @@ learn-claude-code/ | [s09](./docs/zh/s09-agent-teams.md) | 智能体团队 | *追加即发送, 排空即读取* | | [s10](./docs/zh/s10-team-protocols.md) | 团队协议 | *同一个 request_id, 两个协议* | | [s11](./docs/zh/s11-autonomous-agents.md) | 自治智能体 | *轮询, 认领, 工作, 重复* | +| [s12](./docs/zh/s12-worktree-task-isolation.md) | Worktree + 任务隔离 | *目录隔离, 任务 ID 协调* | ## 许可证 @@ -165,4 +185,4 @@ MIT --- -**模型就是智能体。我们的工作是给它工具, 然后让开。** +**模型就是智能体。我们的工作就是给它工具, 然后让开。** diff --git a/README.md b/README.md index c0b58c8..689d7d7 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Learn Claude Code -- Build an AI Agent From Scratch +# Learn Claude Code -- A nano Claude Code-like agent, built from 0 to 1 [English](./README.md) | [中文](./README-zh.md) | [日本語](./README-ja.md) @@ -17,16 +17,16 @@ loop back -----------------> messages[] - That's it. Every AI coding agent is this loop. - Everything else is refinement. + That's the minimal loop. Every AI coding agent needs this loop. + Production agents add policy, permissions, and lifecycle layers. ``` -**11 progressive sessions, from a simple loop to full autonomous teams.** +**12 progressive sessions, from a simple loop to isolated autonomous execution.** **Each session adds one mechanism. Each mechanism has one motto.** > **s01**   *"Bash is all you need"* — one tool + one loop = an agent > -> **s02**   *"The loop didn't change"* — adding tools means adding handlers, not logic +> **s02**   *"The loop didn't change"* — adding tools means adding handlers, not rewriting the loop > > **s03**   *"Plan before you act"* — visible plans improve task completion > @@ -45,6 +45,8 @@ > **s10**   *"Same request_id, two protocols"* — one FSM pattern powers shutdown + plan approval > > **s11**   *"Poll, claim, work, repeat"* — no coordinator needed, agents self-organize +> +> **s12**   *"Isolate by directory, coordinate by task ID"* — task board + optional worktree lanes --- @@ -77,6 +79,19 @@ def agent_loop(messages): Every session layers one mechanism on top of this loop -- without changing the loop itself. +## Scope (Important) + +This repository is a 0->1 learning project for building a nano Claude Code-like agent. +It intentionally simplifies or omits several production mechanisms: + +- Full event/hook buses (for example PreToolUse, SessionStart/End, ConfigChange). + s12 includes only a minimal append-only lifecycle event stream for teaching. +- Rule-based permission governance and trust workflows +- Session lifecycle controls (resume/fork) and advanced worktree lifecycle controls +- Full MCP runtime details (transport/OAuth/resource subscribe/polling) + +Treat the team JSONL mailbox protocol in this repo as a teaching implementation, not a claim about any specific production internals. + ## Quick Start ```sh @@ -87,6 +102,7 @@ cp .env.example .env # Edit .env with your ANTHROPIC_API_KEY python agents/s01_agent_loop.py # Start here python agents/s11_autonomous_agents.py # Full autonomous team +python agents/s12_worktree_task_isolation.py # Task-aware worktree isolation ``` ### Web Platform @@ -124,6 +140,9 @@ s08 Background Tasks [6] s10 Team Protocols [12] | s11 Autonomous Agents [14] idle cycle + auto-claim + | + s12 Worktree Isolation [16] + task coordination + optional isolated execution lanes [N] = number of tools ``` @@ -133,7 +152,7 @@ s08 Background Tasks [6] s10 Team Protocols [12] ``` learn-claude-code/ | -|-- agents/ # Python reference implementations (s01-s11 + full) +|-- agents/ # Python reference implementations (s01-s12 + full) |-- docs/{en,zh,ja}/ # Mental-model-first documentation (3 languages) |-- web/ # Interactive learning platform (Next.js) |-- skills/ # Skill files for s05 @@ -158,6 +177,7 @@ Available in [English](./docs/en/) | [中文](./docs/zh/) | [日本語](./docs/j | [s09](./docs/en/s09-agent-teams.md) | Agent Teams | *Append to send, drain to read* | | [s10](./docs/en/s10-team-protocols.md) | Team Protocols | *Same request_id, two protocols* | | [s11](./docs/en/s11-autonomous-agents.md) | Autonomous Agents | *Poll, claim, work, repeat* | +| [s12](./docs/en/s12-worktree-task-isolation.md) | Worktree + Task Isolation | *Isolate by directory, coordinate by task ID* | ## License diff --git a/agents/__init__.py b/agents/__init__.py index dc13667..e101455 100644 --- a/agents/__init__.py +++ b/agents/__init__.py @@ -1,2 +1,2 @@ -# agents/ - Python teaching agents (s01-s11) + reference agent (s_full) +# agents/ - Python teaching agents (s01-s12) + reference agent (s_full) # Each file is self-contained and runnable: python agents/s01_agent_loop.py diff --git a/agents/s01_agent_loop.py b/agents/s01_agent_loop.py index 1be83f8..3e6c913 100644 --- a/agents/s01_agent_loop.py +++ b/agents/s01_agent_loop.py @@ -2,7 +2,7 @@ """ s01_agent_loop.py - The Agent Loop -The entire secret of coding agents in one pattern: +The entire secret of an AI coding agent in one pattern: while stop_reason == "tool_use": response = LLM(messages, tools) @@ -18,8 +18,9 @@ The entire secret of coding agents in one pattern: +---------------+ (loop continues) -That's it. The ENTIRE agent is a while loop that feeds tool -results back to the model until the model decides to stop. +This is the core loop: feed tool results back to the model +until the model decides to stop. Production agents layer +policy, hooks, and lifecycle controls on top. """ import os diff --git a/agents/s12_worktree_task_isolation.py b/agents/s12_worktree_task_isolation.py new file mode 100644 index 0000000..ad7fbad --- /dev/null +++ b/agents/s12_worktree_task_isolation.py @@ -0,0 +1,775 @@ +#!/usr/bin/env python3 +""" +s12_worktree_task_isolation.py - Worktree + Task Isolation + +Directory-level isolation for parallel task execution. +Tasks are the control plane and worktrees are the execution plane. + + .tasks/task_12.json + { + "id": 12, + "subject": "Implement auth refactor", + "status": "in_progress", + "worktree": "auth-refactor" + } + + .worktrees/index.json + { + "worktrees": [ + { + "name": "auth-refactor", + "path": ".../.worktrees/auth-refactor", + "branch": "wt/auth-refactor", + "task_id": 12, + "status": "active" + } + ] + } + +Key insight: "Isolate by directory, coordinate by task ID." +""" + +import json +import os +import re +import subprocess +import time +from pathlib import Path + +from anthropic import Anthropic +from dotenv import load_dotenv + +load_dotenv(override=True) + +if os.getenv("ANTHROPIC_BASE_URL"): + os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) + +WORKDIR = Path.cwd() +client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) +MODEL = os.environ["MODEL_ID"] + + +def detect_repo_root(cwd: Path) -> Path | None: + """Return git repo root if cwd is inside a repo, else None.""" + try: + r = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + cwd=cwd, + capture_output=True, + text=True, + timeout=10, + ) + if r.returncode != 0: + return None + root = Path(r.stdout.strip()) + return root if root.exists() else None + except Exception: + return None + + +REPO_ROOT = detect_repo_root(WORKDIR) or WORKDIR + +SYSTEM = ( + f"You are a coding agent at {WORKDIR}. " + "Use task + worktree tools for multi-task work. " + "For parallel or risky changes: create tasks, allocate worktree lanes, " + "run commands in those lanes, then choose keep/remove for closeout. " + "Use worktree_events when you need lifecycle visibility." +) + + +# -- EventBus: append-only lifecycle events for observability -- +class EventBus: + def __init__(self, event_log_path: Path): + self.path = event_log_path + self.path.parent.mkdir(parents=True, exist_ok=True) + if not self.path.exists(): + self.path.write_text("") + + def emit( + self, + event: str, + task: dict | None = None, + worktree: dict | None = None, + error: str | None = None, + ): + payload = { + "event": event, + "ts": time.time(), + "task": task or {}, + "worktree": worktree or {}, + } + if error: + payload["error"] = error + with self.path.open("a", encoding="utf-8") as f: + f.write(json.dumps(payload) + "\n") + + def list_recent(self, limit: int = 20) -> str: + n = max(1, min(int(limit or 20), 200)) + lines = self.path.read_text(encoding="utf-8").splitlines() + recent = lines[-n:] + items = [] + for line in recent: + try: + items.append(json.loads(line)) + except Exception: + items.append({"event": "parse_error", "raw": line}) + return json.dumps(items, indent=2) + + +# -- TaskManager: persistent task board with optional worktree binding -- +class TaskManager: + def __init__(self, tasks_dir: Path): + self.dir = tasks_dir + self.dir.mkdir(parents=True, exist_ok=True) + self._next_id = self._max_id() + 1 + + def _max_id(self) -> int: + ids = [] + for f in self.dir.glob("task_*.json"): + try: + ids.append(int(f.stem.split("_")[1])) + except Exception: + pass + return max(ids) if ids else 0 + + def _path(self, task_id: int) -> Path: + return self.dir / f"task_{task_id}.json" + + def _load(self, task_id: int) -> dict: + path = self._path(task_id) + if not path.exists(): + raise ValueError(f"Task {task_id} not found") + return json.loads(path.read_text()) + + def _save(self, task: dict): + self._path(task["id"]).write_text(json.dumps(task, indent=2)) + + def create(self, subject: str, description: str = "") -> str: + task = { + "id": self._next_id, + "subject": subject, + "description": description, + "status": "pending", + "owner": "", + "worktree": "", + "blockedBy": [], + "created_at": time.time(), + "updated_at": time.time(), + } + self._save(task) + self._next_id += 1 + return json.dumps(task, indent=2) + + def get(self, task_id: int) -> str: + return json.dumps(self._load(task_id), indent=2) + + def exists(self, task_id: int) -> bool: + return self._path(task_id).exists() + + def update(self, task_id: int, status: str = None, owner: str = None) -> str: + task = self._load(task_id) + if status: + if status not in ("pending", "in_progress", "completed"): + raise ValueError(f"Invalid status: {status}") + task["status"] = status + if owner is not None: + task["owner"] = owner + task["updated_at"] = time.time() + self._save(task) + return json.dumps(task, indent=2) + + def bind_worktree(self, task_id: int, worktree: str, owner: str = "") -> str: + task = self._load(task_id) + task["worktree"] = worktree + if owner: + task["owner"] = owner + if task["status"] == "pending": + task["status"] = "in_progress" + task["updated_at"] = time.time() + self._save(task) + return json.dumps(task, indent=2) + + def unbind_worktree(self, task_id: int) -> str: + task = self._load(task_id) + task["worktree"] = "" + task["updated_at"] = time.time() + self._save(task) + return json.dumps(task, indent=2) + + def list_all(self) -> str: + tasks = [] + for f in sorted(self.dir.glob("task_*.json")): + tasks.append(json.loads(f.read_text())) + if not tasks: + return "No tasks." + lines = [] + for t in tasks: + marker = { + "pending": "[ ]", + "in_progress": "[>]", + "completed": "[x]", + }.get(t["status"], "[?]") + owner = f" owner={t['owner']}" if t.get("owner") else "" + wt = f" wt={t['worktree']}" if t.get("worktree") else "" + lines.append(f"{marker} #{t['id']}: {t['subject']}{owner}{wt}") + return "\n".join(lines) + + +TASKS = TaskManager(REPO_ROOT / ".tasks") +EVENTS = EventBus(REPO_ROOT / ".worktrees" / "events.jsonl") + + +# -- WorktreeManager: create/list/run/remove git worktrees + lifecycle index -- +class WorktreeManager: + def __init__(self, repo_root: Path, tasks: TaskManager, events: EventBus): + self.repo_root = repo_root + self.tasks = tasks + self.events = events + self.dir = repo_root / ".worktrees" + self.dir.mkdir(parents=True, exist_ok=True) + self.index_path = self.dir / "index.json" + if not self.index_path.exists(): + self.index_path.write_text(json.dumps({"worktrees": []}, indent=2)) + self.git_available = self._is_git_repo() + + def _is_git_repo(self) -> bool: + try: + r = subprocess.run( + ["git", "rev-parse", "--is-inside-work-tree"], + cwd=self.repo_root, + capture_output=True, + text=True, + timeout=10, + ) + return r.returncode == 0 + except Exception: + return False + + def _run_git(self, args: list[str]) -> str: + if not self.git_available: + raise RuntimeError("Not in a git repository. worktree tools require git.") + r = subprocess.run( + ["git", *args], + cwd=self.repo_root, + capture_output=True, + text=True, + timeout=120, + ) + if r.returncode != 0: + msg = (r.stdout + r.stderr).strip() + raise RuntimeError(msg or f"git {' '.join(args)} failed") + return (r.stdout + r.stderr).strip() or "(no output)" + + def _load_index(self) -> dict: + return json.loads(self.index_path.read_text()) + + def _save_index(self, data: dict): + self.index_path.write_text(json.dumps(data, indent=2)) + + def _find(self, name: str) -> dict | None: + idx = self._load_index() + for wt in idx.get("worktrees", []): + if wt.get("name") == name: + return wt + return None + + def _validate_name(self, name: str): + if not re.fullmatch(r"[A-Za-z0-9._-]{1,40}", name or ""): + raise ValueError( + "Invalid worktree name. Use 1-40 chars: letters, numbers, ., _, -" + ) + + def create(self, name: str, task_id: int = None, base_ref: str = "HEAD") -> str: + self._validate_name(name) + if self._find(name): + raise ValueError(f"Worktree '{name}' already exists in index") + if task_id is not None and not self.tasks.exists(task_id): + raise ValueError(f"Task {task_id} not found") + + path = self.dir / name + branch = f"wt/{name}" + self.events.emit( + "worktree.create.before", + task={"id": task_id} if task_id is not None else {}, + worktree={"name": name, "base_ref": base_ref}, + ) + try: + self._run_git(["worktree", "add", "-b", branch, str(path), base_ref]) + + entry = { + "name": name, + "path": str(path), + "branch": branch, + "task_id": task_id, + "status": "active", + "created_at": time.time(), + } + + idx = self._load_index() + idx["worktrees"].append(entry) + self._save_index(idx) + + if task_id is not None: + self.tasks.bind_worktree(task_id, name) + + self.events.emit( + "worktree.create.after", + task={"id": task_id} if task_id is not None else {}, + worktree={ + "name": name, + "path": str(path), + "branch": branch, + "status": "active", + }, + ) + return json.dumps(entry, indent=2) + except Exception as e: + self.events.emit( + "worktree.create.failed", + task={"id": task_id} if task_id is not None else {}, + worktree={"name": name, "base_ref": base_ref}, + error=str(e), + ) + raise + + def list_all(self) -> str: + idx = self._load_index() + wts = idx.get("worktrees", []) + if not wts: + return "No worktrees in index." + lines = [] + for wt in wts: + suffix = f" task={wt['task_id']}" if wt.get("task_id") else "" + lines.append( + f"[{wt.get('status', 'unknown')}] {wt['name']} -> " + f"{wt['path']} ({wt.get('branch', '-')}){suffix}" + ) + return "\n".join(lines) + + def status(self, name: str) -> str: + wt = self._find(name) + if not wt: + return f"Error: Unknown worktree '{name}'" + path = Path(wt["path"]) + if not path.exists(): + return f"Error: Worktree path missing: {path}" + r = subprocess.run( + ["git", "status", "--short", "--branch"], + cwd=path, + capture_output=True, + text=True, + timeout=60, + ) + text = (r.stdout + r.stderr).strip() + return text or "Clean worktree" + + def run(self, name: str, command: str) -> str: + dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] + if any(d in command for d in dangerous): + return "Error: Dangerous command blocked" + + wt = self._find(name) + if not wt: + return f"Error: Unknown worktree '{name}'" + path = Path(wt["path"]) + if not path.exists(): + return f"Error: Worktree path missing: {path}" + + try: + r = subprocess.run( + command, + shell=True, + cwd=path, + capture_output=True, + text=True, + timeout=300, + ) + out = (r.stdout + r.stderr).strip() + return out[:50000] if out else "(no output)" + except subprocess.TimeoutExpired: + return "Error: Timeout (300s)" + + def remove(self, name: str, force: bool = False, complete_task: bool = False) -> str: + wt = self._find(name) + if not wt: + return f"Error: Unknown worktree '{name}'" + + self.events.emit( + "worktree.remove.before", + task={"id": wt.get("task_id")} if wt.get("task_id") is not None else {}, + worktree={"name": name, "path": wt.get("path")}, + ) + try: + args = ["worktree", "remove"] + if force: + args.append("--force") + args.append(wt["path"]) + self._run_git(args) + + if complete_task and wt.get("task_id") is not None: + task_id = wt["task_id"] + before = json.loads(self.tasks.get(task_id)) + self.tasks.update(task_id, status="completed") + self.tasks.unbind_worktree(task_id) + self.events.emit( + "task.completed", + task={ + "id": task_id, + "subject": before.get("subject", ""), + "status": "completed", + }, + worktree={"name": name}, + ) + + idx = self._load_index() + for item in idx.get("worktrees", []): + if item.get("name") == name: + item["status"] = "removed" + item["removed_at"] = time.time() + self._save_index(idx) + + self.events.emit( + "worktree.remove.after", + task={"id": wt.get("task_id")} if wt.get("task_id") is not None else {}, + worktree={"name": name, "path": wt.get("path"), "status": "removed"}, + ) + return f"Removed worktree '{name}'" + except Exception as e: + self.events.emit( + "worktree.remove.failed", + task={"id": wt.get("task_id")} if wt.get("task_id") is not None else {}, + worktree={"name": name, "path": wt.get("path")}, + error=str(e), + ) + raise + + def keep(self, name: str) -> str: + wt = self._find(name) + if not wt: + return f"Error: Unknown worktree '{name}'" + + idx = self._load_index() + kept = None + for item in idx.get("worktrees", []): + if item.get("name") == name: + item["status"] = "kept" + item["kept_at"] = time.time() + kept = item + self._save_index(idx) + + self.events.emit( + "worktree.keep", + task={"id": wt.get("task_id")} if wt.get("task_id") is not None else {}, + worktree={ + "name": name, + "path": wt.get("path"), + "status": "kept", + }, + ) + return json.dumps(kept, indent=2) if kept else f"Error: Unknown worktree '{name}'" + + +WORKTREES = WorktreeManager(REPO_ROOT, TASKS, EVENTS) + + +# -- Base tools (kept minimal, same style as previous sessions) -- +def safe_path(p: str) -> Path: + path = (WORKDIR / p).resolve() + if not path.is_relative_to(WORKDIR): + raise ValueError(f"Path escapes workspace: {p}") + return path + + +def run_bash(command: str) -> str: + dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] + if any(d in command for d in dangerous): + return "Error: Dangerous command blocked" + try: + r = subprocess.run( + command, + shell=True, + cwd=WORKDIR, + capture_output=True, + text=True, + timeout=120, + ) + out = (r.stdout + r.stderr).strip() + return out[:50000] if out else "(no output)" + except subprocess.TimeoutExpired: + return "Error: Timeout (120s)" + + +def run_read(path: str, limit: int = None) -> str: + try: + lines = safe_path(path).read_text().splitlines() + if limit and limit < len(lines): + lines = lines[:limit] + [f"... ({len(lines) - limit} more)"] + return "\n".join(lines)[:50000] + except Exception as e: + return f"Error: {e}" + + +def run_write(path: str, content: str) -> str: + try: + fp = safe_path(path) + fp.parent.mkdir(parents=True, exist_ok=True) + fp.write_text(content) + return f"Wrote {len(content)} bytes" + except Exception as e: + return f"Error: {e}" + + +def run_edit(path: str, old_text: str, new_text: str) -> str: + try: + fp = safe_path(path) + c = fp.read_text() + if old_text not in c: + return f"Error: Text not found in {path}" + fp.write_text(c.replace(old_text, new_text, 1)) + return f"Edited {path}" + except Exception as e: + return f"Error: {e}" + + +TOOL_HANDLERS = { + "bash": lambda **kw: run_bash(kw["command"]), + "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), + "write_file": lambda **kw: run_write(kw["path"], kw["content"]), + "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), + "task_create": lambda **kw: TASKS.create(kw["subject"], kw.get("description", "")), + "task_list": lambda **kw: TASKS.list_all(), + "task_get": lambda **kw: TASKS.get(kw["task_id"]), + "task_update": lambda **kw: TASKS.update(kw["task_id"], kw.get("status"), kw.get("owner")), + "task_bind_worktree": lambda **kw: TASKS.bind_worktree(kw["task_id"], kw["worktree"], kw.get("owner", "")), + "worktree_create": lambda **kw: WORKTREES.create(kw["name"], kw.get("task_id"), kw.get("base_ref", "HEAD")), + "worktree_list": lambda **kw: WORKTREES.list_all(), + "worktree_status": lambda **kw: WORKTREES.status(kw["name"]), + "worktree_run": lambda **kw: WORKTREES.run(kw["name"], kw["command"]), + "worktree_keep": lambda **kw: WORKTREES.keep(kw["name"]), + "worktree_remove": lambda **kw: WORKTREES.remove(kw["name"], kw.get("force", False), kw.get("complete_task", False)), + "worktree_events": lambda **kw: EVENTS.list_recent(kw.get("limit", 20)), +} + +TOOLS = [ + { + "name": "bash", + "description": "Run a shell command in the current workspace (blocking).", + "input_schema": { + "type": "object", + "properties": {"command": {"type": "string"}}, + "required": ["command"], + }, + }, + { + "name": "read_file", + "description": "Read file contents.", + "input_schema": { + "type": "object", + "properties": { + "path": {"type": "string"}, + "limit": {"type": "integer"}, + }, + "required": ["path"], + }, + }, + { + "name": "write_file", + "description": "Write content to file.", + "input_schema": { + "type": "object", + "properties": { + "path": {"type": "string"}, + "content": {"type": "string"}, + }, + "required": ["path", "content"], + }, + }, + { + "name": "edit_file", + "description": "Replace exact text in file.", + "input_schema": { + "type": "object", + "properties": { + "path": {"type": "string"}, + "old_text": {"type": "string"}, + "new_text": {"type": "string"}, + }, + "required": ["path", "old_text", "new_text"], + }, + }, + { + "name": "task_create", + "description": "Create a new task on the shared task board.", + "input_schema": { + "type": "object", + "properties": { + "subject": {"type": "string"}, + "description": {"type": "string"}, + }, + "required": ["subject"], + }, + }, + { + "name": "task_list", + "description": "List all tasks with status, owner, and worktree binding.", + "input_schema": {"type": "object", "properties": {}}, + }, + { + "name": "task_get", + "description": "Get task details by ID.", + "input_schema": { + "type": "object", + "properties": {"task_id": {"type": "integer"}}, + "required": ["task_id"], + }, + }, + { + "name": "task_update", + "description": "Update task status or owner.", + "input_schema": { + "type": "object", + "properties": { + "task_id": {"type": "integer"}, + "status": { + "type": "string", + "enum": ["pending", "in_progress", "completed"], + }, + "owner": {"type": "string"}, + }, + "required": ["task_id"], + }, + }, + { + "name": "task_bind_worktree", + "description": "Bind a task to a worktree name.", + "input_schema": { + "type": "object", + "properties": { + "task_id": {"type": "integer"}, + "worktree": {"type": "string"}, + "owner": {"type": "string"}, + }, + "required": ["task_id", "worktree"], + }, + }, + { + "name": "worktree_create", + "description": "Create a git worktree and optionally bind it to a task.", + "input_schema": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "task_id": {"type": "integer"}, + "base_ref": {"type": "string"}, + }, + "required": ["name"], + }, + }, + { + "name": "worktree_list", + "description": "List worktrees tracked in .worktrees/index.json.", + "input_schema": {"type": "object", "properties": {}}, + }, + { + "name": "worktree_status", + "description": "Show git status for one worktree.", + "input_schema": { + "type": "object", + "properties": {"name": {"type": "string"}}, + "required": ["name"], + }, + }, + { + "name": "worktree_run", + "description": "Run a shell command in a named worktree directory.", + "input_schema": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "command": {"type": "string"}, + }, + "required": ["name", "command"], + }, + }, + { + "name": "worktree_remove", + "description": "Remove a worktree and optionally mark its bound task completed.", + "input_schema": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "force": {"type": "boolean"}, + "complete_task": {"type": "boolean"}, + }, + "required": ["name"], + }, + }, + { + "name": "worktree_keep", + "description": "Mark a worktree as kept in lifecycle state without removing it.", + "input_schema": { + "type": "object", + "properties": {"name": {"type": "string"}}, + "required": ["name"], + }, + }, + { + "name": "worktree_events", + "description": "List recent worktree/task lifecycle events from .worktrees/events.jsonl.", + "input_schema": { + "type": "object", + "properties": {"limit": {"type": "integer"}}, + }, + }, +] + + +def agent_loop(messages: list): + while True: + response = client.messages.create( + model=MODEL, + system=SYSTEM, + messages=messages, + tools=TOOLS, + max_tokens=8000, + ) + messages.append({"role": "assistant", "content": response.content}) + if response.stop_reason != "tool_use": + return + + results = [] + for block in response.content: + if block.type == "tool_use": + handler = TOOL_HANDLERS.get(block.name) + try: + output = handler(**block.input) if handler else f"Unknown tool: {block.name}" + except Exception as e: + output = f"Error: {e}" + print(f"> {block.name}: {str(output)[:200]}") + results.append( + { + "type": "tool_result", + "tool_use_id": block.id, + "content": str(output), + } + ) + messages.append({"role": "user", "content": results}) + + +if __name__ == "__main__": + print(f"Repo root for s12: {REPO_ROOT}") + if not WORKTREES.git_available: + print("Note: Not in a git repo. worktree_* tools will return errors.") + + history = [] + while True: + try: + query = input("\033[36ms12 >> \033[0m") + except (EOFError, KeyboardInterrupt): + break + if query.strip().lower() in ("q", "exit", ""): + break + history.append({"role": "user", "content": query}) + agent_loop(history) + print() diff --git a/agents/s_full.py b/agents/s_full.py index 3239d8c..7a3f1fd 100644 --- a/agents/s_full.py +++ b/agents/s_full.py @@ -3,12 +3,13 @@ s_full.py - Full Reference Agent Capstone implementation combining every mechanism from s01-s11. +Session s12 (task-aware worktree isolation) is taught separately. NOT a teaching session -- this is the "put it all together" reference. +------------------------------------------------------------------+ | FULL AGENT | | | - | System prompt (s05 skills, s03 todo nag) | + | System prompt (s05 skills, task-first + optional todo nag) | | | | Before each LLM call: | | +--------------------+ +------------------+ +--------------+ | @@ -150,6 +151,9 @@ class TodoManager: lines.append(f"\n({done}/{len(self.items)} completed)") return "\n".join(lines) + def has_open_items(self) -> bool: + return any(item.get("status") != "completed" for item in self.items) + # === SECTION: subagent (s04) === def run_subagent(prompt: str, agent_type: str = "Explore") -> str: @@ -545,12 +549,10 @@ BUS = MessageBus() TEAM = TeammateManager(BUS, TASK_MGR) # === SECTION: system_prompt === -SYSTEM = f"""You are a coding agent at {WORKDIR}. -Use tools to solve tasks. Use TodoWrite for multi-step work. +SYSTEM = f"""You are a coding agent at {WORKDIR}. Use tools to solve tasks. +Prefer task_create/task_update/task_list for multi-step work. Use TodoWrite for short checklists. Use task for subagent delegation. Use load_skill for specialized knowledge. - -Skills available: -{SKILLS.descriptions()}""" +Skills: {SKILLS.descriptions()}""" # === SECTION: shutdown_protocol (s10) === @@ -692,9 +694,9 @@ def agent_loop(messages: list): results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)}) if block.name == "TodoWrite": used_todo = True - # s03: nag reminder + # s03: nag reminder (only when todo workflow is active) rounds_without_todo = 0 if used_todo else rounds_without_todo + 1 - if rounds_without_todo >= 3: + if TODO.has_open_items() and rounds_without_todo >= 3: results.insert(0, {"type": "text", "text": "Update your todos."}) messages.append({"role": "user", "content": results}) # s06: manual compress diff --git a/docs/en/s01-the-agent-loop.md b/docs/en/s01-the-agent-loop.md index ecdc77e..54fca90 100644 --- a/docs/en/s01-the-agent-loop.md +++ b/docs/en/s01-the-agent-loop.md @@ -1,6 +1,6 @@ # s01: The Agent Loop -> The entire secret of AI coding agents is a while loop that feeds tool results back to the model until the model decides to stop. +> The core of a coding agent is a while loop that feeds tool results back to the model until the model decides to stop. ## The Problem @@ -59,7 +59,8 @@ messages.append({"role": "assistant", "content": response.content}) ``` 4. We check the stop reason. If the model did not call a tool, the loop - ends. This is the only exit condition. + ends. In this minimal lesson implementation, this is the only loop exit + condition. ```python if response.stop_reason != "tool_use": @@ -126,7 +127,7 @@ This is session 1 -- the starting point. There is no prior session. ## Design Rationale -This loop is the universal foundation of all LLM-based agents. Production implementations add error handling, token counting, streaming, and retry logic, but the fundamental structure is unchanged. The simplicity is the point: one exit condition (`stop_reason != "tool_use"`) controls the entire flow. Everything else in this course -- tools, planning, compression, teams -- layers on top of this loop without modifying it. Understanding this loop means understanding every agent. +This loop is the foundation of LLM-based agents. Production implementations add error handling, token counting, streaming, retry logic, permission policy, and lifecycle orchestration, but the core interaction pattern still starts here. The simplicity is the point for this session: in this minimal implementation, one exit condition (`stop_reason != "tool_use"`) controls the flow we need to learn first. Everything else in this course layers on top of this loop. Understanding this loop gives you the base model, not the full production architecture. ## Try It diff --git a/docs/en/s02-tool-use.md b/docs/en/s02-tool-use.md index b04545d..b630a09 100644 --- a/docs/en/s02-tool-use.md +++ b/docs/en/s02-tool-use.md @@ -1,6 +1,6 @@ # s02: Tools -> A dispatch map routes tool calls to handler functions -- the loop itself does not change at all. +> A dispatch map routes tool calls to handler functions. The loop stays identical. ## The Problem @@ -133,7 +133,7 @@ def agent_loop(messages: list): ## Design Rationale -The dispatch map pattern scales linearly -- adding a tool means adding one handler and one schema entry. The loop never changes. This separation of concerns (loop vs handlers) is why agent frameworks can support dozens of tools without increasing control flow complexity. The pattern also enables independent testing of each handler in isolation, since handlers are pure functions with no coupling to the loop. Any agent that outgrows a dispatch map has a design problem, not a scaling problem. +The dispatch map scales linearly: add a tool, add a handler, add a schema entry. The loop never changes. Handlers are pure functions, so they test in isolation. Any agent that outgrows a dispatch map has a design problem, not a scaling problem. ## Try It diff --git a/docs/en/s03-todo-write.md b/docs/en/s03-todo-write.md index d090b88..3f3ab2d 100644 --- a/docs/en/s03-todo-write.md +++ b/docs/en/s03-todo-write.md @@ -19,9 +19,7 @@ explicitly. The model creates a plan, marks items in_progress as it works, and marks them completed when done. A nag reminder injects a nudge if the model goes 3+ rounds without updating its todos. -Teaching simplification: the nag threshold of 3 rounds is set low for -teaching visibility. Production agents typically use a higher threshold -around 10 to avoid excessive prompting. +Note: the nag threshold of 3 rounds is low for visibility. Production systems tune higher. From s07, this course switches to the Task board for durable multi-step work; TodoWrite remains available for quick checklists. ## The Solution diff --git a/docs/en/s04-subagent.md b/docs/en/s04-subagent.md index e4a60cc..9971e89 100644 --- a/docs/en/s04-subagent.md +++ b/docs/en/s04-subagent.md @@ -14,7 +14,7 @@ does this project use?" might require reading 5 files, but the parent agent does not need all 5 file contents in its history -- it just needs the answer: "pytest with conftest.py configuration." -The solution is process isolation: spawn a child agent with `messages=[]`. +In this course, a practical solution is fresh-context isolation: spawn a child agent with `messages=[]`. The child explores, reads files, runs commands. When it finishes, only its final text response returns to the parent. The child's entire message history is discarded. @@ -137,11 +137,10 @@ def run_subagent(prompt: str) -> str: | Context | Single shared | Parent + child isolation | | Subagent | None | `run_subagent()` function | | Return value | N/A | Summary text only | -| Todo system | TodoManager | Removed (not needed here) | ## Design Rationale -Process isolation gives context isolation for free. A fresh `messages[]` means the subagent cannot be confused by the parent's conversation history. The tradeoff is communication overhead -- results must be compressed back to the parent, losing detail. This is the same tradeoff as OS process isolation: safety and cleanliness in exchange for serialization cost. Limiting subagent depth (no recursive spawning) prevents unbounded resource consumption, and a max iteration count ensures runaway children terminate. +Fresh-context isolation is a practical way to approximate context isolation in this session. A fresh `messages[]` means the subagent starts without the parent's conversation history. The tradeoff is communication overhead -- results must be compressed back to the parent, losing detail. This is a message-history isolation strategy, not OS process isolation. Limiting subagent depth (no recursive spawning) prevents unbounded resource consumption, and a max iteration count ensures runaway children terminate. ## Try It diff --git a/docs/en/s05-skill-loading.md b/docs/en/s05-skill-loading.md index cb603cb..f0d1a33 100644 --- a/docs/en/s05-skill-loading.md +++ b/docs/en/s05-skill-loading.md @@ -144,7 +144,6 @@ class SkillLoader: | System prompt | Static string | + skill descriptions | | Knowledge | None | .skills/*.md files | | Injection | None | Two-layer (system + result)| -| Subagent | `run_subagent()` | Removed (different focus) | ## Design Rationale diff --git a/docs/en/s06-context-compact.md b/docs/en/s06-context-compact.md index 26a1f41..74f0551 100644 --- a/docs/en/s06-context-compact.md +++ b/docs/en/s06-context-compact.md @@ -162,7 +162,6 @@ def agent_loop(messages): | Auto-compact | None | Token threshold trigger | | Manual compact | None | `compact` tool | | Transcripts | None | Saved to .transcripts/ | -| Skills | load_skill | Removed (different focus) | ## Design Rationale diff --git a/docs/en/s07-task-system.md b/docs/en/s07-task-system.md index eb30290..3c2c244 100644 --- a/docs/en/s07-task-system.md +++ b/docs/en/s07-task-system.md @@ -1,28 +1,31 @@ # s07: Tasks -> Tasks persist as JSON files on the filesystem with a dependency graph, so they survive context compression and can be shared across agents. +> Tasks are persisted as JSON files with a dependency graph, so state survives context compression and can be shared across agents. -## The Problem +## Problem -In-memory state like TodoManager (s03) is lost when the context is -compressed (s06). After auto_compact replaces messages with a summary, -the todo list is gone. The agent has to reconstruct it from the summary -text, which is lossy and error-prone. +In-memory state (for example the TodoManager from s03) is fragile under compression (s06). Once earlier turns are compacted into summaries, in-memory todo state is gone. -This is the critical s06-to-s07 bridge: TodoManager items die with -compression; file-based tasks don't. Moving state to the filesystem -makes it compression-proof. +s06 -> s07 is the key transition: -More fundamentally, in-memory state is invisible to other agents. -When we eventually build teams (s09+), teammates need a shared task -board. In-memory data structures are process-local. +1. Todo list state in memory is conversational and lossy. +2. Task board state on disk is durable and recoverable. -The solution is to persist tasks as JSON files in `.tasks/`. Each task -is a separate file with an ID, subject, status, and dependency graph. -Completing task 1 automatically unblocks task 2 if task 2 has -`blockedBy: [1]`. The file system becomes the source of truth. +A second issue is visibility: in-memory structures are process-local, so teammates cannot reliably share that state. -## The Solution +## When to Use Task vs Todo + +From s07 onward, Task is the default. Todo remains for short linear checklists. + +## Quick Decision Matrix + +| Situation | Prefer | Why | +|---|---|---| +| Short, single-session checklist | Todo | Lowest ceremony, fastest capture | +| Cross-session work, dependencies, or teammates | Task | Durable state, dependency graph, shared visibility | +| Unsure which one to use | Task | Easier to simplify later than migrate mid-run | + +## Solution ``` .tasks/ @@ -42,7 +45,7 @@ Dependency resolution: ## How It Works -1. The TaskManager provides CRUD operations. Each task is a JSON file. +1. TaskManager provides CRUD with one JSON file per task. ```python class TaskManager: @@ -61,8 +64,7 @@ class TaskManager: return json.dumps(task, indent=2) ``` -2. When a task is marked completed, `_clear_dependency` removes its ID - from all other tasks' `blockedBy` lists. +2. Completing a task clears that dependency from other tasks. ```python def _clear_dependency(self, completed_id: int): @@ -73,8 +75,7 @@ def _clear_dependency(self, completed_id: int): self._save(task) ``` -3. The `update` method handles status changes and bidirectional dependency - wiring. +3. `update` handles status transitions and dependency wiring. ```python def update(self, task_id, status=None, @@ -94,7 +95,7 @@ def update(self, task_id, status=None, self._save(task) ``` -4. Four task tools are added to the dispatch map. +4. Task tools are added to the dispatch map. ```python TOOL_HANDLERS = { @@ -109,8 +110,7 @@ TOOL_HANDLERS = { ## Key Code -The TaskManager with dependency graph (from `agents/s07_task_system.py`, -lines 46-123): +TaskManager with dependency graph (from `agents/s07_task_system.py`, lines 46-123): ```python class TaskManager: @@ -145,17 +145,20 @@ class TaskManager: ## What Changed From s06 -| Component | Before (s06) | After (s07) | -|----------------|------------------|----------------------------| -| Tools | 5 | 8 (+task_create/update/list/get)| -| State storage | In-memory only | JSON files in .tasks/ | -| Dependencies | None | blockedBy + blocks graph | -| Compression | Three-layer | Removed (different focus) | -| Persistence | Lost on compact | Survives compression | +| Component | Before (s06) | After (s07) | +|---|---|---| +| Tools | 5 | 8 (`task_create/update/list/get`) | +| State storage | In-memory only | JSON files in `.tasks/` | +| Dependencies | None | `blockedBy + blocks` graph | +| Persistence | Lost on compact | Survives compression | ## Design Rationale -File-based state survives context compression. When the agent's conversation is compacted, in-memory state is lost, but tasks written to disk persist. The dependency graph ensures correct execution order even after context loss. This is the bridge between ephemeral conversation and persistent work -- the agent can forget conversation details but always has the task board to remind it what needs doing. The filesystem as source of truth also enables future multi-agent sharing, since any process can read the same JSON files. +File-based state survives compaction and process restarts. The dependency graph preserves execution order even when conversation details are forgotten. This turns transient chat context into durable work state. + +Durability still needs a write discipline: reload task JSON before each write, validate expected `status/blockedBy`, then persist atomically. Otherwise concurrent writers can overwrite each other. + +Course-level implication: s07+ defaults to Task because it better matches long-running and collaborative engineering workflows. ## Try It @@ -164,7 +167,7 @@ cd learn-claude-code python agents/s07_task_system.py ``` -Example prompts to try: +Suggested prompts: 1. `Create 3 tasks: "Setup project", "Write code", "Write tests". Make them depend on each other in order.` 2. `List all tasks and show the dependency graph` diff --git a/docs/en/s08-background-tasks.md b/docs/en/s08-background-tasks.md index ef837ce..79d56f0 100644 --- a/docs/en/s08-background-tasks.md +++ b/docs/en/s08-background-tasks.md @@ -168,7 +168,6 @@ class BackgroundManager: | Execution | Blocking only | Blocking + background threads| | Notification | None | Queue drained per loop | | Concurrency | None | Daemon threads | -| Task system | File-based CRUD | Removed (different focus) | ## Design Rationale diff --git a/docs/en/s09-agent-teams.md b/docs/en/s09-agent-teams.md index 897968f..73a275b 100644 --- a/docs/en/s09-agent-teams.md +++ b/docs/en/s09-agent-teams.md @@ -1,6 +1,6 @@ # s09: Agent Teams -> Persistent teammates with JSONL inboxes turn isolated agents into a communicating team -- spawn, message, broadcast, and drain. +> Persistent teammates with JSONL inboxes are one teaching protocol for turning isolated agents into a communicating team -- spawn, message, broadcast, and drain. ## The Problem @@ -215,7 +215,7 @@ pattern used here is safe for the teaching scenario. ## Design Rationale -File-based mailboxes (append-only JSONL) provide concurrency-safe inter-agent communication. Append is atomic on most filesystems, avoiding lock contention. The "drain on read" pattern (read all, truncate) gives batch delivery. This is simpler and more robust than shared memory or socket-based IPC for agent coordination. The tradeoff is latency -- messages are only seen at the next poll -- but for LLM-driven agents where each turn takes seconds, polling latency is negligible compared to inference time. +File-based mailboxes (append-only JSONL) are easy to inspect and reason about in a teaching codebase. The "drain on read" pattern (read all, truncate) gives batch delivery with very little machinery. The tradeoff is latency -- messages are only seen at the next poll -- but for LLM-driven agents where each turn takes seconds, polling latency is acceptable for this course. ## Try It diff --git a/docs/en/s11-autonomous-agents.md b/docs/en/s11-autonomous-agents.md index 4225b2f..82a6f8a 100644 --- a/docs/en/s11-autonomous-agents.md +++ b/docs/en/s11-autonomous-agents.md @@ -20,10 +20,7 @@ original system prompt identity ("you are alice, role: coder") fades. Identity re-injection solves this by inserting an identity block at the start of compressed contexts. -Teaching simplification: the token estimation used here is rough -(characters / 4). Production systems use proper tokenizer libraries. -The nag threshold of 3 rounds (from s03) is set low for teaching -visibility; production agents typically use a higher threshold around 10. +Note: token estimation here uses characters/4 (rough). The nag threshold of 3 rounds is low for teaching visibility. ## The Solution diff --git a/docs/en/s12-worktree-task-isolation.md b/docs/en/s12-worktree-task-isolation.md new file mode 100644 index 0000000..83ca38e --- /dev/null +++ b/docs/en/s12-worktree-task-isolation.md @@ -0,0 +1,250 @@ +# s12: Worktree + Task Isolation + +> Isolate by directory, coordinate by task ID -- tasks are the control plane, worktrees are the execution plane, and an event stream makes every lifecycle step observable. + +## The Problem + +By s11, agents can claim and complete tasks autonomously. But every task runs in one shared directory. Ask two agents to refactor different modules at the same time and you hit three failure modes: + +Agent A edits `auth.py`. Agent B edits `auth.py`. Neither knows the other touched it. Unstaged changes collide, task status says "in_progress" but the directory is a mess, and when something breaks there is no way to roll back one agent's work without destroying the other's. The task board tracks _what to do_ but has no opinion about _where to do it_. + +The fix is to separate the two concerns. Tasks manage goals. Worktrees manage execution context. Bind them by task ID, and each agent gets its own directory, its own branch, and a clean teardown path. + +## The Solution + +``` +Control Plane (.tasks/) Execution Plane (.worktrees/) ++---------------------------+ +---------------------------+ +| task_1.json | | index.json | +| id: 1 | | name: "auth-refactor" | +| subject: "Auth refactor"| bind | path: ".worktrees/..." | +| status: "in_progress" | <----> | branch: "wt/auth-..." | +| worktree: "auth-refactor"| | task_id: 1 | ++---------------------------+ | status: "active" | + +---------------------------+ +| task_2.json | | | +| id: 2 | bind | name: "ui-login" | +| subject: "Login page" | <----> | task_id: 2 | +| worktree: "ui-login" | | status: "active" | ++---------------------------+ +---------------------------+ + | + +---------------------------+ + | events.jsonl (append-only)| + | worktree.create.before | + | worktree.create.after | + | worktree.remove.after | + | task.completed | + +---------------------------+ +``` + +Three state layers make this work: + +1. **Control plane** (`.tasks/task_*.json`) -- what is assigned, in progress, or done. Key fields: `id`, `subject`, `status`, `owner`, `worktree`. +2. **Execution plane** (`.worktrees/index.json`) -- where commands run and whether the workspace is still valid. Key fields: `name`, `path`, `branch`, `task_id`, `status`. +3. **Runtime state** (in-memory) -- per-turn execution continuity: `current_task`, `current_worktree`, `tool_result`, `error`. + +## How It Works + +The lifecycle has five steps. Each step is a tool call. + +1. **Create a task.** Persist the goal first. The task starts as `pending` with an empty `worktree` field. + +```python +task = { + "id": self._next_id, + "subject": subject, + "status": "pending", + "owner": "", + "worktree": "", +} +self._save(task) +``` + +2. **Create a worktree.** Allocate an isolated directory and branch. If you pass `task_id`, the task auto-advances to `in_progress` and the binding is written to both sides. + +```python +self._run_git(["worktree", "add", "-b", branch, str(path), base_ref]) + +entry = { + "name": name, + "path": str(path), + "branch": branch, + "task_id": task_id, + "status": "active", +} +idx["worktrees"].append(entry) +self._save_index(idx) + +if task_id is not None: + self.tasks.bind_worktree(task_id, name) +``` + +3. **Run commands in the worktree.** `worktree_run` sets `cwd` to the worktree path. Edits happen in the isolated directory, not the shared workspace. + +```python +r = subprocess.run( + command, + shell=True, + cwd=path, + capture_output=True, + text=True, + timeout=300, +) +``` + +4. **Observe.** `worktree_status` shows git state inside the isolated context. `worktree_events` queries the append-only event stream. + +5. **Close out.** Two choices: + - `worktree_keep(name)` -- preserve the directory, mark lifecycle as `kept`. + - `worktree_remove(name, complete_task=True)` -- remove the directory, complete the bound task, unbind, and emit `task.completed`. This is the closeout pattern: one call handles teardown and task completion together. + +## State Machines + +``` +Task: pending -------> in_progress -------> completed + (worktree_create (worktree_remove + with task_id) with complete_task=true) + +Worktree: absent --------> active -----------> removed | kept + (worktree_create) (worktree_remove | worktree_keep) +``` + +## Key Code + +The closeout pattern -- teardown + task completion in one operation (from `agents/s12_worktree_task_isolation.py`): + +```python +def remove(self, name: str, force: bool = False, complete_task: bool = False) -> str: + wt = self._find(name) + if not wt: + return f"Error: Unknown worktree '{name}'" + + self.events.emit( + "worktree.remove.before", + task={"id": wt.get("task_id")} if wt.get("task_id") is not None else {}, + worktree={"name": name, "path": wt.get("path")}, + ) + try: + args = ["worktree", "remove"] + if force: + args.append("--force") + args.append(wt["path"]) + self._run_git(args) + + if complete_task and wt.get("task_id") is not None: + task_id = wt["task_id"] + self.tasks.update(task_id, status="completed") + self.tasks.unbind_worktree(task_id) + self.events.emit("task.completed", task={ + "id": task_id, "status": "completed", + }, worktree={"name": name}) + + idx = self._load_index() + for item in idx.get("worktrees", []): + if item.get("name") == name: + item["status"] = "removed" + item["removed_at"] = time.time() + self._save_index(idx) + + self.events.emit( + "worktree.remove.after", + task={"id": wt.get("task_id")} if wt.get("task_id") is not None else {}, + worktree={"name": name, "path": wt.get("path"), "status": "removed"}, + ) + return f"Removed worktree '{name}'" + except Exception as e: + self.events.emit( + "worktree.remove.failed", + worktree={"name": name}, + error=str(e), + ) + raise +``` + +The task-side binding (from `agents/s12_worktree_task_isolation.py`): + +```python +def bind_worktree(self, task_id: int, worktree: str, owner: str = "") -> str: + task = self._load(task_id) + task["worktree"] = worktree + if task["status"] == "pending": + task["status"] = "in_progress" + task["updated_at"] = time.time() + self._save(task) +``` + +The dispatch map wiring all tools together: + +```python +TOOL_HANDLERS = { + "bash": lambda **kw: run_bash(kw["command"]), + "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), + "write_file": lambda **kw: run_write(kw["path"], kw["content"]), + "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), + "task_create": lambda **kw: TASKS.create(kw["subject"], kw.get("description", "")), + "task_list": lambda **kw: TASKS.list_all(), + "task_get": lambda **kw: TASKS.get(kw["task_id"]), + "task_update": lambda **kw: TASKS.update(kw["task_id"], kw.get("status"), kw.get("owner")), + "task_bind_worktree": lambda **kw: TASKS.bind_worktree(kw["task_id"], kw["worktree"]), + "worktree_create": lambda **kw: WORKTREES.create(kw["name"], kw.get("task_id")), + "worktree_list": lambda **kw: WORKTREES.list_all(), + "worktree_status": lambda **kw: WORKTREES.status(kw["name"]), + "worktree_run": lambda **kw: WORKTREES.run(kw["name"], kw["command"]), + "worktree_keep": lambda **kw: WORKTREES.keep(kw["name"]), + "worktree_remove": lambda **kw: WORKTREES.remove(kw["name"], kw.get("force", False), kw.get("complete_task", False)), + "worktree_events": lambda **kw: EVENTS.list_recent(kw.get("limit", 20)), +} +``` + +## Event Stream + +Every lifecycle transition emits a before/after/failed triplet to `.worktrees/events.jsonl`. This is an append-only log, not a replacement for task/worktree state files. + +Events emitted: + +- `worktree.create.before` / `worktree.create.after` / `worktree.create.failed` +- `worktree.remove.before` / `worktree.remove.after` / `worktree.remove.failed` +- `worktree.keep` +- `task.completed` (when `complete_task=true` succeeds) + +Payload shape: + +```json +{ + "event": "worktree.remove.after", + "task": {"id": 7, "status": "completed"}, + "worktree": {"name": "auth-refactor", "path": "...", "status": "removed"}, + "ts": 1730000000 +} +``` + +This gives you three things: policy decoupling (audit and notifications stay outside the core flow), failure compensation (`*.failed` records mark partial transitions), and queryability (`worktree_events` tool reads the log directly). + +## What Changed From s11 + +| Component | Before (s11) | After (s12) | +|--------------------|----------------------------|----------------------------------------------| +| Coordination state | Task board (`owner/status`) | Task board + explicit `worktree` binding | +| Execution scope | Shared directory | Task-scoped isolated directory | +| Recoverability | Task status only | Task status + worktree index | +| Teardown semantics | Task completion | Task completion + explicit keep/remove | +| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` | + +## Design Rationale + +Separating control plane from execution plane means you can reason about _what to do_ and _where to do it_ independently. A task can exist without a worktree (planning phase). A worktree can exist without a task (ad-hoc exploration). Binding them is an explicit action that writes state to both sides. This composability is the point -- it keeps the system recoverable after crashes. After an interruption, state reconstructs from `.tasks/` + `.worktrees/index.json` on disk. Volatile in-memory session state downgrades into explicit, durable file state. The event stream adds observability without coupling side effects into the critical path: auditing, notifications, and quota checks consume events rather than intercepting state writes. + +## Try It + +```sh +cd learn-claude-code +python agents/s12_worktree_task_isolation.py +``` + +Example prompts to try: + +1. `Create tasks for backend auth and frontend login page, then list tasks.` +2. `Create worktree "auth-refactor" for task 1, create worktree "ui-login", then bind task 2 to "ui-login".` +3. `Run "git status --short" in worktree "auth-refactor".` +4. `Keep worktree "ui-login", then list worktrees and inspect worktree events.` +5. `Remove worktree "auth-refactor" with complete_task=true, then list tasks/worktrees/events.` diff --git a/docs/ja/s01-the-agent-loop.md b/docs/ja/s01-the-agent-loop.md index 4bd4cf0..27076d8 100644 --- a/docs/ja/s01-the-agent-loop.md +++ b/docs/ja/s01-the-agent-loop.md @@ -1,6 +1,6 @@ # s01: The Agent Loop -> AIコーディングエージェントの秘密はすべて、モデルが「終了」と判断するまでツール結果をモデルにフィードバックし続けるwhileループにある。 +> AIコーディングエージェントの中核は、モデルが「終了」と判断するまでツール結果をモデルにフィードバックし続ける while ループにある。 ## 問題 @@ -49,7 +49,7 @@ response = client.messages.create( messages.append({"role": "assistant", "content": response.content}) ``` -4. stop reasonを確認する。モデルがツールを呼び出さなかった場合、ループは終了する。これが唯一の終了条件だ。 +4. stop reasonを確認する。モデルがツールを呼び出さなかった場合、ループは終了する。この最小実装では、これが唯一のループ終了条件だ。 ```python if response.stop_reason != "tool_use": @@ -115,7 +115,7 @@ def agent_loop(messages: list): ## 設計原理 -このループはすべてのLLMベースエージェントの普遍的な基盤だ。本番実装ではエラーハンドリング、トークンカウント、ストリーミング、リトライロジックが追加されるが、根本的な構造は変わらない。シンプルさこそがポイントだ: 1つの終了条件(`stop_reason != "tool_use"`)がフロー全体を制御する。本コースの他のすべて -- ツール、計画、圧縮、チーム -- はこのループの上に積み重なるが、ループ自体は変更しない。このループを理解することは、すべてのエージェントを理解することだ。 +このループは LLM ベースエージェントの土台だ。本番実装ではエラーハンドリング、トークン計測、ストリーミング、リトライに加え、権限ポリシーやライフサイクル編成が追加されるが、コアの相互作用パターンはここから始まる。シンプルさこそこの章の狙いであり、この最小実装では 1 つの終了条件(`stop_reason != "tool_use"`)で学習に必要な制御を示す。本コースの他の要素はこのループに積み重なる。つまり、このループの理解は基礎であって、本番アーキテクチャ全体そのものではない。 ## 試してみる diff --git a/docs/ja/s03-todo-write.md b/docs/ja/s03-todo-write.md index 398a048..ba926f4 100644 --- a/docs/ja/s03-todo-write.md +++ b/docs/ja/s03-todo-write.md @@ -10,7 +10,7 @@ 解決策は構造化された状態管理だ: モデルが明示的に書き込むTodoManager。モデルは計画を作成し、作業中のアイテムをin_progressとしてマークし、完了時にcompletedとマークする。nagリマインダーは、モデルが3ラウンド以上todoを更新しなかった場合にナッジを注入する。 -教育上の簡略化: nagの閾値3ラウンドは教育目的の可視化のために低く設定されている。本番のエージェントでは過剰なプロンプトを避けるため閾値は約10に設定されている。 +注: nag 閾値 3 ラウンドは可視化のために低く設定。本番ではより高い値に調整される。s07 以降は永続的なマルチステップ作業に Task ボードを使用。TodoWrite は軽量チェックリストとして引き続き利用可能。 ## 解決策 diff --git a/docs/ja/s04-subagent.md b/docs/ja/s04-subagent.md index 9141346..c68f2d8 100644 --- a/docs/ja/s04-subagent.md +++ b/docs/ja/s04-subagent.md @@ -8,7 +8,7 @@ これは探索的タスクで特に深刻だ。「このプロジェクトはどのテストフレームワークを使っているか」という質問には5つのファイルを読む必要があるかもしれないが、親エージェントには5つのファイルの内容すべては不要だ -- 「pytest with conftest.py configuration」という回答だけが必要なのだ。 -解決策はプロセスの分離だ: `messages=[]`で子エージェントを生成する。子は探索し、ファイルを読み、コマンドを実行する。終了時には最終的なテキストレスポンスだけが親に返される。子のメッセージ履歴全体は破棄される。 +このコースでの実用的な解決策は fresh `messages[]` 分離だ: `messages=[]`で子エージェントを生成する。子は探索し、ファイルを読み、コマンドを実行する。終了時には最終的なテキストレスポンスだけが親に返される。子のメッセージ履歴全体は破棄される。 ## 解決策 @@ -124,11 +124,10 @@ def run_subagent(prompt: str) -> str: | Context | Single shared | Parent + child isolation | | Subagent | None | `run_subagent()` function | | Return value | N/A | Summary text only | -| Todo system | TodoManager | Removed (not needed here) | ## 設計原理 -プロセス分離はコンテキスト分離を無料で提供する。新しい`messages[]`は、サブエージェントが親の会話履歴に混乱させられないことを意味する。トレードオフは通信オーバーヘッドだ -- 結果は親に圧縮して返す必要があり、詳細が失われる。これはOSのプロセス分離と同じトレードオフだ: シリアライゼーションコストと引き換えに安全性とクリーンさを得る。サブエージェントの深さ制限(再帰的なスポーンは不可)は無制限のリソース消費を防ぎ、最大反復回数は暴走した子プロセスの終了を保証する。 +このセッションでは、fresh `messages[]` 分離はコンテキスト分離を近似する実用手段だ。新しい`messages[]`により、サブエージェントは親の会話履歴を持たずに開始する。トレードオフは通信オーバーヘッドで、結果を親へ圧縮して返すため詳細が失われる。これはメッセージ履歴の分離戦略であり、OSのプロセス分離そのものではない。サブエージェントの深さ制限(再帰スポーン不可)は無制限のリソース消費を防ぎ、最大反復回数は暴走した子処理の終了を保証する。 ## 試してみる diff --git a/docs/ja/s05-skill-loading.md b/docs/ja/s05-skill-loading.md index 1f34cfa..a472269 100644 --- a/docs/ja/s05-skill-loading.md +++ b/docs/ja/s05-skill-loading.md @@ -132,7 +132,6 @@ class SkillLoader: | System prompt | Static string | + skill descriptions | | Knowledge | None | .skills/*.md files | | Injection | None | Two-layer (system + result)| -| Subagent | `run_subagent()` | Removed (different focus) | ## 設計原理 diff --git a/docs/ja/s06-context-compact.md b/docs/ja/s06-context-compact.md index 9049453..f11bc8b 100644 --- a/docs/ja/s06-context-compact.md +++ b/docs/ja/s06-context-compact.md @@ -149,7 +149,6 @@ def agent_loop(messages): | Auto-compact | None | Token threshold trigger | | Manual compact | None | `compact` tool | | Transcripts | None | Saved to .transcripts/ | -| Skills | load_skill | Removed (different focus) | ## 設計原理 diff --git a/docs/ja/s07-task-system.md b/docs/ja/s07-task-system.md index ebe52d6..745366f 100644 --- a/docs/ja/s07-task-system.md +++ b/docs/ja/s07-task-system.md @@ -1,16 +1,29 @@ # s07: Tasks -> タスクはファイルシステム上にJSON形式で依存グラフ付きで永続化され、コンテキスト圧縮後も生き残り、複数エージェント間で共有できる。 +> タスクを依存グラフ付き JSON として永続化し、コンテキスト圧縮後も状態を保持し、複数エージェントで共有できるようにする。 ## 問題 -インメモリの状態であるTodoManager(s03)は、コンテキストが圧縮(s06)されると失われる。auto_compactがメッセージを要約で置換した後、todoリストは消える。エージェントは要約テキストからそれを再構成しなければならないが、これは不正確でエラーが起きやすい。 +インメモリ状態(s03 の TodoManager など)は、s06 の圧縮後に失われやすい。古いターンが要約化されると、Todo 状態は会話の外に残らない。 -これがs06からs07への重要な橋渡しだ: TodoManagerのアイテムは圧縮と共に死ぬが、ファイルベースのタスクは死なない。状態をファイルシステムに移すことで、圧縮に対する耐性が得られる。 +s06 -> s07 の本質は次の切替: -さらに根本的な問題として、インメモリの状態は他のエージェントからは見えない。最終的にチーム(s09以降)を構築する際、チームメイトには共有のタスクボードが必要だ。インメモリのデータ構造はプロセスローカルだ。 +1. メモリ上 Todo は会話依存で失われやすい。 +2. ディスク上 Task は永続で復元しやすい。 -解決策はタスクを`.tasks/`にJSON形式で永続化すること。各タスクはID、件名、ステータス、依存グラフを持つ個別のファイルだ。タスク1を完了すると、タスク2が`blockedBy: [1]`を持つ場合、自動的にタスク2のブロックが解除される。ファイルシステムが信頼できる情報源となる。 +さらに可視性の問題がある。インメモリ構造はプロセスローカルであり、チームメイト間の共有が不安定になる。 + +## Task vs Todo: 使い分け + +s07 以降は Task がデフォルト。Todo は短い直線的チェックリスト用に残る。 + +## クイック判定マトリクス + +| 状況 | 優先 | 理由 | +|---|---|---| +| 短時間・単一セッション・直線的チェック | Todo | 儀式が最小で記録が速い | +| セッション跨ぎ・依存関係・複数担当 | Task | 永続性、依存表現、協調可視性が必要 | +| 迷う場合 | Task | 後で簡略化する方が、途中移行より低コスト | ## 解決策 @@ -32,7 +45,7 @@ Dependency resolution: ## 仕組み -1. TaskManagerがCRUD操作を提供する。各タスクは1つのJSONファイル。 +1. TaskManager はタスクごとに1 JSON ファイルで CRUD を提供する。 ```python class TaskManager: @@ -51,7 +64,7 @@ class TaskManager: return json.dumps(task, indent=2) ``` -2. タスクが完了とマークされると、`_clear_dependency`がそのIDを他のすべてのタスクの`blockedBy`リストから除去する。 +2. タスク完了時、他タスクの依存を解除する。 ```python def _clear_dependency(self, completed_id: int): @@ -62,7 +75,7 @@ def _clear_dependency(self, completed_id: int): self._save(task) ``` -3. `update`メソッドがステータス変更と双方向の依存関係の結線を処理する。 +3. `update` が状態遷移と依存配線を担う。 ```python def update(self, task_id, status=None, @@ -82,7 +95,7 @@ def update(self, task_id, status=None, self._save(task) ``` -4. 4つのタスクツールがディスパッチマップに追加される。 +4. タスクツール群をディスパッチへ追加する。 ```python TOOL_HANDLERS = { @@ -97,7 +110,7 @@ TOOL_HANDLERS = { ## 主要コード -依存グラフ付きTaskManager(`agents/s07_task_system.py` 46-123行目): +依存グラフ付き TaskManager(`agents/s07_task_system.py` 46-123行): ```python class TaskManager: @@ -130,19 +143,22 @@ class TaskManager: self._save(task) ``` -## s06からの変更点 +## s06 からの変更 -| Component | Before (s06) | After (s07) | -|----------------|------------------|----------------------------| -| Tools | 5 | 8 (+task_create/update/list/get)| -| State storage | In-memory only | JSON files in .tasks/ | -| Dependencies | None | blockedBy + blocks graph | -| Compression | Three-layer | Removed (different focus) | -| Persistence | Lost on compact | Survives compression | +| 項目 | Before (s06) | After (s07) | +|---|---|---| +| Tools | 5 | 8 (`task_create/update/list/get`) | +| 状態保存 | メモリのみ | `.tasks/` の JSON | +| 依存関係 | なし | `blockedBy + blocks` グラフ | +| 永続性 | compact で消失 | compact 後も維持 | ## 設計原理 -ファイルベースの状態はコンテキスト圧縮を生き延びる。エージェントの会話が圧縮されるとメモリ内の状態は失われるが、ディスクに書き込まれたタスクは永続する。依存グラフにより、コンテキストが失われた後でも正しい順序で実行される。これは一時的な会話と永続的な作業の橋渡しだ -- エージェントは会話の詳細を忘れても、タスクボードが常に何をすべきかを思い出させてくれる。ファイルシステムを信頼できる情報源とすることで、将来のマルチエージェント共有も可能になる。任意のプロセスが同じJSONファイルを読み取れるからだ。 +ファイルベース状態は compaction や再起動に強い。依存グラフにより、会話詳細を忘れても実行順序を保てる。これにより、会話中心の状態を作業中心の永続状態へ移せる。 + +ただし耐久性には運用前提がある。書き込みのたびに task JSON を再読込し、`status/blockedBy` が期待通りか確認してから原子的に保存しないと、並行更新で状態を上書きしやすい。 + +コース設計上、s07 以降で Task を主線に置くのは、長時間・協調開発の実態に近いから。 ## 試してみる @@ -151,7 +167,7 @@ cd learn-claude-code python agents/s07_task_system.py ``` -試せるプロンプト例: +例: 1. `Create 3 tasks: "Setup project", "Write code", "Write tests". Make them depend on each other in order.` 2. `List all tasks and show the dependency graph` diff --git a/docs/ja/s08-background-tasks.md b/docs/ja/s08-background-tasks.md index c3b3040..2476db6 100644 --- a/docs/ja/s08-background-tasks.md +++ b/docs/ja/s08-background-tasks.md @@ -157,7 +157,6 @@ class BackgroundManager: | Execution | Blocking only | Blocking + background threads| | Notification | None | Queue drained per loop | | Concurrency | None | Daemon threads | -| Task system | File-based CRUD | Removed (different focus) | ## 設計原理 diff --git a/docs/ja/s09-agent-teams.md b/docs/ja/s09-agent-teams.md index 473e0d3..994d911 100644 --- a/docs/ja/s09-agent-teams.md +++ b/docs/ja/s09-agent-teams.md @@ -1,6 +1,6 @@ # s09: Agent Teams -> JSONL形式のインボックスを持つ永続的なチームメイトが、孤立したエージェントをコミュニケーションするチームに変える -- spawn、message、broadcast、drain。 +> JSONL 形式のインボックスを持つ永続的なチームメイトは、孤立したエージェントを連携可能なチームへ変えるための教材プロトコルの一つだ -- spawn、message、broadcast、drain。 ## 問題 @@ -8,7 +8,7 @@ 本物のチームワークには3つのものが必要だ: (1)単一のプロンプトを超えて存続する永続的なエージェント、(2)アイデンティティとライフサイクル管理、(3)エージェント間の通信チャネル。メッセージングがなければ、永続的なチームメイトでさえ聾唖だ -- 並列に作業できるが協調することはない。 -解決策は、名前付きの永続的エージェントを生成するTeammateManagerと、JONSLインボックスファイルを使うMessageBusの組み合わせだ。各チームメイトは自身のagent loopをスレッドで実行し、各LLM呼び出しの前にインボックスを確認し、他のチームメイトやリーダーにメッセージを送れる。 +解決策は、名前付きの永続的エージェントを生成するTeammateManagerと、JSONL インボックスファイルを使うMessageBusの組み合わせだ。各チームメイトは自身のagent loopをスレッドで実行し、各LLM呼び出しの前にインボックスを確認し、他のチームメイトやリーダーにメッセージを送れる。 s06からs07への橋渡しについての注記: s03のTodoManagerアイテムは圧縮(s06)と共に死ぬ。ファイルベースのタスク(s07)はディスク上に存在するため圧縮後も生き残る。チームも同じ原則の上に構築されている -- config.jsonとインボックスファイルはコンテキストウィンドウの外に永続化される。 @@ -194,7 +194,7 @@ class MessageBus: ## 設計原理 -ファイルベースのメールボックス(追記専用JSONL)は並行性安全なエージェント間通信を提供する。追記はほとんどのファイルシステムでアトミックであり、ロック競合を回避する。「読み取り時にドレイン」パターン(全読み取り、切り詰め)はバッチ配信を提供する。これは共有メモリやソケットベースのIPCよりもシンプルで堅牢だ。トレードオフはレイテンシだ -- メッセージは次のポーリングまで見えない -- しかし各ターンに数秒の推論時間がかかるLLM駆動エージェントにとって、ポーリングレイテンシは推論時間に比べて無視できる。 +ファイルベースのメールボックス(追記専用 JSONL)は、教材コードとして観察しやすく理解しやすい。「読み取り時にドレイン」パターン(全読み取り、切り詰め)は、少ない仕組みでバッチ配信を実現できる。トレードオフはレイテンシで、メッセージは次のポーリングまで見えない。ただし本コースでは、各ターンに数秒かかる LLM 推論を前提にすると、この遅延は許容範囲である。 ## 試してみる diff --git a/docs/ja/s11-autonomous-agents.md b/docs/ja/s11-autonomous-agents.md index 7d4a882..a72b6e5 100644 --- a/docs/ja/s11-autonomous-agents.md +++ b/docs/ja/s11-autonomous-agents.md @@ -10,7 +10,7 @@ s09-s10では、チームメイトは明示的に指示された時のみ作業 しかし自律エージェントには微妙な問題がある: コンテキスト圧縮後に、エージェントが自分が誰かを忘れる可能性がある。メッセージが要約されると、元のシステムプロンプトのアイデンティティ(「あなたはalice、役割はcoder」)が薄れる。アイデンティティの再注入は、圧縮されたコンテキストの先頭にアイデンティティブロックを挿入することでこれを解決する。 -教育上の簡略化: ここで使用するトークン推定は大まかなもの(文字数 / 4)だ。本番システムでは適切なトークナイザーライブラリを使用する。nagの閾値3ラウンド(s03から)は教育目的の可視化のために低く設定されている。本番のエージェントでは閾値は約10。 +注: トークン推定は文字数/4(大まか)。nag 閾値 3 ラウンドは可視化のために低く設定。 ## 解決策 diff --git a/docs/ja/s12-worktree-task-isolation.md b/docs/ja/s12-worktree-task-isolation.md new file mode 100644 index 0000000..fbdfd1e --- /dev/null +++ b/docs/ja/s12-worktree-task-isolation.md @@ -0,0 +1,226 @@ +# s12: Worktree + Task Isolation + +> ディレクトリで分離し、タスクIDで調整する -- タスクボード(制御面)と worktree(実行面)の組み合わせで、並行編集を衝突しやすい状態から追跡可能・復元可能・後片付け可能な状態に変える。 + +## 問題 + +s11 でエージェントはタスクを自律的に処理できるようになった。だが全タスクが同じ作業ディレクトリで走ると、3つの障害が現れる。 + +あるエージェントが認証リファクタリングに取り組みながら、別のエージェントがログインページを作っている。両者が `src/auth.py` を編集する。未コミットの変更が混ざり合い、`git diff` は2つのタスクの差分が入り混じった結果を返す。どちらのエージェントの変更かを後から特定するのは困難になり、片方のタスクを巻き戻すと他方の編集も消える。 + +1. 変更汚染: 未コミット変更が相互に干渉する。 +2. 責務の曖昧化: タスク状態とファイル変更がずれる。 +3. 終了処理の難化: 実行コンテキストを残すか削除するかの判断が曖昧になる。 + +解決の核は「何をやるか」と「どこでやるか」の分離だ。 + +## 解決策 + +``` +Control Plane (.tasks/) Execution Plane (.worktrees/) ++---------------------+ +------------------------+ +| task_1.json | | auth-refactor/ | +| status: in_progress| bind | branch: wt/auth-ref | +| worktree: auth-ref|-------->| cwd for commands | ++---------------------+ +------------------------+ +| task_2.json | | ui-login/ | +| status: pending | bind | branch: wt/ui-login | +| worktree: ui-login|-------->| cwd for commands | ++---------------------+ +------------------------+ + | | + v v + "what to do" "where to execute" + +Events (.worktrees/events.jsonl) + worktree.create.before -> worktree.create.after + worktree.remove.before -> worktree.remove.after + task.completed +``` + +## 仕組み + +1. 状態は3つの層に分かれる。制御面はタスクの目標と担当を管理し、実行面は worktree のパスとブランチを管理し、実行時状態はメモリ上の1ターン情報を保持する。 + +```text +制御面 (.tasks/task_*.json) -> id/subject/status/owner/worktree +実行面 (.worktrees/index.json) -> name/path/branch/task_id/status +実行時状態 (メモリ) -> current_task/current_worktree/error +``` + +2. Task と worktree はそれぞれ独立した状態機械を持つ。 + +```text +Task: pending -> in_progress -> completed +Worktree: absent -> active -> removed | kept +``` + +3. `task_create` でまず目標を永続化する。worktree はまだ不要だ。 + +```python +task = { + "id": self._next_id, + "subject": subject, + "status": "pending", + "owner": "", + "worktree": "", + "created_at": time.time(), + "updated_at": time.time(), +} +self._save(task) +``` + +4. `worktree_create(name, task_id?)` で分離ディレクトリとブランチを作る。`task_id` を渡すと、タスクが `pending` なら自動的に `in_progress` に遷移する。 + +```python +entry = { + "name": name, + "path": str(path), + "branch": branch, + "task_id": task_id, + "status": "active", + "created_at": time.time(), +} +idx["worktrees"].append(entry) +self._save_index(idx) + +if task_id is not None: + self.tasks.bind_worktree(task_id, name) +``` + +5. `worktree_run(name, command)` で分離ディレクトリ内のコマンドを実行する。`cwd=worktree_path` が実質的な「enter」だ。 + +```python +r = subprocess.run( + command, + shell=True, + cwd=path, + capture_output=True, + text=True, + timeout=300, +) +``` + +6. 終了処理では `keep` か `remove` を明示的に選ぶ。`worktree_remove(name, complete_task=true)` はディレクトリ削除とタスク完了を一度に行う。 + +```python +def remove(self, name: str, force: bool = False, complete_task: bool = False) -> str: + self._run_git(["worktree", "remove", wt["path"]]) + if complete_task and wt.get("task_id") is not None: + self.tasks.update(wt["task_id"], status="completed") + self.tasks.unbind_worktree(wt["task_id"]) + self.events.emit("task.completed", ...) +``` + +7. `.worktrees/events.jsonl` にライフサイクルイベントが append-only で記録される。重要な遷移には `before / after / failed` の三段イベントが出力される。 + +```json +{ + "event": "worktree.remove.after", + "task": {"id": 7, "status": "completed"}, + "worktree": {"name": "auth-refactor", "path": "...", "status": "removed"}, + "ts": 1730000000 +} +``` + +イベントは可観測性のサイドチャネルであり、task/worktree の主状態機械の書き込みを置き換えるものではない。監査・通知・ポリシーチェックはイベント購読側で処理する。 + +## 主要コード + +タスクの worktree バインドと状態遷移(`agents/s12_worktree_task_isolation.py` 182-191行目): + +```python +def bind_worktree(self, task_id: int, worktree: str, owner: str = "") -> str: + task = self._load(task_id) + task["worktree"] = worktree + if owner: + task["owner"] = owner + if task["status"] == "pending": + task["status"] = "in_progress" + task["updated_at"] = time.time() + self._save(task) + return json.dumps(task, indent=2) +``` + +Worktree の作成とイベント発火(`agents/s12_worktree_task_isolation.py` 283-334行目): + +```python +def create(self, name: str, task_id: int = None, base_ref: str = "HEAD") -> str: + self._validate_name(name) + if self._find(name): + raise ValueError(f"Worktree '{name}' already exists in index") + + path = self.dir / name + branch = f"wt/{name}" + self.events.emit("worktree.create.before", + task={"id": task_id} if task_id is not None else {}, + worktree={"name": name, "base_ref": base_ref}) + try: + self._run_git(["worktree", "add", "-b", branch, str(path), base_ref]) + entry = { + "name": name, "path": str(path), "branch": branch, + "task_id": task_id, "status": "active", + "created_at": time.time(), + } + idx = self._load_index() + idx["worktrees"].append(entry) + self._save_index(idx) + if task_id is not None: + self.tasks.bind_worktree(task_id, name) + self.events.emit("worktree.create.after", ...) + return json.dumps(entry, indent=2) + except Exception as e: + self.events.emit("worktree.create.failed", ..., error=str(e)) + raise +``` + +ツールディスパッチマップ(`agents/s12_worktree_task_isolation.py` 535-552行目): + +```python +TOOL_HANDLERS = { + "bash": lambda **kw: run_bash(kw["command"]), + "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), + "write_file": lambda **kw: run_write(kw["path"], kw["content"]), + "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), + "task_create": lambda **kw: TASKS.create(kw["subject"], kw.get("description", "")), + "task_list": lambda **kw: TASKS.list_all(), + "task_get": lambda **kw: TASKS.get(kw["task_id"]), + "task_update": lambda **kw: TASKS.update(kw["task_id"], kw.get("status"), kw.get("owner")), + "task_bind_worktree": lambda **kw: TASKS.bind_worktree(kw["task_id"], kw["worktree"], kw.get("owner", "")), + "worktree_create": lambda **kw: WORKTREES.create(kw["name"], kw.get("task_id"), kw.get("base_ref", "HEAD")), + "worktree_list": lambda **kw: WORKTREES.list_all(), + "worktree_status": lambda **kw: WORKTREES.status(kw["name"]), + "worktree_run": lambda **kw: WORKTREES.run(kw["name"], kw["command"]), + "worktree_keep": lambda **kw: WORKTREES.keep(kw["name"]), + "worktree_remove": lambda **kw: WORKTREES.remove(kw["name"], kw.get("force", False), kw.get("complete_task", False)), + "worktree_events": lambda **kw: EVENTS.list_recent(kw.get("limit", 20)), +} +``` + +## s11 からの変更 + +| 観点 | s11 | s12 | +|---|---|---| +| 調整状態 | Task board (`owner/status`) | Task board + `worktree` 明示バインド | +| 実行スコープ | 共有ディレクトリ | タスク単位の分離ディレクトリ | +| 復元性 | タスク状態のみ | タスク状態 + worktree index | +| 終了意味論 | タスク完了のみ | タスク完了 + 明示的 keep/remove 判断 | +| ライフサイクル可視性 | 暗黙的なログ | `.worktrees/events.jsonl` の明示イベント | + +## 設計原理 + +制御面と実行面の分離が中核だ。タスクは「何をやるか」を記述し、worktree は「どこでやるか」を提供する。両者は組み合わせ可能だが、強結合ではない。状態遷移は暗黙の自動掃除ではなく、`worktree_keep` / `worktree_remove` という明示的なツール操作として表現する。イベントストリームは `before / after / failed` の三段構造で重要な遷移を記録し、監査や通知をコアロジックから分離する。中断後でも `.tasks/` + `.worktrees/index.json` から状態を再構築できる。揮発的な会話状態を明示的なディスク状態に落とすことが、復元可能性の鍵だ。 + +## 試してみる + +```sh +cd learn-claude-code +python agents/s12_worktree_task_isolation.py +``` + +試せるプロンプト例: + +1. `Create tasks for backend auth and frontend login page, then list tasks.` +2. `Create worktree "auth-refactor" for task 1, create worktree "ui-login", then bind task 2 to "ui-login".` +3. `Run "git status --short" in worktree "auth-refactor".` +4. `Keep worktree "ui-login", then list worktrees and inspect worktree events.` +5. `Remove worktree "auth-refactor" with complete_task=true, then list tasks/worktrees/events.` diff --git a/docs/zh/s01-the-agent-loop.md b/docs/zh/s01-the-agent-loop.md index 4cbfa25..b32bb12 100644 --- a/docs/zh/s01-the-agent-loop.md +++ b/docs/zh/s01-the-agent-loop.md @@ -1,6 +1,6 @@ # s01: Agent Loop (智能体循环) -> AI 编程智能体的全部秘密就是一个 while 循环 -- 把工具执行结果反馈给模型, 直到模型决定停止。 +> AI 编程智能体的核心是一个 while 循环 -- 把工具执行结果反馈给模型, 直到模型决定停止。 ## 问题 @@ -49,7 +49,7 @@ response = client.messages.create( messages.append({"role": "assistant", "content": response.content}) ``` -4. 检查 stop_reason。如果模型没有调用工具, 循环结束。这是唯一的退出条件。 +4. 检查 stop_reason。如果模型没有调用工具, 循环结束。在本节最小实现里, 这是唯一的循环退出条件。 ```python if response.stop_reason != "tool_use": @@ -115,7 +115,7 @@ def agent_loop(messages: list): ## 设计原理 -这个循环是所有基于 LLM 的智能体的通用基础。生产实现会增加错误处理、token 计数、流式输出和重试逻辑, 但基本结构不变。简洁性就是重点: 一个退出条件 (`stop_reason != "tool_use"`) 控制整个流程。本课程中的所有其他内容 -- 工具、规划、压缩、团队 -- 都是在这个循环之上叠加, 而不修改它。理解这个循环就是理解所有智能体。 +这个循环是所有基于 LLM 的智能体基础。生产实现还会增加错误处理、token 计数、流式输出、重试、权限策略与生命周期编排, 但核心交互模式仍从这里开始。本节强调简洁性: 在本节最小实现里, 一个退出条件 (`stop_reason != "tool_use"`) 就能支撑我们先学会主流程。本课程中的其他内容都在这个循环上叠加。理解这个循环是建立基础心智模型, 不是完整的生产架构。 ## 试一试 diff --git a/docs/zh/s02-tool-use.md b/docs/zh/s02-tool-use.md index e38aa68..5ddd352 100644 --- a/docs/zh/s02-tool-use.md +++ b/docs/zh/s02-tool-use.md @@ -6,7 +6,7 @@ 只有 `bash` 时, 智能体所有操作都通过 shell: 读文件、写文件、编辑文件。这能用但很脆弱。`cat` 的输出会被不可预测地截断。`sed` 替换遇到特殊字符就会失败。模型浪费大量 token 构造 shell 管道, 而一个直接的函数调用会简单得多。 -更重要的是, bash 是一个安全攻击面。每次 bash 调用都能做 shell 能做的一切。有了专用工具如 `read_file` 和 `write_file`, 你可以在工具层面强制路径沙箱化, 阻止危险模式, 而不是寄希望于模型自觉回避。 +更重要的是, bash 存在安全风险。每次 bash 调用都能做 shell 能做的一切。有了专用工具如 `read_file` 和 `write_file`, 你可以在工具层面强制路径沙箱化, 阻止危险模式, 而不是寄希望于模型自觉回避。 关键洞察: 添加工具不需要修改循环。s01 的循环保持不变。你只需在工具数组中添加条目, 编写处理函数, 然后通过 dispatch map 把它们关联起来。 diff --git a/docs/zh/s03-todo-write.md b/docs/zh/s03-todo-write.md index 70697ef..7b705df 100644 --- a/docs/zh/s03-todo-write.md +++ b/docs/zh/s03-todo-write.md @@ -10,7 +10,7 @@ 解决方案是结构化状态: 一个模型显式写入的 TodoManager。模型创建计划, 工作时将项目标记为 in_progress, 完成后标记为 completed。nag reminder 机制在模型连续 3 轮以上不更新待办时注入提醒。 -教学简化说明: 这里 nag 阈值设为 3 轮是为了教学可见性。生产环境的智能体通常使用约 10 轮的阈值以避免过度提醒。 +注: nag 阈值 3 轮是为教学可见性设的低值, 生产环境通常更高。从 s07 起, 课程转向 Task 看板处理持久化多步工作; TodoWrite 仍可用于轻量清单。 ## 解决方案 diff --git a/docs/zh/s04-subagent.md b/docs/zh/s04-subagent.md index 4f18d4c..2c157b6 100644 --- a/docs/zh/s04-subagent.md +++ b/docs/zh/s04-subagent.md @@ -8,7 +8,7 @@ 这对探索性任务尤其糟糕。"这个项目用了什么测试框架?" 可能需要读取 5 个文件, 但父智能体的历史中并不需要这 5 个文件的全部内容 -- 它只需要答案: "pytest, 使用 conftest.py 配置。" -解决方案是进程隔离: 以 `messages=[]` 启动一个子智能体。子智能体进行探索、读取文件、运行命令。完成后, 只有最终的文本响应返回给父智能体。子智能体的全部消息历史被丢弃。 +在本课程里, 一个实用解法是 fresh `messages[]` 隔离: 以 `messages=[]` 启动一个子智能体。子智能体进行探索、读取文件、运行命令。完成后, 只有最终的文本响应返回给父智能体。子智能体的全部消息历史被丢弃。 ## 解决方案 @@ -124,11 +124,10 @@ def run_subagent(prompt: str) -> str: | 上下文 | 单一共享 | 父 + 子隔离 | | Subagent | 无 | `run_subagent()` 函数 | | 返回值 | 不适用 | 仅摘要文本 | -| Todo 系统 | TodoManager | 已移除 (非本节重点) | ## 设计原理 -进程隔离免费提供了上下文隔离。全新的 `messages[]` 意味着子智能体不会被父级的对话历史干扰。代价是通信开销 -- 结果必须压缩回父级, 丢失细节。这与操作系统进程隔离的权衡相同: 用序列化成本换取安全性和整洁性。限制子智能体深度 (不允许递归生成) 防止无限资源消耗, 最大迭代次数确保失控的子进程能终止。 +在本节中, fresh `messages[]` 隔离是一个近似实现上下文隔离的实用办法。全新的 `messages[]` 意味着子智能体从不携带父级历史开始。代价是通信开销 -- 结果必须压缩回父级, 丢失细节。这是消息历史隔离策略, 不是操作系统进程隔离本身。限制子智能体深度 (不允许递归生成) 防止无限资源消耗, 最大迭代次数确保失控的子任务能终止。 ## 试一试 diff --git a/docs/zh/s05-skill-loading.md b/docs/zh/s05-skill-loading.md index aa8033d..ad4f0c3 100644 --- a/docs/zh/s05-skill-loading.md +++ b/docs/zh/s05-skill-loading.md @@ -4,7 +4,7 @@ ## 问题 -你希望智能体针对不同领域遵循特定的工作流: git 约定、测试模式、代码审查清单。简单粗暴的做法是把所有内容都塞进系统提示。但系统提示的有效注意力是有限的 -- 文本太多, 模型就会开始忽略其中一部分。 +智能体需要针对不同领域遵循特定的工作流: git 约定、测试模式、代码审查清单。简单粗暴的做法是把所有内容都塞进系统提示。但系统提示的有效注意力是有限的 -- 文本太多, 模型就会开始忽略其中一部分。 如果你有 10 个技能, 每个 2000 token, 那就是 20,000 token 的系统提示。模型关注开头和结尾, 但会略过中间部分。更糟糕的是, 这些技能中大部分与当前任务无关。文件编辑任务不需要 git 工作流说明。 @@ -132,7 +132,6 @@ class SkillLoader: | 系统提示 | 静态字符串 | + 技能描述列表 | | 知识库 | 无 | .skills/*.md 文件 | | 注入方式 | 无 | 两层 (系统提示 + result) | -| Subagent | `run_subagent()` | 已移除 (非本节重点) | ## 设计原理 diff --git a/docs/zh/s06-context-compact.md b/docs/zh/s06-context-compact.md index 91d41e8..a01f13c 100644 --- a/docs/zh/s06-context-compact.md +++ b/docs/zh/s06-context-compact.md @@ -149,7 +149,6 @@ def agent_loop(messages): | Auto-compact | 无 | token 阈值触发 | | Manual compact | 无 | `compact` 工具 | | Transcripts | 无 | 保存到 .transcripts/ | -| Skills | load_skill | 已移除 (非本节重点) | ## 设计原理 diff --git a/docs/zh/s07-task-system.md b/docs/zh/s07-task-system.md index 239a177..1b17e22 100644 --- a/docs/zh/s07-task-system.md +++ b/docs/zh/s07-task-system.md @@ -10,7 +10,19 @@ 更根本地说, 内存中的状态对其他智能体不可见。当我们最终构建团队 (s09+) 时, 队友需要一个共享的任务看板。内存中的数据结构是进程局部的。 -解决方案是将任务作为 JSON 文件持久化在 `.tasks/` 目录中。每个任务是一个单独的文件, 包含 ID、主题、状态和依赖图。完成任务 1 会自动解除任务 2 的阻塞 (如果任务 2 有 `blockedBy: [1]`)。文件系统成为唯一的真实来源。 +解决方案是将任务作为 JSON 文件持久化在 `.tasks/` 目录中。每个任务是一个单独的文件, 包含 ID、主题、状态和依赖图。完成任务 1 会自动解除任务 2 的阻塞 (如果任务 2 有 `blockedBy: [1]`)。在本教学实现里, 文件系统是任务状态的真实来源。 + +## Task vs Todo: 何时用哪个 + +从 s07 起, Task 是默认主线。Todo 仍可用于短期线性清单。 + +## 快速判定矩阵 + +| 场景 | 优先选择 | 原因 | +|---|---|---| +| 短时、单会话、线性清单 | Todo | 心智负担最低,记录最快 | +| 跨会话、存在依赖、多人协作 | Task | 状态可持久、依赖可表达、协作可见 | +| 一时拿不准 | Task | 后续降级更容易,半途迁移成本更低 | ## 解决方案 @@ -132,17 +144,20 @@ class TaskManager: ## 相对 s06 的变更 -| 组件 | 之前 (s06) | 之后 (s07) | -|----------------|------------------|----------------------------------| -| Tools | 5 | 8 (+task_create/update/list/get) | -| 状态存储 | 仅内存 | .tasks/ 中的 JSON 文件 | -| 依赖关系 | 无 | blockedBy + blocks 图 | -| 压缩机制 | 三层 | 已移除 (非本节重点) | -| 持久化 | 压缩后丢失 | 压缩后存活 | +| 组件 | 之前 (s06) | 之后 (s07) | +|---|---|---| +| Tools | 5 | 8 (`task_create/update/list/get`) | +| 状态存储 | 仅内存 | `.tasks/` 中的 JSON 文件 | +| 依赖关系 | 无 | `blockedBy + blocks` 图 | +| 持久化 | 压缩后丢失 | 压缩后存活 | ## 设计原理 -基于文件的状态能在上下文压缩中存活。当智能体的对话被压缩时, 内存中的状态会丢失, 但写入磁盘的任务会持久保存。依赖图确保即使在上下文丢失后也能按正确顺序执行。这是临时对话与持久工作之间的桥梁 -- 智能体可以忘记对话细节, 但始终有任务看板来提醒它还需要做什么。文件系统作为唯一真实来源也为未来的多智能体共享提供了基础, 因为任何进程都可以读取相同的 JSON 文件。 +基于文件的状态能在上下文压缩中存活。当智能体的对话被压缩时, 内存中的状态会丢失, 但写入磁盘的任务会持久保存。依赖图确保即使在上下文丢失后也能按正确顺序执行。这是临时对话与持久工作之间的桥梁 -- 智能体可以忘记对话细节, 但始终有任务看板来提醒它还需要做什么。在本教学实现里, 文件系统作为任务状态真实来源也为未来的多智能体共享提供了基础, 因为任何进程都可以读取相同的 JSON 文件。 + +但“持久化”成立有前提:每次写入前都要重新读取任务文件,确认 `status/blockedBy` 与预期一致,再原子写回。否则并发写入很容易互相覆盖状态。 + +从课程设计上看, 这也是为什么 s07 之后我们默认采用 Task 而不是 Todo: 它更接近真实工程中的长期执行与协作需求。 ## 试一试 diff --git a/docs/zh/s08-background-tasks.md b/docs/zh/s08-background-tasks.md index 19c865b..f89fc7a 100644 --- a/docs/zh/s08-background-tasks.md +++ b/docs/zh/s08-background-tasks.md @@ -157,7 +157,6 @@ class BackgroundManager: | 执行方式 | 仅阻塞 | 阻塞 + 后台线程 | | 通知机制 | 无 | 每轮排空的队列 | | 并发 | 无 | 守护线程 | -| 任务系统 | 基于文件的 CRUD | 已移除 (非本节重点) | ## 设计原理 diff --git a/docs/zh/s09-agent-teams.md b/docs/zh/s09-agent-teams.md index 1df0c93..c0c1a8c 100644 --- a/docs/zh/s09-agent-teams.md +++ b/docs/zh/s09-agent-teams.md @@ -1,6 +1,6 @@ # s09: Agent Teams (智能体团队) -> 持久化的队友通过 JSONL 收件箱将孤立的智能体转变为可通信的团队 -- spawn、message、broadcast 和 drain。 +> 持久化的队友通过 JSONL 收件箱提供了一种教学协议, 将孤立的智能体转变为可通信的团队 -- spawn、message、broadcast 和 drain。 ## 问题 @@ -194,7 +194,7 @@ class MessageBus: ## 设计原理 -基于文件的邮箱 (追加式 JSONL) 提供了并发安全的智能体间通信。追加操作在大多数文件系统上是原子的, 避免了锁竞争。"读取时排空" 模式 (读取全部, 截断) 提供批量传递。这比共享内存或基于 socket 的 IPC 更简单、更健壮。代价是延迟 -- 消息只在下一次轮询时才被看到 -- 但对于每轮需要数秒推理时间的 LLM 驱动智能体来说, 轮询延迟相比推理时间可以忽略不计。 +基于文件的邮箱 (追加式 JSONL) 在教学代码中具有可观察、易理解的优势。"读取时排空" 模式 (读取全部, 截断) 用很少的机制就能实现批量传递。代价是延迟 -- 消息只在下一次轮询时才被看到 -- 但对于每轮需要数秒推理时间的 LLM 驱动智能体来说, 本课程中该延迟是可接受的。 ## 试一试 diff --git a/docs/zh/s11-autonomous-agents.md b/docs/zh/s11-autonomous-agents.md index ac21243..d580e3b 100644 --- a/docs/zh/s11-autonomous-agents.md +++ b/docs/zh/s11-autonomous-agents.md @@ -10,7 +10,7 @@ 但自治智能体面临一个微妙问题: 上下文压缩后, 智能体可能忘记自己是谁。如果消息被摘要化, 原始系统提示中的身份 ("你是 alice, 角色: coder") 就会淡化。身份重注入通过在压缩后的上下文开头插入身份块来解决这个问题。 -教学简化说明: 这里的 token 估算比较粗糙 (字符数 / 4)。生产系统使用专业的 tokenizer 库。s03 中的 nag 阈值 3 轮是为教学可见性设的低值; 生产环境的智能体通常使用约 10 轮的阈值。 +注: token 估算使用字符数/4 (粗略)。nag 阈值 3 轮是为教学可见性设的低值。 ## 解决方案 diff --git a/docs/zh/s12-worktree-task-isolation.md b/docs/zh/s12-worktree-task-isolation.md new file mode 100644 index 0000000..2c485db --- /dev/null +++ b/docs/zh/s12-worktree-task-isolation.md @@ -0,0 +1,193 @@ +# s12: Worktree + 任务隔离 + +> 目录隔离, 任务 ID 协调 -- 用"任务板 (控制面) + worktree (执行面)"把并行改动从互相污染变成可追踪、可恢复、可收尾。 + +## 问题 + +s11 时, agent 已经能认领任务并协同推进。但所有任务共享同一个工作目录。两个 agent 同时改同一棵文件树时, 未提交的变更互相干扰, 任务状态和实际改动对不上, 收尾时也无法判断该保留还是清理哪些文件。 + +考虑一个具体场景: agent A 在做 auth 重构, agent B 在做登录页。两者都修改了 `config.py`。A 的半成品改动被 B 的 `git status` 看到, B 以为是自己的遗留, 尝试提交 -- 结果两个任务都坏了。 + +根因是"做什么"和"在哪里做"没有分开。任务板管目标, 但执行上下文是共享的。解决方案: 给每个任务分配独立的 git worktree 目录, 用任务 ID 把两边关联起来。 + +## 解决方案 + +``` +控制面 (.tasks/) 执行面 (.worktrees/) ++------------------+ +------------------------+ +| task_1.json | | auth-refactor/ | +| status: in_progress <----> branch: wt/auth-refactor +| worktree: "auth-refactor" | task_id: 1 | ++------------------+ +------------------------+ +| task_2.json | | ui-login/ | +| status: pending <----> branch: wt/ui-login +| worktree: "ui-login" | task_id: 2 | ++------------------+ +------------------------+ + | + index.json (worktree registry) + events.jsonl (lifecycle log) +``` + +三层状态: +1. 控制面 (What): `.tasks/task_*.json` -- 任务目标、责任归属、完成状态 +2. 执行面 (Where): `.worktrees/index.json` -- 隔离目录路径、分支、存活状态 +3. 运行态 (Now): 单轮内存上下文 -- 当前任务、当前 worktree、工具结果 + +状态机: +```text +Task: pending -> in_progress -> completed +Worktree: absent -> active -> removed | kept +``` + +## 工作原理 + +1. 创建任务, 把目标写入任务板。 + +```python +TASKS.create("Implement auth refactor") +# -> .tasks/task_1.json status=pending worktree="" +``` + +2. 创建 worktree 并绑定任务。传入 `task_id` 时自动把任务推进到 `in_progress`。 + +```python +WORKTREES.create("auth-refactor", task_id=1) +# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD +# -> index.json 追加 entry, task_1.json 绑定 worktree="auth-refactor" +``` + +3. 在隔离目录中执行命令。`cwd` 指向 worktree 路径, 主目录不受影响。 + +```python +WORKTREES.run("auth-refactor", "git status --short") +# -> subprocess.run(command, cwd=".worktrees/auth-refactor", ...) +``` + +4. 观测和回写。`worktree_status` 查看 git 状态, `task_update` 维护进度。 + +```python +WORKTREES.status("auth-refactor") # git status inside worktree +TASKS.update(1, owner="agent-A") # update task metadata +``` + +5. 收尾: 选择 keep 或 remove。`remove` 配合 `complete_task=true` 会同时完成任务并解绑 worktree。 + +```python +WORKTREES.remove("auth-refactor", complete_task=True) +# -> git worktree remove +# -> task_1.json status=completed, worktree="" +# -> index.json status=removed +# -> events.jsonl 写入 task.completed + worktree.remove.after +``` + +6. 进程中断后, 从 `.tasks/` + `.worktrees/index.json` 重建现场。会话记忆是易失的, 磁盘状态是持久的。 + +## 核心代码 + +事件流 -- append-only 生命周期日志 (来自 `agents/s12_worktree_task_isolation.py`): + +```python +class EventBus: + def emit(self, event, task=None, worktree=None, error=None): + payload = { + "event": event, + "ts": time.time(), + "task": task or {}, + "worktree": worktree or {}, + } + if error: + payload["error"] = error + with self.path.open("a", encoding="utf-8") as f: + f.write(json.dumps(payload) + "\n") +``` + +事件流写入 `.worktrees/events.jsonl`, 每个关键操作发出三段式事件: +- `worktree.create.before / after / failed` +- `worktree.remove.before / after / failed` +- `task.completed` (当 `complete_task=true` 成功时) + +事件负载形状: + +```json +{ + "event": "worktree.remove.after", + "task": {"id": 7, "status": "completed"}, + "worktree": {"name": "auth-refactor", "path": "...", "status": "removed"}, + "ts": 1730000000 +} +``` + +任务绑定 -- Task 侧持有 worktree 名称: + +```python +def bind_worktree(self, task_id: int, worktree: str, owner: str = "") -> str: + task = self._load(task_id) + task["worktree"] = worktree + if task["status"] == "pending": + task["status"] = "in_progress" + self._save(task) +``` + +隔离执行 -- cwd 路由到 worktree 目录: + +```python +r = subprocess.run( + command, + shell=True, + cwd=path, + capture_output=True, + text=True, + timeout=300, +) +``` + +收尾联动 -- remove 同时完成任务: + +```python +def remove(self, name, force=False, complete_task=False): + self._run_git(["worktree", "remove", wt["path"]]) + if complete_task and wt.get("task_id") is not None: + self.tasks.update(wt["task_id"], status="completed") + self.tasks.unbind_worktree(wt["task_id"]) + self.events.emit("task.completed", ...) +``` + +生命周期工具注册: + +```python +"worktree_keep": lambda **kw: WORKTREES.keep(kw["name"]), +"worktree_events": lambda **kw: EVENTS.list_recent(kw.get("limit", 20)), +``` + +## 相对 s11 的变更 + +| 组件 | 之前 (s11) | 之后 (s12) | +|----------------|----------------------------|-----------------------------------------| +| 协调状态 | 任务板 (owner/status) | 任务板 + `worktree` 显式绑定 | +| 执行上下文 | 共享目录 | 每个任务可分配独立 worktree 目录 | +| 可恢复性 | 依赖任务状态 | 任务状态 + worktree 索引双重恢复 | +| 收尾语义 | 任务完成 | 任务完成 + worktree 显式 keep/remove | +| 生命周期可见性 | 隐式日志 | `.worktrees/events.jsonl` 显式事件流 | + +## 设计原理 + +控制面/执行面分离是这一章的核心模式。Task 管"做什么", worktree 管"在哪做", 两者通过 task ID 关联但不强耦合。这意味着一个任务可以先不绑定 worktree (纯规划阶段), 也可以在多个 worktree 之间迁移。 + +显式状态机让每次迁移都可审计、可恢复。进程崩溃后, 从 `.tasks/` 和 `.worktrees/index.json` 两个文件就能重建全部现场, 不依赖会话内存。 + +事件流是旁路可观测层, 不替代主状态机写入。审计、通知、配额控制等副作用放在事件消费者中处理, 核心流程保持最小。`keep/remove` 作为显式收尾动作存在, 而不是隐式清理 -- agent 必须做出决策, 这个决策本身被记录。 + +## 试一试 + +```sh +cd learn-claude-code +python agents/s12_worktree_task_isolation.py +``` + +可以尝试的提示: + +1. `Create tasks for backend auth and frontend login page, then list tasks.` +2. `Create worktree "auth-refactor" for task 1, create worktree "ui-login", then bind task 2 to "ui-login".` +3. `Run "git status --short" in worktree "auth-refactor".` +4. `Keep worktree "ui-login", then list worktrees and inspect worktree events.` +5. `Remove worktree "auth-refactor" with complete_task=true, then list tasks/worktrees/events.` diff --git a/web/src/components/architecture/design-decisions.tsx b/web/src/components/architecture/design-decisions.tsx index 9d63536..5fa47fa 100644 --- a/web/src/components/architecture/design-decisions.tsx +++ b/web/src/components/architecture/design-decisions.tsx @@ -17,6 +17,7 @@ import s08Annotations from "@/data/annotations/s08.json"; import s09Annotations from "@/data/annotations/s09.json"; import s10Annotations from "@/data/annotations/s10.json"; import s11Annotations from "@/data/annotations/s11.json"; +import s12Annotations from "@/data/annotations/s12.json"; interface Decision { id: string; @@ -44,6 +45,7 @@ const ANNOTATIONS: Record = { s09: s09Annotations as AnnotationFile, s10: s10Annotations as AnnotationFile, s11: s11Annotations as AnnotationFile, + s12: s12Annotations as AnnotationFile, }; interface DesignDecisionsProps { diff --git a/web/src/components/simulator/agent-loop-simulator.tsx b/web/src/components/simulator/agent-loop-simulator.tsx index 6457b79..8de470c 100644 --- a/web/src/components/simulator/agent-loop-simulator.tsx +++ b/web/src/components/simulator/agent-loop-simulator.tsx @@ -20,6 +20,7 @@ const scenarioModules: Record Promise<{ default: Scenario }>> = { s09: () => import("@/data/scenarios/s09.json") as Promise<{ default: Scenario }>, s10: () => import("@/data/scenarios/s10.json") as Promise<{ default: Scenario }>, s11: () => import("@/data/scenarios/s11.json") as Promise<{ default: Scenario }>, + s12: () => import("@/data/scenarios/s12.json") as Promise<{ default: Scenario }>, }; interface AgentLoopSimulatorProps { diff --git a/web/src/components/visualizations/index.tsx b/web/src/components/visualizations/index.tsx index 9a94859..5fc6222 100644 --- a/web/src/components/visualizations/index.tsx +++ b/web/src/components/visualizations/index.tsx @@ -18,6 +18,7 @@ const visualizations: Record< s09: lazy(() => import("./s09-agent-teams")), s10: lazy(() => import("./s10-team-protocols")), s11: lazy(() => import("./s11-autonomous-agents")), + s12: lazy(() => import("./s12-worktree-task-isolation")), }; export function SessionVisualization({ version }: { version: string }) { diff --git a/web/src/components/visualizations/s04-subagent.tsx b/web/src/components/visualizations/s04-subagent.tsx index 82a129a..d4b05b7 100644 --- a/web/src/components/visualizations/s04-subagent.tsx +++ b/web/src/components/visualizations/s04-subagent.tsx @@ -62,7 +62,7 @@ const STEPS = [ { title: "Clean Context", description: - "The parent gets a clean summary without context bloat. This is process isolation for LLMs.", + "The parent gets a clean summary without context bloat. This is fresh-context isolation via messages[].", }, ]; diff --git a/web/src/components/visualizations/s12-worktree-task-isolation.tsx b/web/src/components/visualizations/s12-worktree-task-isolation.tsx new file mode 100644 index 0000000..1251c56 --- /dev/null +++ b/web/src/components/visualizations/s12-worktree-task-isolation.tsx @@ -0,0 +1,278 @@ +"use client"; + +import { motion } from "framer-motion"; +import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; +import { StepControls } from "@/components/visualizations/shared/step-controls"; + +type TaskStatus = "pending" | "in_progress" | "completed"; + +interface TaskRow { + id: number; + subject: string; + status: TaskStatus; + worktree: string; +} + +interface WorktreeRow { + name: string; + branch: string; + task: string; + state: "none" | "active" | "kept" | "removed"; +} + +interface Lane { + name: string; + files: string[]; + highlight?: boolean; +} + +interface StepState { + title: string; + desc: string; + tasks: TaskRow[]; + worktrees: WorktreeRow[]; + lanes: Lane[]; + op: string; +} + +const STEPS: StepState[] = [ + { + title: "Single Workspace Pain", + desc: "Two tasks are active, but both edits would hit one directory and collide.", + op: "task_create x2", + tasks: [ + { id: 1, subject: "Auth refactor", status: "in_progress", worktree: "" }, + { id: 2, subject: "UI login polish", status: "in_progress", worktree: "" }, + ], + worktrees: [], + lanes: [ + { name: "main", files: ["auth/service.py", "ui/Login.tsx"], highlight: true }, + { name: "wt/auth-refactor", files: [] }, + { name: "wt/ui-login", files: [] }, + ], + }, + { + title: "Allocate Lane for Task 1", + desc: "Create a worktree lane and associate it with task 1 for clear ownership.", + op: "worktree_create(name='auth-refactor', task_id=1)", + tasks: [ + { id: 1, subject: "Auth refactor", status: "in_progress", worktree: "auth-refactor" }, + { id: 2, subject: "UI login polish", status: "in_progress", worktree: "" }, + ], + worktrees: [ + { name: "auth-refactor", branch: "wt/auth-refactor", task: "#1", state: "active" }, + ], + lanes: [ + { name: "main", files: ["ui/Login.tsx"] }, + { name: "wt/auth-refactor", files: ["auth/service.py"], highlight: true }, + { name: "wt/ui-login", files: [] }, + ], + }, + { + title: "Allocate Lane for Task 2", + desc: "Lane creation and task association can be separate. Here task 2 binds after lane creation.", + op: "worktree_create(name='ui-login')\ntask_bind_worktree(task_id=2, worktree='ui-login')", + tasks: [ + { id: 1, subject: "Auth refactor", status: "in_progress", worktree: "auth-refactor" }, + { id: 2, subject: "UI login polish", status: "in_progress", worktree: "ui-login" }, + ], + worktrees: [ + { name: "auth-refactor", branch: "wt/auth-refactor", task: "#1", state: "active" }, + { name: "ui-login", branch: "wt/ui-login", task: "#2", state: "active" }, + ], + lanes: [ + { name: "main", files: [] }, + { name: "wt/auth-refactor", files: ["auth/service.py"] }, + { name: "wt/ui-login", files: ["ui/Login.tsx"], highlight: true }, + ], + }, + { + title: "Run Commands in Isolated Lanes", + desc: "Each command routes by selected lane directory, not by the shared root.", + op: "worktree_run('auth-refactor', 'pytest tests/auth -q')", + tasks: [ + { id: 1, subject: "Auth refactor", status: "in_progress", worktree: "auth-refactor" }, + { id: 2, subject: "UI login polish", status: "in_progress", worktree: "ui-login" }, + ], + worktrees: [ + { name: "auth-refactor", branch: "wt/auth-refactor", task: "#1", state: "active" }, + { name: "ui-login", branch: "wt/ui-login", task: "#2", state: "active" }, + ], + lanes: [ + { name: "main", files: [] }, + { name: "wt/auth-refactor", files: ["auth/service.py", "tests/auth/test_login.py"], highlight: true }, + { name: "wt/ui-login", files: ["ui/Login.tsx", "ui/Login.css"] }, + ], + }, + { + title: "Keep One Lane, Close Another", + desc: "Closeout can mix decisions: keep ui-login active for follow-up, remove auth-refactor and complete task 1.", + op: "worktree_keep('ui-login')\nworktree_remove('auth-refactor', complete_task=true)\nworktree_events(limit=10)", + tasks: [ + { id: 1, subject: "Auth refactor", status: "completed", worktree: "" }, + { id: 2, subject: "UI login polish", status: "in_progress", worktree: "ui-login" }, + ], + worktrees: [ + { name: "auth-refactor", branch: "wt/auth-refactor", task: "#1", state: "removed" }, + { name: "ui-login", branch: "wt/ui-login", task: "#2", state: "kept" }, + ], + lanes: [ + { name: "main", files: [] }, + { name: "wt/auth-refactor", files: [] }, + { name: "wt/ui-login", files: ["ui/Login.tsx"], highlight: true }, + ], + }, + { + title: "Isolation + Coordination + Events", + desc: "The board tracks shared truth, worktree lanes isolate execution, and events provide auditable side-channel traces.", + op: "task_list + worktree_list + worktree_events", + tasks: [ + { id: 1, subject: "Auth refactor", status: "completed", worktree: "" }, + { id: 2, subject: "UI login polish", status: "in_progress", worktree: "ui-login" }, + ], + worktrees: [ + { name: "auth-refactor", branch: "wt/auth-refactor", task: "#1", state: "removed" }, + { name: "ui-login", branch: "wt/ui-login", task: "#2", state: "kept" }, + ], + lanes: [ + { name: "main", files: [] }, + { name: "wt/auth-refactor", files: [] }, + { name: "wt/ui-login", files: ["ui/Login.tsx"], highlight: true }, + ], + }, +]; + +function statusClass(status: TaskStatus): string { + if (status === "completed") return "bg-emerald-100 text-emerald-700 dark:bg-emerald-900/30 dark:text-emerald-300"; + if (status === "in_progress") return "bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-300"; + return "bg-zinc-100 text-zinc-700 dark:bg-zinc-800 dark:text-zinc-300"; +} + +function worktreeClass(state: WorktreeRow["state"]): string { + if (state === "active") return "border-emerald-300 bg-emerald-50 dark:border-emerald-800 dark:bg-emerald-900/20"; + if (state === "kept") return "border-sky-300 bg-sky-50 dark:border-sky-800 dark:bg-sky-900/20"; + if (state === "removed") return "border-zinc-200 bg-zinc-100 opacity-70 dark:border-zinc-700 dark:bg-zinc-800"; + return "border-zinc-200 bg-white dark:border-zinc-700 dark:bg-zinc-900"; +} + +export default function WorktreeTaskIsolation({ title }: { title?: string }) { + const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2600 }); + const step = STEPS[vis.currentStep]; + + return ( +
+

+ {title || "Worktree Task Isolation"} +

+ +
+
+ {step.op} +
+ +
+
+
+ Task Board (.tasks) +
+
+ {step.tasks.map((task) => ( + +
+ #{task.id} + + {task.status} + +
+
{task.subject}
+
+ worktree: {task.worktree || "-"} +
+
+ ))} +
+
+ +
+
+ Worktree Index (.worktrees/index.json) +
+
+ {step.worktrees.length === 0 && ( +
+ no worktrees yet +
+ )} + {step.worktrees.map((wt) => ( + +
{wt.name}
+
{wt.branch}
+
task: {wt.task}
+
+ ))} +
+
+ +
+
+ Execution Lanes +
+
+ {step.lanes.map((lane) => ( + +
{lane.name}
+
+ {lane.files.length === 0 ? ( +
(no changes)
+ ) : ( + lane.files.map((f) =>
{f}
) + )} +
+
+ ))} +
+
+
+ +
+
{step.title}
+
{step.desc}
+
+
+ + +
+ ); +} diff --git a/web/src/data/annotations/s07.json b/web/src/data/annotations/s07.json index 2f22f27..b062451 100644 --- a/web/src/data/annotations/s07.json +++ b/web/src/data/annotations/s07.json @@ -30,17 +30,59 @@ } }, { - "id": "task-replaces-todo", - "title": "TaskManager Replaces TodoWrite", - "description": "TaskManager is the multi-agent evolution of TodoWrite. Same core concept (a list of items with statuses) but with critical additions: file persistence (survives crashes), dependency tracking (blocks/blockedBy), ownership (which agent is working on what), and multi-process safety. TodoWrite was designed for a single agent tracking its own work in memory. TaskManager is designed for a team of agents coordinating through the filesystem. The API is intentionally similar so the conceptual upgrade path is clear.", - "alternatives": "Keeping TodoWrite for single-agent use and adding TaskManager only for multi-agent scenarios would avoid breaking the single-agent experience. But maintaining two systems with overlapping functionality increases complexity. TaskManager is a strict superset of TodoWrite -- a single agent using TaskManager just ignores the multi-agent features.", + "id": "task-default-todo-coexistence", + "title": "Task as Course Default, Todo Still Useful", + "description": "TaskManager extends the Todo mental model and becomes the default workflow from s07 onward in this course. This 'default' is a course sequencing choice, not a universal runtime default claim. Both track work items with statuses, but TaskManager adds file persistence (survives crashes), dependency tracking (blocks/blockedBy), ownership fields, and multi-process coordination. Todo remains useful for short, linear, one-shot tracking where heavyweight coordination is unnecessary.", + "alternatives": "Using only Todo keeps the model minimal but weak for long-running or collaborative work. Using only Task everywhere maximizes consistency but can feel heavy for tiny one-off tasks. Reminder signals are omission-reduction hints, not implicit mode switches; Task/Todo choice should still come from workflow intent and available tools.", "zh": { - "title": "TaskManager 取代 TodoWrite", - "description": "TaskManager 是 TodoWrite 的多代理进化版。核心概念相同(带状态的项目列表),但增加了关键能力:文件持久化(崩溃后存活)、依赖追踪(blocks/blockedBy)、所有权(哪个 agent 在处理什么)、以及多进程安全。TodoWrite 为单 agent 在内存中追踪自身工作而设计。TaskManager 为代理团队通过文件系统协调而设计。API 刻意保持相似,使概念升级路径清晰。" + "title": "Task 为课程主线,Todo 仍有适用场景", + "description": "TaskManager 延续了 Todo 的心智模型,并在本课程 s07 之后成为默认主线。两者都管理带状态的任务项,但 TaskManager 增加了文件持久化(崩溃后可恢复)、依赖追踪(blocks/blockedBy)、owner 字段与多进程协作能力。Todo 仍适合短、线性、一次性的轻量跟踪。" }, "ja": { - "title": "TaskManager が TodoWrite を置き換え", - "description": "TaskManager は TodoWrite のマルチエージェント進化版です。コア概念は同じ(ステータス付きの項目リスト)ですが、重要な追加があります:ファイル永続化(クラッシュ後も存続)、依存関係追跡(blocks/blockedBy)、所有権(どのエージェントが何を担当しているか)、マルチプロセス安全性。TodoWrite は単一エージェントがメモリ内で自身の作業を追跡するために設計されました。TaskManager はエージェントチームがファイルシステムを通じて連携するために設計されています。API は意図的に類似させ、概念的なアップグレードパスを明確にしています。" + "title": "Task を主線にしつつ Todo も併存", + "description": "TaskManager は Todo のメンタルモデルを拡張し、本コースでは s07 以降のデフォルトになる。どちらもステータス付き作業項目を扱うが、TaskManager にはファイル永続化(クラッシュ耐性)、依存関係追跡(blocks/blockedBy)、owner、マルチプロセス協調がある。Todo は短く直線的な単発作業では引き続き有効。" + } + }, + { + "id": "task-write-discipline", + "title": "Durability Needs Write Discipline", + "description": "File persistence reduces context loss, but it does not remove concurrent-write risks by itself. Before writing task state, reload the JSON, validate expected status/dependency fields, and then save atomically. This prevents one agent from silently overwriting another agent's transition.", + "alternatives": "Blind overwrite writes are simpler but can corrupt coordination state under parallel execution. A database with optimistic locking would enforce stronger safety, but the course keeps file-based state for zero-dependency teaching.", + "zh": { + "title": "持久化仍需要写入纪律", + "description": "文件持久化能降低上下文丢失,但不会自动消除并发写入风险。写任务状态前应先重读 JSON、校验 `status/blockedBy` 是否符合预期,再原子写回,避免不同 agent 悄悄覆盖彼此状态。" + }, + "ja": { + "title": "耐久性には書き込み規律が必要", + "description": "ファイル永続化だけでは並行書き込み競合は防げない。更新前に JSON を再読込し、`status/blockedBy` を検証して原子的に保存することで、他エージェントの遷移上書きを防ぐ。" + } + }, + { + "id": "reminder-advisory-not-switch", + "title": "Reminder Is Advisory, Not a Mode Switch", + "description": "Reminder signals should be treated as omission-reduction hints, not as control-plane switches. Choosing Task vs Todo should come from workflow intent and currently available tools, while reminders only nudge usage when tracking appears stale.", + "alternatives": "Treating reminders as implicit mode selectors looks convenient, but it hides decision boundaries and makes behavior harder to reason about during long sessions.", + "zh": { + "title": "Reminder 是提示,不是模式开关", + "description": "Reminder 信号用于降低遗漏,不应当被当作控制面的模式切换器。Task/Todo 的选择应由工作流意图与可用工具决定,提醒只在追踪滞后时提供轻量提示。" + }, + "ja": { + "title": "Reminder は助言でありモード切替ではない", + "description": "Reminder は取りこぼしを減らすための助言であり、制御面のモード切替として扱わない。Task/Todo の選択はワークフロー意図と利用可能ツールで決め、Reminder は追跡が滞ったときに軽く促す。" + } + }, + { + "id": "todo-task-fast-matrix", + "title": "Todo/Task Fast Decision Matrix", + "description": "Use Todo for short one-session linear checklists. Use Task for cross-session work, dependencies, or teammate coordination. If uncertain, start with Task because downscoping is cheaper than migrating state mid-run.", + "alternatives": "Always using Todo keeps the model minimal but breaks durability and collaboration. Always using Task maximizes consistency but may feel heavy for tiny one-shot notes.", + "zh": { + "title": "Todo/Task 快速判定矩阵", + "description": "短时单会话线性清单用 Todo;跨会话、依赖、多人协作用 Task;拿不准时先用 Task,因为后续降级比半途迁移状态更便宜。" + }, + "ja": { + "title": "Todo/Task クイック判定マトリクス", + "description": "短い単一セッションの直線タスクは Todo、セッション跨ぎや依存・協調がある作業は Task。迷うなら Task 開始が安全で、後で簡略化する方が途中移行より低コスト。" } } ] diff --git a/web/src/data/annotations/s12.json b/web/src/data/annotations/s12.json new file mode 100644 index 0000000..f040ad2 --- /dev/null +++ b/web/src/data/annotations/s12.json @@ -0,0 +1,103 @@ +{ + "version": "s12", + "decisions": [ + { + "id": "shared-board-isolated-lanes", + "title": "Shared Task Board + Isolated Execution Lanes", + "description": "The task board remains shared and centralized in `.tasks/`, while file edits happen in per-task worktree directories. This separation preserves global visibility (who owns what, what is done) without forcing everyone to edit inside one mutable directory. Coordination stays simple because there is one board, and execution stays safe because each lane is isolated.", + "alternatives": "A single shared workspace is simpler but causes edit collisions and mixed git state. Fully independent task stores per lane avoid collisions but lose team-level visibility and make planning harder.", + "zh": { + "title": "共享任务板 + 隔离执行通道", + "description": "任务板继续集中在 `.tasks/`,而文件改动发生在按任务划分的 worktree 目录中。这样既保留了全局可见性(谁在做什么、完成到哪),又避免所有人同时写同一目录导致冲突。协调层简单(一个任务板),执行层安全(多条隔离通道)。" + }, + "ja": { + "title": "共有タスクボード + 分離実行レーン", + "description": "タスクボードは `.tasks/` に集約しつつ、実際の編集はタスクごとの worktree ディレクトリで行う。これにより全体の可視性(担当と進捗)を維持しながら、単一ディレクトリでの衝突を回避できる。調整は1つのボードで単純化され、実行はレーン分離で安全になる。" + } + }, + { + "id": "index-file-lifecycle", + "title": "Explicit Worktree Lifecycle Index", + "description": "`.worktrees/index.json` records each worktree's name, path, branch, task_id, and status. This makes lifecycle state inspectable and recoverable even after context compression or process restarts. The index also provides a deterministic source for list/status/remove operations.", + "alternatives": "Relying only on `git worktree list` removes local bookkeeping but loses task binding metadata and custom lifecycle states. Keeping all state only in memory is simpler in code but breaks recoverability.", + "zh": { + "title": "显式 worktree 生命周期索引", + "description": "`.worktrees/index.json` 记录每个 worktree 的名称、路径、分支、task_id 与状态。即使上下文压缩或进程重启,这些生命周期状态仍可检查和恢复。它也为 list/status/remove 提供了确定性的本地数据源。" + }, + "ja": { + "title": "明示的な worktree ライフサイクル索引", + "description": "`.worktrees/index.json` に name/path/branch/task_id/status を記録することで、コンテキスト圧縮やプロセス再起動後も状態を追跡できる。list/status/remove の挙動もこの索引を基準に決定できる。" + } + }, + { + "id": "lane-cwd-routing-and-reentry-guard", + "title": "Lane-Scoped CWD Routing + Re-entry Guard", + "description": "This course runtime uses lane-scoped cwd routing (`worktree_run(name, command)`). Other runtimes may choose session-level cwd switches. The design goal is predictable lane context with a re-entry guard when already inside an active worktree context.", + "alternatives": "Global cwd mutation is easy to implement but can leak context across parallel work. Allowing silent re-entry makes lifecycle ownership ambiguous and complicates teardown behavior.", + "zh": { + "title": "按通道 cwd 路由 + 禁止重入", + "description": "本课程运行时采用按通道 `cwd` 路由(`worktree_run(name, command)`)。其他运行时也可能选择会话级 cwd 切换。设计目标是让并行通道可预测,并在已处于 active worktree 上下文时通过重入保护避免二次进入。" + }, + "ja": { + "title": "レーン単位 cwd ルーティング + 再入防止", + "description": "本コース実装では `worktree_run(name, command)` によるレーン単位 cwd ルーティングを採用する。実装によってはセッション単位で cwd を切り替える場合もある。狙いは並列レーンの予測可能性を保ち、active な worktree 文脈での再入を防ぐこと。" + } + }, + { + "id": "event-stream-observability", + "title": "Append-Only Lifecycle Event Stream", + "description": "Lifecycle events are appended to `.worktrees/events.jsonl` (`worktree.create.*`, `worktree.remove.*`, `task.completed`). This turns hidden transitions into queryable records and makes failures explicit (`*.failed`) instead of silent.", + "alternatives": "Relying only on console logs is lighter but fragile during long sessions and hard to audit. A full event bus infrastructure is powerful but heavier than needed for this teaching baseline.", + "zh": { + "title": "追加式生命周期事件流", + "description": "生命周期事件写入 `.worktrees/events.jsonl`(如 `worktree.create.*`、`worktree.remove.*`、`task.completed`)。这样状态迁移可查询、可追踪,失败也会以 `*.failed` 显式暴露,而不是静默丢失。" + }, + "ja": { + "title": "追記型ライフサイクルイベント", + "description": "ライフサイクルイベントを `.worktrees/events.jsonl` に追記する(`worktree.create.*`、`worktree.remove.*`、`task.completed` など)。遷移が可観測になり、失敗も `*.failed` として明示できる。" + } + }, + { + "id": "hook-style-extension", + "title": "Hook-Style Extensions via Event Triplets", + "description": "Treat `before/after/failed` lifecycle emissions as extension points. Keep source-of-truth state writes in task/worktree files, and run side effects (audit, notification, policy checks) in event consumers.", + "alternatives": "Embedding every side effect directly in create/remove logic couples concerns tightly and makes failure handling harder. Moving source-of-truth to event replay is also risky without strict idempotency/repair semantics.", + "zh": { + "title": "通过三段事件实现 Hook 风格扩展", + "description": "把 `before/after/failed` 生命周期事件当作扩展插槽。真实状态写入仍留在 task/worktree 文件,审计、通知、策略检查等副作用交给事件消费者。" + }, + "ja": { + "title": "三段イベントによる Hook 風拡張", + "description": "`before/after/failed` ライフサイクルイベントを拡張ポイントとして使う。正準状態は task/worktree ファイルに残し、副作用(監査・通知・ポリシーチェック)はイベント購読側で処理する。" + } + }, + { + "id": "task-worktree-closeout", + "title": "Close Task and Workspace Together", + "description": "`worktree_remove(..., complete_task=true)` allows a single closeout step: remove the isolated directory and mark the bound task completed. In this course model, closeout remains an explicit tool-driven transition (`worktree_keep` / `worktree_remove`) rather than hidden automatic cleanup. This reduces dangling state where a task says done but its temporary lane remains active (or the reverse).", + "alternatives": "Keeping closeout fully manual gives flexibility but increases operational drift. Fully automatic removal on every completion risks deleting a workspace before final review.", + "zh": { + "title": "任务与工作区一起收尾", + "description": "`worktree_remove(..., complete_task=true)` 允许在一个动作里完成收尾:删除隔离目录并把绑定任务标记为 completed。在本课程模型里,收尾保持为显式工具驱动迁移(`worktree_keep` / `worktree_remove`),而不是隐藏的自动清理。这样可减少状态悬挂(任务已完成但临时工作区仍活跃,或反过来)。" + }, + "ja": { + "title": "タスクとワークスペースを同時にクローズ", + "description": "`worktree_remove(..., complete_task=true)` により、分離ディレクトリ削除とタスク完了更新を1ステップで実行できる。本コースのモデルでは、クローズ処理は `worktree_keep` / `worktree_remove` の明示ツール遷移として扱い、暗黙の自動清掃にはしない。完了済みタスクに未回収レーンが残る、といったズレを減らせる。" + } + }, + { + "id": "event-stream-side-channel", + "title": "Event Stream Is Observability Side-Channel", + "description": "Lifecycle events improve auditability, but the source of truth remains task/worktree state files. Events should be read as transition traces, not as a replacement state machine.", + "alternatives": "Using logs alone hides structured transitions; using events as the only state source risks drift when replay/repair semantics are undefined.", + "zh": { + "title": "事件流是观测旁路,不是状态机替身", + "description": "生命周期事件提升可审计性,但真实状态源仍是任务/工作区状态文件。事件更适合做迁移轨迹,而不是替代主状态机。" + }, + "ja": { + "title": "イベントは観測サイドチャネルであり状態機械の代替ではない", + "description": "ライフサイクルイベントは監査性を高めるが、真の状態源は task/worktree 状態ファイルのまま。イベントは遷移トレースとして扱い、主状態機械の代替にしない。" + } + } + ] +} diff --git a/web/src/data/execution-flows.ts b/web/src/data/execution-flows.ts index 084e0f6..72ce54d 100644 --- a/web/src/data/execution-flows.ts +++ b/web/src/data/execution-flows.ts @@ -271,6 +271,43 @@ export const EXECUTION_FLOWS: Record = { { from: "poll", to: "inbox" }, ], }, + s12: { + nodes: [ + { id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 }, + { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 }, + { id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 190 }, + { id: "is_wt", label: "worktree tool?", type: "decision", x: COL_LEFT, y: 280 }, + { id: "task", label: "Task Board\\n(.tasks)", type: "process", x: 60, y: 360 }, + { id: "wt_create", label: "Allocate / Enter\\nWorktree", type: "subprocess", x: 60, y: 440 }, + { id: "wt_run", label: "Run in\\nIsolated Dir", type: "subprocess", x: COL_LEFT + 80, y: 360 }, + { id: "wt_close", label: "Closeout:\\nworktree_keep / remove", type: "process", x: COL_LEFT + 80, y: 440 }, + { id: "events", label: "Emit Lifecycle Events\\n(side-channel)", type: "process", x: COL_RIGHT, y: 420 }, + { id: "events_read", label: "Optional Read\\nworktree_events", type: "subprocess", x: COL_RIGHT, y: 520 }, + { id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 530 }, + { id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 }, + ], + edges: [ + { from: "start", to: "llm" }, + { from: "llm", to: "tool_check" }, + { from: "tool_check", to: "is_wt", label: "yes" }, + { from: "tool_check", to: "end", label: "no" }, + { from: "is_wt", to: "task", label: "task ops" }, + { from: "is_wt", to: "wt_create", label: "create/bind" }, + { from: "is_wt", to: "wt_run", label: "run/status" }, + { from: "task", to: "wt_create", label: "allocate lane" }, + { from: "wt_create", to: "wt_run" }, + { from: "task", to: "append", label: "task result" }, + { from: "wt_create", to: "events", label: "emit create" }, + { from: "wt_create", to: "append", label: "create result" }, + { from: "wt_run", to: "wt_close" }, + { from: "wt_run", to: "append", label: "run/status result" }, + { from: "wt_close", to: "events", label: "emit closeout" }, + { from: "wt_close", to: "append", label: "closeout result" }, + { from: "events", to: "events_read", label: "optional query" }, + { from: "events_read", to: "append", label: "events result" }, + { from: "append", to: "llm" }, + ], + }, }; export function getFlowForVersion(version: string): FlowDefinition | null { diff --git a/web/src/data/generated/docs.json b/web/src/data/generated/docs.json index cbc4093..0cd83a4 100644 --- a/web/src/data/generated/docs.json +++ b/web/src/data/generated/docs.json @@ -3,7 +3,7 @@ "version": "s01", "locale": "en", "title": "s01: The Agent Loop", - "content": "# s01: The Agent Loop\n\n> The entire secret of AI coding agents is a while loop that feeds tool results back to the model until the model decides to stop.\n\n## The Problem\n\nWhy can't a language model just answer a coding question? Because coding\nrequires _interaction with the real world_. The model needs to read files,\nrun tests, check errors, and iterate. A single prompt-response pair cannot\ndo this.\n\nWithout the agent loop, you would have to copy-paste outputs back into the\nmodel yourself. The user becomes the loop. The agent loop automates this:\ncall the model, execute whatever tools it asks for, feed the results back,\nrepeat until the model says \"I'm done.\"\n\nConsider a simple task: \"Create a Python file that prints hello.\" The model\nneeds to (1) decide to write a file, (2) write it, (3) verify it works.\nThat is three tool calls minimum. Without a loop, each one requires manual\nhuman intervention.\n\n## The Solution\n\n```\n+----------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n (loop continues)\n\nThe loop terminates when stop_reason != \"tool_use\".\nThat single condition is the entire control flow.\n```\n\n## How It Works\n\n1. The user provides a prompt. It becomes the first message.\n\n```python\nhistory.append({\"role\": \"user\", \"content\": query})\n```\n\n2. The messages array is sent to the LLM along with the tool definitions.\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. The assistant response is appended to messages.\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\n```\n\n4. We check the stop reason. If the model did not call a tool, the loop\n ends. This is the only exit condition.\n\n```python\nif response.stop_reason != \"tool_use\":\n return\n```\n\n5. For each tool_use block in the response, execute the tool (bash in this\n session) and collect results.\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n6. The results are appended as a user message, and the loop continues.\n\n```python\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n## Key Code\n\nThe minimum viable agent -- the entire pattern in under 30 lines\n(from `agents/s01_agent_loop.py`, lines 66-86):\n\n```python\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n## What Changed\n\nThis is session 1 -- the starting point. There is no prior session.\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## Design Rationale\n\nThis loop is the universal foundation of all LLM-based agents. Production implementations add error handling, token counting, streaming, and retry logic, but the fundamental structure is unchanged. The simplicity is the point: one exit condition (`stop_reason != \"tool_use\"`) controls the entire flow. Everything else in this course -- tools, planning, compression, teams -- layers on top of this loop without modifying it. Understanding this loop means understanding every agent.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\nExample prompts to try:\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" + "content": "# s01: The Agent Loop\n\n> The core of a coding agent is a while loop that feeds tool results back to the model until the model decides to stop.\n\n## The Problem\n\nWhy can't a language model just answer a coding question? Because coding\nrequires _interaction with the real world_. The model needs to read files,\nrun tests, check errors, and iterate. A single prompt-response pair cannot\ndo this.\n\nWithout the agent loop, you would have to copy-paste outputs back into the\nmodel yourself. The user becomes the loop. The agent loop automates this:\ncall the model, execute whatever tools it asks for, feed the results back,\nrepeat until the model says \"I'm done.\"\n\nConsider a simple task: \"Create a Python file that prints hello.\" The model\nneeds to (1) decide to write a file, (2) write it, (3) verify it works.\nThat is three tool calls minimum. Without a loop, each one requires manual\nhuman intervention.\n\n## The Solution\n\n```\n+----------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n (loop continues)\n\nThe loop terminates when stop_reason != \"tool_use\".\nThat single condition is the entire control flow.\n```\n\n## How It Works\n\n1. The user provides a prompt. It becomes the first message.\n\n```python\nhistory.append({\"role\": \"user\", \"content\": query})\n```\n\n2. The messages array is sent to the LLM along with the tool definitions.\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. The assistant response is appended to messages.\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\n```\n\n4. We check the stop reason. If the model did not call a tool, the loop\n ends. In this minimal lesson implementation, this is the only loop exit\n condition.\n\n```python\nif response.stop_reason != \"tool_use\":\n return\n```\n\n5. For each tool_use block in the response, execute the tool (bash in this\n session) and collect results.\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n6. The results are appended as a user message, and the loop continues.\n\n```python\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n## Key Code\n\nThe minimum viable agent -- the entire pattern in under 30 lines\n(from `agents/s01_agent_loop.py`, lines 66-86):\n\n```python\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n## What Changed\n\nThis is session 1 -- the starting point. There is no prior session.\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## Design Rationale\n\nThis loop is the foundation of LLM-based agents. Production implementations add error handling, token counting, streaming, retry logic, permission policy, and lifecycle orchestration, but the core interaction pattern still starts here. The simplicity is the point for this session: in this minimal implementation, one exit condition (`stop_reason != \"tool_use\"`) controls the flow we need to learn first. Everything else in this course layers on top of this loop. Understanding this loop gives you the base model, not the full production architecture.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\nExample prompts to try:\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" }, { "version": "s02", @@ -15,43 +15,43 @@ "version": "s03", "locale": "en", "title": "s03: TodoWrite", - "content": "# s03: TodoWrite\n\n> A TodoManager lets the agent track its own progress, and a nag reminder injection forces it to keep updating when it forgets.\n\n## The Problem\n\nWhen an agent works on a multi-step task, it often loses track of what it\nhas done and what remains. Without explicit planning, the model might repeat\nwork, skip steps, or wander off on tangents. The user has no visibility\ninto the agent's internal plan.\n\nThis is worse than it sounds. Long conversations cause the model to \"drift\"\n-- the system prompt fades in influence as the context window fills with\ntool results. A 10-step refactoring task might complete steps 1-3, then\nthe model starts improvising because it forgot steps 4-10 existed.\n\nThe solution is structured state: a TodoManager that the model writes to\nexplicitly. The model creates a plan, marks items in_progress as it works,\nand marks them completed when done. A nag reminder injects a nudge if the\nmodel goes 3+ rounds without updating its todos.\n\nTeaching simplification: the nag threshold of 3 rounds is set low for\nteaching visibility. Production agents typically use a higher threshold\naround 10 to avoid excessive prompting.\n\n## The Solution\n\n```\n+----------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## How It Works\n\n1. The TodoManager validates and stores a list of items with statuses.\n Only one item can be `in_progress` at a time.\n\n```python\nclass TodoManager:\n def __init__(self):\n self.items = []\n\n def update(self, items: list) -> str:\n validated = []\n in_progress_count = 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\n \"id\": item[\"id\"],\n \"text\": item[\"text\"],\n \"status\": status,\n })\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. The `todo` tool is added to the dispatch map like any other tool.\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n # ...other tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. The nag reminder injects a `` tag into the tool_result\n messages when the model goes 3+ rounds without calling `todo`.\n\n```python\ndef agent_loop(messages: list):\n rounds_since_todo = 0\n while True:\n if rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if (last[\"role\"] == \"user\"\n and isinstance(last.get(\"content\"), list)):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n # ... rest of loop ...\n rounds_since_todo = 0 if used_todo else rounds_since_todo + 1\n```\n\n4. The system prompt instructs the model to use todos for planning.\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse the todo tool to plan multi-step tasks.\nMark in_progress before starting, completed when done.\nPrefer tools over prose.\"\"\"\n```\n\n## Key Code\n\nThe TodoManager and nag injection (from `agents/s03_todo_write.py`,\nlines 51-85 and 158-187):\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated = []\n in_progress_count = 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\n \"id\": item[\"id\"],\n \"text\": item[\"text\"],\n \"status\": status,\n })\n if in_progress_count > 1:\n raise ValueError(\"Only one in_progress\")\n self.items = validated\n return self.render()\n\n# In agent_loop:\nif rounds_since_todo >= 3:\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\n## What Changed From s02\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|--------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses|\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## Design Rationale\n\nVisible plans improve task completion because the model can self-monitor progress. The nag mechanism creates accountability -- without it, the model may abandon plans mid-execution as conversation context grows and earlier instructions fade. The \"one in_progress at a time\" constraint enforces sequential focus, preventing context-switching overhead that degrades output quality. This pattern works because it externalizes the model's working memory into structured state that survives attention drift.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\nExample prompts to try:\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" + "content": "# s03: TodoWrite\n\n> A TodoManager lets the agent track its own progress, and a nag reminder injection forces it to keep updating when it forgets.\n\n## The Problem\n\nWhen an agent works on a multi-step task, it often loses track of what it\nhas done and what remains. Without explicit planning, the model might repeat\nwork, skip steps, or wander off on tangents. The user has no visibility\ninto the agent's internal plan.\n\nThis is worse than it sounds. Long conversations cause the model to \"drift\"\n-- the system prompt fades in influence as the context window fills with\ntool results. A 10-step refactoring task might complete steps 1-3, then\nthe model starts improvising because it forgot steps 4-10 existed.\n\nThe solution is structured state: a TodoManager that the model writes to\nexplicitly. The model creates a plan, marks items in_progress as it works,\nand marks them completed when done. A nag reminder injects a nudge if the\nmodel goes 3+ rounds without updating its todos.\n\nNote: the nag threshold of 3 rounds is low for visibility. Production systems tune higher. From s07, this course switches to the Task board for durable multi-step work; TodoWrite remains available for quick checklists.\n\n## The Solution\n\n```\n+----------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## How It Works\n\n1. The TodoManager validates and stores a list of items with statuses.\n Only one item can be `in_progress` at a time.\n\n```python\nclass TodoManager:\n def __init__(self):\n self.items = []\n\n def update(self, items: list) -> str:\n validated = []\n in_progress_count = 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\n \"id\": item[\"id\"],\n \"text\": item[\"text\"],\n \"status\": status,\n })\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. The `todo` tool is added to the dispatch map like any other tool.\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n # ...other tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. The nag reminder injects a `` tag into the tool_result\n messages when the model goes 3+ rounds without calling `todo`.\n\n```python\ndef agent_loop(messages: list):\n rounds_since_todo = 0\n while True:\n if rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if (last[\"role\"] == \"user\"\n and isinstance(last.get(\"content\"), list)):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n # ... rest of loop ...\n rounds_since_todo = 0 if used_todo else rounds_since_todo + 1\n```\n\n4. The system prompt instructs the model to use todos for planning.\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse the todo tool to plan multi-step tasks.\nMark in_progress before starting, completed when done.\nPrefer tools over prose.\"\"\"\n```\n\n## Key Code\n\nThe TodoManager and nag injection (from `agents/s03_todo_write.py`,\nlines 51-85 and 158-187):\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated = []\n in_progress_count = 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\n \"id\": item[\"id\"],\n \"text\": item[\"text\"],\n \"status\": status,\n })\n if in_progress_count > 1:\n raise ValueError(\"Only one in_progress\")\n self.items = validated\n return self.render()\n\n# In agent_loop:\nif rounds_since_todo >= 3:\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\n## What Changed From s02\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|--------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses|\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## Design Rationale\n\nVisible plans improve task completion because the model can self-monitor progress. The nag mechanism creates accountability -- without it, the model may abandon plans mid-execution as conversation context grows and earlier instructions fade. The \"one in_progress at a time\" constraint enforces sequential focus, preventing context-switching overhead that degrades output quality. This pattern works because it externalizes the model's working memory into structured state that survives attention drift.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\nExample prompts to try:\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" }, { "version": "s04", "locale": "en", "title": "s04: Subagents", - "content": "# s04: Subagents\n\n> A subagent runs with a fresh messages list, shares the filesystem with the parent, and returns only a summary -- keeping the parent context clean.\n\n## The Problem\n\nAs the agent works, its messages array grows. Every tool call, every file\nread, every bash output accumulates. After 20-30 tool calls, the context\nwindow is crowded with irrelevant history. Reading a 500-line file to\nanswer a quick question permanently adds 500 lines to the context.\n\nThis is particularly bad for exploratory tasks. \"What testing framework\ndoes this project use?\" might require reading 5 files, but the parent\nagent does not need all 5 file contents in its history -- it just needs\nthe answer: \"pytest with conftest.py configuration.\"\n\nThe solution is process isolation: spawn a child agent with `messages=[]`.\nThe child explores, reads files, runs commands. When it finishes, only its\nfinal text response returns to the parent. The child's entire message\nhistory is discarded.\n\n## The Solution\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ---------->| while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <--------- | return last text |\n+------------------+ +------------------+\n |\nParent context stays clean.\nSubagent context is discarded.\n```\n\n## How It Works\n\n1. The parent agent gets a `task` tool that triggers subagent spawning.\n The child gets all base tools except `task` (no recursive spawning).\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"prompt\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n },\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. The subagent starts with a fresh messages list containing only\n the delegated prompt. It shares the same filesystem.\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\n \"role\": \"assistant\", \"content\": response.content\n })\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n```\n\n3. Only the final text returns to the parent. The child's 30+ tool\n call history is discarded.\n\n```python\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n4. The parent receives this summary as a normal tool_result.\n\n```python\nif block.name == \"task\":\n output = run_subagent(block.input[\"prompt\"])\nresults.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n})\n```\n\n## Key Code\n\nThe subagent function (from `agents/s04_subagent.py`,\nlines 110-128):\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30):\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n## What Changed From s03\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n| Todo system | TodoManager | Removed (not needed here) |\n\n## Design Rationale\n\nProcess isolation gives context isolation for free. A fresh `messages[]` means the subagent cannot be confused by the parent's conversation history. The tradeoff is communication overhead -- results must be compressed back to the parent, losing detail. This is the same tradeoff as OS process isolation: safety and cleanliness in exchange for serialization cost. Limiting subagent depth (no recursive spawning) prevents unbounded resource consumption, and a max iteration count ensures runaway children terminate.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\nExample prompts to try:\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" + "content": "# s04: Subagents\n\n> A subagent runs with a fresh messages list, shares the filesystem with the parent, and returns only a summary -- keeping the parent context clean.\n\n## The Problem\n\nAs the agent works, its messages array grows. Every tool call, every file\nread, every bash output accumulates. After 20-30 tool calls, the context\nwindow is crowded with irrelevant history. Reading a 500-line file to\nanswer a quick question permanently adds 500 lines to the context.\n\nThis is particularly bad for exploratory tasks. \"What testing framework\ndoes this project use?\" might require reading 5 files, but the parent\nagent does not need all 5 file contents in its history -- it just needs\nthe answer: \"pytest with conftest.py configuration.\"\n\nIn this course, a practical solution is fresh-context isolation: spawn a child agent with `messages=[]`.\nThe child explores, reads files, runs commands. When it finishes, only its\nfinal text response returns to the parent. The child's entire message\nhistory is discarded.\n\n## The Solution\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ---------->| while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <--------- | return last text |\n+------------------+ +------------------+\n |\nParent context stays clean.\nSubagent context is discarded.\n```\n\n## How It Works\n\n1. The parent agent gets a `task` tool that triggers subagent spawning.\n The child gets all base tools except `task` (no recursive spawning).\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"prompt\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n },\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. The subagent starts with a fresh messages list containing only\n the delegated prompt. It shares the same filesystem.\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\n \"role\": \"assistant\", \"content\": response.content\n })\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n```\n\n3. Only the final text returns to the parent. The child's 30+ tool\n call history is discarded.\n\n```python\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n4. The parent receives this summary as a normal tool_result.\n\n```python\nif block.name == \"task\":\n output = run_subagent(block.input[\"prompt\"])\nresults.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n})\n```\n\n## Key Code\n\nThe subagent function (from `agents/s04_subagent.py`,\nlines 110-128):\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30):\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n## What Changed From s03\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n\n## Design Rationale\n\nFresh-context isolation is a practical way to approximate context isolation in this session. A fresh `messages[]` means the subagent starts without the parent's conversation history. The tradeoff is communication overhead -- results must be compressed back to the parent, losing detail. This is a message-history isolation strategy, not OS process isolation. Limiting subagent depth (no recursive spawning) prevents unbounded resource consumption, and a max iteration count ensures runaway children terminate.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\nExample prompts to try:\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" }, { "version": "s05", "locale": "en", "title": "s05: Skills", - "content": "# s05: Skills\n\n> Two-layer skill injection avoids system prompt bloat by putting skill names in the system prompt (cheap) and full skill bodies in tool_result (on demand).\n\n## The Problem\n\nYou want the agent to follow specific workflows for different domains:\ngit conventions, testing patterns, code review checklists. The naive\napproach is to put everything in the system prompt. But the system prompt\nhas limited effective attention -- too much text and the model starts\nignoring parts of it.\n\nIf you have 10 skills at 2000 tokens each, that is 20,000 tokens of system\nprompt. The model pays attention to the beginning and end but skims the\nmiddle. Worse, most of those skills are irrelevant to any given task. A\nfile editing task does not need the git workflow instructions.\n\nThe two-layer approach solves this: Layer 1 puts short skill descriptions\nin the system prompt (~100 tokens per skill). Layer 2 loads the full skill\nbody into a tool_result only when the model calls `load_skill`. The model\nlearns what skills exist (cheap) and loads them on demand (only when\nrelevant).\n\n## The Solution\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| Step 2: ... |\n| |\n+--------------------------------------+\n```\n\n## How It Works\n\n1. Skill files live in `.skills/` as Markdown with YAML frontmatter.\n\n```\n.skills/\n git.md # ---\\n description: Git workflow\\n ---\\n ...\n test.md # ---\\n description: Testing patterns\\n ---\\n ...\n```\n\n2. The SkillLoader parses frontmatter and separates metadata from body.\n\n```python\nclass SkillLoader:\n def _parse_frontmatter(self, text: str) -> tuple:\n match = re.match(\n r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL\n )\n if not match:\n return {}, text\n meta = {}\n for line in match.group(1).strip().splitlines():\n if \":\" in line:\n key, val = line.split(\":\", 1)\n meta[key.strip()] = val.strip()\n return meta, match.group(2).strip()\n```\n\n3. Layer 1: `get_descriptions()` returns short lines for the system prompt.\n\n```python\ndef get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"No description\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n```\n\n4. Layer 2: `get_content()` returns the full body wrapped in `` tags.\n\n```python\ndef get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n5. The `load_skill` tool is just another entry in the dispatch map.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\n## Key Code\n\nThe SkillLoader class (from `agents/s05_skill_loading.py`,\nlines 51-97):\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.glob(\"*.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n self.skills[f.stem] = {\n \"meta\": meta, \"body\": body\n }\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return (f\"\\n\"\n f\"{skill['body']}\\n\")\n```\n\n## What Changed From s04\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | .skills/*.md files |\n| Injection | None | Two-layer (system + result)|\n| Subagent | `run_subagent()` | Removed (different focus) |\n\n## Design Rationale\n\nTwo-layer injection solves the attention budget problem. Putting all skill content in the system prompt wastes tokens on unused skills. Layer 1 (compact summaries) costs roughly 120 tokens total. Layer 2 (full content) loads on demand via tool_result. This scales to dozens of skills without degrading model attention quality. The key insight is that the model only needs to know what skills exist (cheap) to decide when to load one (expensive). This is the same lazy-loading principle used in software module systems.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\nExample prompts to try:\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" + "content": "# s05: Skills\n\n> Two-layer skill injection avoids system prompt bloat by putting skill names in the system prompt (cheap) and full skill bodies in tool_result (on demand).\n\n## The Problem\n\nYou want the agent to follow specific workflows for different domains:\ngit conventions, testing patterns, code review checklists. The naive\napproach is to put everything in the system prompt. But the system prompt\nhas limited effective attention -- too much text and the model starts\nignoring parts of it.\n\nIf you have 10 skills at 2000 tokens each, that is 20,000 tokens of system\nprompt. The model pays attention to the beginning and end but skims the\nmiddle. Worse, most of those skills are irrelevant to any given task. A\nfile editing task does not need the git workflow instructions.\n\nThe two-layer approach solves this: Layer 1 puts short skill descriptions\nin the system prompt (~100 tokens per skill). Layer 2 loads the full skill\nbody into a tool_result only when the model calls `load_skill`. The model\nlearns what skills exist (cheap) and loads them on demand (only when\nrelevant).\n\n## The Solution\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| Step 2: ... |\n| |\n+--------------------------------------+\n```\n\n## How It Works\n\n1. Skill files live in `.skills/` as Markdown with YAML frontmatter.\n\n```\n.skills/\n git.md # ---\\n description: Git workflow\\n ---\\n ...\n test.md # ---\\n description: Testing patterns\\n ---\\n ...\n```\n\n2. The SkillLoader parses frontmatter and separates metadata from body.\n\n```python\nclass SkillLoader:\n def _parse_frontmatter(self, text: str) -> tuple:\n match = re.match(\n r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL\n )\n if not match:\n return {}, text\n meta = {}\n for line in match.group(1).strip().splitlines():\n if \":\" in line:\n key, val = line.split(\":\", 1)\n meta[key.strip()] = val.strip()\n return meta, match.group(2).strip()\n```\n\n3. Layer 1: `get_descriptions()` returns short lines for the system prompt.\n\n```python\ndef get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"No description\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n```\n\n4. Layer 2: `get_content()` returns the full body wrapped in `` tags.\n\n```python\ndef get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n5. The `load_skill` tool is just another entry in the dispatch map.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\n## Key Code\n\nThe SkillLoader class (from `agents/s05_skill_loading.py`,\nlines 51-97):\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.glob(\"*.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n self.skills[f.stem] = {\n \"meta\": meta, \"body\": body\n }\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return (f\"\\n\"\n f\"{skill['body']}\\n\")\n```\n\n## What Changed From s04\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | .skills/*.md files |\n| Injection | None | Two-layer (system + result)|\n\n## Design Rationale\n\nTwo-layer injection solves the attention budget problem. Putting all skill content in the system prompt wastes tokens on unused skills. Layer 1 (compact summaries) costs roughly 120 tokens total. Layer 2 (full content) loads on demand via tool_result. This scales to dozens of skills without degrading model attention quality. The key insight is that the model only needs to know what skills exist (cheap) to decide when to load one (expensive). This is the same lazy-loading principle used in software module systems.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\nExample prompts to try:\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" }, { "version": "s06", "locale": "en", "title": "s06: Compact", - "content": "# s06: Compact\n\n> A three-layer compression pipeline lets the agent work indefinitely by strategically forgetting old tool results, auto-summarizing when tokens exceed a threshold, and allowing manual compression on demand.\n\n## The Problem\n\nThe context window is finite. After enough tool calls, the messages array\nexceeds the model's context limit and the API call fails. Even before\nhitting the hard limit, performance degrades: the model becomes slower,\nless accurate, and starts ignoring earlier messages.\n\nA 200,000 token context window sounds large, but a single `read_file` on\na 1000-line source file consumes ~4000 tokens. After reading 30 files and\nrunning 20 bash commands, you are at 100,000+ tokens. The agent cannot\nwork on large codebases without some form of compression.\n\nThe three-layer pipeline addresses this with increasing aggressiveness:\nLayer 1 (micro-compact) silently replaces old tool results every turn.\nLayer 2 (auto-compact) triggers a full summarization when tokens exceed\na threshold. Layer 3 (manual compact) lets the model trigger compression\nitself.\n\nTeaching simplification: the token estimation here uses a rough\ncharacters/4 heuristic. Production systems use proper tokenizer\nlibraries for accurate counts.\n\n## The Solution\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## How It Works\n\n1. **Layer 1 -- micro_compact**: Before each LLM call, find all\n tool_result entries older than the last 3 and replace their content.\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n to_clear = tool_results[:-KEEP_RECENT]\n for _, _, part in to_clear:\n if len(part.get(\"content\", \"\")) > 100:\n tool_id = part.get(\"tool_use_id\", \"\")\n tool_name = tool_name_map.get(tool_id, \"unknown\")\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **Layer 2 -- auto_compact**: When estimated tokens exceed 50,000,\n save the full transcript and ask the LLM to summarize.\n\n```python\ndef auto_compact(messages: list) -> list:\n TRANSCRIPT_DIR.mkdir(exist_ok=True)\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n summary = response.content[0].text\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{summary}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **Layer 3 -- manual compact**: The `compact` tool triggers the same\n summarization on demand.\n\n```python\nif manual_compact:\n messages[:] = auto_compact(messages)\n```\n\n4. The agent loop integrates all three layers.\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages)\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages)\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages)\n```\n\n## Key Code\n\nThe three-layer pipeline (from `agents/s06_context_compact.py`,\nlines 67-93 and 189-223):\n\n```python\nTHRESHOLD = 50000\nKEEP_RECENT = 3\n\ndef micro_compact(messages):\n # Replace old tool results with placeholders\n ...\n\ndef auto_compact(messages):\n # Save transcript, LLM summarize, replace messages\n ...\n\ndef agent_loop(messages):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\n## What Changed From s05\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Manual compact | None | `compact` tool |\n| Transcripts | None | Saved to .transcripts/ |\n| Skills | load_skill | Removed (different focus) |\n\n## Design Rationale\n\nContext windows are finite, but agent sessions can be infinite. Three compression layers solve this at different granularities: micro-compact (replace old tool outputs), auto-compact (LLM summarizes when approaching limit), and manual compact (user-triggered). The key insight is that forgetting is a feature, not a bug -- it enables unbounded sessions. Transcripts preserve the full history on disk so nothing is truly lost, just moved out of the active context. The layered approach lets each layer operate independently at its own granularity, from silent per-turn cleanup to full conversation reset.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\nExample prompts to try:\n\n1. `Read every Python file in the agents/ directory one by one`\n (watch micro-compact replace old results)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" + "content": "# s06: Compact\n\n> A three-layer compression pipeline lets the agent work indefinitely by strategically forgetting old tool results, auto-summarizing when tokens exceed a threshold, and allowing manual compression on demand.\n\n## The Problem\n\nThe context window is finite. After enough tool calls, the messages array\nexceeds the model's context limit and the API call fails. Even before\nhitting the hard limit, performance degrades: the model becomes slower,\nless accurate, and starts ignoring earlier messages.\n\nA 200,000 token context window sounds large, but a single `read_file` on\na 1000-line source file consumes ~4000 tokens. After reading 30 files and\nrunning 20 bash commands, you are at 100,000+ tokens. The agent cannot\nwork on large codebases without some form of compression.\n\nThe three-layer pipeline addresses this with increasing aggressiveness:\nLayer 1 (micro-compact) silently replaces old tool results every turn.\nLayer 2 (auto-compact) triggers a full summarization when tokens exceed\na threshold. Layer 3 (manual compact) lets the model trigger compression\nitself.\n\nTeaching simplification: the token estimation here uses a rough\ncharacters/4 heuristic. Production systems use proper tokenizer\nlibraries for accurate counts.\n\n## The Solution\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## How It Works\n\n1. **Layer 1 -- micro_compact**: Before each LLM call, find all\n tool_result entries older than the last 3 and replace their content.\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n to_clear = tool_results[:-KEEP_RECENT]\n for _, _, part in to_clear:\n if len(part.get(\"content\", \"\")) > 100:\n tool_id = part.get(\"tool_use_id\", \"\")\n tool_name = tool_name_map.get(tool_id, \"unknown\")\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **Layer 2 -- auto_compact**: When estimated tokens exceed 50,000,\n save the full transcript and ask the LLM to summarize.\n\n```python\ndef auto_compact(messages: list) -> list:\n TRANSCRIPT_DIR.mkdir(exist_ok=True)\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n summary = response.content[0].text\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{summary}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **Layer 3 -- manual compact**: The `compact` tool triggers the same\n summarization on demand.\n\n```python\nif manual_compact:\n messages[:] = auto_compact(messages)\n```\n\n4. The agent loop integrates all three layers.\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages)\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages)\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages)\n```\n\n## Key Code\n\nThe three-layer pipeline (from `agents/s06_context_compact.py`,\nlines 67-93 and 189-223):\n\n```python\nTHRESHOLD = 50000\nKEEP_RECENT = 3\n\ndef micro_compact(messages):\n # Replace old tool results with placeholders\n ...\n\ndef auto_compact(messages):\n # Save transcript, LLM summarize, replace messages\n ...\n\ndef agent_loop(messages):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\n## What Changed From s05\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Manual compact | None | `compact` tool |\n| Transcripts | None | Saved to .transcripts/ |\n\n## Design Rationale\n\nContext windows are finite, but agent sessions can be infinite. Three compression layers solve this at different granularities: micro-compact (replace old tool outputs), auto-compact (LLM summarizes when approaching limit), and manual compact (user-triggered). The key insight is that forgetting is a feature, not a bug -- it enables unbounded sessions. Transcripts preserve the full history on disk so nothing is truly lost, just moved out of the active context. The layered approach lets each layer operate independently at its own granularity, from silent per-turn cleanup to full conversation reset.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\nExample prompts to try:\n\n1. `Read every Python file in the agents/ directory one by one`\n (watch micro-compact replace old results)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" }, { "version": "s07", "locale": "en", "title": "s07: Tasks", - "content": "# s07: Tasks\n\n> Tasks persist as JSON files on the filesystem with a dependency graph, so they survive context compression and can be shared across agents.\n\n## The Problem\n\nIn-memory state like TodoManager (s03) is lost when the context is\ncompressed (s06). After auto_compact replaces messages with a summary,\nthe todo list is gone. The agent has to reconstruct it from the summary\ntext, which is lossy and error-prone.\n\nThis is the critical s06-to-s07 bridge: TodoManager items die with\ncompression; file-based tasks don't. Moving state to the filesystem\nmakes it compression-proof.\n\nMore fundamentally, in-memory state is invisible to other agents.\nWhen we eventually build teams (s09+), teammates need a shared task\nboard. In-memory data structures are process-local.\n\nThe solution is to persist tasks as JSON files in `.tasks/`. Each task\nis a separate file with an ID, subject, status, and dependency graph.\nCompleting task 1 automatically unblocks task 2 if task 2 has\n`blockedBy: [1]`. The file system becomes the source of truth.\n\n## The Solution\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[2], \"status\":\"pending\"}\n\nDependency resolution:\n+----------+ +----------+ +----------+\n| task 1 | --> | task 2 | --> | task 3 |\n| complete | | blocked | | blocked |\n+----------+ +----------+ +----------+\n | ^\n +--- completing task 1 removes it from\n task 2's blockedBy list\n```\n\n## How It Works\n\n1. The TaskManager provides CRUD operations. Each task is a JSON file.\n\n```python\nclass TaskManager:\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id,\n \"subject\": subject,\n \"description\": description,\n \"status\": \"pending\",\n \"blockedBy\": [],\n \"blocks\": [],\n \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. When a task is marked completed, `_clear_dependency` removes its ID\n from all other tasks' `blockedBy` lists.\n\n```python\ndef _clear_dependency(self, completed_id: int):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. The `update` method handles status changes and bidirectional dependency\n wiring.\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocks:\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\n for blocked_id in add_blocks:\n blocked = self._load(blocked_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n self._save(blocked)\n self._save(task)\n```\n\n4. Four task tools are added to the dispatch map.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"],\n kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\n## Key Code\n\nThe TaskManager with dependency graph (from `agents/s07_task_system.py`,\nlines 46-123):\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2))\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n## What Changed From s06\n\n| Component | Before (s06) | After (s07) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 8 (+task_create/update/list/get)|\n| State storage | In-memory only | JSON files in .tasks/ |\n| Dependencies | None | blockedBy + blocks graph |\n| Compression | Three-layer | Removed (different focus) |\n| Persistence | Lost on compact | Survives compression |\n\n## Design Rationale\n\nFile-based state survives context compression. When the agent's conversation is compacted, in-memory state is lost, but tasks written to disk persist. The dependency graph ensures correct execution order even after context loss. This is the bridge between ephemeral conversation and persistent work -- the agent can forget conversation details but always has the task board to remind it what needs doing. The filesystem as source of truth also enables future multi-agent sharing, since any process can read the same JSON files.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\nExample prompts to try:\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test`\n" + "content": "# s07: Tasks\n\n> Tasks are persisted as JSON files with a dependency graph, so state survives context compression and can be shared across agents.\n\n## Problem\n\nIn-memory state (for example the TodoManager from s03) is fragile under compression (s06). Once earlier turns are compacted into summaries, in-memory todo state is gone.\n\ns06 -> s07 is the key transition:\n\n1. Todo list state in memory is conversational and lossy.\n2. Task board state on disk is durable and recoverable.\n\nA second issue is visibility: in-memory structures are process-local, so teammates cannot reliably share that state.\n\n## When to Use Task vs Todo\n\nFrom s07 onward, Task is the default. Todo remains for short linear checklists.\n\n## Quick Decision Matrix\n\n| Situation | Prefer | Why |\n|---|---|---|\n| Short, single-session checklist | Todo | Lowest ceremony, fastest capture |\n| Cross-session work, dependencies, or teammates | Task | Durable state, dependency graph, shared visibility |\n| Unsure which one to use | Task | Easier to simplify later than migrate mid-run |\n\n## Solution\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[2], \"status\":\"pending\"}\n\nDependency resolution:\n+----------+ +----------+ +----------+\n| task 1 | --> | task 2 | --> | task 3 |\n| complete | | blocked | | blocked |\n+----------+ +----------+ +----------+\n | ^\n +--- completing task 1 removes it from\n task 2's blockedBy list\n```\n\n## How It Works\n\n1. TaskManager provides CRUD with one JSON file per task.\n\n```python\nclass TaskManager:\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id,\n \"subject\": subject,\n \"description\": description,\n \"status\": \"pending\",\n \"blockedBy\": [],\n \"blocks\": [],\n \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. Completing a task clears that dependency from other tasks.\n\n```python\ndef _clear_dependency(self, completed_id: int):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. `update` handles status transitions and dependency wiring.\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocks:\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\n for blocked_id in add_blocks:\n blocked = self._load(blocked_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n self._save(blocked)\n self._save(task)\n```\n\n4. Task tools are added to the dispatch map.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"],\n kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\n## Key Code\n\nTaskManager with dependency graph (from `agents/s07_task_system.py`, lines 46-123):\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2))\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n## What Changed From s06\n\n| Component | Before (s06) | After (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| State storage | In-memory only | JSON files in `.tasks/` |\n| Dependencies | None | `blockedBy + blocks` graph |\n| Persistence | Lost on compact | Survives compression |\n\n## Design Rationale\n\nFile-based state survives compaction and process restarts. The dependency graph preserves execution order even when conversation details are forgotten. This turns transient chat context into durable work state.\n\nDurability still needs a write discipline: reload task JSON before each write, validate expected `status/blockedBy`, then persist atomically. Otherwise concurrent writers can overwrite each other.\n\nCourse-level implication: s07+ defaults to Task because it better matches long-running and collaborative engineering workflows.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\nSuggested prompts:\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test`\n" }, { "version": "s08", "locale": "en", "title": "s08: Background Tasks", - "content": "# s08: Background Tasks\n\n> A BackgroundManager runs commands in separate threads and drains a notification queue before each LLM call, so the agent never blocks on long-running operations.\n\n## The Problem\n\nSome commands take minutes: `npm install`, `pytest`, `docker build`. With\na blocking agent loop, the model sits idle waiting for the subprocess to\nfinish. It cannot do anything else. If the user asked \"install dependencies\nand while that runs, create the config file,\" the agent would install\nfirst, _then_ create the config -- sequentially, not in parallel.\n\nThe agent needs concurrency. Not full multi-threading of the agent loop\nitself, but the ability to fire off a long command and continue working\nwhile it runs. When the command finishes, its result should appear\nnaturally in the conversation.\n\nThe solution is a BackgroundManager that runs commands in daemon threads\nand collects results in a notification queue. Before each LLM call, the\nqueue is drained and results are injected into the messages.\n\n## The Solution\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | task executes |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- notification queue --+\n |\n [results injected before\n next LLM call]\n```\n\n## How It Works\n\n1. The BackgroundManager tracks tasks and maintains a thread-safe\n notification queue.\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` starts a daemon thread and returns a task_id immediately.\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\n \"status\": \"running\",\n \"result\": None,\n \"command\": command,\n }\n thread = threading.Thread(\n target=self._execute,\n args=(task_id, command),\n daemon=True,\n )\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. The thread target `_execute` runs the subprocess and pushes\n results to the notification queue.\n\n```python\ndef _execute(self, task_id: str, command: str):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = output\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"result\": output[:500],\n })\n```\n\n4. `drain_notifications()` returns and clears pending results.\n\n```python\ndef drain_notifications(self) -> list:\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n```\n\n5. The agent loop drains notifications before each LLM call.\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: \"\n f\"{n['result']}\" for n in notifs\n )\n messages.append({\"role\": \"user\",\n \"content\": f\"\"\n f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\n## Key Code\n\nThe BackgroundManager (from `agents/s08_background_tasks.py`, lines 49-107):\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n\n def run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\",\n \"result\": None,\n \"command\": command}\n thread = threading.Thread(\n target=self._execute,\n args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n\n def _execute(self, task_id, command):\n # run subprocess, push to queue\n ...\n\n def drain_notifications(self) -> list:\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n```\n\n## What Changed From s07\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n| Task system | File-based CRUD | Removed (different focus) |\n\n## Design Rationale\n\nThe agent loop is inherently single-threaded (one LLM call at a time). Background threads break this constraint for I/O-bound work (tests, builds, installs). The notification queue pattern (\"drain before next LLM call\") ensures results arrive at natural conversation breakpoints rather than interrupting the model's reasoning mid-thought. This is a minimal concurrency model: the agent loop stays single-threaded and deterministic, while only the I/O-bound subprocess execution is parallelized.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\nExample prompts to try:\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" + "content": "# s08: Background Tasks\n\n> A BackgroundManager runs commands in separate threads and drains a notification queue before each LLM call, so the agent never blocks on long-running operations.\n\n## The Problem\n\nSome commands take minutes: `npm install`, `pytest`, `docker build`. With\na blocking agent loop, the model sits idle waiting for the subprocess to\nfinish. It cannot do anything else. If the user asked \"install dependencies\nand while that runs, create the config file,\" the agent would install\nfirst, _then_ create the config -- sequentially, not in parallel.\n\nThe agent needs concurrency. Not full multi-threading of the agent loop\nitself, but the ability to fire off a long command and continue working\nwhile it runs. When the command finishes, its result should appear\nnaturally in the conversation.\n\nThe solution is a BackgroundManager that runs commands in daemon threads\nand collects results in a notification queue. Before each LLM call, the\nqueue is drained and results are injected into the messages.\n\n## The Solution\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | task executes |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- notification queue --+\n |\n [results injected before\n next LLM call]\n```\n\n## How It Works\n\n1. The BackgroundManager tracks tasks and maintains a thread-safe\n notification queue.\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` starts a daemon thread and returns a task_id immediately.\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\n \"status\": \"running\",\n \"result\": None,\n \"command\": command,\n }\n thread = threading.Thread(\n target=self._execute,\n args=(task_id, command),\n daemon=True,\n )\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. The thread target `_execute` runs the subprocess and pushes\n results to the notification queue.\n\n```python\ndef _execute(self, task_id: str, command: str):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = output\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"result\": output[:500],\n })\n```\n\n4. `drain_notifications()` returns and clears pending results.\n\n```python\ndef drain_notifications(self) -> list:\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n```\n\n5. The agent loop drains notifications before each LLM call.\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: \"\n f\"{n['result']}\" for n in notifs\n )\n messages.append({\"role\": \"user\",\n \"content\": f\"\"\n f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\n## Key Code\n\nThe BackgroundManager (from `agents/s08_background_tasks.py`, lines 49-107):\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n\n def run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\",\n \"result\": None,\n \"command\": command}\n thread = threading.Thread(\n target=self._execute,\n args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n\n def _execute(self, task_id, command):\n # run subprocess, push to queue\n ...\n\n def drain_notifications(self) -> list:\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n```\n\n## What Changed From s07\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n\n## Design Rationale\n\nThe agent loop is inherently single-threaded (one LLM call at a time). Background threads break this constraint for I/O-bound work (tests, builds, installs). The notification queue pattern (\"drain before next LLM call\") ensures results arrive at natural conversation breakpoints rather than interrupting the model's reasoning mid-thought. This is a minimal concurrency model: the agent loop stays single-threaded and deterministic, while only the I/O-bound subprocess execution is parallelized.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\nExample prompts to try:\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" }, { "version": "s09", "locale": "en", "title": "s09: Agent Teams", - "content": "# s09: Agent Teams\n\n> Persistent teammates with JSONL inboxes turn isolated agents into a communicating team -- spawn, message, broadcast, and drain.\n\n## The Problem\n\nSubagents (s04) are disposable: spawn, work, return summary, die. They\nhave no identity, no memory between invocations, and no way to receive\nfollow-up instructions. Background tasks (s08) run shell commands but\ncannot make LLM-guided decisions or communicate findings.\n\nFor real teamwork you need three things: (1) persistent agents that\nsurvive beyond a single prompt, (2) identity and lifecycle management,\nand (3) a communication channel between agents. Without messaging, even\npersistent teammates are deaf and mute -- they can work in parallel but\nnever coordinate.\n\nThe solution combines a TeammateManager for spawning persistent named\nagents with a MessageBus using JSONL inbox files. Each teammate runs\nits own agent loop in a thread, checks its inbox before every LLM call,\nand can send messages to any other teammate or the lead.\n\nNote on the s06-to-s07 bridge: TodoManager items from s03 die with\ncompression (s06). File-based tasks (s07) survive compression because\nthey live on disk. Teams build on this same principle -- config.json and\ninbox files persist outside the context window.\n\n## The Solution\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n\n5 message types:\n+-------------------------+------------------------------+\n| message | Normal text between agents |\n| broadcast | Sent to all teammates |\n| shutdown_request | Request graceful shutdown |\n| shutdown_response | Approve/reject shutdown |\n| plan_approval_response | Approve/reject plan |\n+-------------------------+------------------------------+\n```\n\n## How It Works\n\n1. The TeammateManager maintains config.json with the team roster.\n Each member has a name, role, and status.\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` creates a teammate and starts its agent loop in a thread.\n Re-spawning an idle teammate reactivates it.\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n self.threads[name] = thread\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. The MessageBus handles JSONL inbox files. `send()` appends a JSON\n line; `read_inbox()` reads all lines and drains the file.\n\n```python\nclass MessageBus:\n def send(self, sender, to, content,\n msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists():\n return \"[]\"\n msgs = [json.loads(l)\n for l in path.read_text().strip().splitlines()\n if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. Each teammate checks its inbox before every LLM call and injects\n received messages into the conversation context.\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n sys_prompt = f\"You are '{name}', role: {role}, at {WORKDIR}.\"\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(\n model=MODEL, system=sys_prompt,\n messages=messages, tools=TOOLS)\n messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n self._save_config()\n```\n\n5. `broadcast()` sends the same message to all teammates except the\n sender.\n\n```python\ndef broadcast(self, sender, content, teammates):\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n```\n\n## Key Code\n\nThe TeammateManager + MessageBus core (from `agents/s09_agent_teams.py`):\n\n```python\nclass TeammateManager:\n def spawn(self, name, role, prompt):\n member = self._find_member(name) or {\n \"name\": name, \"role\": role, \"status\": \"working\"\n }\n member[\"status\"] = \"working\"\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned '{name}'\"\n\nclass MessageBus:\n def send(self, sender, to, content,\n msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra: msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l)\n for l in path.read_text().strip().splitlines()\n if l]\n path.write_text(\"\")\n return json.dumps(msgs, indent=2)\n```\n\n## What Changed From s08\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | 5 message types + broadcast|\n\nTeaching simplification: this implementation does not use lock files\nfor inbox access. In production, concurrent append from multiple writers\nwould need file locking or atomic rename. The single-writer-per-inbox\npattern used here is safe for the teaching scenario.\n\n## Design Rationale\n\nFile-based mailboxes (append-only JSONL) provide concurrency-safe inter-agent communication. Append is atomic on most filesystems, avoiding lock contention. The \"drain on read\" pattern (read all, truncate) gives batch delivery. This is simpler and more robust than shared memory or socket-based IPC for agent coordination. The tradeoff is latency -- messages are only seen at the next poll -- but for LLM-driven agents where each turn takes seconds, polling latency is negligible compared to inference time.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\nExample prompts to try:\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. Type `/team` to see the team roster with statuses\n5. Type `/inbox` to manually check the lead's inbox\n" + "content": "# s09: Agent Teams\n\n> Persistent teammates with JSONL inboxes are one teaching protocol for turning isolated agents into a communicating team -- spawn, message, broadcast, and drain.\n\n## The Problem\n\nSubagents (s04) are disposable: spawn, work, return summary, die. They\nhave no identity, no memory between invocations, and no way to receive\nfollow-up instructions. Background tasks (s08) run shell commands but\ncannot make LLM-guided decisions or communicate findings.\n\nFor real teamwork you need three things: (1) persistent agents that\nsurvive beyond a single prompt, (2) identity and lifecycle management,\nand (3) a communication channel between agents. Without messaging, even\npersistent teammates are deaf and mute -- they can work in parallel but\nnever coordinate.\n\nThe solution combines a TeammateManager for spawning persistent named\nagents with a MessageBus using JSONL inbox files. Each teammate runs\nits own agent loop in a thread, checks its inbox before every LLM call,\nand can send messages to any other teammate or the lead.\n\nNote on the s06-to-s07 bridge: TodoManager items from s03 die with\ncompression (s06). File-based tasks (s07) survive compression because\nthey live on disk. Teams build on this same principle -- config.json and\ninbox files persist outside the context window.\n\n## The Solution\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n\n5 message types:\n+-------------------------+------------------------------+\n| message | Normal text between agents |\n| broadcast | Sent to all teammates |\n| shutdown_request | Request graceful shutdown |\n| shutdown_response | Approve/reject shutdown |\n| plan_approval_response | Approve/reject plan |\n+-------------------------+------------------------------+\n```\n\n## How It Works\n\n1. The TeammateManager maintains config.json with the team roster.\n Each member has a name, role, and status.\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` creates a teammate and starts its agent loop in a thread.\n Re-spawning an idle teammate reactivates it.\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n self.threads[name] = thread\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. The MessageBus handles JSONL inbox files. `send()` appends a JSON\n line; `read_inbox()` reads all lines and drains the file.\n\n```python\nclass MessageBus:\n def send(self, sender, to, content,\n msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists():\n return \"[]\"\n msgs = [json.loads(l)\n for l in path.read_text().strip().splitlines()\n if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. Each teammate checks its inbox before every LLM call and injects\n received messages into the conversation context.\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n sys_prompt = f\"You are '{name}', role: {role}, at {WORKDIR}.\"\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(\n model=MODEL, system=sys_prompt,\n messages=messages, tools=TOOLS)\n messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n self._save_config()\n```\n\n5. `broadcast()` sends the same message to all teammates except the\n sender.\n\n```python\ndef broadcast(self, sender, content, teammates):\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n```\n\n## Key Code\n\nThe TeammateManager + MessageBus core (from `agents/s09_agent_teams.py`):\n\n```python\nclass TeammateManager:\n def spawn(self, name, role, prompt):\n member = self._find_member(name) or {\n \"name\": name, \"role\": role, \"status\": \"working\"\n }\n member[\"status\"] = \"working\"\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned '{name}'\"\n\nclass MessageBus:\n def send(self, sender, to, content,\n msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra: msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l)\n for l in path.read_text().strip().splitlines()\n if l]\n path.write_text(\"\")\n return json.dumps(msgs, indent=2)\n```\n\n## What Changed From s08\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | 5 message types + broadcast|\n\nTeaching simplification: this implementation does not use lock files\nfor inbox access. In production, concurrent append from multiple writers\nwould need file locking or atomic rename. The single-writer-per-inbox\npattern used here is safe for the teaching scenario.\n\n## Design Rationale\n\nFile-based mailboxes (append-only JSONL) are easy to inspect and reason about in a teaching codebase. The \"drain on read\" pattern (read all, truncate) gives batch delivery with very little machinery. The tradeoff is latency -- messages are only seen at the next poll -- but for LLM-driven agents where each turn takes seconds, polling latency is acceptable for this course.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\nExample prompts to try:\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. Type `/team` to see the team roster with statuses\n5. Type `/inbox` to manually check the lead's inbox\n" }, { "version": "s10", @@ -63,61 +63,67 @@ "version": "s11", "locale": "en", "title": "s11: Autonomous Agents", - "content": "# s11: Autonomous Agents\n\n> An idle cycle with task board polling lets teammates find and claim work themselves, with identity re-injection after context compression.\n\n## The Problem\n\nIn s09-s10, teammates only work when explicitly told to. The lead must\nspawn each teammate with a specific prompt. If the task board has 10\nunclaimed tasks, the lead must manually assign each one. This does not\nscale.\n\nTrue autonomy means teammates find work themselves. When a teammate\nfinishes its current task, it should scan the task board for unclaimed\nwork, claim a task, and start working -- without any instruction from\nthe lead.\n\nBut autonomous agents face a subtlety: after context compression, the\nagent might forget who it is. If the messages are summarized, the\noriginal system prompt identity (\"you are alice, role: coder\") fades.\nIdentity re-injection solves this by inserting an identity block at the\nstart of compressed contexts.\n\nTeaching simplification: the token estimation used here is rough\n(characters / 4). Production systems use proper tokenizer libraries.\nThe nag threshold of 3 rounds (from s03) is set low for teaching\nvisibility; production agents typically use a higher threshold around 10.\n\n## The Solution\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use\n | (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n \"You are 'alice', role: coder, team: my-team\"\n```\n\n## How It Works\n\n1. The teammate loop has two phases: WORK and IDLE. WORK runs the\n standard agent loop. When the LLM stops calling tools (or calls\n the `idle` tool), the teammate enters the IDLE phase.\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append(...)\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. The idle phase polls the inbox and task board in a loop.\n\n```python\ndef _idle_poll(self, name, messages):\n polls = IDLE_TIMEOUT // POLL_INTERVAL # 60s / 5s = 12\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n # Check inbox for new messages\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n # Scan task board for unclaimed tasks\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n claim_task(task[\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{task['id']}: \"\n f\"{task['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. Task board scanning looks for pending, unowned, unblocked tasks.\n\n```python\ndef scan_unclaimed_tasks() -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n\ndef claim_task(task_id: int, owner: str):\n path = TASKS_DIR / f\"task_{task_id}.json\"\n task = json.loads(path.read_text())\n task[\"status\"] = \"in_progress\"\n task[\"owner\"] = owner\n path.write_text(json.dumps(task, indent=2))\n```\n\n4. Identity re-injection inserts an identity block when the context\n is too short, indicating compression has occurred.\n\n```python\ndef make_identity_block(name, role, team_name):\n return {\"role\": \"user\",\n \"content\": f\"You are '{name}', \"\n f\"role: {role}, team: {team_name}. \"\n f\"Continue your work.\"}\n\n# Before resuming work after idle:\nif len(messages) <= 3:\n messages.insert(0, make_identity_block(\n name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n5. The `idle` tool lets the teammate explicitly signal it has no more\n work, entering the idle polling phase early.\n\n```python\n{\"name\": \"idle\",\n \"description\": \"Signal that you have no more work. \"\n \"Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n```\n\n## Key Code\n\nThe autonomous loop (from `agents/s11_autonomous_agents.py`):\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # WORK PHASE\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n for block in response.content:\n if block.name == \"idle\":\n idle_requested = True\n if idle_requested:\n break\n\n # IDLE PHASE\n self._set_status(name, \"idle\")\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox: resume = True; break\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n resume = True; break\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n## What Changed From s10\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## Design Rationale\n\nPolling + timeout makes agents self-organizing without a central coordinator. Each agent independently polls the task board, claims unclaimed work, and returns to idle when done. The timeout triggers the poll cycle, and if no work appears within the window, the agent shuts itself down. This is the same pattern as work-stealing thread pools -- distributed, no single point of failure. Identity re-injection after compression ensures agents maintain their role even when conversation history is summarized away.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\nExample prompts to try:\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. Type `/tasks` to see the task board with owners\n5. Type `/team` to monitor who is working vs idle\n" + "content": "# s11: Autonomous Agents\n\n> An idle cycle with task board polling lets teammates find and claim work themselves, with identity re-injection after context compression.\n\n## The Problem\n\nIn s09-s10, teammates only work when explicitly told to. The lead must\nspawn each teammate with a specific prompt. If the task board has 10\nunclaimed tasks, the lead must manually assign each one. This does not\nscale.\n\nTrue autonomy means teammates find work themselves. When a teammate\nfinishes its current task, it should scan the task board for unclaimed\nwork, claim a task, and start working -- without any instruction from\nthe lead.\n\nBut autonomous agents face a subtlety: after context compression, the\nagent might forget who it is. If the messages are summarized, the\noriginal system prompt identity (\"you are alice, role: coder\") fades.\nIdentity re-injection solves this by inserting an identity block at the\nstart of compressed contexts.\n\nNote: token estimation here uses characters/4 (rough). The nag threshold of 3 rounds is low for teaching visibility.\n\n## The Solution\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use\n | (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n \"You are 'alice', role: coder, team: my-team\"\n```\n\n## How It Works\n\n1. The teammate loop has two phases: WORK and IDLE. WORK runs the\n standard agent loop. When the LLM stops calling tools (or calls\n the `idle` tool), the teammate enters the IDLE phase.\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append(...)\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. The idle phase polls the inbox and task board in a loop.\n\n```python\ndef _idle_poll(self, name, messages):\n polls = IDLE_TIMEOUT // POLL_INTERVAL # 60s / 5s = 12\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n # Check inbox for new messages\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n # Scan task board for unclaimed tasks\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n claim_task(task[\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{task['id']}: \"\n f\"{task['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. Task board scanning looks for pending, unowned, unblocked tasks.\n\n```python\ndef scan_unclaimed_tasks() -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n\ndef claim_task(task_id: int, owner: str):\n path = TASKS_DIR / f\"task_{task_id}.json\"\n task = json.loads(path.read_text())\n task[\"status\"] = \"in_progress\"\n task[\"owner\"] = owner\n path.write_text(json.dumps(task, indent=2))\n```\n\n4. Identity re-injection inserts an identity block when the context\n is too short, indicating compression has occurred.\n\n```python\ndef make_identity_block(name, role, team_name):\n return {\"role\": \"user\",\n \"content\": f\"You are '{name}', \"\n f\"role: {role}, team: {team_name}. \"\n f\"Continue your work.\"}\n\n# Before resuming work after idle:\nif len(messages) <= 3:\n messages.insert(0, make_identity_block(\n name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n5. The `idle` tool lets the teammate explicitly signal it has no more\n work, entering the idle polling phase early.\n\n```python\n{\"name\": \"idle\",\n \"description\": \"Signal that you have no more work. \"\n \"Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n```\n\n## Key Code\n\nThe autonomous loop (from `agents/s11_autonomous_agents.py`):\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # WORK PHASE\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n for block in response.content:\n if block.name == \"idle\":\n idle_requested = True\n if idle_requested:\n break\n\n # IDLE PHASE\n self._set_status(name, \"idle\")\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox: resume = True; break\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n resume = True; break\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n## What Changed From s10\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## Design Rationale\n\nPolling + timeout makes agents self-organizing without a central coordinator. Each agent independently polls the task board, claims unclaimed work, and returns to idle when done. The timeout triggers the poll cycle, and if no work appears within the window, the agent shuts itself down. This is the same pattern as work-stealing thread pools -- distributed, no single point of failure. Identity re-injection after compression ensures agents maintain their role even when conversation history is summarized away.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\nExample prompts to try:\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. Type `/tasks` to see the task board with owners\n5. Type `/team` to monitor who is working vs idle\n" + }, + { + "version": "s12", + "locale": "en", + "title": "s12: Worktree + Task Isolation", + "content": "# s12: Worktree + Task Isolation\n\n> Isolate by directory, coordinate by task ID -- tasks are the control plane, worktrees are the execution plane, and an event stream makes every lifecycle step observable.\n\n## The Problem\n\nBy s11, agents can claim and complete tasks autonomously. But every task runs in one shared directory. Ask two agents to refactor different modules at the same time and you hit three failure modes:\n\nAgent A edits `auth.py`. Agent B edits `auth.py`. Neither knows the other touched it. Unstaged changes collide, task status says \"in_progress\" but the directory is a mess, and when something breaks there is no way to roll back one agent's work without destroying the other's. The task board tracks _what to do_ but has no opinion about _where to do it_.\n\nThe fix is to separate the two concerns. Tasks manage goals. Worktrees manage execution context. Bind them by task ID, and each agent gets its own directory, its own branch, and a clean teardown path.\n\n## The Solution\n\n```\nControl Plane (.tasks/) Execution Plane (.worktrees/)\n+---------------------------+ +---------------------------+\n| task_1.json | | index.json |\n| id: 1 | | name: \"auth-refactor\" |\n| subject: \"Auth refactor\"| bind | path: \".worktrees/...\" |\n| status: \"in_progress\" | <----> | branch: \"wt/auth-...\" |\n| worktree: \"auth-refactor\"| | task_id: 1 |\n+---------------------------+ | status: \"active\" |\n +---------------------------+\n| task_2.json | | |\n| id: 2 | bind | name: \"ui-login\" |\n| subject: \"Login page\" | <----> | task_id: 2 |\n| worktree: \"ui-login\" | | status: \"active\" |\n+---------------------------+ +---------------------------+\n |\n +---------------------------+\n | events.jsonl (append-only)|\n | worktree.create.before |\n | worktree.create.after |\n | worktree.remove.after |\n | task.completed |\n +---------------------------+\n```\n\nThree state layers make this work:\n\n1. **Control plane** (`.tasks/task_*.json`) -- what is assigned, in progress, or done. Key fields: `id`, `subject`, `status`, `owner`, `worktree`.\n2. **Execution plane** (`.worktrees/index.json`) -- where commands run and whether the workspace is still valid. Key fields: `name`, `path`, `branch`, `task_id`, `status`.\n3. **Runtime state** (in-memory) -- per-turn execution continuity: `current_task`, `current_worktree`, `tool_result`, `error`.\n\n## How It Works\n\nThe lifecycle has five steps. Each step is a tool call.\n\n1. **Create a task.** Persist the goal first. The task starts as `pending` with an empty `worktree` field.\n\n```python\ntask = {\n \"id\": self._next_id,\n \"subject\": subject,\n \"status\": \"pending\",\n \"owner\": \"\",\n \"worktree\": \"\",\n}\nself._save(task)\n```\n\n2. **Create a worktree.** Allocate an isolated directory and branch. If you pass `task_id`, the task auto-advances to `in_progress` and the binding is written to both sides.\n\n```python\nself._run_git([\"worktree\", \"add\", \"-b\", branch, str(path), base_ref])\n\nentry = {\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"task_id\": task_id,\n \"status\": \"active\",\n}\nidx[\"worktrees\"].append(entry)\nself._save_index(idx)\n\nif task_id is not None:\n self.tasks.bind_worktree(task_id, name)\n```\n\n3. **Run commands in the worktree.** `worktree_run` sets `cwd` to the worktree path. Edits happen in the isolated directory, not the shared workspace.\n\n```python\nr = subprocess.run(\n command,\n shell=True,\n cwd=path,\n capture_output=True,\n text=True,\n timeout=300,\n)\n```\n\n4. **Observe.** `worktree_status` shows git state inside the isolated context. `worktree_events` queries the append-only event stream.\n\n5. **Close out.** Two choices:\n - `worktree_keep(name)` -- preserve the directory, mark lifecycle as `kept`.\n - `worktree_remove(name, complete_task=True)` -- remove the directory, complete the bound task, unbind, and emit `task.completed`. This is the closeout pattern: one call handles teardown and task completion together.\n\n## State Machines\n\n```\nTask: pending -------> in_progress -------> completed\n (worktree_create (worktree_remove\n with task_id) with complete_task=true)\n\nWorktree: absent --------> active -----------> removed | kept\n (worktree_create) (worktree_remove | worktree_keep)\n```\n\n## Key Code\n\nThe closeout pattern -- teardown + task completion in one operation (from `agents/s12_worktree_task_isolation.py`):\n\n```python\ndef remove(self, name: str, force: bool = False, complete_task: bool = False) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n\n self.events.emit(\n \"worktree.remove.before\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\n )\n try:\n args = [\"worktree\", \"remove\"]\n if force:\n args.append(\"--force\")\n args.append(wt[\"path\"])\n self._run_git(args)\n\n if complete_task and wt.get(\"task_id\") is not None:\n task_id = wt[\"task_id\"]\n self.tasks.update(task_id, status=\"completed\")\n self.tasks.unbind_worktree(task_id)\n self.events.emit(\"task.completed\", task={\n \"id\": task_id, \"status\": \"completed\",\n }, worktree={\"name\": name})\n\n idx = self._load_index()\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item[\"status\"] = \"removed\"\n item[\"removed_at\"] = time.time()\n self._save_index(idx)\n\n self.events.emit(\n \"worktree.remove.after\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\"), \"status\": \"removed\"},\n )\n return f\"Removed worktree '{name}'\"\n except Exception as e:\n self.events.emit(\n \"worktree.remove.failed\",\n worktree={\"name\": name},\n error=str(e),\n )\n raise\n```\n\nThe task-side binding (from `agents/s12_worktree_task_isolation.py`):\n\n```python\ndef bind_worktree(self, task_id: int, worktree: str, owner: str = \"\") -> str:\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n```\n\nThe dispatch map wiring all tools together:\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\")),\n \"task_bind_worktree\": lambda **kw: TASKS.bind_worktree(kw[\"task_id\"], kw[\"worktree\"]),\n \"worktree_create\": lambda **kw: WORKTREES.create(kw[\"name\"], kw.get(\"task_id\")),\n \"worktree_list\": lambda **kw: WORKTREES.list_all(),\n \"worktree_status\": lambda **kw: WORKTREES.status(kw[\"name\"]),\n \"worktree_run\": lambda **kw: WORKTREES.run(kw[\"name\"], kw[\"command\"]),\n \"worktree_keep\": lambda **kw: WORKTREES.keep(kw[\"name\"]),\n \"worktree_remove\": lambda **kw: WORKTREES.remove(kw[\"name\"], kw.get(\"force\", False), kw.get(\"complete_task\", False)),\n \"worktree_events\": lambda **kw: EVENTS.list_recent(kw.get(\"limit\", 20)),\n}\n```\n\n## Event Stream\n\nEvery lifecycle transition emits a before/after/failed triplet to `.worktrees/events.jsonl`. This is an append-only log, not a replacement for task/worktree state files.\n\nEvents emitted:\n\n- `worktree.create.before` / `worktree.create.after` / `worktree.create.failed`\n- `worktree.remove.before` / `worktree.remove.after` / `worktree.remove.failed`\n- `worktree.keep`\n- `task.completed` (when `complete_task=true` succeeds)\n\nPayload shape:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 7, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"path\": \"...\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\nThis gives you three things: policy decoupling (audit and notifications stay outside the core flow), failure compensation (`*.failed` records mark partial transitions), and queryability (`worktree_events` tool reads the log directly).\n\n## What Changed From s11\n\n| Component | Before (s11) | After (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| Coordination state | Task board (`owner/status`) | Task board + explicit `worktree` binding |\n| Execution scope | Shared directory | Task-scoped isolated directory |\n| Recoverability | Task status only | Task status + worktree index |\n| Teardown semantics | Task completion | Task completion + explicit keep/remove |\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\n\n## Design Rationale\n\nSeparating control plane from execution plane means you can reason about _what to do_ and _where to do it_ independently. A task can exist without a worktree (planning phase). A worktree can exist without a task (ad-hoc exploration). Binding them is an explicit action that writes state to both sides. This composability is the point -- it keeps the system recoverable after crashes. After an interruption, state reconstructs from `.tasks/` + `.worktrees/index.json` on disk. Volatile in-memory session state downgrades into explicit, durable file state. The event stream adds observability without coupling side effects into the critical path: auditing, notifications, and quota checks consume events rather than intercepting state writes.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\nExample prompts to try:\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, create worktree \"ui-login\", then bind task 2 to \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect worktree events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n" }, { "version": "s01", "locale": "zh", "title": "s01: Agent Loop (智能体循环)", - "content": "# s01: Agent Loop (智能体循环)\n\n> AI 编程智能体的全部秘密就是一个 while 循环 -- 把工具执行结果反馈给模型, 直到模型决定停止。\n\n## 问题\n\n为什么语言模型不能直接回答编程问题? 因为编程需要**与真实世界交互**。模型需要读取文件、运行测试、检查错误、反复迭代。单次的提示-响应交互无法做到这些。\n\n没有 agent loop, 你就得手动把输出复制粘贴回模型。用户自己变成了那个循环。Agent loop 将这个过程自动化: 调用模型, 执行它要求的工具, 把结果送回去, 重复 -- 直到模型说 \"我完成了\"。\n\n考虑一个简单任务: \"创建一个打印 hello 的 Python 文件。\" 模型需要 (1) 决定写文件, (2) 写入文件, (3) 验证是否正常工作。至少三次工具调用。没有循环的话, 每一次都需要人工干预。\n\n## 解决方案\n\n```\n+----------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n (loop continues)\n\nThe loop terminates when stop_reason != \"tool_use\".\nThat single condition is the entire control flow.\n```\n\n## 工作原理\n\n1. 用户提供一个 prompt, 成为第一条消息。\n\n```python\nhistory.append({\"role\": \"user\", \"content\": query})\n```\n\n2. 消息数组连同工具定义一起发送给 LLM。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. 助手的响应被追加到消息列表中。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\n```\n\n4. 检查 stop_reason。如果模型没有调用工具, 循环结束。这是唯一的退出条件。\n\n```python\nif response.stop_reason != \"tool_use\":\n return\n```\n\n5. 对响应中的每个 tool_use 块, 执行工具 (本节课中是 bash) 并收集结果。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n6. 结果作为 user 消息追加, 循环继续。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n## 核心代码\n\n最小可行智能体 -- 不到 30 行代码实现整个模式\n(来自 `agents/s01_agent_loop.py`, 第 66-86 行):\n\n```python\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n## 变更内容\n\n这是第 1 节课 -- 起点。没有前置课程。\n\n| 组件 | 之前 | 之后 |\n|---------------|------------|--------------------------------|\n| Agent loop | (无) | `while True` + stop_reason |\n| Tools | (无) | `bash` (单一工具) |\n| Messages | (无) | 累积式消息列表 |\n| Control flow | (无) | `stop_reason != \"tool_use\"` |\n\n## 设计原理\n\n这个循环是所有基于 LLM 的智能体的通用基础。生产实现会增加错误处理、token 计数、流式输出和重试逻辑, 但基本结构不变。简洁性就是重点: 一个退出条件 (`stop_reason != \"tool_use\"`) 控制整个流程。本课程中的所有其他内容 -- 工具、规划、压缩、团队 -- 都是在这个循环之上叠加, 而不修改它。理解这个循环就是理解所有智能体。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n可以尝试的提示:\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" + "content": "# s01: Agent Loop (智能体循环)\n\n> AI 编程智能体的核心是一个 while 循环 -- 把工具执行结果反馈给模型, 直到模型决定停止。\n\n## 问题\n\n为什么语言模型不能直接回答编程问题? 因为编程需要**与真实世界交互**。模型需要读取文件、运行测试、检查错误、反复迭代。单次的提示-响应交互无法做到这些。\n\n没有 agent loop, 你就得手动把输出复制粘贴回模型。用户自己变成了那个循环。Agent loop 将这个过程自动化: 调用模型, 执行它要求的工具, 把结果送回去, 重复 -- 直到模型说 \"我完成了\"。\n\n考虑一个简单任务: \"创建一个打印 hello 的 Python 文件。\" 模型需要 (1) 决定写文件, (2) 写入文件, (3) 验证是否正常工作。至少三次工具调用。没有循环的话, 每一次都需要人工干预。\n\n## 解决方案\n\n```\n+----------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n (loop continues)\n\nThe loop terminates when stop_reason != \"tool_use\".\nThat single condition is the entire control flow.\n```\n\n## 工作原理\n\n1. 用户提供一个 prompt, 成为第一条消息。\n\n```python\nhistory.append({\"role\": \"user\", \"content\": query})\n```\n\n2. 消息数组连同工具定义一起发送给 LLM。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. 助手的响应被追加到消息列表中。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\n```\n\n4. 检查 stop_reason。如果模型没有调用工具, 循环结束。在本节最小实现里, 这是唯一的循环退出条件。\n\n```python\nif response.stop_reason != \"tool_use\":\n return\n```\n\n5. 对响应中的每个 tool_use 块, 执行工具 (本节课中是 bash) 并收集结果。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n6. 结果作为 user 消息追加, 循环继续。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n## 核心代码\n\n最小可行智能体 -- 不到 30 行代码实现整个模式\n(来自 `agents/s01_agent_loop.py`, 第 66-86 行):\n\n```python\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n## 变更内容\n\n这是第 1 节课 -- 起点。没有前置课程。\n\n| 组件 | 之前 | 之后 |\n|---------------|------------|--------------------------------|\n| Agent loop | (无) | `while True` + stop_reason |\n| Tools | (无) | `bash` (单一工具) |\n| Messages | (无) | 累积式消息列表 |\n| Control flow | (无) | `stop_reason != \"tool_use\"` |\n\n## 设计原理\n\n这个循环是所有基于 LLM 的智能体基础。生产实现还会增加错误处理、token 计数、流式输出、重试、权限策略与生命周期编排, 但核心交互模式仍从这里开始。本节强调简洁性: 在本节最小实现里, 一个退出条件 (`stop_reason != \"tool_use\"`) 就能支撑我们先学会主流程。本课程中的其他内容都在这个循环上叠加。理解这个循环是建立基础心智模型, 不是完整的生产架构。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n可以尝试的提示:\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" }, { "version": "s02", "locale": "zh", "title": "s02: Tools (工具)", - "content": "# s02: Tools (工具)\n\n> 一个分发映射表 (dispatch map) 将工具调用路由到处理函数 -- 循环本身完全不需要改动。\n\n## 问题\n\n只有 `bash` 时, 智能体所有操作都通过 shell: 读文件、写文件、编辑文件。这能用但很脆弱。`cat` 的输出会被不可预测地截断。`sed` 替换遇到特殊字符就会失败。模型浪费大量 token 构造 shell 管道, 而一个直接的函数调用会简单得多。\n\n更重要的是, bash 是一个安全攻击面。每次 bash 调用都能做 shell 能做的一切。有了专用工具如 `read_file` 和 `write_file`, 你可以在工具层面强制路径沙箱化, 阻止危险模式, 而不是寄希望于模型自觉回避。\n\n关键洞察: 添加工具不需要修改循环。s01 的循环保持不变。你只需在工具数组中添加条目, 编写处理函数, 然后通过 dispatch map 把它们关联起来。\n\n## 解决方案\n\n```\n+----------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+----------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +----------+ edit: run_edit |\n tool_result| } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}\nOne lookup replaces any if/elif chain.\n```\n\n## 工作原理\n\n1. 为每个工具定义处理函数。每个函数接受与工具 input_schema 对应的关键字参数, 返回字符串结果。\n\n```python\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. 创建 dispatch map, 将工具名映射到处理函数。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. 在 agent loop 中, 按名称查找处理函数, 而不是硬编码。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n4. 路径沙箱化防止模型逃逸出工作区。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n```\n\n## 核心代码\n\ndispatch 模式 (来自 `agents/s02_tool_use.py`, 第 93-129 行):\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n## 相对 s01 的变更\n\n| 组件 | 之前 (s01) | 之后 (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (仅 bash) | 4 (bash, read, write, edit)|\n| Dispatch | 硬编码 bash 调用 | `TOOL_HANDLERS` 字典 |\n| 路径安全 | 无 | `safe_path()` 沙箱 |\n| Agent loop | 不变 | 不变 |\n\n## 设计原理\n\ndispatch map 模式可以线性扩展 -- 添加工具只需添加一个处理函数和一个 schema 条目。循环永远不需要改动。这种关注点分离 (循环 vs 处理函数) 是智能体框架能支持数十个工具而不增加控制流复杂度的原因。该模式还支持对每个处理函数进行独立测试, 因为处理函数是与循环无耦合的纯函数。任何超出 dispatch map 的智能体都是设计问题, 而非扩展问题。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n可以尝试的提示:\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n5. `Run the greet function with bash: python -c \"from greet import greet; greet('World')\"`\n" + "content": "# s02: Tools (工具)\n\n> 一个分发映射表 (dispatch map) 将工具调用路由到处理函数 -- 循环本身完全不需要改动。\n\n## 问题\n\n只有 `bash` 时, 智能体所有操作都通过 shell: 读文件、写文件、编辑文件。这能用但很脆弱。`cat` 的输出会被不可预测地截断。`sed` 替换遇到特殊字符就会失败。模型浪费大量 token 构造 shell 管道, 而一个直接的函数调用会简单得多。\n\n更重要的是, bash 存在安全风险。每次 bash 调用都能做 shell 能做的一切。有了专用工具如 `read_file` 和 `write_file`, 你可以在工具层面强制路径沙箱化, 阻止危险模式, 而不是寄希望于模型自觉回避。\n\n关键洞察: 添加工具不需要修改循环。s01 的循环保持不变。你只需在工具数组中添加条目, 编写处理函数, 然后通过 dispatch map 把它们关联起来。\n\n## 解决方案\n\n```\n+----------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+----------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +----------+ edit: run_edit |\n tool_result| } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}\nOne lookup replaces any if/elif chain.\n```\n\n## 工作原理\n\n1. 为每个工具定义处理函数。每个函数接受与工具 input_schema 对应的关键字参数, 返回字符串结果。\n\n```python\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. 创建 dispatch map, 将工具名映射到处理函数。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. 在 agent loop 中, 按名称查找处理函数, 而不是硬编码。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n4. 路径沙箱化防止模型逃逸出工作区。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n```\n\n## 核心代码\n\ndispatch 模式 (来自 `agents/s02_tool_use.py`, 第 93-129 行):\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n## 相对 s01 的变更\n\n| 组件 | 之前 (s01) | 之后 (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (仅 bash) | 4 (bash, read, write, edit)|\n| Dispatch | 硬编码 bash 调用 | `TOOL_HANDLERS` 字典 |\n| 路径安全 | 无 | `safe_path()` 沙箱 |\n| Agent loop | 不变 | 不变 |\n\n## 设计原理\n\ndispatch map 模式可以线性扩展 -- 添加工具只需添加一个处理函数和一个 schema 条目。循环永远不需要改动。这种关注点分离 (循环 vs 处理函数) 是智能体框架能支持数十个工具而不增加控制流复杂度的原因。该模式还支持对每个处理函数进行独立测试, 因为处理函数是与循环无耦合的纯函数。任何超出 dispatch map 的智能体都是设计问题, 而非扩展问题。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n可以尝试的提示:\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n5. `Run the greet function with bash: python -c \"from greet import greet; greet('World')\"`\n" }, { "version": "s03", "locale": "zh", "title": "s03: TodoWrite (待办写入)", - "content": "# s03: TodoWrite (待办写入)\n\n> TodoManager 让智能体能追踪自己的进度, 而 nag reminder 注入机制在它忘记更新时强制提醒。\n\n## 问题\n\n当智能体处理多步骤任务时, 它经常丢失对已完成和待办事项的追踪。没有显式的计划, 模型可能重复工作、跳过步骤或跑偏。用户也无法看到智能体内部的计划。\n\n这个问题比听起来更严重。长对话会导致模型 \"漂移\" -- 随着上下文窗口被工具结果填满, 系统提示的影响力逐渐减弱。一个 10 步的重构任务可能完成了 1-3 步, 然后模型就开始即兴发挥, 因为它忘了第 4-10 步的存在。\n\n解决方案是结构化状态: 一个模型显式写入的 TodoManager。模型创建计划, 工作时将项目标记为 in_progress, 完成后标记为 completed。nag reminder 机制在模型连续 3 轮以上不更新待办时注入提醒。\n\n教学简化说明: 这里 nag 阈值设为 3 轮是为了教学可见性。生产环境的智能体通常使用约 10 轮的阈值以避免过度提醒。\n\n## 解决方案\n\n```\n+----------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 工作原理\n\n1. TodoManager 验证并存储一组带状态的项目。同一时间只允许一个项目处于 `in_progress` 状态。\n\n```python\nclass TodoManager:\n def __init__(self):\n self.items = []\n\n def update(self, items: list) -> str:\n validated = []\n in_progress_count = 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\n \"id\": item[\"id\"],\n \"text\": item[\"text\"],\n \"status\": status,\n })\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo` 工具和其他工具一样添加到 dispatch map 中。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n # ...other tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nag reminder 在模型连续 3 轮以上不调用 `todo` 时, 向 tool_result 消息中注入 `` 标签。\n\n```python\ndef agent_loop(messages: list):\n rounds_since_todo = 0\n while True:\n if rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if (last[\"role\"] == \"user\"\n and isinstance(last.get(\"content\"), list)):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n # ... rest of loop ...\n rounds_since_todo = 0 if used_todo else rounds_since_todo + 1\n```\n\n4. 系统提示指导模型使用 todo 进行规划。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse the todo tool to plan multi-step tasks.\nMark in_progress before starting, completed when done.\nPrefer tools over prose.\"\"\"\n```\n\n## 核心代码\n\nTodoManager 和 nag 注入 (来自 `agents/s03_todo_write.py`,\n第 51-85 行和第 158-187 行):\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated = []\n in_progress_count = 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\n \"id\": item[\"id\"],\n \"text\": item[\"text\"],\n \"status\": status,\n })\n if in_progress_count > 1:\n raise ValueError(\"Only one in_progress\")\n self.items = validated\n return self.render()\n\n# In agent_loop:\nif rounds_since_todo >= 3:\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\n## 相对 s02 的变更\n\n| 组件 | 之前 (s02) | 之后 (s03) |\n|----------------|------------------|--------------------------|\n| Tools | 4 | 5 (+todo) |\n| 规划 | 无 | 带状态的 TodoManager |\n| Nag 注入 | 无 | 3 轮后注入 `` |\n| Agent loop | 简单分发 | + rounds_since_todo 计数器|\n\n## 设计原理\n\n可见的计划能提高任务完成率, 因为模型可以自我监控进度。nag 机制创造了问责性 -- 没有它, 随着对话上下文增长和早期指令淡化, 模型可能在执行中途放弃计划。\"同一时间只允许一个 in_progress\" 的约束强制顺序聚焦, 防止上下文切换开销降低输出质量。这个模式之所以有效, 是因为它将模型的工作记忆外化为结构化状态, 使其能够在注意力漂移中存活。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n可以尝试的提示:\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" + "content": "# s03: TodoWrite (待办写入)\n\n> TodoManager 让智能体能追踪自己的进度, 而 nag reminder 注入机制在它忘记更新时强制提醒。\n\n## 问题\n\n当智能体处理多步骤任务时, 它经常丢失对已完成和待办事项的追踪。没有显式的计划, 模型可能重复工作、跳过步骤或跑偏。用户也无法看到智能体内部的计划。\n\n这个问题比听起来更严重。长对话会导致模型 \"漂移\" -- 随着上下文窗口被工具结果填满, 系统提示的影响力逐渐减弱。一个 10 步的重构任务可能完成了 1-3 步, 然后模型就开始即兴发挥, 因为它忘了第 4-10 步的存在。\n\n解决方案是结构化状态: 一个模型显式写入的 TodoManager。模型创建计划, 工作时将项目标记为 in_progress, 完成后标记为 completed。nag reminder 机制在模型连续 3 轮以上不更新待办时注入提醒。\n\n注: nag 阈值 3 轮是为教学可见性设的低值, 生产环境通常更高。从 s07 起, 课程转向 Task 看板处理持久化多步工作; TodoWrite 仍可用于轻量清单。\n\n## 解决方案\n\n```\n+----------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 工作原理\n\n1. TodoManager 验证并存储一组带状态的项目。同一时间只允许一个项目处于 `in_progress` 状态。\n\n```python\nclass TodoManager:\n def __init__(self):\n self.items = []\n\n def update(self, items: list) -> str:\n validated = []\n in_progress_count = 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\n \"id\": item[\"id\"],\n \"text\": item[\"text\"],\n \"status\": status,\n })\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo` 工具和其他工具一样添加到 dispatch map 中。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n # ...other tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nag reminder 在模型连续 3 轮以上不调用 `todo` 时, 向 tool_result 消息中注入 `` 标签。\n\n```python\ndef agent_loop(messages: list):\n rounds_since_todo = 0\n while True:\n if rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if (last[\"role\"] == \"user\"\n and isinstance(last.get(\"content\"), list)):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n # ... rest of loop ...\n rounds_since_todo = 0 if used_todo else rounds_since_todo + 1\n```\n\n4. 系统提示指导模型使用 todo 进行规划。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse the todo tool to plan multi-step tasks.\nMark in_progress before starting, completed when done.\nPrefer tools over prose.\"\"\"\n```\n\n## 核心代码\n\nTodoManager 和 nag 注入 (来自 `agents/s03_todo_write.py`,\n第 51-85 行和第 158-187 行):\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated = []\n in_progress_count = 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\n \"id\": item[\"id\"],\n \"text\": item[\"text\"],\n \"status\": status,\n })\n if in_progress_count > 1:\n raise ValueError(\"Only one in_progress\")\n self.items = validated\n return self.render()\n\n# In agent_loop:\nif rounds_since_todo >= 3:\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\n## 相对 s02 的变更\n\n| 组件 | 之前 (s02) | 之后 (s03) |\n|----------------|------------------|--------------------------|\n| Tools | 4 | 5 (+todo) |\n| 规划 | 无 | 带状态的 TodoManager |\n| Nag 注入 | 无 | 3 轮后注入 `` |\n| Agent loop | 简单分发 | + rounds_since_todo 计数器|\n\n## 设计原理\n\n可见的计划能提高任务完成率, 因为模型可以自我监控进度。nag 机制创造了问责性 -- 没有它, 随着对话上下文增长和早期指令淡化, 模型可能在执行中途放弃计划。\"同一时间只允许一个 in_progress\" 的约束强制顺序聚焦, 防止上下文切换开销降低输出质量。这个模式之所以有效, 是因为它将模型的工作记忆外化为结构化状态, 使其能够在注意力漂移中存活。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n可以尝试的提示:\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" }, { "version": "s04", "locale": "zh", "title": "s04: Subagent (子智能体)", - "content": "# s04: Subagent (子智能体)\n\n> 子智能体使用全新的消息列表运行, 与父智能体共享文件系统, 仅返回摘要 -- 保持父上下文的整洁。\n\n## 问题\n\n随着智能体工作, 它的消息数组不断增长。每次工具调用、每次文件读取、每次 bash 输出都在累积。20-30 次工具调用后, 上下文窗口充满了无关的历史。为了回答一个简单问题而读取的 500 行文件, 会永久占据上下文中的 500 行空间。\n\n这对探索性任务尤其糟糕。\"这个项目用了什么测试框架?\" 可能需要读取 5 个文件, 但父智能体的历史中并不需要这 5 个文件的全部内容 -- 它只需要答案: \"pytest, 使用 conftest.py 配置。\"\n\n解决方案是进程隔离: 以 `messages=[]` 启动一个子智能体。子智能体进行探索、读取文件、运行命令。完成后, 只有最终的文本响应返回给父智能体。子智能体的全部消息历史被丢弃。\n\n## 解决方案\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ---------->| while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <--------- | return last text |\n+------------------+ +------------------+\n |\nParent context stays clean.\nSubagent context is discarded.\n```\n\n## 工作原理\n\n1. 父智能体拥有一个 `task` 工具用于触发子智能体的生成。子智能体获得除 `task` 外的所有基础工具 (不允许递归生成)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"prompt\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n },\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. 子智能体以全新的消息列表启动, 仅包含委派的 prompt。它共享相同的文件系统。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\n \"role\": \"assistant\", \"content\": response.content\n })\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n```\n\n3. 只有最终文本返回给父智能体。子智能体 30+ 次工具调用的历史被丢弃。\n\n```python\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n4. 父智能体将此摘要作为普通的 tool_result 接收。\n\n```python\nif block.name == \"task\":\n output = run_subagent(block.input[\"prompt\"])\nresults.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n})\n```\n\n## 核心代码\n\n子智能体函数 (来自 `agents/s04_subagent.py`, 第 110-128 行):\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30):\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n## 相对 s03 的变更\n\n| 组件 | 之前 (s03) | 之后 (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (基础) + task (仅父端) |\n| 上下文 | 单一共享 | 父 + 子隔离 |\n| Subagent | 无 | `run_subagent()` 函数 |\n| 返回值 | 不适用 | 仅摘要文本 |\n| Todo 系统 | TodoManager | 已移除 (非本节重点) |\n\n## 设计原理\n\n进程隔离免费提供了上下文隔离。全新的 `messages[]` 意味着子智能体不会被父级的对话历史干扰。代价是通信开销 -- 结果必须压缩回父级, 丢失细节。这与操作系统进程隔离的权衡相同: 用序列化成本换取安全性和整洁性。限制子智能体深度 (不允许递归生成) 防止无限资源消耗, 最大迭代次数确保失控的子进程能终止。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n可以尝试的提示:\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" + "content": "# s04: Subagent (子智能体)\n\n> 子智能体使用全新的消息列表运行, 与父智能体共享文件系统, 仅返回摘要 -- 保持父上下文的整洁。\n\n## 问题\n\n随着智能体工作, 它的消息数组不断增长。每次工具调用、每次文件读取、每次 bash 输出都在累积。20-30 次工具调用后, 上下文窗口充满了无关的历史。为了回答一个简单问题而读取的 500 行文件, 会永久占据上下文中的 500 行空间。\n\n这对探索性任务尤其糟糕。\"这个项目用了什么测试框架?\" 可能需要读取 5 个文件, 但父智能体的历史中并不需要这 5 个文件的全部内容 -- 它只需要答案: \"pytest, 使用 conftest.py 配置。\"\n\n在本课程里, 一个实用解法是 fresh `messages[]` 隔离: 以 `messages=[]` 启动一个子智能体。子智能体进行探索、读取文件、运行命令。完成后, 只有最终的文本响应返回给父智能体。子智能体的全部消息历史被丢弃。\n\n## 解决方案\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ---------->| while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <--------- | return last text |\n+------------------+ +------------------+\n |\nParent context stays clean.\nSubagent context is discarded.\n```\n\n## 工作原理\n\n1. 父智能体拥有一个 `task` 工具用于触发子智能体的生成。子智能体获得除 `task` 外的所有基础工具 (不允许递归生成)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"prompt\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n },\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. 子智能体以全新的消息列表启动, 仅包含委派的 prompt。它共享相同的文件系统。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\n \"role\": \"assistant\", \"content\": response.content\n })\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n```\n\n3. 只有最终文本返回给父智能体。子智能体 30+ 次工具调用的历史被丢弃。\n\n```python\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n4. 父智能体将此摘要作为普通的 tool_result 接收。\n\n```python\nif block.name == \"task\":\n output = run_subagent(block.input[\"prompt\"])\nresults.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n})\n```\n\n## 核心代码\n\n子智能体函数 (来自 `agents/s04_subagent.py`, 第 110-128 行):\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30):\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n## 相对 s03 的变更\n\n| 组件 | 之前 (s03) | 之后 (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (基础) + task (仅父端) |\n| 上下文 | 单一共享 | 父 + 子隔离 |\n| Subagent | 无 | `run_subagent()` 函数 |\n| 返回值 | 不适用 | 仅摘要文本 |\n\n## 设计原理\n\n在本节中, fresh `messages[]` 隔离是一个近似实现上下文隔离的实用办法。全新的 `messages[]` 意味着子智能体从不携带父级历史开始。代价是通信开销 -- 结果必须压缩回父级, 丢失细节。这是消息历史隔离策略, 不是操作系统进程隔离本身。限制子智能体深度 (不允许递归生成) 防止无限资源消耗, 最大迭代次数确保失控的子任务能终止。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n可以尝试的提示:\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" }, { "version": "s05", "locale": "zh", "title": "s05: Skills (技能加载)", - "content": "# s05: Skills (技能加载)\n\n> 两层技能注入避免了系统提示膨胀: 在系统提示中放技能名称 (低成本), 在 tool_result 中按需放入完整技能内容。\n\n## 问题\n\n你希望智能体针对不同领域遵循特定的工作流: git 约定、测试模式、代码审查清单。简单粗暴的做法是把所有内容都塞进系统提示。但系统提示的有效注意力是有限的 -- 文本太多, 模型就会开始忽略其中一部分。\n\n如果你有 10 个技能, 每个 2000 token, 那就是 20,000 token 的系统提示。模型关注开头和结尾, 但会略过中间部分。更糟糕的是, 这些技能中大部分与当前任务无关。文件编辑任务不需要 git 工作流说明。\n\n两层方案解决了这个问题: 第一层在系统提示中放入简短的技能描述 (每个技能约 100 token)。第二层只在模型调用 `load_skill` 时, 才将完整的技能内容加载到 tool_result 中。模型知道有哪些技能可用 (低成本), 按需加载它们 (只在相关时)。\n\n## 解决方案\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| Step 2: ... |\n| |\n+--------------------------------------+\n```\n\n## 工作原理\n\n1. 技能文件以 Markdown 格式存放在 `.skills/` 目录中, 带有 YAML frontmatter。\n\n```\n.skills/\n git.md # ---\\n description: Git workflow\\n ---\\n ...\n test.md # ---\\n description: Testing patterns\\n ---\\n ...\n```\n\n2. SkillLoader 解析 frontmatter, 分离元数据和正文。\n\n```python\nclass SkillLoader:\n def _parse_frontmatter(self, text: str) -> tuple:\n match = re.match(\n r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL\n )\n if not match:\n return {}, text\n meta = {}\n for line in match.group(1).strip().splitlines():\n if \":\" in line:\n key, val = line.split(\":\", 1)\n meta[key.strip()] = val.strip()\n return meta, match.group(2).strip()\n```\n\n3. 第一层: `get_descriptions()` 返回简短描述, 用于系统提示。\n\n```python\ndef get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"No description\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n```\n\n4. 第二层: `get_content()` 返回用 `` 标签包裹的完整正文。\n\n```python\ndef get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n5. `load_skill` 工具只是 dispatch map 中的又一个条目。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\n## 核心代码\n\nSkillLoader 类 (来自 `agents/s05_skill_loading.py`, 第 51-97 行):\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.glob(\"*.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n self.skills[f.stem] = {\n \"meta\": meta, \"body\": body\n }\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return (f\"\\n\"\n f\"{skill['body']}\\n\")\n```\n\n## 相对 s04 的变更\n\n| 组件 | 之前 (s04) | 之后 (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (基础 + task) | 5 (基础 + load_skill) |\n| 系统提示 | 静态字符串 | + 技能描述列表 |\n| 知识库 | 无 | .skills/*.md 文件 |\n| 注入方式 | 无 | 两层 (系统提示 + result) |\n| Subagent | `run_subagent()` | 已移除 (非本节重点) |\n\n## 设计原理\n\n两层注入解决了注意力预算问题。将所有技能内容放入系统提示会在未使用的技能上浪费 token。第一层 (紧凑摘要) 总共约 120 token。第二层 (完整内容) 通过 tool_result 按需加载。这可以扩展到数十个技能而不降低模型注意力质量。关键洞察是: 模型只需要知道有哪些技能 (低成本) 就能决定何时加载某个技能 (高成本)。这与软件模块系统中的懒加载原则相同。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n可以尝试的提示:\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" + "content": "# s05: Skills (技能加载)\n\n> 两层技能注入避免了系统提示膨胀: 在系统提示中放技能名称 (低成本), 在 tool_result 中按需放入完整技能内容。\n\n## 问题\n\n智能体需要针对不同领域遵循特定的工作流: git 约定、测试模式、代码审查清单。简单粗暴的做法是把所有内容都塞进系统提示。但系统提示的有效注意力是有限的 -- 文本太多, 模型就会开始忽略其中一部分。\n\n如果你有 10 个技能, 每个 2000 token, 那就是 20,000 token 的系统提示。模型关注开头和结尾, 但会略过中间部分。更糟糕的是, 这些技能中大部分与当前任务无关。文件编辑任务不需要 git 工作流说明。\n\n两层方案解决了这个问题: 第一层在系统提示中放入简短的技能描述 (每个技能约 100 token)。第二层只在模型调用 `load_skill` 时, 才将完整的技能内容加载到 tool_result 中。模型知道有哪些技能可用 (低成本), 按需加载它们 (只在相关时)。\n\n## 解决方案\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| Step 2: ... |\n| |\n+--------------------------------------+\n```\n\n## 工作原理\n\n1. 技能文件以 Markdown 格式存放在 `.skills/` 目录中, 带有 YAML frontmatter。\n\n```\n.skills/\n git.md # ---\\n description: Git workflow\\n ---\\n ...\n test.md # ---\\n description: Testing patterns\\n ---\\n ...\n```\n\n2. SkillLoader 解析 frontmatter, 分离元数据和正文。\n\n```python\nclass SkillLoader:\n def _parse_frontmatter(self, text: str) -> tuple:\n match = re.match(\n r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL\n )\n if not match:\n return {}, text\n meta = {}\n for line in match.group(1).strip().splitlines():\n if \":\" in line:\n key, val = line.split(\":\", 1)\n meta[key.strip()] = val.strip()\n return meta, match.group(2).strip()\n```\n\n3. 第一层: `get_descriptions()` 返回简短描述, 用于系统提示。\n\n```python\ndef get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"No description\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n```\n\n4. 第二层: `get_content()` 返回用 `` 标签包裹的完整正文。\n\n```python\ndef get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n5. `load_skill` 工具只是 dispatch map 中的又一个条目。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\n## 核心代码\n\nSkillLoader 类 (来自 `agents/s05_skill_loading.py`, 第 51-97 行):\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.glob(\"*.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n self.skills[f.stem] = {\n \"meta\": meta, \"body\": body\n }\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return (f\"\\n\"\n f\"{skill['body']}\\n\")\n```\n\n## 相对 s04 的变更\n\n| 组件 | 之前 (s04) | 之后 (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (基础 + task) | 5 (基础 + load_skill) |\n| 系统提示 | 静态字符串 | + 技能描述列表 |\n| 知识库 | 无 | .skills/*.md 文件 |\n| 注入方式 | 无 | 两层 (系统提示 + result) |\n\n## 设计原理\n\n两层注入解决了注意力预算问题。将所有技能内容放入系统提示会在未使用的技能上浪费 token。第一层 (紧凑摘要) 总共约 120 token。第二层 (完整内容) 通过 tool_result 按需加载。这可以扩展到数十个技能而不降低模型注意力质量。关键洞察是: 模型只需要知道有哪些技能 (低成本) 就能决定何时加载某个技能 (高成本)。这与软件模块系统中的懒加载原则相同。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n可以尝试的提示:\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" }, { "version": "s06", "locale": "zh", "title": "s06: Compact (上下文压缩)", - "content": "# s06: Compact (上下文压缩)\n\n> 三层压缩管道让智能体可以无限期工作: 策略性地遗忘旧的工具结果, token 超过阈值时自动摘要, 以及支持手动触发压缩。\n\n## 问题\n\n上下文窗口是有限的。工具调用积累到足够多时, 消息数组会超过模型的上下文限制, API 调用直接失败。即使在到达硬限制之前, 性能也会下降: 模型变慢、准确率降低, 开始忽略早期消息。\n\n200,000 token 的上下文窗口听起来很大, 但一次 `read_file` 读取 1000 行源文件就消耗约 4000 token。读取 30 个文件、运行 20 条 bash 命令后, 你就已经用掉 100,000+ token 了。没有某种压缩机制, 智能体无法在大型代码库上工作。\n\n三层管道以递增的激进程度来应对这个问题:\n第一层 (micro-compact) 每轮静默替换旧的工具结果。\n第二层 (auto-compact) 在 token 超过阈值时触发完整摘要。\n第三层 (manual compact) 让模型自己触发压缩。\n\n教学简化说明: 这里的 token 估算使用粗略的 字符数/4 启发式方法。生产系统使用专业的 tokenizer 库进行精确计数。\n\n## 解决方案\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 工作原理\n\n1. **第一层 -- micro_compact**: 每次 LLM 调用前, 找到最近 3 条之前的所有 tool_result 条目, 替换其内容。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n to_clear = tool_results[:-KEEP_RECENT]\n for _, _, part in to_clear:\n if len(part.get(\"content\", \"\")) > 100:\n tool_id = part.get(\"tool_use_id\", \"\")\n tool_name = tool_name_map.get(tool_id, \"unknown\")\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第二层 -- auto_compact**: 当估算 token 超过 50,000 时, 保存完整对话记录并请求 LLM 进行摘要。\n\n```python\ndef auto_compact(messages: list) -> list:\n TRANSCRIPT_DIR.mkdir(exist_ok=True)\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n summary = response.content[0].text\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{summary}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **第三层 -- manual compact**: `compact` 工具按需触发相同的摘要机制。\n\n```python\nif manual_compact:\n messages[:] = auto_compact(messages)\n```\n\n4. Agent loop 整合了全部三层。\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages)\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages)\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages)\n```\n\n## 核心代码\n\n三层管道 (来自 `agents/s06_context_compact.py`, 第 67-93 行和第 189-223 行):\n\n```python\nTHRESHOLD = 50000\nKEEP_RECENT = 3\n\ndef micro_compact(messages):\n # Replace old tool results with placeholders\n ...\n\ndef auto_compact(messages):\n # Save transcript, LLM summarize, replace messages\n ...\n\ndef agent_loop(messages):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\n## 相对 s05 的变更\n\n| 组件 | 之前 (s05) | 之后 (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (基础 + compact) |\n| 上下文管理 | 无 | 三层压缩 |\n| Micro-compact | 无 | 旧结果 -> 占位符 |\n| Auto-compact | 无 | token 阈值触发 |\n| Manual compact | 无 | `compact` 工具 |\n| Transcripts | 无 | 保存到 .transcripts/ |\n| Skills | load_skill | 已移除 (非本节重点) |\n\n## 设计原理\n\n上下文窗口有限, 但智能体会话可以无限。三层压缩在不同粒度上解决这个问题: micro-compact (替换旧工具输出), auto-compact (接近限制时 LLM 摘要), manual compact (用户触发)。关键洞察是遗忘是特性而非缺陷 -- 它使无限会话成为可能。转录文件将完整历史保存在磁盘上, 因此没有任何东西真正丢失, 只是从活跃上下文中移出。分层方法让每一层在各自的粒度上独立运作, 从静默的逐轮清理到完整的对话重置。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n可以尝试的提示:\n\n1. `Read every Python file in the agents/ directory one by one`\n (观察 micro-compact 替换旧的结果)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" + "content": "# s06: Compact (上下文压缩)\n\n> 三层压缩管道让智能体可以无限期工作: 策略性地遗忘旧的工具结果, token 超过阈值时自动摘要, 以及支持手动触发压缩。\n\n## 问题\n\n上下文窗口是有限的。工具调用积累到足够多时, 消息数组会超过模型的上下文限制, API 调用直接失败。即使在到达硬限制之前, 性能也会下降: 模型变慢、准确率降低, 开始忽略早期消息。\n\n200,000 token 的上下文窗口听起来很大, 但一次 `read_file` 读取 1000 行源文件就消耗约 4000 token。读取 30 个文件、运行 20 条 bash 命令后, 你就已经用掉 100,000+ token 了。没有某种压缩机制, 智能体无法在大型代码库上工作。\n\n三层管道以递增的激进程度来应对这个问题:\n第一层 (micro-compact) 每轮静默替换旧的工具结果。\n第二层 (auto-compact) 在 token 超过阈值时触发完整摘要。\n第三层 (manual compact) 让模型自己触发压缩。\n\n教学简化说明: 这里的 token 估算使用粗略的 字符数/4 启发式方法。生产系统使用专业的 tokenizer 库进行精确计数。\n\n## 解决方案\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 工作原理\n\n1. **第一层 -- micro_compact**: 每次 LLM 调用前, 找到最近 3 条之前的所有 tool_result 条目, 替换其内容。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n to_clear = tool_results[:-KEEP_RECENT]\n for _, _, part in to_clear:\n if len(part.get(\"content\", \"\")) > 100:\n tool_id = part.get(\"tool_use_id\", \"\")\n tool_name = tool_name_map.get(tool_id, \"unknown\")\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第二层 -- auto_compact**: 当估算 token 超过 50,000 时, 保存完整对话记录并请求 LLM 进行摘要。\n\n```python\ndef auto_compact(messages: list) -> list:\n TRANSCRIPT_DIR.mkdir(exist_ok=True)\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n summary = response.content[0].text\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{summary}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **第三层 -- manual compact**: `compact` 工具按需触发相同的摘要机制。\n\n```python\nif manual_compact:\n messages[:] = auto_compact(messages)\n```\n\n4. Agent loop 整合了全部三层。\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages)\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages)\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages)\n```\n\n## 核心代码\n\n三层管道 (来自 `agents/s06_context_compact.py`, 第 67-93 行和第 189-223 行):\n\n```python\nTHRESHOLD = 50000\nKEEP_RECENT = 3\n\ndef micro_compact(messages):\n # Replace old tool results with placeholders\n ...\n\ndef auto_compact(messages):\n # Save transcript, LLM summarize, replace messages\n ...\n\ndef agent_loop(messages):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\n## 相对 s05 的变更\n\n| 组件 | 之前 (s05) | 之后 (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (基础 + compact) |\n| 上下文管理 | 无 | 三层压缩 |\n| Micro-compact | 无 | 旧结果 -> 占位符 |\n| Auto-compact | 无 | token 阈值触发 |\n| Manual compact | 无 | `compact` 工具 |\n| Transcripts | 无 | 保存到 .transcripts/ |\n\n## 设计原理\n\n上下文窗口有限, 但智能体会话可以无限。三层压缩在不同粒度上解决这个问题: micro-compact (替换旧工具输出), auto-compact (接近限制时 LLM 摘要), manual compact (用户触发)。关键洞察是遗忘是特性而非缺陷 -- 它使无限会话成为可能。转录文件将完整历史保存在磁盘上, 因此没有任何东西真正丢失, 只是从活跃上下文中移出。分层方法让每一层在各自的粒度上独立运作, 从静默的逐轮清理到完整的对话重置。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n可以尝试的提示:\n\n1. `Read every Python file in the agents/ directory one by one`\n (观察 micro-compact 替换旧的结果)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" }, { "version": "s07", "locale": "zh", "title": "s07: Tasks (任务系统)", - "content": "# s07: Tasks (任务系统)\n\n> 任务以 JSON 文件形式持久化在文件系统上, 带有依赖图, 因此它们能在上下文压缩后存活, 也可以跨智能体共享。\n\n## 问题\n\n内存中的状态 (如 s03 的 TodoManager) 在上下文压缩 (s06) 时会丢失。auto_compact 用摘要替换消息后, 待办列表就没了。智能体只能从摘要文本中重建它, 这是有损且容易出错的。\n\n这就是 s06 到 s07 的关键桥梁: TodoManager 的条目随压缩消亡; 基于文件的任务不会。将状态移到文件系统上使其不受压缩影响。\n\n更根本地说, 内存中的状态对其他智能体不可见。当我们最终构建团队 (s09+) 时, 队友需要一个共享的任务看板。内存中的数据结构是进程局部的。\n\n解决方案是将任务作为 JSON 文件持久化在 `.tasks/` 目录中。每个任务是一个单独的文件, 包含 ID、主题、状态和依赖图。完成任务 1 会自动解除任务 2 的阻塞 (如果任务 2 有 `blockedBy: [1]`)。文件系统成为唯一的真实来源。\n\n## 解决方案\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[2], \"status\":\"pending\"}\n\nDependency resolution:\n+----------+ +----------+ +----------+\n| task 1 | --> | task 2 | --> | task 3 |\n| complete | | blocked | | blocked |\n+----------+ +----------+ +----------+\n | ^\n +--- completing task 1 removes it from\n task 2's blockedBy list\n```\n\n## 工作原理\n\n1. TaskManager 提供 CRUD 操作。每个任务是一个 JSON 文件。\n\n```python\nclass TaskManager:\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id,\n \"subject\": subject,\n \"description\": description,\n \"status\": \"pending\",\n \"blockedBy\": [],\n \"blocks\": [],\n \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. 当任务标记为 completed 时, `_clear_dependency` 将其 ID 从所有其他任务的 `blockedBy` 列表中移除。\n\n```python\ndef _clear_dependency(self, completed_id: int):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. `update` 方法处理状态变更和双向依赖关联。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocks:\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\n for blocked_id in add_blocks:\n blocked = self._load(blocked_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n self._save(blocked)\n self._save(task)\n```\n\n4. 四个任务工具添加到 dispatch map。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"],\n kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\n## 核心代码\n\n带依赖图的 TaskManager (来自 `agents/s07_task_system.py`, 第 46-123 行):\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2))\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n## 相对 s06 的变更\n\n| 组件 | 之前 (s06) | 之后 (s07) |\n|----------------|------------------|----------------------------------|\n| Tools | 5 | 8 (+task_create/update/list/get) |\n| 状态存储 | 仅内存 | .tasks/ 中的 JSON 文件 |\n| 依赖关系 | 无 | blockedBy + blocks 图 |\n| 压缩机制 | 三层 | 已移除 (非本节重点) |\n| 持久化 | 压缩后丢失 | 压缩后存活 |\n\n## 设计原理\n\n基于文件的状态能在上下文压缩中存活。当智能体的对话被压缩时, 内存中的状态会丢失, 但写入磁盘的任务会持久保存。依赖图确保即使在上下文丢失后也能按正确顺序执行。这是临时对话与持久工作之间的桥梁 -- 智能体可以忘记对话细节, 但始终有任务看板来提醒它还需要做什么。文件系统作为唯一真实来源也为未来的多智能体共享提供了基础, 因为任何进程都可以读取相同的 JSON 文件。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n可以尝试的提示:\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test`\n" + "content": "# s07: Tasks (任务系统)\n\n> 任务以 JSON 文件形式持久化在文件系统上, 带有依赖图, 因此它们能在上下文压缩后存活, 也可以跨智能体共享。\n\n## 问题\n\n内存中的状态 (如 s03 的 TodoManager) 在上下文压缩 (s06) 时会丢失。auto_compact 用摘要替换消息后, 待办列表就没了。智能体只能从摘要文本中重建它, 这是有损且容易出错的。\n\n这就是 s06 到 s07 的关键桥梁: TodoManager 的条目随压缩消亡; 基于文件的任务不会。将状态移到文件系统上使其不受压缩影响。\n\n更根本地说, 内存中的状态对其他智能体不可见。当我们最终构建团队 (s09+) 时, 队友需要一个共享的任务看板。内存中的数据结构是进程局部的。\n\n解决方案是将任务作为 JSON 文件持久化在 `.tasks/` 目录中。每个任务是一个单独的文件, 包含 ID、主题、状态和依赖图。完成任务 1 会自动解除任务 2 的阻塞 (如果任务 2 有 `blockedBy: [1]`)。在本教学实现里, 文件系统是任务状态的真实来源。\n\n## Task vs Todo: 何时用哪个\n\n从 s07 起, Task 是默认主线。Todo 仍可用于短期线性清单。\n\n## 快速判定矩阵\n\n| 场景 | 优先选择 | 原因 |\n|---|---|---|\n| 短时、单会话、线性清单 | Todo | 心智负担最低,记录最快 |\n| 跨会话、存在依赖、多人协作 | Task | 状态可持久、依赖可表达、协作可见 |\n| 一时拿不准 | Task | 后续降级更容易,半途迁移成本更低 |\n\n## 解决方案\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[2], \"status\":\"pending\"}\n\nDependency resolution:\n+----------+ +----------+ +----------+\n| task 1 | --> | task 2 | --> | task 3 |\n| complete | | blocked | | blocked |\n+----------+ +----------+ +----------+\n | ^\n +--- completing task 1 removes it from\n task 2's blockedBy list\n```\n\n## 工作原理\n\n1. TaskManager 提供 CRUD 操作。每个任务是一个 JSON 文件。\n\n```python\nclass TaskManager:\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id,\n \"subject\": subject,\n \"description\": description,\n \"status\": \"pending\",\n \"blockedBy\": [],\n \"blocks\": [],\n \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. 当任务标记为 completed 时, `_clear_dependency` 将其 ID 从所有其他任务的 `blockedBy` 列表中移除。\n\n```python\ndef _clear_dependency(self, completed_id: int):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. `update` 方法处理状态变更和双向依赖关联。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocks:\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\n for blocked_id in add_blocks:\n blocked = self._load(blocked_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n self._save(blocked)\n self._save(task)\n```\n\n4. 四个任务工具添加到 dispatch map。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"],\n kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\n## 核心代码\n\n带依赖图的 TaskManager (来自 `agents/s07_task_system.py`, 第 46-123 行):\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2))\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n## 相对 s06 的变更\n\n| 组件 | 之前 (s06) | 之后 (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| 状态存储 | 仅内存 | `.tasks/` 中的 JSON 文件 |\n| 依赖关系 | 无 | `blockedBy + blocks` 图 |\n| 持久化 | 压缩后丢失 | 压缩后存活 |\n\n## 设计原理\n\n基于文件的状态能在上下文压缩中存活。当智能体的对话被压缩时, 内存中的状态会丢失, 但写入磁盘的任务会持久保存。依赖图确保即使在上下文丢失后也能按正确顺序执行。这是临时对话与持久工作之间的桥梁 -- 智能体可以忘记对话细节, 但始终有任务看板来提醒它还需要做什么。在本教学实现里, 文件系统作为任务状态真实来源也为未来的多智能体共享提供了基础, 因为任何进程都可以读取相同的 JSON 文件。\n\n但“持久化”成立有前提:每次写入前都要重新读取任务文件,确认 `status/blockedBy` 与预期一致,再原子写回。否则并发写入很容易互相覆盖状态。\n\n从课程设计上看, 这也是为什么 s07 之后我们默认采用 Task 而不是 Todo: 它更接近真实工程中的长期执行与协作需求。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n可以尝试的提示:\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test`\n" }, { "version": "s08", "locale": "zh", "title": "s08: Background Tasks (后台任务)", - "content": "# s08: Background Tasks (后台任务)\n\n> BackgroundManager 在独立线程中运行命令, 在每次 LLM 调用前排空通知队列, 使智能体永远不会因长时间运行的操作而阻塞。\n\n## 问题\n\n有些命令需要几分钟: `npm install`、`pytest`、`docker build`。在阻塞式的 agent loop 中, 模型只能干等子进程结束, 什么也做不了。如果用户要求 \"安装依赖, 同时创建配置文件\", 智能体会先安装, 然后才创建配置 -- 串行执行, 而非并行。\n\n智能体需要并发能力。不是将 agent loop 本身完全多线程化, 而是能够发起一个长时间命令然后继续工作。当命令完成时, 结果自然地出现在对话中。\n\n解决方案是一个 BackgroundManager, 它在守护线程中运行命令, 将结果收集到通知队列中。每次 LLM 调用前, 队列被排空, 结果注入到消息中。\n\n## 解决方案\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | task executes |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- notification queue --+\n |\n [results injected before\n next LLM call]\n```\n\n## 工作原理\n\n1. BackgroundManager 追踪任务并维护一个线程安全的通知队列。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` 启动一个守护线程并立即返回 task_id。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\n \"status\": \"running\",\n \"result\": None,\n \"command\": command,\n }\n thread = threading.Thread(\n target=self._execute,\n args=(task_id, command),\n daemon=True,\n )\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. 线程目标函数 `_execute` 运行子进程并将结果推入通知队列。\n\n```python\ndef _execute(self, task_id: str, command: str):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = output\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"result\": output[:500],\n })\n```\n\n4. `drain_notifications()` 返回并清空待处理的结果。\n\n```python\ndef drain_notifications(self) -> list:\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n```\n\n5. Agent loop 在每次 LLM 调用前排空通知。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: \"\n f\"{n['result']}\" for n in notifs\n )\n messages.append({\"role\": \"user\",\n \"content\": f\"\"\n f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\n## 核心代码\n\nBackgroundManager (来自 `agents/s08_background_tasks.py`, 第 49-107 行):\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n\n def run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\",\n \"result\": None,\n \"command\": command}\n thread = threading.Thread(\n target=self._execute,\n args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n\n def _execute(self, task_id, command):\n # run subprocess, push to queue\n ...\n\n def drain_notifications(self) -> list:\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n```\n\n## 相对 s07 的变更\n\n| 组件 | 之前 (s07) | 之后 (s08) |\n|----------------|------------------|------------------------------------|\n| Tools | 8 | 6 (基础 + background_run + check) |\n| 执行方式 | 仅阻塞 | 阻塞 + 后台线程 |\n| 通知机制 | 无 | 每轮排空的队列 |\n| 并发 | 无 | 守护线程 |\n| 任务系统 | 基于文件的 CRUD | 已移除 (非本节重点) |\n\n## 设计原理\n\n智能体循环本质上是单线程的 (一次一个 LLM 调用)。后台线程为 I/O 密集型工作 (测试、构建、安装) 打破了这个限制。通知队列模式 (\"在下一次 LLM 调用前排空\") 确保结果在对话的自然间断点到达, 而不是打断模型的推理过程。这是一个最小化的并发模型: 智能体循环保持单线程和确定性, 只有 I/O 密集型的子进程执行被并行化。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n可以尝试的提示:\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" + "content": "# s08: Background Tasks (后台任务)\n\n> BackgroundManager 在独立线程中运行命令, 在每次 LLM 调用前排空通知队列, 使智能体永远不会因长时间运行的操作而阻塞。\n\n## 问题\n\n有些命令需要几分钟: `npm install`、`pytest`、`docker build`。在阻塞式的 agent loop 中, 模型只能干等子进程结束, 什么也做不了。如果用户要求 \"安装依赖, 同时创建配置文件\", 智能体会先安装, 然后才创建配置 -- 串行执行, 而非并行。\n\n智能体需要并发能力。不是将 agent loop 本身完全多线程化, 而是能够发起一个长时间命令然后继续工作。当命令完成时, 结果自然地出现在对话中。\n\n解决方案是一个 BackgroundManager, 它在守护线程中运行命令, 将结果收集到通知队列中。每次 LLM 调用前, 队列被排空, 结果注入到消息中。\n\n## 解决方案\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | task executes |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- notification queue --+\n |\n [results injected before\n next LLM call]\n```\n\n## 工作原理\n\n1. BackgroundManager 追踪任务并维护一个线程安全的通知队列。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` 启动一个守护线程并立即返回 task_id。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\n \"status\": \"running\",\n \"result\": None,\n \"command\": command,\n }\n thread = threading.Thread(\n target=self._execute,\n args=(task_id, command),\n daemon=True,\n )\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. 线程目标函数 `_execute` 运行子进程并将结果推入通知队列。\n\n```python\ndef _execute(self, task_id: str, command: str):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = output\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"result\": output[:500],\n })\n```\n\n4. `drain_notifications()` 返回并清空待处理的结果。\n\n```python\ndef drain_notifications(self) -> list:\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n```\n\n5. Agent loop 在每次 LLM 调用前排空通知。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: \"\n f\"{n['result']}\" for n in notifs\n )\n messages.append({\"role\": \"user\",\n \"content\": f\"\"\n f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\n## 核心代码\n\nBackgroundManager (来自 `agents/s08_background_tasks.py`, 第 49-107 行):\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n\n def run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\",\n \"result\": None,\n \"command\": command}\n thread = threading.Thread(\n target=self._execute,\n args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n\n def _execute(self, task_id, command):\n # run subprocess, push to queue\n ...\n\n def drain_notifications(self) -> list:\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n```\n\n## 相对 s07 的变更\n\n| 组件 | 之前 (s07) | 之后 (s08) |\n|----------------|------------------|------------------------------------|\n| Tools | 8 | 6 (基础 + background_run + check) |\n| 执行方式 | 仅阻塞 | 阻塞 + 后台线程 |\n| 通知机制 | 无 | 每轮排空的队列 |\n| 并发 | 无 | 守护线程 |\n\n## 设计原理\n\n智能体循环本质上是单线程的 (一次一个 LLM 调用)。后台线程为 I/O 密集型工作 (测试、构建、安装) 打破了这个限制。通知队列模式 (\"在下一次 LLM 调用前排空\") 确保结果在对话的自然间断点到达, 而不是打断模型的推理过程。这是一个最小化的并发模型: 智能体循环保持单线程和确定性, 只有 I/O 密集型的子进程执行被并行化。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n可以尝试的提示:\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" }, { "version": "s09", "locale": "zh", "title": "s09: Agent Teams (智能体团队)", - "content": "# s09: Agent Teams (智能体团队)\n\n> 持久化的队友通过 JSONL 收件箱将孤立的智能体转变为可通信的团队 -- spawn、message、broadcast 和 drain。\n\n## 问题\n\n子智能体 (s04) 是一次性的: 生成、工作、返回摘要、消亡。它们没有身份, 没有跨调用的记忆, 也无法接收后续指令。后台任务 (s08) 运行 shell 命令, 但不能做 LLM 引导的决策或交流发现。\n\n真正的团队协作需要三样东西: (1) 存活时间超过单次 prompt 的持久化智能体, (2) 身份和生命周期管理, (3) 智能体之间的通信通道。没有消息机制, 即使持久化的队友也是又聋又哑的 -- 它们可以并行工作但永远无法协调。\n\n解决方案将 TeammateManager (用于生成持久化的命名智能体) 与使用 JSONL 收件箱文件的 MessageBus 结合。每个队友在独立线程中运行自己的 agent loop, 每次 LLM 调用前检查收件箱, 可以向任何其他队友或领导发送消息。\n\n关于 s06 到 s07 的桥梁: s03 的 TodoManager 条目随压缩 (s06) 消亡。基于文件的任务 (s07) 因为存储在磁盘上而能存活压缩。团队建立在同样的原则上 -- config.json 和收件箱文件持久化在上下文窗口之外。\n\n## 解决方案\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n\n5 message types:\n+-------------------------+------------------------------+\n| message | Normal text between agents |\n| broadcast | Sent to all teammates |\n| shutdown_request | Request graceful shutdown |\n| shutdown_response | Approve/reject shutdown |\n| plan_approval_response | Approve/reject plan |\n+-------------------------+------------------------------+\n```\n\n## 工作原理\n\n1. TeammateManager 通过 config.json 维护团队名册。每个成员有名称、角色和状态。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` 创建队友并在线程中启动其 agent loop。重新 spawn 一个 idle 状态的队友会将其重新激活。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n self.threads[name] = thread\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus 处理 JSONL 收件箱文件。`send()` 追加一行 JSON; `read_inbox()` 读取所有行并清空文件。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content,\n msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists():\n return \"[]\"\n msgs = [json.loads(l)\n for l in path.read_text().strip().splitlines()\n if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 每个队友在每次 LLM 调用前检查收件箱, 将收到的消息注入对话上下文。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n sys_prompt = f\"You are '{name}', role: {role}, at {WORKDIR}.\"\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(\n model=MODEL, system=sys_prompt,\n messages=messages, tools=TOOLS)\n messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n self._save_config()\n```\n\n5. `broadcast()` 向除发送者外的所有队友发送相同消息。\n\n```python\ndef broadcast(self, sender, content, teammates):\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n```\n\n## 核心代码\n\nTeammateManager + MessageBus 核心 (来自 `agents/s09_agent_teams.py`):\n\n```python\nclass TeammateManager:\n def spawn(self, name, role, prompt):\n member = self._find_member(name) or {\n \"name\": name, \"role\": role, \"status\": \"working\"\n }\n member[\"status\"] = \"working\"\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned '{name}'\"\n\nclass MessageBus:\n def send(self, sender, to, content,\n msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra: msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l)\n for l in path.read_text().strip().splitlines()\n if l]\n path.write_text(\"\")\n return json.dumps(msgs, indent=2)\n```\n\n## 相对 s08 的变更\n\n| 组件 | 之前 (s08) | 之后 (s09) |\n|----------------|------------------|------------------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| 智能体数量 | 单一 | 领导 + N 个队友 |\n| 持久化 | 无 | config.json + JSONL 收件箱 |\n| 线程 | 后台命令 | 每线程完整 agent loop |\n| 生命周期 | 一次性 | idle -> working -> idle |\n| 通信 | 无 | 5 种消息类型 + broadcast |\n\n教学简化说明: 此实现未使用文件锁来保护收件箱访问。在生产中, 多个写入者并发追加需要文件锁或原子重命名。这里使用的单写入者-per-收件箱模式在教学场景下是安全的。\n\n## 设计原理\n\n基于文件的邮箱 (追加式 JSONL) 提供了并发安全的智能体间通信。追加操作在大多数文件系统上是原子的, 避免了锁竞争。\"读取时排空\" 模式 (读取全部, 截断) 提供批量传递。这比共享内存或基于 socket 的 IPC 更简单、更健壮。代价是延迟 -- 消息只在下一次轮询时才被看到 -- 但对于每轮需要数秒推理时间的 LLM 驱动智能体来说, 轮询延迟相比推理时间可以忽略不计。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n可以尝试的提示:\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. 输入 `/team` 查看带状态的团队名册\n5. 输入 `/inbox` 手动检查领导的收件箱\n" + "content": "# s09: Agent Teams (智能体团队)\n\n> 持久化的队友通过 JSONL 收件箱提供了一种教学协议, 将孤立的智能体转变为可通信的团队 -- spawn、message、broadcast 和 drain。\n\n## 问题\n\n子智能体 (s04) 是一次性的: 生成、工作、返回摘要、消亡。它们没有身份, 没有跨调用的记忆, 也无法接收后续指令。后台任务 (s08) 运行 shell 命令, 但不能做 LLM 引导的决策或交流发现。\n\n真正的团队协作需要三样东西: (1) 存活时间超过单次 prompt 的持久化智能体, (2) 身份和生命周期管理, (3) 智能体之间的通信通道。没有消息机制, 即使持久化的队友也是又聋又哑的 -- 它们可以并行工作但永远无法协调。\n\n解决方案将 TeammateManager (用于生成持久化的命名智能体) 与使用 JSONL 收件箱文件的 MessageBus 结合。每个队友在独立线程中运行自己的 agent loop, 每次 LLM 调用前检查收件箱, 可以向任何其他队友或领导发送消息。\n\n关于 s06 到 s07 的桥梁: s03 的 TodoManager 条目随压缩 (s06) 消亡。基于文件的任务 (s07) 因为存储在磁盘上而能存活压缩。团队建立在同样的原则上 -- config.json 和收件箱文件持久化在上下文窗口之外。\n\n## 解决方案\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n\n5 message types:\n+-------------------------+------------------------------+\n| message | Normal text between agents |\n| broadcast | Sent to all teammates |\n| shutdown_request | Request graceful shutdown |\n| shutdown_response | Approve/reject shutdown |\n| plan_approval_response | Approve/reject plan |\n+-------------------------+------------------------------+\n```\n\n## 工作原理\n\n1. TeammateManager 通过 config.json 维护团队名册。每个成员有名称、角色和状态。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` 创建队友并在线程中启动其 agent loop。重新 spawn 一个 idle 状态的队友会将其重新激活。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n self.threads[name] = thread\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus 处理 JSONL 收件箱文件。`send()` 追加一行 JSON; `read_inbox()` 读取所有行并清空文件。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content,\n msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists():\n return \"[]\"\n msgs = [json.loads(l)\n for l in path.read_text().strip().splitlines()\n if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 每个队友在每次 LLM 调用前检查收件箱, 将收到的消息注入对话上下文。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n sys_prompt = f\"You are '{name}', role: {role}, at {WORKDIR}.\"\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(\n model=MODEL, system=sys_prompt,\n messages=messages, tools=TOOLS)\n messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n self._save_config()\n```\n\n5. `broadcast()` 向除发送者外的所有队友发送相同消息。\n\n```python\ndef broadcast(self, sender, content, teammates):\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n```\n\n## 核心代码\n\nTeammateManager + MessageBus 核心 (来自 `agents/s09_agent_teams.py`):\n\n```python\nclass TeammateManager:\n def spawn(self, name, role, prompt):\n member = self._find_member(name) or {\n \"name\": name, \"role\": role, \"status\": \"working\"\n }\n member[\"status\"] = \"working\"\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned '{name}'\"\n\nclass MessageBus:\n def send(self, sender, to, content,\n msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra: msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l)\n for l in path.read_text().strip().splitlines()\n if l]\n path.write_text(\"\")\n return json.dumps(msgs, indent=2)\n```\n\n## 相对 s08 的变更\n\n| 组件 | 之前 (s08) | 之后 (s09) |\n|----------------|------------------|------------------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| 智能体数量 | 单一 | 领导 + N 个队友 |\n| 持久化 | 无 | config.json + JSONL 收件箱 |\n| 线程 | 后台命令 | 每线程完整 agent loop |\n| 生命周期 | 一次性 | idle -> working -> idle |\n| 通信 | 无 | 5 种消息类型 + broadcast |\n\n教学简化说明: 此实现未使用文件锁来保护收件箱访问。在生产中, 多个写入者并发追加需要文件锁或原子重命名。这里使用的单写入者-per-收件箱模式在教学场景下是安全的。\n\n## 设计原理\n\n基于文件的邮箱 (追加式 JSONL) 在教学代码中具有可观察、易理解的优势。\"读取时排空\" 模式 (读取全部, 截断) 用很少的机制就能实现批量传递。代价是延迟 -- 消息只在下一次轮询时才被看到 -- 但对于每轮需要数秒推理时间的 LLM 驱动智能体来说, 本课程中该延迟是可接受的。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n可以尝试的提示:\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. 输入 `/team` 查看带状态的团队名册\n5. 输入 `/inbox` 手动检查领导的收件箱\n" }, { "version": "s10", @@ -129,13 +135,19 @@ "version": "s11", "locale": "zh", "title": "s11: Autonomous Agents (自治智能体)", - "content": "# s11: Autonomous Agents (自治智能体)\n\n> 带任务看板轮询的空闲循环让队友能自己发现和认领工作, 上下文压缩后通过身份重注入保持角色认知。\n\n## 问题\n\n在 s09-s10 中, 队友只在被明确指示时才工作。领导必须用特定的 prompt 生成每个队友。如果任务看板上有 10 个未认领的任务, 领导必须手动分配每一个。这无法扩展。\n\n真正的自治意味着队友自己寻找工作。当一个队友完成当前任务后, 它应该扫描任务看板寻找未认领的工作, 认领一个任务, 然后开始工作 -- 不需要领导的任何指令。\n\n但自治智能体面临一个微妙问题: 上下文压缩后, 智能体可能忘记自己是谁。如果消息被摘要化, 原始系统提示中的身份 (\"你是 alice, 角色: coder\") 就会淡化。身份重注入通过在压缩后的上下文开头插入身份块来解决这个问题。\n\n教学简化说明: 这里的 token 估算比较粗糙 (字符数 / 4)。生产系统使用专业的 tokenizer 库。s03 中的 nag 阈值 3 轮是为教学可见性设的低值; 生产环境的智能体通常使用约 10 轮的阈值。\n\n## 解决方案\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use\n | (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n \"You are 'alice', role: coder, team: my-team\"\n```\n\n## 工作原理\n\n1. 队友循环有两个阶段: WORK 和 IDLE。WORK 阶段运行标准的 agent loop。当 LLM 停止调用工具 (或调用了 `idle` 工具) 时, 队友进入 IDLE 阶段。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append(...)\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. 空闲阶段循环轮询收件箱和任务看板。\n\n```python\ndef _idle_poll(self, name, messages):\n polls = IDLE_TIMEOUT // POLL_INTERVAL # 60s / 5s = 12\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n # Check inbox for new messages\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n # Scan task board for unclaimed tasks\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n claim_task(task[\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{task['id']}: \"\n f\"{task['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. 任务看板扫描查找 pending 状态、无 owner、未被阻塞的任务。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n\ndef claim_task(task_id: int, owner: str):\n path = TASKS_DIR / f\"task_{task_id}.json\"\n task = json.loads(path.read_text())\n task[\"status\"] = \"in_progress\"\n task[\"owner\"] = owner\n path.write_text(json.dumps(task, indent=2))\n```\n\n4. 身份重注入: 当上下文过短时插入身份块, 表明发生了压缩。\n\n```python\ndef make_identity_block(name, role, team_name):\n return {\"role\": \"user\",\n \"content\": f\"You are '{name}', \"\n f\"role: {role}, team: {team_name}. \"\n f\"Continue your work.\"}\n\n# Before resuming work after idle:\nif len(messages) <= 3:\n messages.insert(0, make_identity_block(\n name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n5. `idle` 工具让队友显式地表示没有更多工作, 提前进入空闲轮询阶段。\n\n```python\n{\"name\": \"idle\",\n \"description\": \"Signal that you have no more work. \"\n \"Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n```\n\n## 核心代码\n\n自治循环 (来自 `agents/s11_autonomous_agents.py`):\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # WORK PHASE\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n for block in response.content:\n if block.name == \"idle\":\n idle_requested = True\n if idle_requested:\n break\n\n # IDLE PHASE\n self._set_status(name, \"idle\")\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox: resume = True; break\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n resume = True; break\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n## 相对 s10 的变更\n\n| 组件 | 之前 (s10) | 之后 (s11) |\n|----------------|------------------|----------------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| 自治性 | 领导指派 | 自组织 |\n| 空闲阶段 | 无 | 轮询收件箱 + 任务看板 |\n| 任务认领 | 仅手动 | 自动认领未认领任务 |\n| 身份 | 系统提示 | + 压缩后重注入 |\n| 超时 | 无 | 60 秒空闲 -> 自动关机 |\n\n## 设计原理\n\n轮询 + 超时使智能体无需中央协调器即可自组织。每个智能体独立轮询任务看板, 认领未认领的工作, 完成后回到空闲状态。超时触发轮询循环, 如果在窗口期内没有工作出现, 智能体自行关机。这与工作窃取线程池的模式相同 -- 分布式, 无单点故障。压缩后的身份重注入确保智能体即使在对话历史被摘要后仍能保持其角色。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n可以尝试的提示:\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. 输入 `/tasks` 查看带 owner 的任务看板\n5. 输入 `/team` 监控谁在工作、谁在空闲\n" + "content": "# s11: Autonomous Agents (自治智能体)\n\n> 带任务看板轮询的空闲循环让队友能自己发现和认领工作, 上下文压缩后通过身份重注入保持角色认知。\n\n## 问题\n\n在 s09-s10 中, 队友只在被明确指示时才工作。领导必须用特定的 prompt 生成每个队友。如果任务看板上有 10 个未认领的任务, 领导必须手动分配每一个。这无法扩展。\n\n真正的自治意味着队友自己寻找工作。当一个队友完成当前任务后, 它应该扫描任务看板寻找未认领的工作, 认领一个任务, 然后开始工作 -- 不需要领导的任何指令。\n\n但自治智能体面临一个微妙问题: 上下文压缩后, 智能体可能忘记自己是谁。如果消息被摘要化, 原始系统提示中的身份 (\"你是 alice, 角色: coder\") 就会淡化。身份重注入通过在压缩后的上下文开头插入身份块来解决这个问题。\n\n注: token 估算使用字符数/4 (粗略)。nag 阈值 3 轮是为教学可见性设的低值。\n\n## 解决方案\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use\n | (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n \"You are 'alice', role: coder, team: my-team\"\n```\n\n## 工作原理\n\n1. 队友循环有两个阶段: WORK 和 IDLE。WORK 阶段运行标准的 agent loop。当 LLM 停止调用工具 (或调用了 `idle` 工具) 时, 队友进入 IDLE 阶段。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append(...)\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. 空闲阶段循环轮询收件箱和任务看板。\n\n```python\ndef _idle_poll(self, name, messages):\n polls = IDLE_TIMEOUT // POLL_INTERVAL # 60s / 5s = 12\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n # Check inbox for new messages\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n # Scan task board for unclaimed tasks\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n claim_task(task[\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{task['id']}: \"\n f\"{task['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. 任务看板扫描查找 pending 状态、无 owner、未被阻塞的任务。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n\ndef claim_task(task_id: int, owner: str):\n path = TASKS_DIR / f\"task_{task_id}.json\"\n task = json.loads(path.read_text())\n task[\"status\"] = \"in_progress\"\n task[\"owner\"] = owner\n path.write_text(json.dumps(task, indent=2))\n```\n\n4. 身份重注入: 当上下文过短时插入身份块, 表明发生了压缩。\n\n```python\ndef make_identity_block(name, role, team_name):\n return {\"role\": \"user\",\n \"content\": f\"You are '{name}', \"\n f\"role: {role}, team: {team_name}. \"\n f\"Continue your work.\"}\n\n# Before resuming work after idle:\nif len(messages) <= 3:\n messages.insert(0, make_identity_block(\n name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n5. `idle` 工具让队友显式地表示没有更多工作, 提前进入空闲轮询阶段。\n\n```python\n{\"name\": \"idle\",\n \"description\": \"Signal that you have no more work. \"\n \"Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n```\n\n## 核心代码\n\n自治循环 (来自 `agents/s11_autonomous_agents.py`):\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # WORK PHASE\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n for block in response.content:\n if block.name == \"idle\":\n idle_requested = True\n if idle_requested:\n break\n\n # IDLE PHASE\n self._set_status(name, \"idle\")\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox: resume = True; break\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n resume = True; break\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n## 相对 s10 的变更\n\n| 组件 | 之前 (s10) | 之后 (s11) |\n|----------------|------------------|----------------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| 自治性 | 领导指派 | 自组织 |\n| 空闲阶段 | 无 | 轮询收件箱 + 任务看板 |\n| 任务认领 | 仅手动 | 自动认领未认领任务 |\n| 身份 | 系统提示 | + 压缩后重注入 |\n| 超时 | 无 | 60 秒空闲 -> 自动关机 |\n\n## 设计原理\n\n轮询 + 超时使智能体无需中央协调器即可自组织。每个智能体独立轮询任务看板, 认领未认领的工作, 完成后回到空闲状态。超时触发轮询循环, 如果在窗口期内没有工作出现, 智能体自行关机。这与工作窃取线程池的模式相同 -- 分布式, 无单点故障。压缩后的身份重注入确保智能体即使在对话历史被摘要后仍能保持其角色。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n可以尝试的提示:\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. 输入 `/tasks` 查看带 owner 的任务看板\n5. 输入 `/team` 监控谁在工作、谁在空闲\n" + }, + { + "version": "s12", + "locale": "zh", + "title": "s12: Worktree + 任务隔离", + "content": "# s12: Worktree + 任务隔离\n\n> 目录隔离, 任务 ID 协调 -- 用\"任务板 (控制面) + worktree (执行面)\"把并行改动从互相污染变成可追踪、可恢复、可收尾。\n\n## 问题\n\ns11 时, agent 已经能认领任务并协同推进。但所有任务共享同一个工作目录。两个 agent 同时改同一棵文件树时, 未提交的变更互相干扰, 任务状态和实际改动对不上, 收尾时也无法判断该保留还是清理哪些文件。\n\n考虑一个具体场景: agent A 在做 auth 重构, agent B 在做登录页。两者都修改了 `config.py`。A 的半成品改动被 B 的 `git status` 看到, B 以为是自己的遗留, 尝试提交 -- 结果两个任务都坏了。\n\n根因是\"做什么\"和\"在哪里做\"没有分开。任务板管目标, 但执行上下文是共享的。解决方案: 给每个任务分配独立的 git worktree 目录, 用任务 ID 把两边关联起来。\n\n## 解决方案\n\n```\n控制面 (.tasks/) 执行面 (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <----> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <----> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n```\n\n三层状态:\n1. 控制面 (What): `.tasks/task_*.json` -- 任务目标、责任归属、完成状态\n2. 执行面 (Where): `.worktrees/index.json` -- 隔离目录路径、分支、存活状态\n3. 运行态 (Now): 单轮内存上下文 -- 当前任务、当前 worktree、工具结果\n\n状态机:\n```text\nTask: pending -> in_progress -> completed\nWorktree: absent -> active -> removed | kept\n```\n\n## 工作原理\n\n1. 创建任务, 把目标写入任务板。\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. 创建 worktree 并绑定任务。传入 `task_id` 时自动把任务推进到 `in_progress`。\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json 追加 entry, task_1.json 绑定 worktree=\"auth-refactor\"\n```\n\n3. 在隔离目录中执行命令。`cwd` 指向 worktree 路径, 主目录不受影响。\n\n```python\nWORKTREES.run(\"auth-refactor\", \"git status --short\")\n# -> subprocess.run(command, cwd=\".worktrees/auth-refactor\", ...)\n```\n\n4. 观测和回写。`worktree_status` 查看 git 状态, `task_update` 维护进度。\n\n```python\nWORKTREES.status(\"auth-refactor\") # git status inside worktree\nTASKS.update(1, owner=\"agent-A\") # update task metadata\n```\n\n5. 收尾: 选择 keep 或 remove。`remove` 配合 `complete_task=true` 会同时完成任务并解绑 worktree。\n\n```python\nWORKTREES.remove(\"auth-refactor\", complete_task=True)\n# -> git worktree remove\n# -> task_1.json status=completed, worktree=\"\"\n# -> index.json status=removed\n# -> events.jsonl 写入 task.completed + worktree.remove.after\n```\n\n6. 进程中断后, 从 `.tasks/` + `.worktrees/index.json` 重建现场。会话记忆是易失的, 磁盘状态是持久的。\n\n## 核心代码\n\n事件流 -- append-only 生命周期日志 (来自 `agents/s12_worktree_task_isolation.py`):\n\n```python\nclass EventBus:\n def emit(self, event, task=None, worktree=None, error=None):\n payload = {\n \"event\": event,\n \"ts\": time.time(),\n \"task\": task or {},\n \"worktree\": worktree or {},\n }\n if error:\n payload[\"error\"] = error\n with self.path.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n```\n\n事件流写入 `.worktrees/events.jsonl`, 每个关键操作发出三段式事件:\n- `worktree.create.before / after / failed`\n- `worktree.remove.before / after / failed`\n- `task.completed` (当 `complete_task=true` 成功时)\n\n事件负载形状:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 7, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"path\": \"...\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\n任务绑定 -- Task 侧持有 worktree 名称:\n\n```python\ndef bind_worktree(self, task_id: int, worktree: str, owner: str = \"\") -> str:\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n隔离执行 -- cwd 路由到 worktree 目录:\n\n```python\nr = subprocess.run(\n command,\n shell=True,\n cwd=path,\n capture_output=True,\n text=True,\n timeout=300,\n)\n```\n\n收尾联动 -- remove 同时完成任务:\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n生命周期工具注册:\n\n```python\n\"worktree_keep\": lambda **kw: WORKTREES.keep(kw[\"name\"]),\n\"worktree_events\": lambda **kw: EVENTS.list_recent(kw.get(\"limit\", 20)),\n```\n\n## 相对 s11 的变更\n\n| 组件 | 之前 (s11) | 之后 (s12) |\n|----------------|----------------------------|-----------------------------------------|\n| 协调状态 | 任务板 (owner/status) | 任务板 + `worktree` 显式绑定 |\n| 执行上下文 | 共享目录 | 每个任务可分配独立 worktree 目录 |\n| 可恢复性 | 依赖任务状态 | 任务状态 + worktree 索引双重恢复 |\n| 收尾语义 | 任务完成 | 任务完成 + worktree 显式 keep/remove |\n| 生命周期可见性 | 隐式日志 | `.worktrees/events.jsonl` 显式事件流 |\n\n## 设计原理\n\n控制面/执行面分离是这一章的核心模式。Task 管\"做什么\", worktree 管\"在哪做\", 两者通过 task ID 关联但不强耦合。这意味着一个任务可以先不绑定 worktree (纯规划阶段), 也可以在多个 worktree 之间迁移。\n\n显式状态机让每次迁移都可审计、可恢复。进程崩溃后, 从 `.tasks/` 和 `.worktrees/index.json` 两个文件就能重建全部现场, 不依赖会话内存。\n\n事件流是旁路可观测层, 不替代主状态机写入。审计、通知、配额控制等副作用放在事件消费者中处理, 核心流程保持最小。`keep/remove` 作为显式收尾动作存在, 而不是隐式清理 -- agent 必须做出决策, 这个决策本身被记录。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n可以尝试的提示:\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, create worktree \"ui-login\", then bind task 2 to \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect worktree events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n" }, { "version": "s01", "locale": "ja", "title": "s01: The Agent Loop", - "content": "# s01: The Agent Loop\n\n> AIコーディングエージェントの秘密はすべて、モデルが「終了」と判断するまでツール結果をモデルにフィードバックし続けるwhileループにある。\n\n## 問題\n\nなぜ言語モデルは単体でコーディングの質問に答えられないのか。それはコーディングが「現実世界とのインタラクション」を必要とするからだ。モデルはファイルを読み、テストを実行し、エラーを確認し、反復する必要がある。一回のプロンプト-レスポンスのやり取りではこれは実現できない。\n\nagent loopがなければ、ユーザーが自分でモデルの出力をコピーペーストして戻す必要がある。つまりユーザー自身がループの役割を果たすことになる。agent loopはこれを自動化する: モデルを呼び出し、モデルが要求したツールを実行し、結果をフィードバックし、モデルが「完了」と言うまで繰り返す。\n\n単純なタスクを考えてみよう: 「helloと出力するPythonファイルを作成せよ」。モデルは(1)ファイルを書くことを決定し、(2)書き、(3)動作を検証する必要がある。最低でも3回のツール呼び出しが必要だ。ループがなければ、そのたびに手動の介入が必要になる。\n\n## 解決策\n\n```\n+----------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n (loop continues)\n\nThe loop terminates when stop_reason != \"tool_use\".\nThat single condition is the entire control flow.\n```\n\n## 仕組み\n\n1. ユーザーがプロンプトを入力する。これが最初のメッセージになる。\n\n```python\nhistory.append({\"role\": \"user\", \"content\": query})\n```\n\n2. メッセージ配列がツール定義と共にLLMに送信される。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. アシスタントのレスポンスがメッセージに追加される。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\n```\n\n4. stop reasonを確認する。モデルがツールを呼び出さなかった場合、ループは終了する。これが唯一の終了条件だ。\n\n```python\nif response.stop_reason != \"tool_use\":\n return\n```\n\n5. レスポンス中の各tool_useブロックについて、ツール(このセッションではbash)を実行し、結果を収集する。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n6. 結果がuserメッセージとして追加され、ループが続行する。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n## 主要コード\n\n最小限のエージェント -- パターン全体が30行未満\n(`agents/s01_agent_loop.py` 66-86行目):\n\n```python\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n## 変更点\n\nこれはセッション1 -- 出発点である。前のセッションは存在しない。\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## 設計原理\n\nこのループはすべてのLLMベースエージェントの普遍的な基盤だ。本番実装ではエラーハンドリング、トークンカウント、ストリーミング、リトライロジックが追加されるが、根本的な構造は変わらない。シンプルさこそがポイントだ: 1つの終了条件(`stop_reason != \"tool_use\"`)がフロー全体を制御する。本コースの他のすべて -- ツール、計画、圧縮、チーム -- はこのループの上に積み重なるが、ループ自体は変更しない。このループを理解することは、すべてのエージェントを理解することだ。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n試せるプロンプト例:\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" + "content": "# s01: The Agent Loop\n\n> AIコーディングエージェントの中核は、モデルが「終了」と判断するまでツール結果をモデルにフィードバックし続ける while ループにある。\n\n## 問題\n\nなぜ言語モデルは単体でコーディングの質問に答えられないのか。それはコーディングが「現実世界とのインタラクション」を必要とするからだ。モデルはファイルを読み、テストを実行し、エラーを確認し、反復する必要がある。一回のプロンプト-レスポンスのやり取りではこれは実現できない。\n\nagent loopがなければ、ユーザーが自分でモデルの出力をコピーペーストして戻す必要がある。つまりユーザー自身がループの役割を果たすことになる。agent loopはこれを自動化する: モデルを呼び出し、モデルが要求したツールを実行し、結果をフィードバックし、モデルが「完了」と言うまで繰り返す。\n\n単純なタスクを考えてみよう: 「helloと出力するPythonファイルを作成せよ」。モデルは(1)ファイルを書くことを決定し、(2)書き、(3)動作を検証する必要がある。最低でも3回のツール呼び出しが必要だ。ループがなければ、そのたびに手動の介入が必要になる。\n\n## 解決策\n\n```\n+----------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n (loop continues)\n\nThe loop terminates when stop_reason != \"tool_use\".\nThat single condition is the entire control flow.\n```\n\n## 仕組み\n\n1. ユーザーがプロンプトを入力する。これが最初のメッセージになる。\n\n```python\nhistory.append({\"role\": \"user\", \"content\": query})\n```\n\n2. メッセージ配列がツール定義と共にLLMに送信される。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. アシスタントのレスポンスがメッセージに追加される。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\n```\n\n4. stop reasonを確認する。モデルがツールを呼び出さなかった場合、ループは終了する。この最小実装では、これが唯一のループ終了条件だ。\n\n```python\nif response.stop_reason != \"tool_use\":\n return\n```\n\n5. レスポンス中の各tool_useブロックについて、ツール(このセッションではbash)を実行し、結果を収集する。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n6. 結果がuserメッセージとして追加され、ループが続行する。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n## 主要コード\n\n最小限のエージェント -- パターン全体が30行未満\n(`agents/s01_agent_loop.py` 66-86行目):\n\n```python\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n## 変更点\n\nこれはセッション1 -- 出発点である。前のセッションは存在しない。\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## 設計原理\n\nこのループは LLM ベースエージェントの土台だ。本番実装ではエラーハンドリング、トークン計測、ストリーミング、リトライに加え、権限ポリシーやライフサイクル編成が追加されるが、コアの相互作用パターンはここから始まる。シンプルさこそこの章の狙いであり、この最小実装では 1 つの終了条件(`stop_reason != \"tool_use\"`)で学習に必要な制御を示す。本コースの他の要素はこのループに積み重なる。つまり、このループの理解は基礎であって、本番アーキテクチャ全体そのものではない。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n試せるプロンプト例:\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" }, { "version": "s02", @@ -147,43 +159,43 @@ "version": "s03", "locale": "ja", "title": "s03: TodoWrite", - "content": "# s03: TodoWrite\n\n> TodoManagerによりエージェントが自身の進捗を追跡でき、nagリマインダーの注入により更新を忘れた場合に強制的に更新させる。\n\n## 問題\n\nエージェントがマルチステップのタスクに取り組むとき、何を完了し何が残っているかを見失うことが多い。明示的な計画がなければ、モデルは作業を繰り返したり、ステップを飛ばしたり、脱線したりする可能性がある。ユーザーにはエージェントの内部計画が見えない。\n\nこれは見た目以上に深刻だ。長い会話ではモデルが「ドリフト」する -- コンテキストウィンドウがツール結果で埋まるにつれ、システムプロンプトの影響力が薄れていく。10ステップのリファクタリングタスクでステップ1-3を完了した後、モデルはステップ4-10の存在を忘れて即興で行動し始めるかもしれない。\n\n解決策は構造化された状態管理だ: モデルが明示的に書き込むTodoManager。モデルは計画を作成し、作業中のアイテムをin_progressとしてマークし、完了時にcompletedとマークする。nagリマインダーは、モデルが3ラウンド以上todoを更新しなかった場合にナッジを注入する。\n\n教育上の簡略化: nagの閾値3ラウンドは教育目的の可視化のために低く設定されている。本番のエージェントでは過剰なプロンプトを避けるため閾値は約10に設定されている。\n\n## 解決策\n\n```\n+----------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 仕組み\n\n1. TodoManagerはアイテムのリストをバリデーションして保持する。`in_progress`にできるのは一度に1つだけ。\n\n```python\nclass TodoManager:\n def __init__(self):\n self.items = []\n\n def update(self, items: list) -> str:\n validated = []\n in_progress_count = 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\n \"id\": item[\"id\"],\n \"text\": item[\"text\"],\n \"status\": status,\n })\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo`ツールは他のツールと同様にディスパッチマップに追加される。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n # ...other tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nagリマインダーは、モデルが3ラウンド以上`todo`を呼び出さなかった場合にtool_resultメッセージに``タグを注入する。\n\n```python\ndef agent_loop(messages: list):\n rounds_since_todo = 0\n while True:\n if rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if (last[\"role\"] == \"user\"\n and isinstance(last.get(\"content\"), list)):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n # ... rest of loop ...\n rounds_since_todo = 0 if used_todo else rounds_since_todo + 1\n```\n\n4. システムプロンプトがモデルにtodoによる計画を指示する。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse the todo tool to plan multi-step tasks.\nMark in_progress before starting, completed when done.\nPrefer tools over prose.\"\"\"\n```\n\n## 主要コード\n\nTodoManagerとnag注入(`agents/s03_todo_write.py` 51-85行目および158-187行目):\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated = []\n in_progress_count = 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\n \"id\": item[\"id\"],\n \"text\": item[\"text\"],\n \"status\": status,\n })\n if in_progress_count > 1:\n raise ValueError(\"Only one in_progress\")\n self.items = validated\n return self.render()\n\n# In agent_loop:\nif rounds_since_todo >= 3:\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\n## s02からの変更点\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|--------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses|\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## 設計原理\n\n可視化された計画はタスク完了率を向上させる。モデルが自身の進捗を自己監視できるからだ。nagメカニズムはアカウンタビリティを生み出す -- これがなければ、会話コンテキストが増大し初期の指示が薄れるにつれ、モデルは実行途中で計画を放棄する可能性がある。「一度にin_progressは1つだけ」という制約は逐次的な集中を強制し、出力品質を低下させるコンテキストスイッチのオーバーヘッドを防ぐ。このパターンが機能するのは、モデルのワーキングメモリを注意力のドリフトに耐える構造化された状態に外部化するからだ。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n試せるプロンプト例:\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" + "content": "# s03: TodoWrite\n\n> TodoManagerによりエージェントが自身の進捗を追跡でき、nagリマインダーの注入により更新を忘れた場合に強制的に更新させる。\n\n## 問題\n\nエージェントがマルチステップのタスクに取り組むとき、何を完了し何が残っているかを見失うことが多い。明示的な計画がなければ、モデルは作業を繰り返したり、ステップを飛ばしたり、脱線したりする可能性がある。ユーザーにはエージェントの内部計画が見えない。\n\nこれは見た目以上に深刻だ。長い会話ではモデルが「ドリフト」する -- コンテキストウィンドウがツール結果で埋まるにつれ、システムプロンプトの影響力が薄れていく。10ステップのリファクタリングタスクでステップ1-3を完了した後、モデルはステップ4-10の存在を忘れて即興で行動し始めるかもしれない。\n\n解決策は構造化された状態管理だ: モデルが明示的に書き込むTodoManager。モデルは計画を作成し、作業中のアイテムをin_progressとしてマークし、完了時にcompletedとマークする。nagリマインダーは、モデルが3ラウンド以上todoを更新しなかった場合にナッジを注入する。\n\n注: nag 閾値 3 ラウンドは可視化のために低く設定。本番ではより高い値に調整される。s07 以降は永続的なマルチステップ作業に Task ボードを使用。TodoWrite は軽量チェックリストとして引き続き利用可能。\n\n## 解決策\n\n```\n+----------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 仕組み\n\n1. TodoManagerはアイテムのリストをバリデーションして保持する。`in_progress`にできるのは一度に1つだけ。\n\n```python\nclass TodoManager:\n def __init__(self):\n self.items = []\n\n def update(self, items: list) -> str:\n validated = []\n in_progress_count = 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\n \"id\": item[\"id\"],\n \"text\": item[\"text\"],\n \"status\": status,\n })\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo`ツールは他のツールと同様にディスパッチマップに追加される。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n # ...other tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nagリマインダーは、モデルが3ラウンド以上`todo`を呼び出さなかった場合にtool_resultメッセージに``タグを注入する。\n\n```python\ndef agent_loop(messages: list):\n rounds_since_todo = 0\n while True:\n if rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if (last[\"role\"] == \"user\"\n and isinstance(last.get(\"content\"), list)):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n # ... rest of loop ...\n rounds_since_todo = 0 if used_todo else rounds_since_todo + 1\n```\n\n4. システムプロンプトがモデルにtodoによる計画を指示する。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse the todo tool to plan multi-step tasks.\nMark in_progress before starting, completed when done.\nPrefer tools over prose.\"\"\"\n```\n\n## 主要コード\n\nTodoManagerとnag注入(`agents/s03_todo_write.py` 51-85行目および158-187行目):\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated = []\n in_progress_count = 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\n \"id\": item[\"id\"],\n \"text\": item[\"text\"],\n \"status\": status,\n })\n if in_progress_count > 1:\n raise ValueError(\"Only one in_progress\")\n self.items = validated\n return self.render()\n\n# In agent_loop:\nif rounds_since_todo >= 3:\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\n## s02からの変更点\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|--------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses|\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## 設計原理\n\n可視化された計画はタスク完了率を向上させる。モデルが自身の進捗を自己監視できるからだ。nagメカニズムはアカウンタビリティを生み出す -- これがなければ、会話コンテキストが増大し初期の指示が薄れるにつれ、モデルは実行途中で計画を放棄する可能性がある。「一度にin_progressは1つだけ」という制約は逐次的な集中を強制し、出力品質を低下させるコンテキストスイッチのオーバーヘッドを防ぐ。このパターンが機能するのは、モデルのワーキングメモリを注意力のドリフトに耐える構造化された状態に外部化するからだ。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n試せるプロンプト例:\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" }, { "version": "s04", "locale": "ja", "title": "s04: Subagents", - "content": "# s04: Subagents\n\n> サブエージェントは新しいメッセージリストで実行され、親とファイルシステムを共有し、要約のみを返す -- 親のコンテキストをクリーンに保つ。\n\n## 問題\n\nエージェントが作業するにつれ、メッセージ配列は膨張する。すべてのツール呼び出し、ファイル読み取り、bash出力が蓄積されていく。20-30回のツール呼び出しの後、コンテキストウィンドウは無関係な履歴で溢れる。ちょっとした質問に答えるために500行のファイルを読むと、永久に500行がコンテキストに追加される。\n\nこれは探索的タスクで特に深刻だ。「このプロジェクトはどのテストフレームワークを使っているか」という質問には5つのファイルを読む必要があるかもしれないが、親エージェントには5つのファイルの内容すべては不要だ -- 「pytest with conftest.py configuration」という回答だけが必要なのだ。\n\n解決策はプロセスの分離だ: `messages=[]`で子エージェントを生成する。子は探索し、ファイルを読み、コマンドを実行する。終了時には最終的なテキストレスポンスだけが親に返される。子のメッセージ履歴全体は破棄される。\n\n## 解決策\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ---------->| while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <--------- | return last text |\n+------------------+ +------------------+\n |\nParent context stays clean.\nSubagent context is discarded.\n```\n\n## 仕組み\n\n1. 親エージェントにサブエージェント生成をトリガーする`task`ツールが追加される。子は`task`を除くすべての基本ツールを取得する(再帰的な生成は不可)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"prompt\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n },\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. サブエージェントは委譲されたプロンプトのみを含む新しいメッセージリストで開始する。ファイルシステムは共有される。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\n \"role\": \"assistant\", \"content\": response.content\n })\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n```\n\n3. 最終テキストのみが親に返される。子の30回以上のツール呼び出し履歴は破棄される。\n\n```python\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n4. 親はこの要約を通常のtool_resultとして受け取る。\n\n```python\nif block.name == \"task\":\n output = run_subagent(block.input[\"prompt\"])\nresults.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n})\n```\n\n## 主要コード\n\nサブエージェント関数(`agents/s04_subagent.py` 110-128行目):\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30):\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n## s03からの変更点\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n| Todo system | TodoManager | Removed (not needed here) |\n\n## 設計原理\n\nプロセス分離はコンテキスト分離を無料で提供する。新しい`messages[]`は、サブエージェントが親の会話履歴に混乱させられないことを意味する。トレードオフは通信オーバーヘッドだ -- 結果は親に圧縮して返す必要があり、詳細が失われる。これはOSのプロセス分離と同じトレードオフだ: シリアライゼーションコストと引き換えに安全性とクリーンさを得る。サブエージェントの深さ制限(再帰的なスポーンは不可)は無制限のリソース消費を防ぎ、最大反復回数は暴走した子プロセスの終了を保証する。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n試せるプロンプト例:\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" + "content": "# s04: Subagents\n\n> サブエージェントは新しいメッセージリストで実行され、親とファイルシステムを共有し、要約のみを返す -- 親のコンテキストをクリーンに保つ。\n\n## 問題\n\nエージェントが作業するにつれ、メッセージ配列は膨張する。すべてのツール呼び出し、ファイル読み取り、bash出力が蓄積されていく。20-30回のツール呼び出しの後、コンテキストウィンドウは無関係な履歴で溢れる。ちょっとした質問に答えるために500行のファイルを読むと、永久に500行がコンテキストに追加される。\n\nこれは探索的タスクで特に深刻だ。「このプロジェクトはどのテストフレームワークを使っているか」という質問には5つのファイルを読む必要があるかもしれないが、親エージェントには5つのファイルの内容すべては不要だ -- 「pytest with conftest.py configuration」という回答だけが必要なのだ。\n\nこのコースでの実用的な解決策は fresh `messages[]` 分離だ: `messages=[]`で子エージェントを生成する。子は探索し、ファイルを読み、コマンドを実行する。終了時には最終的なテキストレスポンスだけが親に返される。子のメッセージ履歴全体は破棄される。\n\n## 解決策\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ---------->| while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <--------- | return last text |\n+------------------+ +------------------+\n |\nParent context stays clean.\nSubagent context is discarded.\n```\n\n## 仕組み\n\n1. 親エージェントにサブエージェント生成をトリガーする`task`ツールが追加される。子は`task`を除くすべての基本ツールを取得する(再帰的な生成は不可)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"prompt\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n },\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. サブエージェントは委譲されたプロンプトのみを含む新しいメッセージリストで開始する。ファイルシステムは共有される。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\n \"role\": \"assistant\", \"content\": response.content\n })\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n```\n\n3. 最終テキストのみが親に返される。子の30回以上のツール呼び出し履歴は破棄される。\n\n```python\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n4. 親はこの要約を通常のtool_resultとして受け取る。\n\n```python\nif block.name == \"task\":\n output = run_subagent(block.input[\"prompt\"])\nresults.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n})\n```\n\n## 主要コード\n\nサブエージェント関数(`agents/s04_subagent.py` 110-128行目):\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30):\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n## s03からの変更点\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n\n## 設計原理\n\nこのセッションでは、fresh `messages[]` 分離はコンテキスト分離を近似する実用手段だ。新しい`messages[]`により、サブエージェントは親の会話履歴を持たずに開始する。トレードオフは通信オーバーヘッドで、結果を親へ圧縮して返すため詳細が失われる。これはメッセージ履歴の分離戦略であり、OSのプロセス分離そのものではない。サブエージェントの深さ制限(再帰スポーン不可)は無制限のリソース消費を防ぎ、最大反復回数は暴走した子処理の終了を保証する。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n試せるプロンプト例:\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" }, { "version": "s05", "locale": "ja", "title": "s05: Skills", - "content": "# s05: Skills\n\n> 2層のスキル注入により、スキル名をシステムプロンプトに(低コスト)、スキル本体をtool_resultに(オンデマンド)配置することで、システムプロンプトの肥大化を回避する。\n\n## 問題\n\nエージェントに特定のドメインのワークフローを遵守させたい: gitの規約、テストパターン、コードレビューのチェックリストなど。単純なアプローチはすべてをシステムプロンプトに入れることだ。しかしシステムプロンプトの実効的な注意力は有限であり、テキストが多すぎるとモデルはその一部を無視し始める。\n\n10個のスキルが各2000トークンあれば、20,000トークンのシステムプロンプトになる。モデルは先頭と末尾に注意を払い、中間部分は飛ばし読みする。さらに悪いことに、ほとんどのスキルは任意のタスクに対して無関係だ。ファイル編集のタスクにgitワークフローの指示は不要だ。\n\n2層アプローチがこれを解決する: 第1層はシステムプロンプトにスキルの短い説明を置く(スキルあたり約100トークン)。第2層はモデルが`load_skill`を呼び出した時だけ、スキル本体の全文をtool_resultに読み込む。モデルはどのスキルが存在するかを知り(低コスト)、必要な時だけ読み込む(関連する時のみ)。\n\n## 解決策\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| Step 2: ... |\n| |\n+--------------------------------------+\n```\n\n## 仕組み\n\n1. スキルファイルは`.skills/`にYAMLフロントマター付きMarkdownとして配置される。\n\n```\n.skills/\n git.md # ---\\n description: Git workflow\\n ---\\n ...\n test.md # ---\\n description: Testing patterns\\n ---\\n ...\n```\n\n2. SkillLoaderがフロントマターを解析し、メタデータと本体を分離する。\n\n```python\nclass SkillLoader:\n def _parse_frontmatter(self, text: str) -> tuple:\n match = re.match(\n r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL\n )\n if not match:\n return {}, text\n meta = {}\n for line in match.group(1).strip().splitlines():\n if \":\" in line:\n key, val = line.split(\":\", 1)\n meta[key.strip()] = val.strip()\n return meta, match.group(2).strip()\n```\n\n3. 第1層: `get_descriptions()`がシステムプロンプト用の短い行を返す。\n\n```python\ndef get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"No description\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n```\n\n4. 第2層: `get_content()`が``タグで囲まれた本体全文を返す。\n\n```python\ndef get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n5. `load_skill`ツールはディスパッチマップの単なる一エントリだ。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\n## 主要コード\n\nSkillLoaderクラス(`agents/s05_skill_loading.py` 51-97行目):\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.glob(\"*.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n self.skills[f.stem] = {\n \"meta\": meta, \"body\": body\n }\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return (f\"\\n\"\n f\"{skill['body']}\\n\")\n```\n\n## s04からの変更点\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | .skills/*.md files |\n| Injection | None | Two-layer (system + result)|\n| Subagent | `run_subagent()` | Removed (different focus) |\n\n## 設計原理\n\n2層注入は注意力バジェットの問題を解決する。すべてのスキル内容をシステムプロンプトに入れると、未使用のスキルにトークンを浪費する。第1層(コンパクトな要約)は合計約120トークンのコストだ。第2層(完全な内容)はtool_resultを通じてオンデマンドで読み込まれる。これにより、モデルの注意力品質を劣化させることなく数十のスキルにスケールできる。重要な洞察は、モデルはどのスキルが存在するか(低コスト)を知るだけで、いつスキルを読み込むか(高コスト)を判断できるということだ。これはソフトウェアモジュールシステムで使われる遅延読み込みと同じ原理だ。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n試せるプロンプト例:\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" + "content": "# s05: Skills\n\n> 2層のスキル注入により、スキル名をシステムプロンプトに(低コスト)、スキル本体をtool_resultに(オンデマンド)配置することで、システムプロンプトの肥大化を回避する。\n\n## 問題\n\nエージェントに特定のドメインのワークフローを遵守させたい: gitの規約、テストパターン、コードレビューのチェックリストなど。単純なアプローチはすべてをシステムプロンプトに入れることだ。しかしシステムプロンプトの実効的な注意力は有限であり、テキストが多すぎるとモデルはその一部を無視し始める。\n\n10個のスキルが各2000トークンあれば、20,000トークンのシステムプロンプトになる。モデルは先頭と末尾に注意を払い、中間部分は飛ばし読みする。さらに悪いことに、ほとんどのスキルは任意のタスクに対して無関係だ。ファイル編集のタスクにgitワークフローの指示は不要だ。\n\n2層アプローチがこれを解決する: 第1層はシステムプロンプトにスキルの短い説明を置く(スキルあたり約100トークン)。第2層はモデルが`load_skill`を呼び出した時だけ、スキル本体の全文をtool_resultに読み込む。モデルはどのスキルが存在するかを知り(低コスト)、必要な時だけ読み込む(関連する時のみ)。\n\n## 解決策\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| Step 2: ... |\n| |\n+--------------------------------------+\n```\n\n## 仕組み\n\n1. スキルファイルは`.skills/`にYAMLフロントマター付きMarkdownとして配置される。\n\n```\n.skills/\n git.md # ---\\n description: Git workflow\\n ---\\n ...\n test.md # ---\\n description: Testing patterns\\n ---\\n ...\n```\n\n2. SkillLoaderがフロントマターを解析し、メタデータと本体を分離する。\n\n```python\nclass SkillLoader:\n def _parse_frontmatter(self, text: str) -> tuple:\n match = re.match(\n r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL\n )\n if not match:\n return {}, text\n meta = {}\n for line in match.group(1).strip().splitlines():\n if \":\" in line:\n key, val = line.split(\":\", 1)\n meta[key.strip()] = val.strip()\n return meta, match.group(2).strip()\n```\n\n3. 第1層: `get_descriptions()`がシステムプロンプト用の短い行を返す。\n\n```python\ndef get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"No description\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n```\n\n4. 第2層: `get_content()`が``タグで囲まれた本体全文を返す。\n\n```python\ndef get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n5. `load_skill`ツールはディスパッチマップの単なる一エントリだ。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\n## 主要コード\n\nSkillLoaderクラス(`agents/s05_skill_loading.py` 51-97行目):\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.glob(\"*.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n self.skills[f.stem] = {\n \"meta\": meta, \"body\": body\n }\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return (f\"\\n\"\n f\"{skill['body']}\\n\")\n```\n\n## s04からの変更点\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | .skills/*.md files |\n| Injection | None | Two-layer (system + result)|\n\n## 設計原理\n\n2層注入は注意力バジェットの問題を解決する。すべてのスキル内容をシステムプロンプトに入れると、未使用のスキルにトークンを浪費する。第1層(コンパクトな要約)は合計約120トークンのコストだ。第2層(完全な内容)はtool_resultを通じてオンデマンドで読み込まれる。これにより、モデルの注意力品質を劣化させることなく数十のスキルにスケールできる。重要な洞察は、モデルはどのスキルが存在するか(低コスト)を知るだけで、いつスキルを読み込むか(高コスト)を判断できるということだ。これはソフトウェアモジュールシステムで使われる遅延読み込みと同じ原理だ。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n試せるプロンプト例:\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" }, { "version": "s06", "locale": "ja", "title": "s06: Compact", - "content": "# s06: Compact\n\n> 3層の圧縮パイプラインにより、古いツール結果の戦略的な忘却、トークンが閾値を超えた時の自動要約、オンデマンドの手動圧縮を組み合わせて、エージェントを無期限に動作可能にする。\n\n## 問題\n\nコンテキストウィンドウは有限だ。十分なツール呼び出しの後、メッセージ配列がモデルのコンテキスト上限を超え、API呼び出しが失敗する。ハード制限に達する前でも、パフォーマンスは劣化する: モデルは遅くなり、精度が落ち、以前のメッセージを無視し始める。\n\n200,000トークンのコンテキストウィンドウは大きく聞こえるが、1000行のソースファイルに対する一回の`read_file`で約4000トークンを消費する。30ファイルを読み20回のbashコマンドを実行すると、100,000トークン以上になる。何らかの圧縮がなければ、エージェントは大規模なコードベースで作業できない。\n\n3層のパイプラインは積極性を段階的に上げて対処する:\n第1層(micro-compact)は毎ターン静かに古いツール結果を置換する。\n第2層(auto-compact)はトークンが閾値を超えた時に完全な要約を発動する。\n第3層(manual compact)はモデル自身が圧縮をトリガーできる。\n\n教育上の簡略化: ここでのトークン推定は大まかな「文字数/4」ヒューリスティックを使用している。本番システムでは正確なカウントのために適切なトークナイザーライブラリを使用する。\n\n## 解決策\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 仕組み\n\n1. **第1層 -- micro_compact**: 各LLM呼び出しの前に、直近3件以前のすべてのtool_resultエントリを見つけて内容を置換する。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n to_clear = tool_results[:-KEEP_RECENT]\n for _, _, part in to_clear:\n if len(part.get(\"content\", \"\")) > 100:\n tool_id = part.get(\"tool_use_id\", \"\")\n tool_name = tool_name_map.get(tool_id, \"unknown\")\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第2層 -- auto_compact**: 推定トークン数が50,000を超えた時、完全なトランスクリプトを保存し、LLMに要約を依頼する。\n\n```python\ndef auto_compact(messages: list) -> list:\n TRANSCRIPT_DIR.mkdir(exist_ok=True)\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n summary = response.content[0].text\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{summary}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **第3層 -- manual compact**: `compact`ツールが同じ要約処理をオンデマンドでトリガーする。\n\n```python\nif manual_compact:\n messages[:] = auto_compact(messages)\n```\n\n4. agent loopが3つの層すべてを統合する。\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages)\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages)\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages)\n```\n\n## 主要コード\n\n3層パイプライン(`agents/s06_context_compact.py` 67-93行目および189-223行目):\n\n```python\nTHRESHOLD = 50000\nKEEP_RECENT = 3\n\ndef micro_compact(messages):\n # Replace old tool results with placeholders\n ...\n\ndef auto_compact(messages):\n # Save transcript, LLM summarize, replace messages\n ...\n\ndef agent_loop(messages):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\n## s05からの変更点\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Manual compact | None | `compact` tool |\n| Transcripts | None | Saved to .transcripts/ |\n| Skills | load_skill | Removed (different focus) |\n\n## 設計原理\n\nコンテキストウィンドウは有限だが、エージェントセッションは無限にできる。3層の圧縮が異なる粒度でこれを解決する: micro-compact(古いツール出力の置換)、auto-compact(上限に近づいたときのLLM要約)、manual compact(ユーザートリガー)。重要な洞察は、忘却はバグではなく機能だということだ -- 無制限のセッションを可能にする。トランスクリプトはディスク上に完全な履歴を保存するため、何も真に失われず、アクティブなコンテキストの外に移動されるだけだ。層状のアプローチにより、各層がサイレントなターンごとのクリーンアップから完全な会話リセットまで、独自の粒度で独立して動作する。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n試せるプロンプト例:\n\n1. `Read every Python file in the agents/ directory one by one`\n (micro-compactが古い結果を置換するのを観察する)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" + "content": "# s06: Compact\n\n> 3層の圧縮パイプラインにより、古いツール結果の戦略的な忘却、トークンが閾値を超えた時の自動要約、オンデマンドの手動圧縮を組み合わせて、エージェントを無期限に動作可能にする。\n\n## 問題\n\nコンテキストウィンドウは有限だ。十分なツール呼び出しの後、メッセージ配列がモデルのコンテキスト上限を超え、API呼び出しが失敗する。ハード制限に達する前でも、パフォーマンスは劣化する: モデルは遅くなり、精度が落ち、以前のメッセージを無視し始める。\n\n200,000トークンのコンテキストウィンドウは大きく聞こえるが、1000行のソースファイルに対する一回の`read_file`で約4000トークンを消費する。30ファイルを読み20回のbashコマンドを実行すると、100,000トークン以上になる。何らかの圧縮がなければ、エージェントは大規模なコードベースで作業できない。\n\n3層のパイプラインは積極性を段階的に上げて対処する:\n第1層(micro-compact)は毎ターン静かに古いツール結果を置換する。\n第2層(auto-compact)はトークンが閾値を超えた時に完全な要約を発動する。\n第3層(manual compact)はモデル自身が圧縮をトリガーできる。\n\n教育上の簡略化: ここでのトークン推定は大まかな「文字数/4」ヒューリスティックを使用している。本番システムでは正確なカウントのために適切なトークナイザーライブラリを使用する。\n\n## 解決策\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 仕組み\n\n1. **第1層 -- micro_compact**: 各LLM呼び出しの前に、直近3件以前のすべてのtool_resultエントリを見つけて内容を置換する。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n to_clear = tool_results[:-KEEP_RECENT]\n for _, _, part in to_clear:\n if len(part.get(\"content\", \"\")) > 100:\n tool_id = part.get(\"tool_use_id\", \"\")\n tool_name = tool_name_map.get(tool_id, \"unknown\")\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第2層 -- auto_compact**: 推定トークン数が50,000を超えた時、完全なトランスクリプトを保存し、LLMに要約を依頼する。\n\n```python\ndef auto_compact(messages: list) -> list:\n TRANSCRIPT_DIR.mkdir(exist_ok=True)\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n summary = response.content[0].text\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{summary}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **第3層 -- manual compact**: `compact`ツールが同じ要約処理をオンデマンドでトリガーする。\n\n```python\nif manual_compact:\n messages[:] = auto_compact(messages)\n```\n\n4. agent loopが3つの層すべてを統合する。\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages)\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages)\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages)\n```\n\n## 主要コード\n\n3層パイプライン(`agents/s06_context_compact.py` 67-93行目および189-223行目):\n\n```python\nTHRESHOLD = 50000\nKEEP_RECENT = 3\n\ndef micro_compact(messages):\n # Replace old tool results with placeholders\n ...\n\ndef auto_compact(messages):\n # Save transcript, LLM summarize, replace messages\n ...\n\ndef agent_loop(messages):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\n## s05からの変更点\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Manual compact | None | `compact` tool |\n| Transcripts | None | Saved to .transcripts/ |\n\n## 設計原理\n\nコンテキストウィンドウは有限だが、エージェントセッションは無限にできる。3層の圧縮が異なる粒度でこれを解決する: micro-compact(古いツール出力の置換)、auto-compact(上限に近づいたときのLLM要約)、manual compact(ユーザートリガー)。重要な洞察は、忘却はバグではなく機能だということだ -- 無制限のセッションを可能にする。トランスクリプトはディスク上に完全な履歴を保存するため、何も真に失われず、アクティブなコンテキストの外に移動されるだけだ。層状のアプローチにより、各層がサイレントなターンごとのクリーンアップから完全な会話リセットまで、独自の粒度で独立して動作する。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n試せるプロンプト例:\n\n1. `Read every Python file in the agents/ directory one by one`\n (micro-compactが古い結果を置換するのを観察する)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" }, { "version": "s07", "locale": "ja", "title": "s07: Tasks", - "content": "# s07: Tasks\n\n> タスクはファイルシステム上にJSON形式で依存グラフ付きで永続化され、コンテキスト圧縮後も生き残り、複数エージェント間で共有できる。\n\n## 問題\n\nインメモリの状態であるTodoManager(s03)は、コンテキストが圧縮(s06)されると失われる。auto_compactがメッセージを要約で置換した後、todoリストは消える。エージェントは要約テキストからそれを再構成しなければならないが、これは不正確でエラーが起きやすい。\n\nこれがs06からs07への重要な橋渡しだ: TodoManagerのアイテムは圧縮と共に死ぬが、ファイルベースのタスクは死なない。状態をファイルシステムに移すことで、圧縮に対する耐性が得られる。\n\nさらに根本的な問題として、インメモリの状態は他のエージェントからは見えない。最終的にチーム(s09以降)を構築する際、チームメイトには共有のタスクボードが必要だ。インメモリのデータ構造はプロセスローカルだ。\n\n解決策はタスクを`.tasks/`にJSON形式で永続化すること。各タスクはID、件名、ステータス、依存グラフを持つ個別のファイルだ。タスク1を完了すると、タスク2が`blockedBy: [1]`を持つ場合、自動的にタスク2のブロックが解除される。ファイルシステムが信頼できる情報源となる。\n\n## 解決策\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[2], \"status\":\"pending\"}\n\nDependency resolution:\n+----------+ +----------+ +----------+\n| task 1 | --> | task 2 | --> | task 3 |\n| complete | | blocked | | blocked |\n+----------+ +----------+ +----------+\n | ^\n +--- completing task 1 removes it from\n task 2's blockedBy list\n```\n\n## 仕組み\n\n1. TaskManagerがCRUD操作を提供する。各タスクは1つのJSONファイル。\n\n```python\nclass TaskManager:\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id,\n \"subject\": subject,\n \"description\": description,\n \"status\": \"pending\",\n \"blockedBy\": [],\n \"blocks\": [],\n \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. タスクが完了とマークされると、`_clear_dependency`がそのIDを他のすべてのタスクの`blockedBy`リストから除去する。\n\n```python\ndef _clear_dependency(self, completed_id: int):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. `update`メソッドがステータス変更と双方向の依存関係の結線を処理する。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocks:\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\n for blocked_id in add_blocks:\n blocked = self._load(blocked_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n self._save(blocked)\n self._save(task)\n```\n\n4. 4つのタスクツールがディスパッチマップに追加される。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"],\n kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\n## 主要コード\n\n依存グラフ付きTaskManager(`agents/s07_task_system.py` 46-123行目):\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2))\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n## s06からの変更点\n\n| Component | Before (s06) | After (s07) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 8 (+task_create/update/list/get)|\n| State storage | In-memory only | JSON files in .tasks/ |\n| Dependencies | None | blockedBy + blocks graph |\n| Compression | Three-layer | Removed (different focus) |\n| Persistence | Lost on compact | Survives compression |\n\n## 設計原理\n\nファイルベースの状態はコンテキスト圧縮を生き延びる。エージェントの会話が圧縮されるとメモリ内の状態は失われるが、ディスクに書き込まれたタスクは永続する。依存グラフにより、コンテキストが失われた後でも正しい順序で実行される。これは一時的な会話と永続的な作業の橋渡しだ -- エージェントは会話の詳細を忘れても、タスクボードが常に何をすべきかを思い出させてくれる。ファイルシステムを信頼できる情報源とすることで、将来のマルチエージェント共有も可能になる。任意のプロセスが同じJSONファイルを読み取れるからだ。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n試せるプロンプト例:\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test`\n" + "content": "# s07: Tasks\n\n> タスクを依存グラフ付き JSON として永続化し、コンテキスト圧縮後も状態を保持し、複数エージェントで共有できるようにする。\n\n## 問題\n\nインメモリ状態(s03 の TodoManager など)は、s06 の圧縮後に失われやすい。古いターンが要約化されると、Todo 状態は会話の外に残らない。\n\ns06 -> s07 の本質は次の切替:\n\n1. メモリ上 Todo は会話依存で失われやすい。\n2. ディスク上 Task は永続で復元しやすい。\n\nさらに可視性の問題がある。インメモリ構造はプロセスローカルであり、チームメイト間の共有が不安定になる。\n\n## Task vs Todo: 使い分け\n\ns07 以降は Task がデフォルト。Todo は短い直線的チェックリスト用に残る。\n\n## クイック判定マトリクス\n\n| 状況 | 優先 | 理由 |\n|---|---|---|\n| 短時間・単一セッション・直線的チェック | Todo | 儀式が最小で記録が速い |\n| セッション跨ぎ・依存関係・複数担当 | Task | 永続性、依存表現、協調可視性が必要 |\n| 迷う場合 | Task | 後で簡略化する方が、途中移行より低コスト |\n\n## 解決策\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[2], \"status\":\"pending\"}\n\nDependency resolution:\n+----------+ +----------+ +----------+\n| task 1 | --> | task 2 | --> | task 3 |\n| complete | | blocked | | blocked |\n+----------+ +----------+ +----------+\n | ^\n +--- completing task 1 removes it from\n task 2's blockedBy list\n```\n\n## 仕組み\n\n1. TaskManager はタスクごとに1 JSON ファイルで CRUD を提供する。\n\n```python\nclass TaskManager:\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id,\n \"subject\": subject,\n \"description\": description,\n \"status\": \"pending\",\n \"blockedBy\": [],\n \"blocks\": [],\n \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. タスク完了時、他タスクの依存を解除する。\n\n```python\ndef _clear_dependency(self, completed_id: int):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. `update` が状態遷移と依存配線を担う。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocks:\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\n for blocked_id in add_blocks:\n blocked = self._load(blocked_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n self._save(blocked)\n self._save(task)\n```\n\n4. タスクツール群をディスパッチへ追加する。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"],\n kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\n## 主要コード\n\n依存グラフ付き TaskManager(`agents/s07_task_system.py` 46-123行):\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2))\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n## s06 からの変更\n\n| 項目 | Before (s06) | After (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| 状態保存 | メモリのみ | `.tasks/` の JSON |\n| 依存関係 | なし | `blockedBy + blocks` グラフ |\n| 永続性 | compact で消失 | compact 後も維持 |\n\n## 設計原理\n\nファイルベース状態は compaction や再起動に強い。依存グラフにより、会話詳細を忘れても実行順序を保てる。これにより、会話中心の状態を作業中心の永続状態へ移せる。\n\nただし耐久性には運用前提がある。書き込みのたびに task JSON を再読込し、`status/blockedBy` が期待通りか確認してから原子的に保存しないと、並行更新で状態を上書きしやすい。\n\nコース設計上、s07 以降で Task を主線に置くのは、長時間・協調開発の実態に近いから。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n例:\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test`\n" }, { "version": "s08", "locale": "ja", "title": "s08: Background Tasks", - "content": "# s08: Background Tasks\n\n> BackgroundManagerがコマンドを別スレッドで実行し、各LLM呼び出しの前に通知キューをドレインすることで、エージェントは長時間実行操作でブロックされなくなる。\n\n## 問題\n\n一部のコマンドは数分かかる: `npm install`、`pytest`、`docker build`。ブロッキングのagent loopでは、モデルはサブプロセスの終了を待って待機する。他のことは何もできない。ユーザーが「依存関係をインストールして、その間にconfigファイルを作成して」と言った場合、エージェントはまずインストールを行い、その後configを作成する -- 並列ではなく逐次的に。\n\nエージェントには並行性が必要だ。agent loop自体の完全なマルチスレッディングではなく、長いコマンドを発射して実行中に作業を続ける能力だ。コマンドが終了したら、その結果は自然に会話に現れるべきだ。\n\n解決策は、BackgroundManagerがコマンドをデーモンスレッドで実行し、結果を通知キューに収集すること。各LLM呼び出しの前にキューがドレインされ、結果がメッセージに注入される。\n\n## 解決策\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | task executes |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- notification queue --+\n |\n [results injected before\n next LLM call]\n```\n\n## 仕組み\n\n1. BackgroundManagerがタスクを追跡し、スレッドセーフな通知キューを維持する。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()`がデーモンスレッドを開始し、task_idを即座に返す。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\n \"status\": \"running\",\n \"result\": None,\n \"command\": command,\n }\n thread = threading.Thread(\n target=self._execute,\n args=(task_id, command),\n daemon=True,\n )\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. スレッドのターゲットである`_execute`がサブプロセスを実行し、結果を通知キューにプッシュする。\n\n```python\ndef _execute(self, task_id: str, command: str):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = output\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"result\": output[:500],\n })\n```\n\n4. `drain_notifications()`が保留中の結果を返してクリアする。\n\n```python\ndef drain_notifications(self) -> list:\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n```\n\n5. agent loopが各LLM呼び出しの前に通知をドレインする。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: \"\n f\"{n['result']}\" for n in notifs\n )\n messages.append({\"role\": \"user\",\n \"content\": f\"\"\n f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\n## 主要コード\n\nBackgroundManager(`agents/s08_background_tasks.py` 49-107行目):\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n\n def run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\",\n \"result\": None,\n \"command\": command}\n thread = threading.Thread(\n target=self._execute,\n args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n\n def _execute(self, task_id, command):\n # run subprocess, push to queue\n ...\n\n def drain_notifications(self) -> list:\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n```\n\n## s07からの変更点\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n| Task system | File-based CRUD | Removed (different focus) |\n\n## 設計原理\n\nエージェントループは本質的にシングルスレッドだ(一度に1つのLLM呼び出し)。バックグラウンドスレッドはI/Oバウンドな作業(テスト、ビルド、インストール)に対してこの制約を打破する。通知キューパターン(「次のLLM呼び出し前にドレイン」)により、結果はモデルの推論を途中で中断するのではなく、会話の自然な区切りで到着する。これは最小限の並行性モデルだ: エージェントループはシングルスレッドで決定論的なまま、I/Oバウンドなサブプロセス実行のみが並列化される。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n試せるプロンプト例:\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" + "content": "# s08: Background Tasks\n\n> BackgroundManagerがコマンドを別スレッドで実行し、各LLM呼び出しの前に通知キューをドレインすることで、エージェントは長時間実行操作でブロックされなくなる。\n\n## 問題\n\n一部のコマンドは数分かかる: `npm install`、`pytest`、`docker build`。ブロッキングのagent loopでは、モデルはサブプロセスの終了を待って待機する。他のことは何もできない。ユーザーが「依存関係をインストールして、その間にconfigファイルを作成して」と言った場合、エージェントはまずインストールを行い、その後configを作成する -- 並列ではなく逐次的に。\n\nエージェントには並行性が必要だ。agent loop自体の完全なマルチスレッディングではなく、長いコマンドを発射して実行中に作業を続ける能力だ。コマンドが終了したら、その結果は自然に会話に現れるべきだ。\n\n解決策は、BackgroundManagerがコマンドをデーモンスレッドで実行し、結果を通知キューに収集すること。各LLM呼び出しの前にキューがドレインされ、結果がメッセージに注入される。\n\n## 解決策\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | task executes |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- notification queue --+\n |\n [results injected before\n next LLM call]\n```\n\n## 仕組み\n\n1. BackgroundManagerがタスクを追跡し、スレッドセーフな通知キューを維持する。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()`がデーモンスレッドを開始し、task_idを即座に返す。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\n \"status\": \"running\",\n \"result\": None,\n \"command\": command,\n }\n thread = threading.Thread(\n target=self._execute,\n args=(task_id, command),\n daemon=True,\n )\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. スレッドのターゲットである`_execute`がサブプロセスを実行し、結果を通知キューにプッシュする。\n\n```python\ndef _execute(self, task_id: str, command: str):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = output\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"result\": output[:500],\n })\n```\n\n4. `drain_notifications()`が保留中の結果を返してクリアする。\n\n```python\ndef drain_notifications(self) -> list:\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n```\n\n5. agent loopが各LLM呼び出しの前に通知をドレインする。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: \"\n f\"{n['result']}\" for n in notifs\n )\n messages.append({\"role\": \"user\",\n \"content\": f\"\"\n f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\n## 主要コード\n\nBackgroundManager(`agents/s08_background_tasks.py` 49-107行目):\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n\n def run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\",\n \"result\": None,\n \"command\": command}\n thread = threading.Thread(\n target=self._execute,\n args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n\n def _execute(self, task_id, command):\n # run subprocess, push to queue\n ...\n\n def drain_notifications(self) -> list:\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n```\n\n## s07からの変更点\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n\n## 設計原理\n\nエージェントループは本質的にシングルスレッドだ(一度に1つのLLM呼び出し)。バックグラウンドスレッドはI/Oバウンドな作業(テスト、ビルド、インストール)に対してこの制約を打破する。通知キューパターン(「次のLLM呼び出し前にドレイン」)により、結果はモデルの推論を途中で中断するのではなく、会話の自然な区切りで到着する。これは最小限の並行性モデルだ: エージェントループはシングルスレッドで決定論的なまま、I/Oバウンドなサブプロセス実行のみが並列化される。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n試せるプロンプト例:\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" }, { "version": "s09", "locale": "ja", "title": "s09: Agent Teams", - "content": "# s09: Agent Teams\n\n> JSONL形式のインボックスを持つ永続的なチームメイトが、孤立したエージェントをコミュニケーションするチームに変える -- spawn、message、broadcast、drain。\n\n## 問題\n\nサブエージェント(s04)は使い捨てだ: 生成し、作業し、要約を返し、消滅する。アイデンティティもなく、呼び出し間の記憶もなく、フォローアップの指示を受け取る方法もない。バックグラウンドタスク(s08)はシェルコマンドを実行するが、LLM誘導の意思決定やフィードバックの伝達はできない。\n\n本物のチームワークには3つのものが必要だ: (1)単一のプロンプトを超えて存続する永続的なエージェント、(2)アイデンティティとライフサイクル管理、(3)エージェント間の通信チャネル。メッセージングがなければ、永続的なチームメイトでさえ聾唖だ -- 並列に作業できるが協調することはない。\n\n解決策は、名前付きの永続的エージェントを生成するTeammateManagerと、JONSLインボックスファイルを使うMessageBusの組み合わせだ。各チームメイトは自身のagent loopをスレッドで実行し、各LLM呼び出しの前にインボックスを確認し、他のチームメイトやリーダーにメッセージを送れる。\n\ns06からs07への橋渡しについての注記: s03のTodoManagerアイテムは圧縮(s06)と共に死ぬ。ファイルベースのタスク(s07)はディスク上に存在するため圧縮後も生き残る。チームも同じ原則の上に構築されている -- config.jsonとインボックスファイルはコンテキストウィンドウの外に永続化される。\n\n## 解決策\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n\n5 message types:\n+-------------------------+------------------------------+\n| message | Normal text between agents |\n| broadcast | Sent to all teammates |\n| shutdown_request | Request graceful shutdown |\n| shutdown_response | Approve/reject shutdown |\n| plan_approval_response | Approve/reject plan |\n+-------------------------+------------------------------+\n```\n\n## 仕組み\n\n1. TeammateManagerがチームの名簿としてconfig.jsonを管理する。各メンバーは名前、役割、ステータスを持つ。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()`がチームメイトを作成し、そのagent loopをスレッドで開始する。アイドル状態のチームメイトを再spawnすると再活性化される。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n self.threads[name] = thread\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBusがJSONLインボックスファイルを処理する。`send()`がJSON行を追記し、`read_inbox()`がすべての行を読み取ってファイルをドレインする。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content,\n msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists():\n return \"[]\"\n msgs = [json.loads(l)\n for l in path.read_text().strip().splitlines()\n if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 各チームメイトは各LLM呼び出しの前にインボックスを確認し、受信メッセージを会話コンテキストに注入する。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n sys_prompt = f\"You are '{name}', role: {role}, at {WORKDIR}.\"\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(\n model=MODEL, system=sys_prompt,\n messages=messages, tools=TOOLS)\n messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n self._save_config()\n```\n\n5. `broadcast()`が送信者以外の全チームメイトに同じメッセージを送信する。\n\n```python\ndef broadcast(self, sender, content, teammates):\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n```\n\n## 主要コード\n\nTeammateManager + MessageBusのコア(`agents/s09_agent_teams.py`):\n\n```python\nclass TeammateManager:\n def spawn(self, name, role, prompt):\n member = self._find_member(name) or {\n \"name\": name, \"role\": role, \"status\": \"working\"\n }\n member[\"status\"] = \"working\"\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned '{name}'\"\n\nclass MessageBus:\n def send(self, sender, to, content,\n msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra: msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l)\n for l in path.read_text().strip().splitlines()\n if l]\n path.write_text(\"\")\n return json.dumps(msgs, indent=2)\n```\n\n## s08からの変更点\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | 5 message types + broadcast|\n\n教育上の簡略化: この実装ではインボックスアクセスにロックファイルを使用していない。本番環境では、複数ライターからの並行追記にはファイルロッキングまたはアトミックリネームが必要になる。ここで使用している単一ライター/インボックスパターンは教育シナリオでは安全だ。\n\n## 設計原理\n\nファイルベースのメールボックス(追記専用JSONL)は並行性安全なエージェント間通信を提供する。追記はほとんどのファイルシステムでアトミックであり、ロック競合を回避する。「読み取り時にドレイン」パターン(全読み取り、切り詰め)はバッチ配信を提供する。これは共有メモリやソケットベースのIPCよりもシンプルで堅牢だ。トレードオフはレイテンシだ -- メッセージは次のポーリングまで見えない -- しかし各ターンに数秒の推論時間がかかるLLM駆動エージェントにとって、ポーリングレイテンシは推論時間に比べて無視できる。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n試せるプロンプト例:\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. `/team`と入力してステータス付きのチーム名簿を確認する\n5. `/inbox`と入力してリーダーのインボックスを手動確認する\n" + "content": "# s09: Agent Teams\n\n> JSONL 形式のインボックスを持つ永続的なチームメイトは、孤立したエージェントを連携可能なチームへ変えるための教材プロトコルの一つだ -- spawn、message、broadcast、drain。\n\n## 問題\n\nサブエージェント(s04)は使い捨てだ: 生成し、作業し、要約を返し、消滅する。アイデンティティもなく、呼び出し間の記憶もなく、フォローアップの指示を受け取る方法もない。バックグラウンドタスク(s08)はシェルコマンドを実行するが、LLM誘導の意思決定やフィードバックの伝達はできない。\n\n本物のチームワークには3つのものが必要だ: (1)単一のプロンプトを超えて存続する永続的なエージェント、(2)アイデンティティとライフサイクル管理、(3)エージェント間の通信チャネル。メッセージングがなければ、永続的なチームメイトでさえ聾唖だ -- 並列に作業できるが協調することはない。\n\n解決策は、名前付きの永続的エージェントを生成するTeammateManagerと、JSONL インボックスファイルを使うMessageBusの組み合わせだ。各チームメイトは自身のagent loopをスレッドで実行し、各LLM呼び出しの前にインボックスを確認し、他のチームメイトやリーダーにメッセージを送れる。\n\ns06からs07への橋渡しについての注記: s03のTodoManagerアイテムは圧縮(s06)と共に死ぬ。ファイルベースのタスク(s07)はディスク上に存在するため圧縮後も生き残る。チームも同じ原則の上に構築されている -- config.jsonとインボックスファイルはコンテキストウィンドウの外に永続化される。\n\n## 解決策\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n\n5 message types:\n+-------------------------+------------------------------+\n| message | Normal text between agents |\n| broadcast | Sent to all teammates |\n| shutdown_request | Request graceful shutdown |\n| shutdown_response | Approve/reject shutdown |\n| plan_approval_response | Approve/reject plan |\n+-------------------------+------------------------------+\n```\n\n## 仕組み\n\n1. TeammateManagerがチームの名簿としてconfig.jsonを管理する。各メンバーは名前、役割、ステータスを持つ。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()`がチームメイトを作成し、そのagent loopをスレッドで開始する。アイドル状態のチームメイトを再spawnすると再活性化される。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n self.threads[name] = thread\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBusがJSONLインボックスファイルを処理する。`send()`がJSON行を追記し、`read_inbox()`がすべての行を読み取ってファイルをドレインする。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content,\n msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists():\n return \"[]\"\n msgs = [json.loads(l)\n for l in path.read_text().strip().splitlines()\n if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 各チームメイトは各LLM呼び出しの前にインボックスを確認し、受信メッセージを会話コンテキストに注入する。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n sys_prompt = f\"You are '{name}', role: {role}, at {WORKDIR}.\"\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(\n model=MODEL, system=sys_prompt,\n messages=messages, tools=TOOLS)\n messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n self._save_config()\n```\n\n5. `broadcast()`が送信者以外の全チームメイトに同じメッセージを送信する。\n\n```python\ndef broadcast(self, sender, content, teammates):\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n```\n\n## 主要コード\n\nTeammateManager + MessageBusのコア(`agents/s09_agent_teams.py`):\n\n```python\nclass TeammateManager:\n def spawn(self, name, role, prompt):\n member = self._find_member(name) or {\n \"name\": name, \"role\": role, \"status\": \"working\"\n }\n member[\"status\"] = \"working\"\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned '{name}'\"\n\nclass MessageBus:\n def send(self, sender, to, content,\n msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra: msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l)\n for l in path.read_text().strip().splitlines()\n if l]\n path.write_text(\"\")\n return json.dumps(msgs, indent=2)\n```\n\n## s08からの変更点\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | 5 message types + broadcast|\n\n教育上の簡略化: この実装ではインボックスアクセスにロックファイルを使用していない。本番環境では、複数ライターからの並行追記にはファイルロッキングまたはアトミックリネームが必要になる。ここで使用している単一ライター/インボックスパターンは教育シナリオでは安全だ。\n\n## 設計原理\n\nファイルベースのメールボックス(追記専用 JSONL)は、教材コードとして観察しやすく理解しやすい。「読み取り時にドレイン」パターン(全読み取り、切り詰め)は、少ない仕組みでバッチ配信を実現できる。トレードオフはレイテンシで、メッセージは次のポーリングまで見えない。ただし本コースでは、各ターンに数秒かかる LLM 推論を前提にすると、この遅延は許容範囲である。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n試せるプロンプト例:\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. `/team`と入力してステータス付きのチーム名簿を確認する\n5. `/inbox`と入力してリーダーのインボックスを手動確認する\n" }, { "version": "s10", @@ -195,6 +207,12 @@ "version": "s11", "locale": "ja", "title": "s11: Autonomous Agents", - "content": "# s11: Autonomous Agents\n\n> タスクボードポーリング付きのアイドルサイクルにより、チームメイトが自分で作業を見つけて確保できるようになり、コンテキスト圧縮後にはアイデンティティの再注入が行われる。\n\n## 問題\n\ns09-s10では、チームメイトは明示的に指示された時のみ作業する。リーダーは各チームメイトを特定のプロンプトでspawnしなければならない。タスクボードに未割り当てのタスクが10個あっても、リーダーが手動で各タスクを割り当てなければならない。これはスケールしない。\n\n真の自律性とは、チームメイトが自分で作業を見つけることだ。チームメイトが現在のタスクを完了したら、タスクボードで未確保の作業をスキャンし、タスクを確保し、作業を開始すべきだ -- リーダーからの指示なしに。\n\nしかし自律エージェントには微妙な問題がある: コンテキスト圧縮後に、エージェントが自分が誰かを忘れる可能性がある。メッセージが要約されると、元のシステムプロンプトのアイデンティティ(「あなたはalice、役割はcoder」)が薄れる。アイデンティティの再注入は、圧縮されたコンテキストの先頭にアイデンティティブロックを挿入することでこれを解決する。\n\n教育上の簡略化: ここで使用するトークン推定は大まかなもの(文字数 / 4)だ。本番システムでは適切なトークナイザーライブラリを使用する。nagの閾値3ラウンド(s03から)は教育目的の可視化のために低く設定されている。本番のエージェントでは閾値は約10。\n\n## 解決策\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use\n | (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n \"You are 'alice', role: coder, team: my-team\"\n```\n\n## 仕組み\n\n1. チームメイトのループにはWORKとIDLEの2つのフェーズがある。WORKは標準的なagent loopを実行する。LLMがツール呼び出しを停止した時(または`idle`ツールを呼び出した時)、チームメイトはIDLEフェーズに入る。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append(...)\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. IDLEフェーズがインボックスとタスクボードをループでポーリングする。\n\n```python\ndef _idle_poll(self, name, messages):\n polls = IDLE_TIMEOUT // POLL_INTERVAL # 60s / 5s = 12\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n # Check inbox for new messages\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n # Scan task board for unclaimed tasks\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n claim_task(task[\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{task['id']}: \"\n f\"{task['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. タスクボードスキャンがpendingかつ未割り当てかつブロックされていないタスクを探す。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n\ndef claim_task(task_id: int, owner: str):\n path = TASKS_DIR / f\"task_{task_id}.json\"\n task = json.loads(path.read_text())\n task[\"status\"] = \"in_progress\"\n task[\"owner\"] = owner\n path.write_text(json.dumps(task, indent=2))\n```\n\n4. アイデンティティの再注入は、コンテキストが短すぎる場合(圧縮が発生したことを示す)にアイデンティティブロックを挿入する。\n\n```python\ndef make_identity_block(name, role, team_name):\n return {\"role\": \"user\",\n \"content\": f\"You are '{name}', \"\n f\"role: {role}, team: {team_name}. \"\n f\"Continue your work.\"}\n\n# Before resuming work after idle:\nif len(messages) <= 3:\n messages.insert(0, make_identity_block(\n name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n5. `idle`ツールにより、チームメイトはもう作業がないことを明示的にシグナルし、早期にアイドルポーリングフェーズに入る。\n\n```python\n{\"name\": \"idle\",\n \"description\": \"Signal that you have no more work. \"\n \"Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n```\n\n## 主要コード\n\n自律ループ(`agents/s11_autonomous_agents.py`):\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # WORK PHASE\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n for block in response.content:\n if block.name == \"idle\":\n idle_requested = True\n if idle_requested:\n break\n\n # IDLE PHASE\n self._set_status(name, \"idle\")\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox: resume = True; break\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n resume = True; break\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n## s10からの変更点\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## 設計原理\n\nポーリング + タイムアウトにより、エージェントは中央コーディネーターなしで自己組織化する。各エージェントは独立してタスクボードをポーリングし、未確保の作業を確保し、完了したらアイドルに戻る。タイムアウトがポーリングサイクルをトリガーし、ウィンドウ内に作業が現れなければエージェントは自らシャットダウンする。これはワークスティーリングスレッドプールと同じパターンだ -- 分散型で単一障害点がない。圧縮後のアイデンティティ再注入により、会話履歴が要約された後もエージェントは自身の役割を維持する。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n試せるプロンプト例:\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. `/tasks`と入力してオーナー付きのタスクボードを確認する\n5. `/team`と入力して誰が作業中でアイドルかを監視する\n" + "content": "# s11: Autonomous Agents\n\n> タスクボードポーリング付きのアイドルサイクルにより、チームメイトが自分で作業を見つけて確保できるようになり、コンテキスト圧縮後にはアイデンティティの再注入が行われる。\n\n## 問題\n\ns09-s10では、チームメイトは明示的に指示された時のみ作業する。リーダーは各チームメイトを特定のプロンプトでspawnしなければならない。タスクボードに未割り当てのタスクが10個あっても、リーダーが手動で各タスクを割り当てなければならない。これはスケールしない。\n\n真の自律性とは、チームメイトが自分で作業を見つけることだ。チームメイトが現在のタスクを完了したら、タスクボードで未確保の作業をスキャンし、タスクを確保し、作業を開始すべきだ -- リーダーからの指示なしに。\n\nしかし自律エージェントには微妙な問題がある: コンテキスト圧縮後に、エージェントが自分が誰かを忘れる可能性がある。メッセージが要約されると、元のシステムプロンプトのアイデンティティ(「あなたはalice、役割はcoder」)が薄れる。アイデンティティの再注入は、圧縮されたコンテキストの先頭にアイデンティティブロックを挿入することでこれを解決する。\n\n注: トークン推定は文字数/4(大まか)。nag 閾値 3 ラウンドは可視化のために低く設定。\n\n## 解決策\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use\n | (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n \"You are 'alice', role: coder, team: my-team\"\n```\n\n## 仕組み\n\n1. チームメイトのループにはWORKとIDLEの2つのフェーズがある。WORKは標準的なagent loopを実行する。LLMがツール呼び出しを停止した時(または`idle`ツールを呼び出した時)、チームメイトはIDLEフェーズに入る。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append(...)\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. IDLEフェーズがインボックスとタスクボードをループでポーリングする。\n\n```python\ndef _idle_poll(self, name, messages):\n polls = IDLE_TIMEOUT // POLL_INTERVAL # 60s / 5s = 12\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n # Check inbox for new messages\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n # Scan task board for unclaimed tasks\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n claim_task(task[\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{task['id']}: \"\n f\"{task['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. タスクボードスキャンがpendingかつ未割り当てかつブロックされていないタスクを探す。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n\ndef claim_task(task_id: int, owner: str):\n path = TASKS_DIR / f\"task_{task_id}.json\"\n task = json.loads(path.read_text())\n task[\"status\"] = \"in_progress\"\n task[\"owner\"] = owner\n path.write_text(json.dumps(task, indent=2))\n```\n\n4. アイデンティティの再注入は、コンテキストが短すぎる場合(圧縮が発生したことを示す)にアイデンティティブロックを挿入する。\n\n```python\ndef make_identity_block(name, role, team_name):\n return {\"role\": \"user\",\n \"content\": f\"You are '{name}', \"\n f\"role: {role}, team: {team_name}. \"\n f\"Continue your work.\"}\n\n# Before resuming work after idle:\nif len(messages) <= 3:\n messages.insert(0, make_identity_block(\n name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n5. `idle`ツールにより、チームメイトはもう作業がないことを明示的にシグナルし、早期にアイドルポーリングフェーズに入る。\n\n```python\n{\"name\": \"idle\",\n \"description\": \"Signal that you have no more work. \"\n \"Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n```\n\n## 主要コード\n\n自律ループ(`agents/s11_autonomous_agents.py`):\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # WORK PHASE\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n for block in response.content:\n if block.name == \"idle\":\n idle_requested = True\n if idle_requested:\n break\n\n # IDLE PHASE\n self._set_status(name, \"idle\")\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox: resume = True; break\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n resume = True; break\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n## s10からの変更点\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## 設計原理\n\nポーリング + タイムアウトにより、エージェントは中央コーディネーターなしで自己組織化する。各エージェントは独立してタスクボードをポーリングし、未確保の作業を確保し、完了したらアイドルに戻る。タイムアウトがポーリングサイクルをトリガーし、ウィンドウ内に作業が現れなければエージェントは自らシャットダウンする。これはワークスティーリングスレッドプールと同じパターンだ -- 分散型で単一障害点がない。圧縮後のアイデンティティ再注入により、会話履歴が要約された後もエージェントは自身の役割を維持する。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n試せるプロンプト例:\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. `/tasks`と入力してオーナー付きのタスクボードを確認する\n5. `/team`と入力して誰が作業中でアイドルかを監視する\n" + }, + { + "version": "s12", + "locale": "ja", + "title": "s12: Worktree + Task Isolation", + "content": "# s12: Worktree + Task Isolation\n\n> ディレクトリで分離し、タスクIDで調整する -- タスクボード(制御面)と worktree(実行面)の組み合わせで、並行編集を衝突しやすい状態から追跡可能・復元可能・後片付け可能な状態に変える。\n\n## 問題\n\ns11 でエージェントはタスクを自律的に処理できるようになった。だが全タスクが同じ作業ディレクトリで走ると、3つの障害が現れる。\n\nあるエージェントが認証リファクタリングに取り組みながら、別のエージェントがログインページを作っている。両者が `src/auth.py` を編集する。未コミットの変更が混ざり合い、`git diff` は2つのタスクの差分が入り混じった結果を返す。どちらのエージェントの変更かを後から特定するのは困難になり、片方のタスクを巻き戻すと他方の編集も消える。\n\n1. 変更汚染: 未コミット変更が相互に干渉する。\n2. 責務の曖昧化: タスク状態とファイル変更がずれる。\n3. 終了処理の難化: 実行コンテキストを残すか削除するかの判断が曖昧になる。\n\n解決の核は「何をやるか」と「どこでやるか」の分離だ。\n\n## 解決策\n\n```\nControl Plane (.tasks/) Execution Plane (.worktrees/)\n+---------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress| bind | branch: wt/auth-ref |\n| worktree: auth-ref|-------->| cwd for commands |\n+---------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending | bind | branch: wt/ui-login |\n| worktree: ui-login|-------->| cwd for commands |\n+---------------------+ +------------------------+\n | |\n v v\n \"what to do\" \"where to execute\"\n\nEvents (.worktrees/events.jsonl)\n worktree.create.before -> worktree.create.after\n worktree.remove.before -> worktree.remove.after\n task.completed\n```\n\n## 仕組み\n\n1. 状態は3つの層に分かれる。制御面はタスクの目標と担当を管理し、実行面は worktree のパスとブランチを管理し、実行時状態はメモリ上の1ターン情報を保持する。\n\n```text\n制御面 (.tasks/task_*.json) -> id/subject/status/owner/worktree\n実行面 (.worktrees/index.json) -> name/path/branch/task_id/status\n実行時状態 (メモリ) -> current_task/current_worktree/error\n```\n\n2. Task と worktree はそれぞれ独立した状態機械を持つ。\n\n```text\nTask: pending -> in_progress -> completed\nWorktree: absent -> active -> removed | kept\n```\n\n3. `task_create` でまず目標を永続化する。worktree はまだ不要だ。\n\n```python\ntask = {\n \"id\": self._next_id,\n \"subject\": subject,\n \"status\": \"pending\",\n \"owner\": \"\",\n \"worktree\": \"\",\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n}\nself._save(task)\n```\n\n4. `worktree_create(name, task_id?)` で分離ディレクトリとブランチを作る。`task_id` を渡すと、タスクが `pending` なら自動的に `in_progress` に遷移する。\n\n```python\nentry = {\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"task_id\": task_id,\n \"status\": \"active\",\n \"created_at\": time.time(),\n}\nidx[\"worktrees\"].append(entry)\nself._save_index(idx)\n\nif task_id is not None:\n self.tasks.bind_worktree(task_id, name)\n```\n\n5. `worktree_run(name, command)` で分離ディレクトリ内のコマンドを実行する。`cwd=worktree_path` が実質的な「enter」だ。\n\n```python\nr = subprocess.run(\n command,\n shell=True,\n cwd=path,\n capture_output=True,\n text=True,\n timeout=300,\n)\n```\n\n6. 終了処理では `keep` か `remove` を明示的に選ぶ。`worktree_remove(name, complete_task=true)` はディレクトリ削除とタスク完了を一度に行う。\n\n```python\ndef remove(self, name: str, force: bool = False, complete_task: bool = False) -> str:\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n7. `.worktrees/events.jsonl` にライフサイクルイベントが append-only で記録される。重要な遷移には `before / after / failed` の三段イベントが出力される。\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 7, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"path\": \"...\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\nイベントは可観測性のサイドチャネルであり、task/worktree の主状態機械の書き込みを置き換えるものではない。監査・通知・ポリシーチェックはイベント購読側で処理する。\n\n## 主要コード\n\nタスクの worktree バインドと状態遷移(`agents/s12_worktree_task_isolation.py` 182-191行目):\n\n```python\ndef bind_worktree(self, task_id: int, worktree: str, owner: str = \"\") -> str:\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if owner:\n task[\"owner\"] = owner\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n```\n\nWorktree の作成とイベント発火(`agents/s12_worktree_task_isolation.py` 283-334行目):\n\n```python\ndef create(self, name: str, task_id: int = None, base_ref: str = \"HEAD\") -> str:\n self._validate_name(name)\n if self._find(name):\n raise ValueError(f\"Worktree '{name}' already exists in index\")\n\n path = self.dir / name\n branch = f\"wt/{name}\"\n self.events.emit(\"worktree.create.before\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\"name\": name, \"base_ref\": base_ref})\n try:\n self._run_git([\"worktree\", \"add\", \"-b\", branch, str(path), base_ref])\n entry = {\n \"name\": name, \"path\": str(path), \"branch\": branch,\n \"task_id\": task_id, \"status\": \"active\",\n \"created_at\": time.time(),\n }\n idx = self._load_index()\n idx[\"worktrees\"].append(entry)\n self._save_index(idx)\n if task_id is not None:\n self.tasks.bind_worktree(task_id, name)\n self.events.emit(\"worktree.create.after\", ...)\n return json.dumps(entry, indent=2)\n except Exception as e:\n self.events.emit(\"worktree.create.failed\", ..., error=str(e))\n raise\n```\n\nツールディスパッチマップ(`agents/s12_worktree_task_isolation.py` 535-552行目):\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\")),\n \"task_bind_worktree\": lambda **kw: TASKS.bind_worktree(kw[\"task_id\"], kw[\"worktree\"], kw.get(\"owner\", \"\")),\n \"worktree_create\": lambda **kw: WORKTREES.create(kw[\"name\"], kw.get(\"task_id\"), kw.get(\"base_ref\", \"HEAD\")),\n \"worktree_list\": lambda **kw: WORKTREES.list_all(),\n \"worktree_status\": lambda **kw: WORKTREES.status(kw[\"name\"]),\n \"worktree_run\": lambda **kw: WORKTREES.run(kw[\"name\"], kw[\"command\"]),\n \"worktree_keep\": lambda **kw: WORKTREES.keep(kw[\"name\"]),\n \"worktree_remove\": lambda **kw: WORKTREES.remove(kw[\"name\"], kw.get(\"force\", False), kw.get(\"complete_task\", False)),\n \"worktree_events\": lambda **kw: EVENTS.list_recent(kw.get(\"limit\", 20)),\n}\n```\n\n## s11 からの変更\n\n| 観点 | s11 | s12 |\n|---|---|---|\n| 調整状態 | Task board (`owner/status`) | Task board + `worktree` 明示バインド |\n| 実行スコープ | 共有ディレクトリ | タスク単位の分離ディレクトリ |\n| 復元性 | タスク状態のみ | タスク状態 + worktree index |\n| 終了意味論 | タスク完了のみ | タスク完了 + 明示的 keep/remove 判断 |\n| ライフサイクル可視性 | 暗黙的なログ | `.worktrees/events.jsonl` の明示イベント |\n\n## 設計原理\n\n制御面と実行面の分離が中核だ。タスクは「何をやるか」を記述し、worktree は「どこでやるか」を提供する。両者は組み合わせ可能だが、強結合ではない。状態遷移は暗黙の自動掃除ではなく、`worktree_keep` / `worktree_remove` という明示的なツール操作として表現する。イベントストリームは `before / after / failed` の三段構造で重要な遷移を記録し、監査や通知をコアロジックから分離する。中断後でも `.tasks/` + `.worktrees/index.json` から状態を再構築できる。揮発的な会話状態を明示的なディスク状態に落とすことが、復元可能性の鍵だ。\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n試せるプロンプト例:\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, create worktree \"ui-login\", then bind task 2 to \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect worktree events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n" } ] \ No newline at end of file diff --git a/web/src/data/generated/versions.json b/web/src/data/generated/versions.json index a88c3bc..d267bf4 100644 --- a/web/src/data/generated/versions.json +++ b/web/src/data/generated/versions.json @@ -5,7 +5,7 @@ "filename": "s01_agent_loop.py", "title": "The Agent Loop", "subtitle": "Bash is All You Need", - "loc": 83, + "loc": 84, "tools": [ "bash" ], @@ -13,22 +13,22 @@ "bash" ], "coreAddition": "Single-tool agent loop", - "keyInsight": "The entire agent is a while loop + one tool", + "keyInsight": "The minimal agent kernel is a while loop + one tool", "classes": [], "functions": [ { "name": "run_bash", "signature": "def run_bash(command: str)", - "startLine": 52 + "startLine": 53 }, { "name": "agent_loop", "signature": "def agent_loop(messages: list)", - "startLine": 66 + "startLine": 67 } ], "layer": "tools", - "source": "#!/usr/bin/env python3\n\"\"\"\ns01_agent_loop.py - The Agent Loop\n\nThe entire secret of coding agents in one pattern:\n\n while stop_reason == \"tool_use\":\n response = LLM(messages, tools)\n execute tools\n append results\n\n +----------+ +-------+ +---------+\n | User | ---> | LLM | ---> | Tool |\n | prompt | | | | execute |\n +----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n (loop continues)\n\nThat's it. The ENTIRE agent is a while loop that feeds tool\nresults back to the model until the model decides to stop.\n\"\"\"\n\nimport os\nimport subprocess\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {os.getcwd()}. Use bash to solve tasks. Act, don't explain.\"\n\nTOOLS = [{\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n}]\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=os.getcwd(),\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\n# -- The core pattern: a while loop that calls tools until the model stops --\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n # Append assistant turn\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n # If the model didn't call a tool, we're done\n if response.stop_reason != \"tool_use\":\n return\n # Execute each tool call, collect results\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n print(f\"\\033[33m$ {block.input['command']}\\033[0m\")\n output = run_bash(block.input[\"command\"])\n print(output[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms01 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\n\"\"\"\ns01_agent_loop.py - The Agent Loop\n\nThe entire secret of an AI coding agent in one pattern:\n\n while stop_reason == \"tool_use\":\n response = LLM(messages, tools)\n execute tools\n append results\n\n +----------+ +-------+ +---------+\n | User | ---> | LLM | ---> | Tool |\n | prompt | | | | execute |\n +----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n (loop continues)\n\nThis is the core loop: feed tool results back to the model\nuntil the model decides to stop. Production agents layer\npolicy, hooks, and lifecycle controls on top.\n\"\"\"\n\nimport os\nimport subprocess\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {os.getcwd()}. Use bash to solve tasks. Act, don't explain.\"\n\nTOOLS = [{\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n}]\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=os.getcwd(),\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\n# -- The core pattern: a while loop that calls tools until the model stops --\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n # Append assistant turn\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n # If the model didn't call a tool, we're done\n if response.stop_reason != \"tool_use\":\n return\n # Execute each tool call, collect results\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n print(f\"\\033[33m$ {block.input['command']}\\033[0m\")\n output = run_bash(block.input[\"command\"])\n print(output[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms01 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" }, { "id": "s02", @@ -48,7 +48,7 @@ "edit_file" ], "coreAddition": "Tool dispatch map", - "keyInsight": "Adding tools means adding handlers, the loop stays the same", + "keyInsight": "Adding tools means adding handlers, not rewriting the loop", "classes": [], "functions": [ { @@ -149,7 +149,7 @@ "id": "s04", "filename": "s04_subagent.py", "title": "Subagents", - "subtitle": "Fresh Context via Task Tool", + "subtitle": "Process Isolation = Context Isolation", "loc": 146, "tools": [ "bash", @@ -162,7 +162,7 @@ "task" ], "coreAddition": "Subagent spawn with isolated messages[]", - "keyInsight": "Process isolation = context isolation", + "keyInsight": "Process isolation gives context isolation for free", "classes": [], "functions": [ { @@ -729,6 +729,103 @@ ], "layer": "collaboration", "source": "#!/usr/bin/env python3\n\"\"\"\ns11_autonomous_agents.py - Autonomous Agents\n\nIdle cycle with task board polling, auto-claiming unclaimed tasks, and\nidentity re-injection after context compression. Builds on s10's protocols.\n\n Teammate lifecycle:\n +-------+\n | spawn |\n +---+---+\n |\n v\n +-------+ tool_use +-------+\n | WORK | <----------- | LLM |\n +---+---+ +-------+\n |\n | stop_reason != tool_use\n v\n +--------+\n | IDLE | poll every 5s for up to 60s\n +---+----+\n |\n +---> check inbox -> message? -> resume WORK\n |\n +---> scan .tasks/ -> unclaimed? -> claim -> resume WORK\n |\n +---> timeout (60s) -> shutdown\n\n Identity re-injection after compression:\n messages = [identity_block, ...remaining...]\n \"You are 'coder', role: backend, team: my-team\"\n\nKey insight: \"The agent finds work itself.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\nTASKS_DIR = WORKDIR / \".tasks\"\n\nPOLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Teammates are autonomous -- they find work themselves.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n# -- Request trackers --\nshutdown_requests = {}\nplan_requests = {}\n_tracker_lock = threading.Lock()\n_claim_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- Task board scanning --\ndef scan_unclaimed_tasks() -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n\n\ndef claim_task(task_id: int, owner: str) -> str:\n with _claim_lock:\n path = TASKS_DIR / f\"task_{task_id}.json\"\n if not path.exists():\n return f\"Error: Task {task_id} not found\"\n task = json.loads(path.read_text())\n task[\"owner\"] = owner\n task[\"status\"] = \"in_progress\"\n path.write_text(json.dumps(task, indent=2))\n return f\"Claimed task #{task_id} for {owner}\"\n\n\n# -- Identity re-injection after compression --\ndef make_identity_block(name: str, role: str, team_name: str) -> dict:\n return {\n \"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, team: {team_name}. Continue your work.\",\n }\n\n\n# -- Autonomous TeammateManager --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def _set_status(self, name: str, status: str):\n member = self._find_member(name)\n if member:\n member[\"status\"] = status\n self._save_config()\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _loop(self, name: str, role: str, prompt: str):\n team_name = self.config[\"team_name\"]\n sys_prompt = (\n f\"You are '{name}', role: {role}, team: {team_name}, at {WORKDIR}. \"\n f\"Use idle tool when you have no more work. You will auto-claim new tasks.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n\n while True:\n # -- WORK PHASE: standard agent loop --\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n self._set_status(name, \"idle\")\n return\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n idle_requested = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"idle\":\n idle_requested = True\n output = \"Entering idle phase. Will poll for new tasks.\"\n else:\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n if idle_requested:\n break\n\n # -- IDLE PHASE: poll for inbox messages and unclaimed tasks --\n self._set_status(name, \"idle\")\n resume = False\n polls = IDLE_TIMEOUT // max(POLL_INTERVAL, 1)\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n resume = True\n break\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n claim_task(task[\"id\"], name)\n task_prompt = (\n f\"Task #{task['id']}: {task['subject']}\\n\"\n f\"{task.get('description', '')}\"\n )\n if len(messages) <= 3:\n messages.insert(0, make_identity_block(name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\", \"content\": f\"I am {name}. Continuing.\"})\n messages.append({\"role\": \"user\", \"content\": task_prompt})\n messages.append({\"role\": \"assistant\", \"content\": f\"Claimed task #{task['id']}. Working on it.\"})\n resume = True\n break\n\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n with _tracker_lock:\n if req_id in shutdown_requests:\n shutdown_requests[req_id][\"status\"] = \"approved\" if args[\"approve\"] else \"rejected\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": args[\"approve\"]},\n )\n return f\"Shutdown {'approved' if args['approve'] else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n plan_requests[req_id] = {\"from\": sender, \"plan\": plan_text, \"status\": \"pending\"}\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval_response\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for approval.\"\n if tool_name == \"claim_task\":\n return claim_task(args[\"task_id\"], sender)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n {\"name\": \"idle\", \"description\": \"Signal that you have no more work. Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the task board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}'\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n with _tracker_lock:\n req = plan_requests.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n with _tracker_lock:\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {req['status']} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n with _tracker_lock:\n return json.dumps(shutdown_requests.get(request_id, {\"error\": \"not found\"}))\n\n\n# -- Lead tool dispatch (14 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n \"idle\": lambda **kw: \"Lead does not idle.\",\n \"claim_task\": lambda **kw: claim_task(kw[\"task_id\"], \"lead\"),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check shutdown request status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"idle\", \"description\": \"Enter idle state (for lead -- rarely used).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms11 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n if query.strip() == \"/tasks\":\n TASKS_DIR.mkdir(exist_ok=True)\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n t = json.loads(f.read_text())\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n owner = f\" @{t['owner']}\" if t.get(\"owner\") else \"\"\n print(f\" {marker} #{t['id']}: {t['subject']}{owner}\")\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n" + }, + { + "id": "s12", + "filename": "s12_worktree_task_isolation.py", + "title": "Worktree + Task Isolation", + "subtitle": "Isolate by Directory", + "loc": 689, + "tools": [ + "bash", + "read_file", + "write_file", + "edit_file", + "task_create", + "task_list", + "task_get", + "task_update", + "task_bind_worktree", + "worktree_create", + "worktree_list", + "worktree_status", + "worktree_run", + "worktree_remove", + "worktree_keep", + "worktree_events" + ], + "newTools": [ + "task_create", + "task_list", + "task_get", + "task_update", + "task_bind_worktree", + "worktree_create", + "worktree_list", + "worktree_status", + "worktree_run", + "worktree_remove", + "worktree_keep", + "worktree_events" + ], + "coreAddition": "Composable worktree lifecycle + event stream over a shared task board", + "keyInsight": "Task board coordinates ownership, worktrees isolate execution, and events make lifecycle auditable", + "classes": [ + { + "name": "EventBus", + "startLine": 82, + "endLine": 120 + }, + { + "name": "TaskManager", + "startLine": 121, + "endLine": 218 + }, + { + "name": "WorktreeManager", + "startLine": 224, + "endLine": 472 + } + ], + "functions": [ + { + "name": "detect_repo_root", + "signature": "def detect_repo_root(cwd: Path)", + "startLine": 52 + }, + { + "name": "safe_path", + "signature": "def safe_path(p: str)", + "startLine": 477 + }, + { + "name": "run_bash", + "signature": "def run_bash(command: str)", + "startLine": 484 + }, + { + "name": "run_read", + "signature": "def run_read(path: str, limit: int = None)", + "startLine": 503 + }, + { + "name": "run_write", + "signature": "def run_write(path: str, content: str)", + "startLine": 513 + }, + { + "name": "run_edit", + "signature": "def run_edit(path: str, old_text: str, new_text: str)", + "startLine": 523 + }, + { + "name": "agent_loop", + "signature": "def agent_loop(messages: list)", + "startLine": 728 + } + ], + "layer": "collaboration", + "source": "#!/usr/bin/env python3\n\"\"\"\ns12_worktree_task_isolation.py - Worktree + Task Isolation\n\nDirectory-level isolation for parallel task execution.\nTasks are the control plane and worktrees are the execution plane.\n\n .tasks/task_12.json\n {\n \"id\": 12,\n \"subject\": \"Implement auth refactor\",\n \"status\": \"in_progress\",\n \"worktree\": \"auth-refactor\"\n }\n\n .worktrees/index.json\n {\n \"worktrees\": [\n {\n \"name\": \"auth-refactor\",\n \"path\": \".../.worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\"\n }\n ]\n }\n\nKey insight: \"Isolate by directory, coordinate by task ID.\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\ndef detect_repo_root(cwd: Path) -> Path | None:\n \"\"\"Return git repo root if cwd is inside a repo, else None.\"\"\"\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--show-toplevel\"],\n cwd=cwd,\n capture_output=True,\n text=True,\n timeout=10,\n )\n if r.returncode != 0:\n return None\n root = Path(r.stdout.strip())\n return root if root.exists() else None\n except Exception:\n return None\n\n\nREPO_ROOT = detect_repo_root(WORKDIR) or WORKDIR\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Use task + worktree tools for multi-task work. \"\n \"For parallel or risky changes: create tasks, allocate worktree lanes, \"\n \"run commands in those lanes, then choose keep/remove for closeout. \"\n \"Use worktree_events when you need lifecycle visibility.\"\n)\n\n\n# -- EventBus: append-only lifecycle events for observability --\nclass EventBus:\n def __init__(self, event_log_path: Path):\n self.path = event_log_path\n self.path.parent.mkdir(parents=True, exist_ok=True)\n if not self.path.exists():\n self.path.write_text(\"\")\n\n def emit(\n self,\n event: str,\n task: dict | None = None,\n worktree: dict | None = None,\n error: str | None = None,\n ):\n payload = {\n \"event\": event,\n \"ts\": time.time(),\n \"task\": task or {},\n \"worktree\": worktree or {},\n }\n if error:\n payload[\"error\"] = error\n with self.path.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n\n def list_recent(self, limit: int = 20) -> str:\n n = max(1, min(int(limit or 20), 200))\n lines = self.path.read_text(encoding=\"utf-8\").splitlines()\n recent = lines[-n:]\n items = []\n for line in recent:\n try:\n items.append(json.loads(line))\n except Exception:\n items.append({\"event\": \"parse_error\", \"raw\": line})\n return json.dumps(items, indent=2)\n\n\n# -- TaskManager: persistent task board with optional worktree binding --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = []\n for f in self.dir.glob(\"task_*.json\"):\n try:\n ids.append(int(f.stem.split(\"_\")[1]))\n except Exception:\n pass\n return max(ids) if ids else 0\n\n def _path(self, task_id: int) -> Path:\n return self.dir / f\"task_{task_id}.json\"\n\n def _load(self, task_id: int) -> dict:\n path = self._path(task_id)\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n self._path(task[\"id\"]).write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id,\n \"subject\": subject,\n \"description\": description,\n \"status\": \"pending\",\n \"owner\": \"\",\n \"worktree\": \"\",\n \"blockedBy\": [],\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def exists(self, task_id: int) -> bool:\n return self._path(task_id).exists()\n\n def update(self, task_id: int, status: str = None, owner: str = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n if owner is not None:\n task[\"owner\"] = owner\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def bind_worktree(self, task_id: int, worktree: str, owner: str = \"\") -> str:\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if owner:\n task[\"owner\"] = owner\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def unbind_worktree(self, task_id: int) -> str:\n task = self._load(task_id)\n task[\"worktree\"] = \"\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\n \"pending\": \"[ ]\",\n \"in_progress\": \"[>]\",\n \"completed\": \"[x]\",\n }.get(t[\"status\"], \"[?]\")\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\n wt = f\" wt={t['worktree']}\" if t.get(\"worktree\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{wt}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(REPO_ROOT / \".tasks\")\nEVENTS = EventBus(REPO_ROOT / \".worktrees\" / \"events.jsonl\")\n\n\n# -- WorktreeManager: create/list/run/remove git worktrees + lifecycle index --\nclass WorktreeManager:\n def __init__(self, repo_root: Path, tasks: TaskManager, events: EventBus):\n self.repo_root = repo_root\n self.tasks = tasks\n self.events = events\n self.dir = repo_root / \".worktrees\"\n self.dir.mkdir(parents=True, exist_ok=True)\n self.index_path = self.dir / \"index.json\"\n if not self.index_path.exists():\n self.index_path.write_text(json.dumps({\"worktrees\": []}, indent=2))\n self.git_available = self._is_git_repo()\n\n def _is_git_repo(self) -> bool:\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--is-inside-work-tree\"],\n cwd=self.repo_root,\n capture_output=True,\n text=True,\n timeout=10,\n )\n return r.returncode == 0\n except Exception:\n return False\n\n def _run_git(self, args: list[str]) -> str:\n if not self.git_available:\n raise RuntimeError(\"Not in a git repository. worktree tools require git.\")\n r = subprocess.run(\n [\"git\", *args],\n cwd=self.repo_root,\n capture_output=True,\n text=True,\n timeout=120,\n )\n if r.returncode != 0:\n msg = (r.stdout + r.stderr).strip()\n raise RuntimeError(msg or f\"git {' '.join(args)} failed\")\n return (r.stdout + r.stderr).strip() or \"(no output)\"\n\n def _load_index(self) -> dict:\n return json.loads(self.index_path.read_text())\n\n def _save_index(self, data: dict):\n self.index_path.write_text(json.dumps(data, indent=2))\n\n def _find(self, name: str) -> dict | None:\n idx = self._load_index()\n for wt in idx.get(\"worktrees\", []):\n if wt.get(\"name\") == name:\n return wt\n return None\n\n def _validate_name(self, name: str):\n if not re.fullmatch(r\"[A-Za-z0-9._-]{1,40}\", name or \"\"):\n raise ValueError(\n \"Invalid worktree name. Use 1-40 chars: letters, numbers, ., _, -\"\n )\n\n def create(self, name: str, task_id: int = None, base_ref: str = \"HEAD\") -> str:\n self._validate_name(name)\n if self._find(name):\n raise ValueError(f\"Worktree '{name}' already exists in index\")\n if task_id is not None and not self.tasks.exists(task_id):\n raise ValueError(f\"Task {task_id} not found\")\n\n path = self.dir / name\n branch = f\"wt/{name}\"\n self.events.emit(\n \"worktree.create.before\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\"name\": name, \"base_ref\": base_ref},\n )\n try:\n self._run_git([\"worktree\", \"add\", \"-b\", branch, str(path), base_ref])\n\n entry = {\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"task_id\": task_id,\n \"status\": \"active\",\n \"created_at\": time.time(),\n }\n\n idx = self._load_index()\n idx[\"worktrees\"].append(entry)\n self._save_index(idx)\n\n if task_id is not None:\n self.tasks.bind_worktree(task_id, name)\n\n self.events.emit(\n \"worktree.create.after\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"status\": \"active\",\n },\n )\n return json.dumps(entry, indent=2)\n except Exception as e:\n self.events.emit(\n \"worktree.create.failed\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\"name\": name, \"base_ref\": base_ref},\n error=str(e),\n )\n raise\n\n def list_all(self) -> str:\n idx = self._load_index()\n wts = idx.get(\"worktrees\", [])\n if not wts:\n return \"No worktrees in index.\"\n lines = []\n for wt in wts:\n suffix = f\" task={wt['task_id']}\" if wt.get(\"task_id\") else \"\"\n lines.append(\n f\"[{wt.get('status', 'unknown')}] {wt['name']} -> \"\n f\"{wt['path']} ({wt.get('branch', '-')}){suffix}\"\n )\n return \"\\n\".join(lines)\n\n def status(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n r = subprocess.run(\n [\"git\", \"status\", \"--short\", \"--branch\"],\n cwd=path,\n capture_output=True,\n text=True,\n timeout=60,\n )\n text = (r.stdout + r.stderr).strip()\n return text or \"Clean worktree\"\n\n def run(self, name: str, command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n\n try:\n r = subprocess.run(\n command,\n shell=True,\n cwd=path,\n capture_output=True,\n text=True,\n timeout=300,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (300s)\"\n\n def remove(self, name: str, force: bool = False, complete_task: bool = False) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n\n self.events.emit(\n \"worktree.remove.before\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\n )\n try:\n args = [\"worktree\", \"remove\"]\n if force:\n args.append(\"--force\")\n args.append(wt[\"path\"])\n self._run_git(args)\n\n if complete_task and wt.get(\"task_id\") is not None:\n task_id = wt[\"task_id\"]\n before = json.loads(self.tasks.get(task_id))\n self.tasks.update(task_id, status=\"completed\")\n self.tasks.unbind_worktree(task_id)\n self.events.emit(\n \"task.completed\",\n task={\n \"id\": task_id,\n \"subject\": before.get(\"subject\", \"\"),\n \"status\": \"completed\",\n },\n worktree={\"name\": name},\n )\n\n idx = self._load_index()\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item[\"status\"] = \"removed\"\n item[\"removed_at\"] = time.time()\n self._save_index(idx)\n\n self.events.emit(\n \"worktree.remove.after\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\"), \"status\": \"removed\"},\n )\n return f\"Removed worktree '{name}'\"\n except Exception as e:\n self.events.emit(\n \"worktree.remove.failed\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\n error=str(e),\n )\n raise\n\n def keep(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n\n idx = self._load_index()\n kept = None\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item[\"status\"] = \"kept\"\n item[\"kept_at\"] = time.time()\n kept = item\n self._save_index(idx)\n\n self.events.emit(\n \"worktree.keep\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\n \"name\": name,\n \"path\": wt.get(\"path\"),\n \"status\": \"kept\",\n },\n )\n return json.dumps(kept, indent=2) if kept else f\"Error: Unknown worktree '{name}'\"\n\n\nWORKTREES = WorktreeManager(REPO_ROOT, TASKS, EVENTS)\n\n\n# -- Base tools (kept minimal, same style as previous sessions) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\")),\n \"task_bind_worktree\": lambda **kw: TASKS.bind_worktree(kw[\"task_id\"], kw[\"worktree\"], kw.get(\"owner\", \"\")),\n \"worktree_create\": lambda **kw: WORKTREES.create(kw[\"name\"], kw.get(\"task_id\"), kw.get(\"base_ref\", \"HEAD\")),\n \"worktree_list\": lambda **kw: WORKTREES.list_all(),\n \"worktree_status\": lambda **kw: WORKTREES.status(kw[\"name\"]),\n \"worktree_run\": lambda **kw: WORKTREES.run(kw[\"name\"], kw[\"command\"]),\n \"worktree_keep\": lambda **kw: WORKTREES.keep(kw[\"name\"]),\n \"worktree_remove\": lambda **kw: WORKTREES.remove(kw[\"name\"], kw.get(\"force\", False), kw.get(\"complete_task\", False)),\n \"worktree_events\": lambda **kw: EVENTS.list_recent(kw.get(\"limit\", 20)),\n}\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command in the current workspace (blocking).\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"task_create\",\n \"description\": \"Create a new task on the shared task board.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n },\n \"required\": [\"subject\"],\n },\n },\n {\n \"name\": \"task_list\",\n \"description\": \"List all tasks with status, owner, and worktree binding.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\n },\n {\n \"name\": \"task_get\",\n \"description\": \"Get task details by ID.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"integer\"}},\n \"required\": [\"task_id\"],\n },\n },\n {\n \"name\": \"task_update\",\n \"description\": \"Update task status or owner.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"task_id\": {\"type\": \"integer\"},\n \"status\": {\n \"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"],\n },\n \"owner\": {\"type\": \"string\"},\n },\n \"required\": [\"task_id\"],\n },\n },\n {\n \"name\": \"task_bind_worktree\",\n \"description\": \"Bind a task to a worktree name.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"task_id\": {\"type\": \"integer\"},\n \"worktree\": {\"type\": \"string\"},\n \"owner\": {\"type\": \"string\"},\n },\n \"required\": [\"task_id\", \"worktree\"],\n },\n },\n {\n \"name\": \"worktree_create\",\n \"description\": \"Create a git worktree and optionally bind it to a task.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"integer\"},\n \"base_ref\": {\"type\": \"string\"},\n },\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_list\",\n \"description\": \"List worktrees tracked in .worktrees/index.json.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\n },\n {\n \"name\": \"worktree_status\",\n \"description\": \"Show git status for one worktree.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_run\",\n \"description\": \"Run a shell command in a named worktree directory.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"command\": {\"type\": \"string\"},\n },\n \"required\": [\"name\", \"command\"],\n },\n },\n {\n \"name\": \"worktree_remove\",\n \"description\": \"Remove a worktree and optionally mark its bound task completed.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"force\": {\"type\": \"boolean\"},\n \"complete_task\": {\"type\": \"boolean\"},\n },\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_keep\",\n \"description\": \"Mark a worktree as kept in lifecycle state without removing it.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_events\",\n \"description\": \"List recent worktree/task lifecycle events from .worktrees/events.jsonl.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"limit\": {\"type\": \"integer\"}},\n },\n },\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append(\n {\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n }\n )\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(f\"Repo root for s12: {REPO_ROOT}\")\n if not WORKTREES.git_available:\n print(\"Note: Not in a git repo. worktree_* tools will return errors.\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms12 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n" } ], "diffs": [ @@ -747,7 +844,7 @@ "write_file", "edit_file" ], - "locDelta": 32 + "locDelta": 31 }, { "from": "s02", @@ -881,6 +978,38 @@ "claim_task" ], "locDelta": 80 + }, + { + "from": "s11", + "to": "s12", + "newClasses": [ + "EventBus", + "TaskManager", + "WorktreeManager" + ], + "newFunctions": [ + "detect_repo_root", + "safe_path", + "run_bash", + "run_read", + "run_write", + "run_edit" + ], + "newTools": [ + "task_create", + "task_list", + "task_get", + "task_update", + "task_bind_worktree", + "worktree_create", + "worktree_list", + "worktree_status", + "worktree_run", + "worktree_remove", + "worktree_keep", + "worktree_events" + ], + "locDelta": 195 } ] } \ No newline at end of file diff --git a/web/src/data/scenarios/s12.json b/web/src/data/scenarios/s12.json new file mode 100644 index 0000000..745191e --- /dev/null +++ b/web/src/data/scenarios/s12.json @@ -0,0 +1,51 @@ +{ + "version": "s12", + "title": "Worktree + Task Isolation", + "description": "Use a shared task board with optional worktree lanes for clean parallel execution", + "steps": [ + { + "type": "user_message", + "content": "Implement auth refactor and login UI updates in parallel", + "annotation": "Two active tasks in one workspace would collide" + }, + { + "type": "tool_call", + "content": "task_create(subject: \"Auth refactor\")\ntask_create(subject: \"Login UI polish\")", + "toolName": "task_manager", + "annotation": "Shared board remains the coordination source of truth" + }, + { + "type": "tool_call", + "content": "worktree_create(name: \"auth-refactor\", task_id: 1)\nworktree_create(name: \"ui-login\")\ntask_bind_worktree(task_id: 2, worktree: \"ui-login\")", + "toolName": "worktree_manager", + "annotation": "Lane allocation and task association are composable; task 2 binds after lane creation" + }, + { + "type": "system_event", + "content": "worktree.create.before/after emitted\n.tasks/task_1.json -> { status: \"in_progress\", worktree: \"auth-refactor\" }\n.tasks/task_2.json -> { status: \"in_progress\", worktree: \"ui-login\" }\n.worktrees/index.json updated", + "annotation": "Control-plane state remains canonical; hook-style consumers can react to lifecycle events without owning canonical state writes" + }, + { + "type": "tool_call", + "content": "worktree_run(name: \"auth-refactor\", command: \"pytest tests/auth -q\")\nworktree_run(name: \"ui-login\", command: \"npm test -- login\")", + "toolName": "worktree_run", + "annotation": "In this teaching runtime, commands route by lane-scoped cwd; other runtimes may use session-level directory switches. The invariant is explicit execution context." + }, + { + "type": "tool_call", + "content": "worktree_keep(name: \"ui-login\")\nworktree_remove(name: \"auth-refactor\", complete_task: true)\nworktree_events(limit: 10)", + "toolName": "worktree_manager", + "annotation": "Closeout is explicit tool-driven state transition: mix keep/remove decisions and query lifecycle events in one pass" + }, + { + "type": "system_event", + "content": "worktree.keep emitted for ui-login\nworktree.remove.before/after emitted for auth-refactor\ntask.completed emitted for #1\n.worktrees/events.jsonl appended", + "annotation": "Lifecycle transitions become explicit records while task/worktree files remain source-of-truth" + }, + { + "type": "assistant_text", + "content": "Task board handles coordination, worktrees handle isolation. Parallel tracks stay clean and auditable.", + "annotation": "Coordinate in one board, isolate by lane only where needed, and run optional policy/audit side effects from lifecycle events" + } + ] +} diff --git a/web/src/i18n/messages/en.json b/web/src/i18n/messages/en.json index 4d1da0d..6dcb3ef 100644 --- a/web/src/i18n/messages/en.json +++ b/web/src/i18n/messages/en.json @@ -1,10 +1,10 @@ { - "meta": { "title": "Learn Claude Code", "description": "Build an AI coding agent from scratch, one concept at a time" }, + "meta": { "title": "Learn Claude Code", "description": "Build a nano Claude Code-like agent from 0 to 1, one mechanism at a time" }, "nav": { "home": "Home", "timeline": "Timeline", "compare": "Compare", "layers": "Layers", "github": "GitHub" }, - "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "Build an AI coding agent from scratch, one concept at a time", "start": "Start Learning", "core_pattern": "The Core Pattern", "core_pattern_desc": "Every AI coding agent shares the same loop: call the model, execute tools, feed results back. Everything else is details.", "learning_path": "Learning Path", "learning_path_desc": "11 progressive sessions, from a simple loop to full autonomous teams", "layers_title": "Architectural Layers", "layers_desc": "Five orthogonal concerns that compose into a complete agent", "loc": "LOC", "learn_more": "Learn More", "versions_in_layer": "versions", "message_flow": "Message Growth", "message_flow_desc": "Watch the messages array grow as the agent loop executes" }, + "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "Build a nano Claude Code-like agent from 0 to 1, one mechanism at a time", "start": "Start Learning", "core_pattern": "The Core Pattern", "core_pattern_desc": "Every AI coding agent shares the same loop: call the model, execute tools, feed results back. Production systems add policy, permissions, and lifecycle layers on top.", "learning_path": "Learning Path", "learning_path_desc": "12 progressive sessions, from a simple loop to isolated autonomous execution", "layers_title": "Architectural Layers", "layers_desc": "Five orthogonal concerns that compose into a complete agent", "loc": "LOC", "learn_more": "Learn More", "versions_in_layer": "versions", "message_flow": "Message Growth", "message_flow_desc": "Watch the messages array grow as the agent loop executes" }, "version": { "loc": "lines of code", "tools": "tools", "new": "New", "prev": "Previous", "next": "Next", "view_source": "View Source", "view_diff": "View Diff", "design_decisions": "Design Decisions", "whats_new": "What's New", "tutorial": "Tutorial", "simulator": "Agent Loop Simulator", "execution_flow": "Execution Flow", "architecture": "Architecture", "concept_viz": "Concept Visualization", "alternatives": "Alternatives Considered", "tab_learn": "Learn", "tab_simulate": "Simulate", "tab_code": "Code", "tab_deep_dive": "Deep Dive" }, "sim": { "play": "Play", "pause": "Pause", "step": "Step", "reset": "Reset", "speed": "Speed", "step_of": "of" }, - "timeline": { "title": "Learning Path", "subtitle": "s01 to s11: Progressive Agent Design", "layer_legend": "Layer Legend", "loc_growth": "LOC Growth", "learn_more": "Learn More" }, + "timeline": { "title": "Learning Path", "subtitle": "s01 to s12: Progressive Agent Design", "layer_legend": "Layer Legend", "loc_growth": "LOC Growth", "learn_more": "Learn More" }, "layers": { "title": "Architectural Layers", "subtitle": "Five orthogonal concerns that compose into a complete agent", @@ -49,7 +49,8 @@ "s08": "Background Tasks", "s09": "Agent Teams", "s10": "Team Protocols", - "s11": "Autonomous Agents" + "s11": "Autonomous Agents", + "s12": "Worktree + Task Isolation" }, "layer_labels": { "tools": "Tools & Execution", @@ -69,6 +70,7 @@ "s08": "Background Task Lanes", "s09": "Agent Team Mailboxes", "s10": "FSM Team Protocols", - "s11": "Autonomous Agent Cycle" + "s11": "Autonomous Agent Cycle", + "s12": "Worktree Task Isolation" } } diff --git a/web/src/i18n/messages/ja.json b/web/src/i18n/messages/ja.json index 70b9939..25192d2 100644 --- a/web/src/i18n/messages/ja.json +++ b/web/src/i18n/messages/ja.json @@ -1,10 +1,10 @@ { - "meta": { "title": "Learn Claude Code", "description": "AIコーディングエージェントをゼロから構築、一つずつ概念を追加" }, + "meta": { "title": "Learn Claude Code", "description": "0 から 1 へ nano Claude Code-like agent を構築し、毎回 1 つの仕組みを追加" }, "nav": { "home": "ホーム", "timeline": "学習パス", "compare": "バージョン比較", "layers": "アーキテクチャ層", "github": "GitHub" }, - "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "AIコーディングエージェントをゼロから構築、一つずつ概念を追加", "start": "学習を始める", "core_pattern": "コアパターン", "core_pattern_desc": "全てのAIコーディングエージェントは同じループを共有:モデルを呼び出し、ツールを実行し、結果をフィードバック。他は全て詳細。", "learning_path": "学習パス", "learning_path_desc": "11の段階的セッション、シンプルなループから完全自律チームまで", "layers_title": "アーキテクチャ層", "layers_desc": "5つの直交する関心事が完全なエージェントを構成", "loc": "行", "learn_more": "詳細を見る", "versions_in_layer": "バージョン", "message_flow": "メッセージの増加", "message_flow_desc": "エージェントループ実行時のメッセージ配列の成長を観察" }, + "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "0 から 1 へ nano Claude Code-like agent を構築し、毎回 1 つの仕組みを追加", "start": "学習を始める", "core_pattern": "コアパターン", "core_pattern_desc": "すべての AI コーディングエージェントは同じループを共有する:モデルを呼び出し、ツールを実行し、結果を返す。実運用ではこの上にポリシー、権限、ライフサイクル層が重なる。", "learning_path": "学習パス", "learning_path_desc": "12の段階的セッション、シンプルなループから分離された自律実行まで", "layers_title": "アーキテクチャ層", "layers_desc": "5つの直交する関心事が完全なエージェントを構成", "loc": "行", "learn_more": "詳細を見る", "versions_in_layer": "バージョン", "message_flow": "メッセージの増加", "message_flow_desc": "エージェントループ実行時のメッセージ配列の成長を観察" }, "version": { "loc": "行のコード", "tools": "ツール", "new": "新規", "prev": "前のバージョン", "next": "次のバージョン", "view_source": "ソースを見る", "view_diff": "差分を見る", "design_decisions": "設計判断", "whats_new": "新機能", "tutorial": "チュートリアル", "simulator": "エージェントループシミュレーター", "execution_flow": "実行フロー", "architecture": "アーキテクチャ", "concept_viz": "コンセプト可視化", "alternatives": "検討された代替案", "tab_learn": "学習", "tab_simulate": "シミュレーション", "tab_code": "ソースコード", "tab_deep_dive": "詳細分析" }, "sim": { "play": "再生", "pause": "一時停止", "step": "ステップ", "reset": "リセット", "speed": "速度", "step_of": "/" }, - "timeline": { "title": "学習パス", "subtitle": "s01からs11へ:段階的エージェント設計", "layer_legend": "レイヤー凡例", "loc_growth": "コード量の推移", "learn_more": "詳細を見る" }, + "timeline": { "title": "学習パス", "subtitle": "s01からs12へ:段階的エージェント設計", "layer_legend": "レイヤー凡例", "loc_growth": "コード量の推移", "learn_more": "詳細を見る" }, "layers": { "title": "アーキテクチャ層", "subtitle": "5つの直交する関心事が完全なエージェントを構成", @@ -49,7 +49,8 @@ "s08": "バックグラウンドタスク", "s09": "エージェントチーム", "s10": "チームプロトコル", - "s11": "自律エージェント" + "s11": "自律エージェント", + "s12": "Worktree + タスク分離" }, "layer_labels": { "tools": "ツールと実行", @@ -69,6 +70,7 @@ "s08": "バックグラウンドタスクレーン", "s09": "エージェントチーム メールボックス", "s10": "FSM チームプロトコル", - "s11": "自律エージェントサイクル" + "s11": "自律エージェントサイクル", + "s12": "Worktree タスク分離" } } diff --git a/web/src/i18n/messages/zh.json b/web/src/i18n/messages/zh.json index a2d235c..ebd85db 100644 --- a/web/src/i18n/messages/zh.json +++ b/web/src/i18n/messages/zh.json @@ -1,10 +1,10 @@ { - "meta": { "title": "Learn Claude Code", "description": "从零构建 AI 编程 Agent,每次只加一个概念" }, + "meta": { "title": "Learn Claude Code", "description": "从 0 到 1 构建 nano Claude Code-like agent,每次只加一个机制" }, "nav": { "home": "首页", "timeline": "学习路径", "compare": "版本对比", "layers": "架构层", "github": "GitHub" }, - "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "从零构建 AI 编程 Agent,每次只加一个概念", "start": "开始学习", "core_pattern": "核心模式", "core_pattern_desc": "所有 AI 编程 Agent 共享同一个循环:调用模型、执行工具、回传结果。其他都是细节。", "learning_path": "学习路径", "learning_path_desc": "11 个渐进式课程,从简单循环到完整自主团队", "layers_title": "架构层次", "layers_desc": "五个正交关注点组合成完整的 Agent", "loc": "行", "learn_more": "了解更多", "versions_in_layer": "个版本", "message_flow": "消息增长", "message_flow_desc": "观察 Agent 循环执行时消息数组的增长" }, + "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "从 0 到 1 构建 nano Claude Code-like agent,每次只加一个机制", "start": "开始学习", "core_pattern": "核心模式", "core_pattern_desc": "所有 AI 编程 Agent 共享同一个循环:调用模型、执行工具、回传结果。生产级系统会在其上叠加策略、权限和生命周期层。", "learning_path": "学习路径", "learning_path_desc": "12 个渐进式课程,从简单循环到隔离化自治执行", "layers_title": "架构层次", "layers_desc": "五个正交关注点组合成完整的 Agent", "loc": "行", "learn_more": "了解更多", "versions_in_layer": "个版本", "message_flow": "消息增长", "message_flow_desc": "观察 Agent 循环执行时消息数组的增长" }, "version": { "loc": "行代码", "tools": "个工具", "new": "新增", "prev": "上一版", "next": "下一版", "view_source": "查看源码", "view_diff": "查看变更", "design_decisions": "设计决策", "whats_new": "新增内容", "tutorial": "教程", "simulator": "Agent 循环模拟器", "execution_flow": "执行流程", "architecture": "架构", "concept_viz": "概念可视化", "alternatives": "替代方案", "tab_learn": "学习", "tab_simulate": "模拟", "tab_code": "源码", "tab_deep_dive": "深入探索" }, "sim": { "play": "播放", "pause": "暂停", "step": "单步", "reset": "重置", "speed": "速度", "step_of": "/" }, - "timeline": { "title": "学习路径", "subtitle": "s01 到 s11:渐进式 Agent 设计", "layer_legend": "层次图例", "loc_growth": "代码量增长", "learn_more": "了解更多" }, + "timeline": { "title": "学习路径", "subtitle": "s01 到 s12:渐进式 Agent 设计", "layer_legend": "层次图例", "loc_growth": "代码量增长", "learn_more": "了解更多" }, "layers": { "title": "架构层次", "subtitle": "五个正交关注点组合成完整的 Agent", @@ -49,7 +49,8 @@ "s08": "后台任务", "s09": "Agent 团队", "s10": "团队协议", - "s11": "自主 Agent" + "s11": "自主 Agent", + "s12": "Worktree + 任务隔离" }, "layer_labels": { "tools": "工具与执行", @@ -69,6 +70,7 @@ "s08": "后台任务通道", "s09": "Agent 团队邮箱", "s10": "FSM 团队协议", - "s11": "自主 Agent 循环" + "s11": "自主 Agent 循环", + "s12": "Worktree 任务隔离" } } diff --git a/web/src/lib/constants.ts b/web/src/lib/constants.ts index 70e0e5e..8a7fa23 100644 --- a/web/src/lib/constants.ts +++ b/web/src/lib/constants.ts @@ -1,5 +1,5 @@ export const VERSION_ORDER = [ - "s01", "s02", "s03", "s04", "s05", "s06", "s07", "s08", "s09", "s10", "s11" + "s01", "s02", "s03", "s04", "s05", "s06", "s07", "s08", "s09", "s10", "s11", "s12" ] as const; export const LEARNING_PATH = VERSION_ORDER; @@ -14,10 +14,10 @@ export const VERSION_META: Record = { - s01: { title: "The Agent Loop", subtitle: "Bash is All You Need", coreAddition: "Single-tool agent loop", keyInsight: "The entire agent is a while loop + one tool", layer: "tools", prevVersion: null }, - s02: { title: "Tools", subtitle: "The Loop Didn't Change", coreAddition: "Tool dispatch map", keyInsight: "Adding tools means adding handlers, the loop stays the same", layer: "tools", prevVersion: "s01" }, + s01: { title: "The Agent Loop", subtitle: "Bash is All You Need", coreAddition: "Single-tool agent loop", keyInsight: "The minimal agent kernel is a while loop + one tool", layer: "tools", prevVersion: null }, + s02: { title: "Tools", subtitle: "The Loop Didn't Change", coreAddition: "Tool dispatch map", keyInsight: "Adding tools means adding handlers, not rewriting the loop", layer: "tools", prevVersion: "s01" }, s03: { title: "TodoWrite", subtitle: "Plan Before You Act", coreAddition: "TodoManager + nag reminder", keyInsight: "Visible plans improve task completion and accountability", layer: "planning", prevVersion: "s02" }, - s04: { title: "Subagents", subtitle: "Fresh Context via Task Tool", coreAddition: "Subagent spawn with isolated messages[]", keyInsight: "Process isolation = context isolation", layer: "planning", prevVersion: "s03" }, + s04: { title: "Subagents", subtitle: "Process Isolation = Context Isolation", coreAddition: "Subagent spawn with isolated messages[]", keyInsight: "Process isolation gives context isolation for free", layer: "planning", prevVersion: "s03" }, s05: { title: "Skills", subtitle: "SKILL.md + tool_result Injection", coreAddition: "SkillLoader + two-layer injection", keyInsight: "Skills inject via tool_result, not system prompt", layer: "planning", prevVersion: "s04" }, s06: { title: "Compact", subtitle: "Strategic Forgetting", coreAddition: "micro-compact + auto-compact + archival", keyInsight: "Forgetting old context enables infinite-length sessions", layer: "memory", prevVersion: "s05" }, s07: { title: "Tasks", subtitle: "Persistent CRUD with Dependencies", coreAddition: "TaskManager with file-based state + dependency graph", keyInsight: "File-based state survives context compression", layer: "planning", prevVersion: "s06" }, @@ -25,6 +25,7 @@ export const VERSION_META: Record