feat: build an AI agent from 0 to 1 -- 11 progressive sessions

- 11 sessions from basic agent loop to autonomous teams
- Python MVP implementations for each session
- Mental-model-first docs in en/zh/ja
- Interactive web platform with step-through visualizations
- Incremental architecture: each session adds one mechanism
This commit is contained in:
CrazyBoyM 2026-02-21 14:37:42 +08:00 committed by CrazyBoyM
commit c6a27ef1d7
156 changed files with 28059 additions and 0 deletions

61
.env.example Normal file
View File

@ -0,0 +1,61 @@
# API Key (required)
# Get yours at: https://console.anthropic.com/
ANTHROPIC_API_KEY=sk-ant-xxx
# Model ID (required)
MODEL_ID=claude-sonnet-4-6
# Base URL (optional, for Anthropic-compatible providers)
# ANTHROPIC_BASE_URL=https://api.anthropic.com
# =============================================================================
# Anthropic-compatible providers
#
# Provider MODEL_ID SWE-bench TB2 Base URL
# --------------- -------------------- --------- ------ -------------------
# Anthropic claude-sonnet-4-6 79.6% 59.1% (default)
# MiniMax MiniMax-M2.5 80.2% - see below
# GLM (Zhipu) glm-5 77.8% - see below
# Kimi (Moonshot) kimi-k2.5 76.8% - see below
# DeepSeek deepseek-chat 73.0% - see below
# (V3.2)
#
# SWE-bench = SWE-bench Verified (Feb 2026)
# TB2 = Terminal-Bench 2.0 (Feb 2026)
# =============================================================================
# ---- International ----
# MiniMax https://www.minimax.io
# ANTHROPIC_BASE_URL=https://api.minimax.io/anthropic
# MODEL_ID=MiniMax-M2.5
# GLM (Zhipu) https://z.ai
# ANTHROPIC_BASE_URL=https://api.z.ai/api/anthropic
# MODEL_ID=glm-5
# Kimi (Moonshot) https://platform.moonshot.ai
# ANTHROPIC_BASE_URL=https://api.moonshot.ai/anthropic
# MODEL_ID=kimi-k2.5
# DeepSeek https://platform.deepseek.com
# ANTHROPIC_BASE_URL=https://api.deepseek.com/anthropic
# MODEL_ID=deepseek-chat
# ---- China mainland ----
# MiniMax https://platform.minimax.io
# ANTHROPIC_BASE_URL=https://api.minimaxi.com/anthropic
# MODEL_ID=MiniMax-M2.5
# GLM (Zhipu) https://open.bigmodel.cn
# ANTHROPIC_BASE_URL=https://open.bigmodel.cn/api/anthropic
# MODEL_ID=glm-5
# Kimi (Moonshot) https://platform.moonshot.cn
# ANTHROPIC_BASE_URL=https://api.moonshot.cn/anthropic
# MODEL_ID=kimi-k2.5
# DeepSeek (no regional split, same endpoint globally)
# ANTHROPIC_BASE_URL=https://api.deepseek.com/anthropic
# MODEL_ID=deepseek-chat

32
.github/workflows/ci.yml vendored Normal file
View File

@ -0,0 +1,32 @@
name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
build:
runs-on: ubuntu-latest
defaults:
run:
working-directory: web
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 20
cache: npm
cache-dependency-path: web/package-lock.json
- name: Install dependencies
run: npm ci
- name: Type check
run: npx tsc --noEmit
- name: Build
run: npm run build

68
.github/workflows/test.yml vendored Normal file
View File

@ -0,0 +1,68 @@
name: Test
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
unit-test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.11"
- name: Install dependencies
run: pip install anthropic python-dotenv
- name: Run unit tests
run: python tests/test_unit.py
session-test:
runs-on: ubuntu-latest
strategy:
matrix:
session: [v0, v1, v2, v3, v4, v5, v6, v7, v8a, v8b, v8c, v9]
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.11"
- name: Install dependencies
run: pip install anthropic python-dotenv
- name: Run ${{ matrix.session }} tests
env:
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
TEST_BASE_URL: ${{ secrets.TEST_BASE_URL }}
TEST_MODEL: ${{ secrets.TEST_MODEL }}
run: python tests/test_${{ matrix.session }}.py
web-build:
runs-on: ubuntu-latest
defaults:
run:
working-directory: web
steps:
- uses: actions/checkout@v6
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: "20"
cache: "npm"
cache-dependency-path: web/package-lock.json
- name: Install dependencies
run: npm ci
- name: Build
run: npm run build

223
.gitignore vendored Normal file
View File

@ -0,0 +1,223 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
/lib/
/lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py.cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
#poetry.toml
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
#pdm.lock
#pdm.toml
.pdm-python
.pdm-build/
# pixi
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
#pixi.lock
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
# in the .venv directory. It is recommended not to include this directory in version control.
.pixi
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.envrc
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Abstra
# Abstra is an AI-powered process automation framework.
# Ignore directories containing user credentials, local state, and settings.
# Learn more at https://abstra.io/docs
.abstra/
# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
# .vscode/
# Transcripts (generated by compression agent)
.transcripts/
# Runtime artifacts (generated by agent tests)
.task_outputs/
.tasks/
.teams/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc
# Cursor
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
# refer to https://docs.cursor.com/context/ignore-files
.cursorignore
.cursorindexingignore
# Marimo
marimo/_static/
marimo/_lsp/
__marimo__/
# Web app
web/node_modules/
web/.next/
web/out/
.vercel
.env*.local
test_providers.py

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 shareAI Lab
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

168
README-ja.md Normal file
View File

@ -0,0 +1,168 @@
# Learn Claude Code -- AI Agent をゼロから構築する
[English](./README.md) | [中文](./README-zh.md) | [日本語](./README-ja.md)
```
THE AGENT PATTERN
=================
User --> messages[] --> LLM --> response
|
stop_reason == "tool_use"?
/ \
yes no
| |
execute tools return text
append results
loop back -----------------> messages[]
これだけだ。すべての AI コーディングエージェントはこのループ。
他はすべて改良に過ぎない。
```
**11 の段階的セッション、シンプルなループから完全な自律チームまで。**
**各セッションは1つのメカニズムを追加する。各メカニズムには1つのモットーがある。**
> **s01**   *"Bash があれば十分"* — 1つのツール + 1つのループ = エージェント
>
> **s02**   *"ループは変わらない"* — ツール追加はハンドラー追加であり、ロジック追加ではない
>
> **s03**   *"行動する前に計画せよ"* — 可視化された計画がタスク完了率を向上させる
>
> **s04**   *"プロセス分離 = コンテキスト分離"* — サブエージェントごとに新しい messages[]
>
> **s05**   *"必要な時にロード、事前にではなく"* — system prompt ではなく tool_result で知識を注入
>
> **s06**   *"戦略的忘却"* — 古いコンテキストを忘れて無限セッションを実現
>
> **s07**   *"状態は圧縮を生き延びる"* — ファイルベースの状態はコンテキスト圧縮に耐える
>
> **s08**   *"撃ちっ放し"* — ノンブロッキングスレッド + 通知キュー
>
> **s09**   *"追記で送信、排出で読取"* — 永続チームメイトのための非同期メールボックス
>
> **s10**   *"同じ request_id、2つのプロトコル"* — 1つの FSM パターンでシャットダウン + プラン承認
>
> **s11**   *"ポーリング、クレーム、作業、繰り返し"* — コーディネーター不要、エージェントが自己組織化
---
## コアパターン
```python
def agent_loop(messages):
while True:
response = client.messages.create(
model=MODEL, system=SYSTEM,
messages=messages, tools=TOOLS,
)
messages.append({"role": "assistant",
"content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
output = TOOL_HANDLERS[block.name](**block.input)
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
messages.append({"role": "user", "content": results})
```
各セッションはこのループの上に1つのメカニズムを重ねる -- ループ自体は変わらない。
## クイックスタート
```sh
git clone https://github.com/shareAI-lab/learn-claude-code
cd learn-claude-code
pip install -r requirements.txt
cp .env.example .env # .env を編集して ANTHROPIC_API_KEY を入力
python agents/s01_agent_loop.py # ここから開始
python agents/s11_autonomous_agents.py # 完全自律チーム
```
### Web プラットフォーム
インタラクティブな可視化、ステップスルーアニメーション、ソースビューア、各セッションのドキュメント。
```sh
cd web && npm install && npm run dev # http://localhost:3000
```
## 学習パス
```
フェーズ1: ループ フェーズ2: 計画と知識
================== ==============================
s01 エージェントループ [1] s03 TodoWrite [5]
while + stop_reason TodoManager + nag リマインダー
| |
+-> s02 ツール [4] s04 サブエージェント [5]
dispatch map: name->handler 子ごとに新しい messages[]
|
s05 Skills [5]
SKILL.md を tool_result で注入
|
s06 Compact [5]
3層コンテキスト圧縮
フェーズ3: 永続化 フェーズ4: チーム
================== =====================
s07 タスクシステム [8] s09 エージェントチーム [9]
ファイルベース CRUD + 依存グラフ チームメイト + JSONL メールボックス
| |
s08 バックグラウンドタスク [6] s10 チームプロトコル [12]
デーモンスレッド + 通知キュー シャットダウン + プラン承認 FSM
|
s11 自律エージェント [14]
アイドルサイクル + 自動クレーム
[N] = ツール数
```
## プロジェクト構成
```
learn-claude-code/
|
|-- agents/ # Python リファレンス実装 (s01-s11 + 完全版)
|-- docs/{en,zh,ja}/ # メンタルモデル優先のドキュメント (3言語)
|-- web/ # インタラクティブ学習プラットフォーム (Next.js)
|-- skills/ # s05 の Skill ファイル
+-- .github/workflows/ci.yml # CI: 型チェック + ビルド
```
## ドキュメント
メンタルモデル優先: 問題、解決策、ASCII図、最小限のコード。
[English](./docs/en/) | [中文](./docs/zh/) | [日本語](./docs/ja/)
| セッション | トピック | モットー |
|-----------|---------|---------|
| [s01](./docs/ja/s01-the-agent-loop.md) | エージェントループ | *Bash があれば十分* |
| [s02](./docs/ja/s02-tool-use.md) | ツール | *ループは変わらない* |
| [s03](./docs/ja/s03-todo-write.md) | TodoWrite | *行動する前に計画せよ* |
| [s04](./docs/ja/s04-subagent.md) | サブエージェント | *プロセス分離 = コンテキスト分離* |
| [s05](./docs/ja/s05-skill-loading.md) | Skills | *必要な時にロード、事前にではなく* |
| [s06](./docs/ja/s06-context-compact.md) | Compact | *戦略的忘却* |
| [s07](./docs/ja/s07-task-system.md) | タスクシステム | *状態は圧縮を生き延びる* |
| [s08](./docs/ja/s08-background-tasks.md) | バックグラウンドタスク | *撃ちっ放し* |
| [s09](./docs/ja/s09-agent-teams.md) | エージェントチーム | *追記で送信、排出で読取* |
| [s10](./docs/ja/s10-team-protocols.md) | チームプロトコル | *同じ request_id、2つのプロトコル* |
| [s11](./docs/ja/s11-autonomous-agents.md) | 自律エージェント | *ポーリング、クレーム、作業、繰り返し* |
## ライセンス
MIT
---
**モデルがエージェントだ。私たちの仕事はツールを渡して、邪魔をしないこと。**

168
README-zh.md Normal file
View File

@ -0,0 +1,168 @@
# Learn Claude Code -- 从零构建 AI Agent
[English](./README.md) | [中文](./README-zh.md) | [日本語](./README-ja.md)
```
THE AGENT PATTERN
=================
User --> messages[] --> LLM --> response
|
stop_reason == "tool_use"?
/ \
yes no
| |
execute tools return text
append results
loop back -----------------> messages[]
就这些。每个 AI 编程 Agent 都是这个循环。
其他一切都是优化。
```
**11 个递进式课程, 从简单循环到完整的自治团队。**
**每个课程添加一个机制。每个机制有一句格言。**
> **s01**   *"Bash 就够了"* — 一个工具 + 一个循环 = 一个智能体
>
> **s02**   *"循环没有变"* — 加工具就是加 handler, 不是加逻辑
>
> **s03**   *"先计划再行动"* — 可见的计划提升任务完成率
>
> **s04**   *"进程隔离 = 上下文隔离"* — 每个子智能体独立 messages[]
>
> **s05**   *"按需加载, 而非预装"* — 通过 tool_result 注入知识, 而非塞进 system prompt
>
> **s06**   *"策略性遗忘"* — 忘掉旧上下文, 换来无限会话
>
> **s07**   *"状态在压缩后存活"* — 文件持久化的状态不怕上下文压缩
>
> **s08**   *"发射后不管"* — 非阻塞线程 + 通知队列
>
> **s09**   *"追加即发送, 排空即读取"* — 异步邮箱实现持久化队友通信
>
> **s10**   *"同一个 request_id, 两个协议"* — 一个 FSM 模式驱动关机 + 计划审批
>
> **s11**   *"轮询, 认领, 工作, 重复"* — 无需协调者, 智能体自组织
---
## 核心模式
```python
def agent_loop(messages):
while True:
response = client.messages.create(
model=MODEL, system=SYSTEM,
messages=messages, tools=TOOLS,
)
messages.append({"role": "assistant",
"content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
output = TOOL_HANDLERS[block.name](**block.input)
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
messages.append({"role": "user", "content": results})
```
每个课程在这个循环之上叠加一个机制 -- 循环本身始终不变。
## 快速开始
```sh
git clone https://github.com/shareAI-lab/learn-claude-code
cd learn-claude-code
pip install -r requirements.txt
cp .env.example .env # 编辑 .env 填入你的 ANTHROPIC_API_KEY
python agents/s01_agent_loop.py # 从这里开始
python agents/s11_autonomous_agents.py # 完整自治团队
```
### Web 平台
交互式可视化、分步动画、源码查看器, 以及每个课程的文档。
```sh
cd web && npm install && npm run dev # http://localhost:3000
```
## 学习路径
```
第一阶段: 循环 第二阶段: 规划与知识
================== ==============================
s01 Agent 循环 [1] s03 TodoWrite [5]
while + stop_reason TodoManager + nag 提醒
| |
+-> s02 工具 [4] s04 子智能体 [5]
dispatch map: name->handler 每个子智能体独立 messages[]
|
s05 Skills [5]
SKILL.md 通过 tool_result 注入
|
s06 Compact [5]
三层上下文压缩
第三阶段: 持久化 第四阶段: 团队
================== =====================
s07 任务系统 [8] s09 智能体团队 [9]
文件持久化 CRUD + 依赖图 队友 + JSONL 邮箱
| |
s08 后台任务 [6] s10 团队协议 [12]
守护线程 + 通知队列 关机 + 计划审批 FSM
|
s11 自治智能体 [14]
空闲轮询 + 自动认领
[N] = 工具数量
```
## 项目结构
```
learn-claude-code/
|
|-- agents/ # Python 参考实现 (s01-s11 + 完整版)
|-- docs/{en,zh,ja}/ # 心智模型优先的文档 (3 种语言)
|-- web/ # 交互式学习平台 (Next.js)
|-- skills/ # s05 的 Skill 文件
+-- .github/workflows/ci.yml # CI: 类型检查 + 构建
```
## 文档
心智模型优先: 问题、方案、ASCII 图、最小化代码。
[English](./docs/en/) | [中文](./docs/zh/) | [日本語](./docs/ja/)
| 课程 | 主题 | 格言 |
|------|------|------|
| [s01](./docs/zh/s01-the-agent-loop.md) | Agent 循环 | *Bash 就够了* |
| [s02](./docs/zh/s02-tool-use.md) | 工具 | *循环没有变* |
| [s03](./docs/zh/s03-todo-write.md) | TodoWrite | *先计划再行动* |
| [s04](./docs/zh/s04-subagent.md) | 子智能体 | *进程隔离 = 上下文隔离* |
| [s05](./docs/zh/s05-skill-loading.md) | Skills | *按需加载, 而非预装* |
| [s06](./docs/zh/s06-context-compact.md) | Compact | *策略性遗忘* |
| [s07](./docs/zh/s07-task-system.md) | 任务系统 | *状态在压缩后存活* |
| [s08](./docs/zh/s08-background-tasks.md) | 后台任务 | *发射后不管* |
| [s09](./docs/zh/s09-agent-teams.md) | 智能体团队 | *追加即发送, 排空即读取* |
| [s10](./docs/zh/s10-team-protocols.md) | 团队协议 | *同一个 request_id, 两个协议* |
| [s11](./docs/zh/s11-autonomous-agents.md) | 自治智能体 | *轮询, 认领, 工作, 重复* |
## 许可证
MIT
---
**模型就是智能体。我们的工作是给它工具, 然后让开。**

168
README.md Normal file
View File

@ -0,0 +1,168 @@
# Learn Claude Code -- Build an AI Agent From Scratch
[English](./README.md) | [中文](./README-zh.md) | [日本語](./README-ja.md)
```
THE AGENT PATTERN
=================
User --> messages[] --> LLM --> response
|
stop_reason == "tool_use"?
/ \
yes no
| |
execute tools return text
append results
loop back -----------------> messages[]
That's it. Every AI coding agent is this loop.
Everything else is refinement.
```
**11 progressive sessions, from a simple loop to full autonomous teams.**
**Each session adds one mechanism. Each mechanism has one motto.**
> **s01**   *"Bash is all you need"* — one tool + one loop = an agent
>
> **s02**   *"The loop didn't change"* — adding tools means adding handlers, not logic
>
> **s03**   *"Plan before you act"* — visible plans improve task completion
>
> **s04**   *"Process isolation = context isolation"* — fresh messages[] per subagent
>
> **s05**   *"Load on demand, not upfront"* — inject knowledge via tool_result, not system prompt
>
> **s06**   *"Strategic forgetting"* — forget old context to enable infinite sessions
>
> **s07**   *"State survives /compact"* — file-based state outlives context compression
>
> **s08**   *"Fire and forget"* — non-blocking threads + notification queue
>
> **s09**   *"Append to send, drain to read"* — async mailboxes for persistent teammates
>
> **s10**   *"Same request_id, two protocols"* — one FSM pattern powers shutdown + plan approval
>
> **s11**   *"Poll, claim, work, repeat"* — no coordinator needed, agents self-organize
---
## The Core Pattern
```python
def agent_loop(messages):
while True:
response = client.messages.create(
model=MODEL, system=SYSTEM,
messages=messages, tools=TOOLS,
)
messages.append({"role": "assistant",
"content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
output = TOOL_HANDLERS[block.name](**block.input)
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
messages.append({"role": "user", "content": results})
```
Every session layers one mechanism on top of this loop -- without changing the loop itself.
## Quick Start
```sh
git clone https://github.com/shareAI-lab/learn-claude-code
cd learn-claude-code
pip install -r requirements.txt
cp .env.example .env # Edit .env with your ANTHROPIC_API_KEY
python agents/s01_agent_loop.py # Start here
python agents/s11_autonomous_agents.py # Full autonomous team
```
### Web Platform
Interactive visualizations, step-through diagrams, source viewer, and documentation.
```sh
cd web && npm install && npm run dev # http://localhost:3000
```
## Learning Path
```
Phase 1: THE LOOP Phase 2: PLANNING & KNOWLEDGE
================== ==============================
s01 The Agent Loop [1] s03 TodoWrite [5]
while + stop_reason TodoManager + nag reminder
| |
+-> s02 Tools [4] s04 Subagents [5]
dispatch map: name->handler fresh messages[] per child
|
s05 Skills [5]
SKILL.md via tool_result
|
s06 Compact [5]
3-layer compression
Phase 3: PERSISTENCE Phase 4: TEAMS
================== =====================
s07 Tasks [8] s09 Agent Teams [9]
file-based CRUD + deps graph teammates + JSONL mailboxes
| |
s08 Background Tasks [6] s10 Team Protocols [12]
daemon threads + notify queue shutdown + plan approval FSM
|
s11 Autonomous Agents [14]
idle cycle + auto-claim
[N] = number of tools
```
## Architecture
```
learn-claude-code/
|
|-- agents/ # Python reference implementations (s01-s11 + full)
|-- docs/{en,zh,ja}/ # Mental-model-first documentation (3 languages)
|-- web/ # Interactive learning platform (Next.js)
|-- skills/ # Skill files for s05
+-- .github/workflows/ci.yml # CI: typecheck + build
```
## Documentation
Mental-model-first: problem, solution, ASCII diagram, minimal code.
Available in [English](./docs/en/) | [中文](./docs/zh/) | [日本語](./docs/ja/).
| Session | Topic | Motto |
|---------|-------|-------|
| [s01](./docs/en/s01-the-agent-loop.md) | The Agent Loop | *Bash is all you need* |
| [s02](./docs/en/s02-tool-use.md) | Tools | *The loop didn't change* |
| [s03](./docs/en/s03-todo-write.md) | TodoWrite | *Plan before you act* |
| [s04](./docs/en/s04-subagent.md) | Subagents | *Process isolation = context isolation* |
| [s05](./docs/en/s05-skill-loading.md) | Skills | *Load on demand, not upfront* |
| [s06](./docs/en/s06-context-compact.md) | Compact | *Strategic forgetting* |
| [s07](./docs/en/s07-task-system.md) | Tasks | *State survives /compact* |
| [s08](./docs/en/s08-background-tasks.md) | Background Tasks | *Fire and forget* |
| [s09](./docs/en/s09-agent-teams.md) | Agent Teams | *Append to send, drain to read* |
| [s10](./docs/en/s10-team-protocols.md) | Team Protocols | *Same request_id, two protocols* |
| [s11](./docs/en/s11-autonomous-agents.md) | Autonomous Agents | *Poll, claim, work, repeat* |
## License
MIT
---
**The model is the agent. Our job is to give it tools and stay out of the way.**

2
agents/__init__.py Normal file
View File

@ -0,0 +1,2 @@
# agents/ - Python teaching agents (s01-s11) + reference agent (s_full)
# Each file is self-contained and runnable: python agents/s01_agent_loop.py

105
agents/s01_agent_loop.py Normal file
View File

@ -0,0 +1,105 @@
#!/usr/bin/env python3
"""
s01_agent_loop.py - The Agent Loop
The entire secret of coding agents in one pattern:
while stop_reason == "tool_use":
response = LLM(messages, tools)
execute tools
append results
+----------+ +-------+ +---------+
| User | ---> | LLM | ---> | Tool |
| prompt | | | | execute |
+----------+ +---+---+ +----+----+
^ |
| tool_result |
+---------------+
(loop continues)
That's it. The ENTIRE agent is a while loop that feeds tool
results back to the model until the model decides to stop.
"""
import os
import subprocess
from anthropic import Anthropic
from dotenv import load_dotenv
load_dotenv(override=True)
if os.getenv("ANTHROPIC_BASE_URL"):
os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL"))
MODEL = os.environ["MODEL_ID"]
SYSTEM = f"You are a coding agent at {os.getcwd()}. Use bash to solve tasks. Act, don't explain."
TOOLS = [{
"name": "bash",
"description": "Run a shell command.",
"input_schema": {
"type": "object",
"properties": {"command": {"type": "string"}},
"required": ["command"],
},
}]
def run_bash(command: str) -> str:
dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"]
if any(d in command for d in dangerous):
return "Error: Dangerous command blocked"
try:
r = subprocess.run(command, shell=True, cwd=os.getcwd(),
capture_output=True, text=True, timeout=120)
out = (r.stdout + r.stderr).strip()
return out[:50000] if out else "(no output)"
except subprocess.TimeoutExpired:
return "Error: Timeout (120s)"
# -- The core pattern: a while loop that calls tools until the model stops --
def agent_loop(messages: list):
while True:
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
# Append assistant turn
messages.append({"role": "assistant", "content": response.content})
# If the model didn't call a tool, we're done
if response.stop_reason != "tool_use":
return
# Execute each tool call, collect results
results = []
for block in response.content:
if block.type == "tool_use":
print(f"\033[33m$ {block.input['command']}\033[0m")
output = run_bash(block.input["command"])
print(output[:200])
results.append({"type": "tool_result", "tool_use_id": block.id,
"content": output})
messages.append({"role": "user", "content": results})
if __name__ == "__main__":
history = []
while True:
try:
query = input("\033[36ms01 >> \033[0m")
except (EOFError, KeyboardInterrupt):
break
if query.strip().lower() in ("q", "exit", ""):
break
history.append({"role": "user", "content": query})
agent_loop(history)
response_content = history[-1]["content"]
if isinstance(response_content, list):
for block in response_content:
if hasattr(block, "text"):
print(block.text)
print()

143
agents/s02_tool_use.py Normal file
View File

@ -0,0 +1,143 @@
#!/usr/bin/env python3
"""
s02_tool_use.py - Tools
The agent loop from s01 didn't change. We just added tools to the array
and a dispatch map to route calls.
+----------+ +-------+ +------------------+
| User | ---> | LLM | ---> | Tool Dispatch |
| prompt | | | | { |
+----------+ +---+---+ | bash: run_bash |
^ | read: run_read |
| | write: run_wr |
+----------+ edit: run_edit |
tool_result| } |
+------------------+
Key insight: "The loop didn't change at all. I just added tools."
"""
import os
import subprocess
from pathlib import Path
from anthropic import Anthropic
from dotenv import load_dotenv
load_dotenv(override=True)
if os.getenv("ANTHROPIC_BASE_URL"):
os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
WORKDIR = Path.cwd()
client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL"))
MODEL = os.environ["MODEL_ID"]
SYSTEM = f"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain."
def safe_path(p: str) -> Path:
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {p}")
return path
def run_bash(command: str) -> str:
dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"]
if any(d in command for d in dangerous):
return "Error: Dangerous command blocked"
try:
r = subprocess.run(command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=120)
out = (r.stdout + r.stderr).strip()
return out[:50000] if out else "(no output)"
except subprocess.TimeoutExpired:
return "Error: Timeout (120s)"
def run_read(path: str, limit: int = None) -> str:
try:
text = safe_path(path).read_text()
lines = text.splitlines()
if limit and limit < len(lines):
lines = lines[:limit] + [f"... ({len(lines) - limit} more lines)"]
return "\n".join(lines)[:50000]
except Exception as e:
return f"Error: {e}"
def run_write(path: str, content: str) -> str:
try:
fp = safe_path(path)
fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(content)
return f"Wrote {len(content)} bytes to {path}"
except Exception as e:
return f"Error: {e}"
def run_edit(path: str, old_text: str, new_text: str) -> str:
try:
fp = safe_path(path)
content = fp.read_text()
if old_text not in content:
return f"Error: Text not found in {path}"
fp.write_text(content.replace(old_text, new_text, 1))
return f"Edited {path}"
except Exception as e:
return f"Error: {e}"
# -- The dispatch map: {tool_name: handler} --
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]),
}
TOOLS = [
{"name": "bash", "description": "Run a shell command.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file contents.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}},
{"name": "write_file", "description": "Write content to file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Replace exact text in file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
]
def agent_loop(messages: list):
while True:
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
print(f"> {block.name}: {output[:200]}")
results.append({"type": "tool_result", "tool_use_id": block.id, "content": output})
messages.append({"role": "user", "content": results})
if __name__ == "__main__":
history = []
while True:
try:
query = input("\033[36ms02 >> \033[0m")
except (EOFError, KeyboardInterrupt):
break
if query.strip().lower() in ("q", "exit", ""):
break
history.append({"role": "user", "content": query})
agent_loop(history)
print()

206
agents/s03_todo_write.py Normal file
View File

@ -0,0 +1,206 @@
#!/usr/bin/env python3
"""
s03_todo_write.py - TodoWrite
The model tracks its own progress via a TodoManager. A nag reminder
forces it to keep updating when it forgets.
+----------+ +-------+ +---------+
| User | ---> | LLM | ---> | Tools |
| prompt | | | | + todo |
+----------+ +---+---+ +----+----+
^ |
| tool_result |
+---------------+
|
+-----------+-----------+
| TodoManager state |
| [ ] task A |
| [>] task B <- doing |
| [x] task C |
+-----------------------+
|
if rounds_since_todo >= 3:
inject <reminder>
Key insight: "The agent can track its own progress -- and I can see it."
"""
import os
import subprocess
from pathlib import Path
from anthropic import Anthropic
from dotenv import load_dotenv
load_dotenv(override=True)
if os.getenv("ANTHROPIC_BASE_URL"):
os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
WORKDIR = Path.cwd()
client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL"))
MODEL = os.environ["MODEL_ID"]
SYSTEM = f"""You are a coding agent at {WORKDIR}.
Use the todo tool to plan multi-step tasks. Mark in_progress before starting, completed when done.
Prefer tools over prose."""
# -- TodoManager: structured state the LLM writes to --
class TodoManager:
def __init__(self):
self.items = []
def update(self, items: list) -> str:
if len(items) > 20:
raise ValueError("Max 20 todos allowed")
validated = []
in_progress_count = 0
for i, item in enumerate(items):
text = str(item.get("text", "")).strip()
status = str(item.get("status", "pending")).lower()
item_id = str(item.get("id", str(i + 1)))
if not text:
raise ValueError(f"Item {item_id}: text required")
if status not in ("pending", "in_progress", "completed"):
raise ValueError(f"Item {item_id}: invalid status '{status}'")
if status == "in_progress":
in_progress_count += 1
validated.append({"id": item_id, "text": text, "status": status})
if in_progress_count > 1:
raise ValueError("Only one task can be in_progress at a time")
self.items = validated
return self.render()
def render(self) -> str:
if not self.items:
return "No todos."
lines = []
for item in self.items:
marker = {"pending": "[ ]", "in_progress": "[>]", "completed": "[x]"}[item["status"]]
lines.append(f"{marker} #{item['id']}: {item['text']}")
done = sum(1 for t in self.items if t["status"] == "completed")
lines.append(f"\n({done}/{len(self.items)} completed)")
return "\n".join(lines)
TODO = TodoManager()
# -- Tool implementations --
def safe_path(p: str) -> Path:
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {p}")
return path
def run_bash(command: str) -> str:
dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"]
if any(d in command for d in dangerous):
return "Error: Dangerous command blocked"
try:
r = subprocess.run(command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=120)
out = (r.stdout + r.stderr).strip()
return out[:50000] if out else "(no output)"
except subprocess.TimeoutExpired:
return "Error: Timeout (120s)"
def run_read(path: str, limit: int = None) -> str:
try:
lines = safe_path(path).read_text().splitlines()
if limit and limit < len(lines):
lines = lines[:limit] + [f"... ({len(lines) - limit} more)"]
return "\n".join(lines)[:50000]
except Exception as e:
return f"Error: {e}"
def run_write(path: str, content: str) -> str:
try:
fp = safe_path(path)
fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(content)
return f"Wrote {len(content)} bytes"
except Exception as e:
return f"Error: {e}"
def run_edit(path: str, old_text: str, new_text: str) -> str:
try:
fp = safe_path(path)
content = fp.read_text()
if old_text not in content:
return f"Error: Text not found in {path}"
fp.write_text(content.replace(old_text, new_text, 1))
return f"Edited {path}"
except Exception as e:
return f"Error: {e}"
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]),
"todo": lambda **kw: TODO.update(kw["items"]),
}
TOOLS = [
{"name": "bash", "description": "Run a shell command.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file contents.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}},
{"name": "write_file", "description": "Write content to file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Replace exact text in file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
{"name": "todo", "description": "Update task list. Track progress on multi-step tasks.",
"input_schema": {"type": "object", "properties": {"items": {"type": "array", "items": {"type": "object", "properties": {"id": {"type": "string"}, "text": {"type": "string"}, "status": {"type": "string", "enum": ["pending", "in_progress", "completed"]}}, "required": ["id", "text", "status"]}}}, "required": ["items"]}},
]
# -- Agent loop with nag reminder injection --
def agent_loop(messages: list):
rounds_since_todo = 0
while True:
# Nag reminder: if 3+ rounds without a todo update, inject reminder
if rounds_since_todo >= 3 and messages:
last = messages[-1]
if last["role"] == "user" and isinstance(last.get("content"), list):
last["content"].insert(0, {"type": "text", "text": "<reminder>Update your todos.</reminder>"})
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
used_todo = False
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
try:
output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
except Exception as e:
output = f"Error: {e}"
print(f"> {block.name}: {str(output)[:200]}")
results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)})
if block.name == "todo":
used_todo = True
rounds_since_todo = 0 if used_todo else rounds_since_todo + 1
messages.append({"role": "user", "content": results})
if __name__ == "__main__":
history = []
while True:
try:
query = input("\033[36ms03 >> \033[0m")
except (EOFError, KeyboardInterrupt):
break
if query.strip().lower() in ("q", "exit", ""):
break
history.append({"role": "user", "content": query})
agent_loop(history)
print()

178
agents/s04_subagent.py Normal file
View File

@ -0,0 +1,178 @@
#!/usr/bin/env python3
"""
s04_subagent.py - Subagents
Spawn a child agent with fresh messages=[]. The child works in its own
context, sharing the filesystem, then returns only a summary to the parent.
Parent agent Subagent
+------------------+ +------------------+
| messages=[...] | | messages=[] | <-- fresh
| | dispatch | |
| tool: task | ---------->| while tool_use: |
| prompt="..." | | call tools |
| description="" | | append results |
| | summary | |
| result = "..." | <--------- | return last text |
+------------------+ +------------------+
|
Parent context stays clean.
Subagent context is discarded.
Key insight: "Process isolation gives context isolation for free."
"""
import os
import subprocess
from pathlib import Path
from anthropic import Anthropic
from dotenv import load_dotenv
load_dotenv(override=True)
if os.getenv("ANTHROPIC_BASE_URL"):
os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
WORKDIR = Path.cwd()
client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL"))
MODEL = os.environ["MODEL_ID"]
SYSTEM = f"You are a coding agent at {WORKDIR}. Use the task tool to delegate exploration or subtasks."
SUBAGENT_SYSTEM = f"You are a coding subagent at {WORKDIR}. Complete the given task, then summarize your findings."
# -- Tool implementations shared by parent and child --
def safe_path(p: str) -> Path:
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {p}")
return path
def run_bash(command: str) -> str:
dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"]
if any(d in command for d in dangerous):
return "Error: Dangerous command blocked"
try:
r = subprocess.run(command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=120)
out = (r.stdout + r.stderr).strip()
return out[:50000] if out else "(no output)"
except subprocess.TimeoutExpired:
return "Error: Timeout (120s)"
def run_read(path: str, limit: int = None) -> str:
try:
lines = safe_path(path).read_text().splitlines()
if limit and limit < len(lines):
lines = lines[:limit] + [f"... ({len(lines) - limit} more)"]
return "\n".join(lines)[:50000]
except Exception as e:
return f"Error: {e}"
def run_write(path: str, content: str) -> str:
try:
fp = safe_path(path)
fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(content)
return f"Wrote {len(content)} bytes"
except Exception as e:
return f"Error: {e}"
def run_edit(path: str, old_text: str, new_text: str) -> str:
try:
fp = safe_path(path)
content = fp.read_text()
if old_text not in content:
return f"Error: Text not found in {path}"
fp.write_text(content.replace(old_text, new_text, 1))
return f"Edited {path}"
except Exception as e:
return f"Error: {e}"
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]),
}
# Child gets all base tools except task (no recursive spawning)
CHILD_TOOLS = [
{"name": "bash", "description": "Run a shell command.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file contents.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}},
{"name": "write_file", "description": "Write content to file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Replace exact text in file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
]
# -- Subagent: fresh context, filtered tools, summary-only return --
def run_subagent(prompt: str) -> str:
sub_messages = [{"role": "user", "content": prompt}] # fresh context
for _ in range(30): # safety limit
response = client.messages.create(
model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages,
tools=CHILD_TOOLS, max_tokens=8000,
)
sub_messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
break
results = []
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)[:50000]})
sub_messages.append({"role": "user", "content": results})
# Only the final text returns to the parent -- child context is discarded
return "".join(b.text for b in response.content if hasattr(b, "text")) or "(no summary)"
# -- Parent tools: base tools + task dispatcher --
PARENT_TOOLS = CHILD_TOOLS + [
{"name": "task", "description": "Spawn a subagent with fresh context. It shares the filesystem but not conversation history.",
"input_schema": {"type": "object", "properties": {"prompt": {"type": "string"}, "description": {"type": "string", "description": "Short description of the task"}}, "required": ["prompt"]}},
]
def agent_loop(messages: list):
while True:
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=PARENT_TOOLS, max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
if block.name == "task":
desc = block.input.get("description", "subtask")
print(f"> task ({desc}): {block.input['prompt'][:80]}")
output = run_subagent(block.input["prompt"])
else:
handler = TOOL_HANDLERS.get(block.name)
output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
print(f" {str(output)[:200]}")
results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)})
messages.append({"role": "user", "content": results})
if __name__ == "__main__":
history = []
while True:
try:
query = input("\033[36ms04 >> \033[0m")
except (EOFError, KeyboardInterrupt):
break
if query.strip().lower() in ("q", "exit", ""):
break
history.append({"role": "user", "content": query})
agent_loop(history)
print()

214
agents/s05_skill_loading.py Normal file
View File

@ -0,0 +1,214 @@
#!/usr/bin/env python3
"""
s05_skill_loading.py - Skills
Two-layer skill injection that avoids bloating the system prompt:
Layer 1 (cheap): skill names in system prompt (~100 tokens/skill)
Layer 2 (on demand): full skill body in tool_result
System prompt:
+--------------------------------------+
| You are a coding agent. |
| Skills available: |
| - git: Git workflow helpers | <-- Layer 1: metadata only
| - test: Testing best practices |
+--------------------------------------+
When model calls load_skill("git"):
+--------------------------------------+
| tool_result: |
| <skill> |
| Full git workflow instructions... | <-- Layer 2: full body
| Step 1: ... |
| Step 2: ... |
| </skill> |
+--------------------------------------+
Key insight: "Don't put everything in the system prompt. Load on demand."
"""
import os
import re
import subprocess
from pathlib import Path
from anthropic import Anthropic
from dotenv import load_dotenv
load_dotenv(override=True)
if os.getenv("ANTHROPIC_BASE_URL"):
os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
WORKDIR = Path.cwd()
client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL"))
MODEL = os.environ["MODEL_ID"]
SKILLS_DIR = WORKDIR / ".skills"
# -- SkillLoader: parse .skills/*.md files with YAML frontmatter --
class SkillLoader:
def __init__(self, skills_dir: Path):
self.skills_dir = skills_dir
self.skills = {}
self._load_all()
def _load_all(self):
if not self.skills_dir.exists():
return
for f in sorted(self.skills_dir.glob("*.md")):
name = f.stem
text = f.read_text()
meta, body = self._parse_frontmatter(text)
self.skills[name] = {"meta": meta, "body": body, "path": str(f)}
def _parse_frontmatter(self, text: str) -> tuple:
"""Parse YAML frontmatter between --- delimiters."""
match = re.match(r"^---\n(.*?)\n---\n(.*)", text, re.DOTALL)
if not match:
return {}, text
meta = {}
for line in match.group(1).strip().splitlines():
if ":" in line:
key, val = line.split(":", 1)
meta[key.strip()] = val.strip()
return meta, match.group(2).strip()
def get_descriptions(self) -> str:
"""Layer 1: short descriptions for the system prompt."""
if not self.skills:
return "(no skills available)"
lines = []
for name, skill in self.skills.items():
desc = skill["meta"].get("description", "No description")
tags = skill["meta"].get("tags", "")
line = f" - {name}: {desc}"
if tags:
line += f" [{tags}]"
lines.append(line)
return "\n".join(lines)
def get_content(self, name: str) -> str:
"""Layer 2: full skill body returned in tool_result."""
skill = self.skills.get(name)
if not skill:
return f"Error: Unknown skill '{name}'. Available: {', '.join(self.skills.keys())}"
return f"<skill name=\"{name}\">\n{skill['body']}\n</skill>"
SKILL_LOADER = SkillLoader(SKILLS_DIR)
# Layer 1: skill metadata injected into system prompt
SYSTEM = f"""You are a coding agent at {WORKDIR}.
Use load_skill to access specialized knowledge before tackling unfamiliar topics.
Skills available:
{SKILL_LOADER.get_descriptions()}"""
# -- Tool implementations --
def safe_path(p: str) -> Path:
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {p}")
return path
def run_bash(command: str) -> str:
dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"]
if any(d in command for d in dangerous):
return "Error: Dangerous command blocked"
try:
r = subprocess.run(command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=120)
out = (r.stdout + r.stderr).strip()
return out[:50000] if out else "(no output)"
except subprocess.TimeoutExpired:
return "Error: Timeout (120s)"
def run_read(path: str, limit: int = None) -> str:
try:
lines = safe_path(path).read_text().splitlines()
if limit and limit < len(lines):
lines = lines[:limit] + [f"... ({len(lines) - limit} more)"]
return "\n".join(lines)[:50000]
except Exception as e:
return f"Error: {e}"
def run_write(path: str, content: str) -> str:
try:
fp = safe_path(path)
fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(content)
return f"Wrote {len(content)} bytes"
except Exception as e:
return f"Error: {e}"
def run_edit(path: str, old_text: str, new_text: str) -> str:
try:
fp = safe_path(path)
content = fp.read_text()
if old_text not in content:
return f"Error: Text not found in {path}"
fp.write_text(content.replace(old_text, new_text, 1))
return f"Edited {path}"
except Exception as e:
return f"Error: {e}"
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]),
"load_skill": lambda **kw: SKILL_LOADER.get_content(kw["name"]),
}
TOOLS = [
{"name": "bash", "description": "Run a shell command.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file contents.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}},
{"name": "write_file", "description": "Write content to file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Replace exact text in file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
{"name": "load_skill", "description": "Load specialized knowledge by name.",
"input_schema": {"type": "object", "properties": {"name": {"type": "string", "description": "Skill name to load"}}, "required": ["name"]}},
]
def agent_loop(messages: list):
while True:
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
try:
output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
except Exception as e:
output = f"Error: {e}"
print(f"> {block.name}: {str(output)[:200]}")
results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)})
messages.append({"role": "user", "content": results})
if __name__ == "__main__":
history = []
while True:
try:
query = input("\033[36ms05 >> \033[0m")
except (EOFError, KeyboardInterrupt):
break
if query.strip().lower() in ("q", "exit", ""):
break
history.append({"role": "user", "content": query})
agent_loop(history)
print()

View File

@ -0,0 +1,242 @@
#!/usr/bin/env python3
"""
s06_context_compact.py - Compact
Three-layer compression pipeline so the agent can work forever:
Every turn:
+------------------+
| Tool call result |
+------------------+
|
v
[Layer 1: micro_compact] (silent, every turn)
Replace tool_result content older than last 3
with "[Previous: used {tool_name}]"
|
v
[Check: tokens > 50000?]
| |
no yes
| |
v v
continue [Layer 2: auto_compact]
Save full transcript to .transcripts/
Ask LLM to summarize conversation.
Replace all messages with [summary].
|
v
[Layer 3: compact tool]
Model calls compact -> immediate summarization.
Same as auto, triggered manually.
Key insight: "The agent can forget strategically and keep working forever."
"""
import json
import os
import subprocess
import time
from pathlib import Path
from anthropic import Anthropic
from dotenv import load_dotenv
load_dotenv(override=True)
if os.getenv("ANTHROPIC_BASE_URL"):
os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
WORKDIR = Path.cwd()
client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL"))
MODEL = os.environ["MODEL_ID"]
SYSTEM = f"You are a coding agent at {WORKDIR}. Use tools to solve tasks."
THRESHOLD = 50000
TRANSCRIPT_DIR = WORKDIR / ".transcripts"
KEEP_RECENT = 3
def estimate_tokens(messages: list) -> int:
"""Rough token count: ~4 chars per token."""
return len(str(messages)) // 4
# -- Layer 1: micro_compact - replace old tool results with placeholders --
def micro_compact(messages: list) -> list:
# Collect (msg_index, part_index, tool_result_dict) for all tool_result entries
tool_results = []
for msg_idx, msg in enumerate(messages):
if msg["role"] == "user" and isinstance(msg.get("content"), list):
for part_idx, part in enumerate(msg["content"]):
if isinstance(part, dict) and part.get("type") == "tool_result":
tool_results.append((msg_idx, part_idx, part))
if len(tool_results) <= KEEP_RECENT:
return messages
# Find tool_name for each result by matching tool_use_id in prior assistant messages
tool_name_map = {}
for msg in messages:
if msg["role"] == "assistant":
content = msg.get("content", [])
if isinstance(content, list):
for block in content:
if hasattr(block, "type") and block.type == "tool_use":
tool_name_map[block.id] = block.name
# Clear old results (keep last KEEP_RECENT)
to_clear = tool_results[:-KEEP_RECENT]
for _, _, result in to_clear:
if isinstance(result.get("content"), str) and len(result["content"]) > 100:
tool_id = result.get("tool_use_id", "")
tool_name = tool_name_map.get(tool_id, "unknown")
result["content"] = f"[Previous: used {tool_name}]"
return messages
# -- Layer 2: auto_compact - save transcript, summarize, replace messages --
def auto_compact(messages: list) -> list:
# Save full transcript to disk
TRANSCRIPT_DIR.mkdir(exist_ok=True)
transcript_path = TRANSCRIPT_DIR / f"transcript_{int(time.time())}.jsonl"
with open(transcript_path, "w") as f:
for msg in messages:
f.write(json.dumps(msg, default=str) + "\n")
print(f"[transcript saved: {transcript_path}]")
# Ask LLM to summarize
conversation_text = json.dumps(messages, default=str)[:80000]
response = client.messages.create(
model=MODEL,
messages=[{"role": "user", "content":
"Summarize this conversation for continuity. Include: "
"1) What was accomplished, 2) Current state, 3) Key decisions made. "
"Be concise but preserve critical details.\n\n" + conversation_text}],
max_tokens=2000,
)
summary = response.content[0].text
# Replace all messages with compressed summary
return [
{"role": "user", "content": f"[Conversation compressed. Transcript: {transcript_path}]\n\n{summary}"},
{"role": "assistant", "content": "Understood. I have the context from the summary. Continuing."},
]
# -- Tool implementations --
def safe_path(p: str) -> Path:
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {p}")
return path
def run_bash(command: str) -> str:
dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"]
if any(d in command for d in dangerous):
return "Error: Dangerous command blocked"
try:
r = subprocess.run(command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=120)
out = (r.stdout + r.stderr).strip()
return out[:50000] if out else "(no output)"
except subprocess.TimeoutExpired:
return "Error: Timeout (120s)"
def run_read(path: str, limit: int = None) -> str:
try:
lines = safe_path(path).read_text().splitlines()
if limit and limit < len(lines):
lines = lines[:limit] + [f"... ({len(lines) - limit} more)"]
return "\n".join(lines)[:50000]
except Exception as e:
return f"Error: {e}"
def run_write(path: str, content: str) -> str:
try:
fp = safe_path(path)
fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(content)
return f"Wrote {len(content)} bytes"
except Exception as e:
return f"Error: {e}"
def run_edit(path: str, old_text: str, new_text: str) -> str:
try:
fp = safe_path(path)
content = fp.read_text()
if old_text not in content:
return f"Error: Text not found in {path}"
fp.write_text(content.replace(old_text, new_text, 1))
return f"Edited {path}"
except Exception as e:
return f"Error: {e}"
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]),
"compact": lambda **kw: "Manual compression requested.",
}
TOOLS = [
{"name": "bash", "description": "Run a shell command.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file contents.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}},
{"name": "write_file", "description": "Write content to file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Replace exact text in file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
{"name": "compact", "description": "Trigger manual conversation compression.",
"input_schema": {"type": "object", "properties": {"focus": {"type": "string", "description": "What to preserve in the summary"}}}},
]
def agent_loop(messages: list):
while True:
# Layer 1: micro_compact before each LLM call
micro_compact(messages)
# Layer 2: auto_compact if token estimate exceeds threshold
if estimate_tokens(messages) > THRESHOLD:
print("[auto_compact triggered]")
messages[:] = auto_compact(messages)
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
manual_compact = False
for block in response.content:
if block.type == "tool_use":
if block.name == "compact":
manual_compact = True
output = "Compressing..."
else:
handler = TOOL_HANDLERS.get(block.name)
try:
output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
except Exception as e:
output = f"Error: {e}"
print(f"> {block.name}: {str(output)[:200]}")
results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)})
messages.append({"role": "user", "content": results})
# Layer 3: manual compact triggered by the compact tool
if manual_compact:
print("[manual compact]")
messages[:] = auto_compact(messages)
if __name__ == "__main__":
history = []
while True:
try:
query = input("\033[36ms06 >> \033[0m")
except (EOFError, KeyboardInterrupt):
break
if query.strip().lower() in ("q", "exit", ""):
break
history.append({"role": "user", "content": query})
agent_loop(history)
print()

242
agents/s07_task_system.py Normal file
View File

@ -0,0 +1,242 @@
#!/usr/bin/env python3
"""
s07_task_system.py - Tasks
Tasks persist as JSON files in .tasks/ so they survive context compression.
Each task has a dependency graph (blockedBy/blocks).
.tasks/
task_1.json {"id":1, "subject":"...", "status":"completed", ...}
task_2.json {"id":2, "blockedBy":[1], "status":"pending", ...}
task_3.json {"id":3, "blockedBy":[2], "blocks":[], ...}
Dependency resolution:
+----------+ +----------+ +----------+
| task 1 | --> | task 2 | --> | task 3 |
| complete | | blocked | | blocked |
+----------+ +----------+ +----------+
| ^
+--- completing task 1 removes it from task 2's blockedBy
Key insight: "State that survives compression -- because it's outside the conversation."
"""
import json
import os
import subprocess
from pathlib import Path
from anthropic import Anthropic
from dotenv import load_dotenv
load_dotenv(override=True)
if os.getenv("ANTHROPIC_BASE_URL"):
os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
WORKDIR = Path.cwd()
client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL"))
MODEL = os.environ["MODEL_ID"]
TASKS_DIR = WORKDIR / ".tasks"
SYSTEM = f"You are a coding agent at {WORKDIR}. Use task tools to plan and track work."
# -- TaskManager: CRUD with dependency graph, persisted as JSON files --
class TaskManager:
def __init__(self, tasks_dir: Path):
self.dir = tasks_dir
self.dir.mkdir(exist_ok=True)
self._next_id = self._max_id() + 1
def _max_id(self) -> int:
ids = [int(f.stem.split("_")[1]) for f in self.dir.glob("task_*.json")]
return max(ids) if ids else 0
def _load(self, task_id: int) -> dict:
path = self.dir / f"task_{task_id}.json"
if not path.exists():
raise ValueError(f"Task {task_id} not found")
return json.loads(path.read_text())
def _save(self, task: dict):
path = self.dir / f"task_{task['id']}.json"
path.write_text(json.dumps(task, indent=2))
def create(self, subject: str, description: str = "") -> str:
task = {
"id": self._next_id, "subject": subject, "description": description,
"status": "pending", "blockedBy": [], "blocks": [], "owner": "",
}
self._save(task)
self._next_id += 1
return json.dumps(task, indent=2)
def get(self, task_id: int) -> str:
return json.dumps(self._load(task_id), indent=2)
def update(self, task_id: int, status: str = None,
add_blocked_by: list = None, add_blocks: list = None) -> str:
task = self._load(task_id)
if status:
if status not in ("pending", "in_progress", "completed"):
raise ValueError(f"Invalid status: {status}")
task["status"] = status
# When a task is completed, remove it from all other tasks' blockedBy
if status == "completed":
self._clear_dependency(task_id)
if add_blocked_by:
task["blockedBy"] = list(set(task["blockedBy"] + add_blocked_by))
if add_blocks:
task["blocks"] = list(set(task["blocks"] + add_blocks))
# Bidirectional: also update the blocked tasks' blockedBy lists
for blocked_id in add_blocks:
try:
blocked = self._load(blocked_id)
if task_id not in blocked["blockedBy"]:
blocked["blockedBy"].append(task_id)
self._save(blocked)
except ValueError:
pass
self._save(task)
return json.dumps(task, indent=2)
def _clear_dependency(self, completed_id: int):
"""Remove completed_id from all other tasks' blockedBy lists."""
for f in self.dir.glob("task_*.json"):
task = json.loads(f.read_text())
if completed_id in task.get("blockedBy", []):
task["blockedBy"].remove(completed_id)
self._save(task)
def list_all(self) -> str:
tasks = []
for f in sorted(self.dir.glob("task_*.json")):
tasks.append(json.loads(f.read_text()))
if not tasks:
return "No tasks."
lines = []
for t in tasks:
marker = {"pending": "[ ]", "in_progress": "[>]", "completed": "[x]"}.get(t["status"], "[?]")
blocked = f" (blocked by: {t['blockedBy']})" if t.get("blockedBy") else ""
lines.append(f"{marker} #{t['id']}: {t['subject']}{blocked}")
return "\n".join(lines)
TASKS = TaskManager(TASKS_DIR)
# -- Base tool implementations --
def safe_path(p: str) -> Path:
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {p}")
return path
def run_bash(command: str) -> str:
dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"]
if any(d in command for d in dangerous):
return "Error: Dangerous command blocked"
try:
r = subprocess.run(command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=120)
out = (r.stdout + r.stderr).strip()
return out[:50000] if out else "(no output)"
except subprocess.TimeoutExpired:
return "Error: Timeout (120s)"
def run_read(path: str, limit: int = None) -> str:
try:
lines = safe_path(path).read_text().splitlines()
if limit and limit < len(lines):
lines = lines[:limit] + [f"... ({len(lines) - limit} more)"]
return "\n".join(lines)[:50000]
except Exception as e:
return f"Error: {e}"
def run_write(path: str, content: str) -> str:
try:
fp = safe_path(path)
fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(content)
return f"Wrote {len(content)} bytes"
except Exception as e:
return f"Error: {e}"
def run_edit(path: str, old_text: str, new_text: str) -> str:
try:
fp = safe_path(path)
c = fp.read_text()
if old_text not in c:
return f"Error: Text not found in {path}"
fp.write_text(c.replace(old_text, new_text, 1))
return f"Edited {path}"
except Exception as e:
return f"Error: {e}"
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]),
"task_create": lambda **kw: TASKS.create(kw["subject"], kw.get("description", "")),
"task_update": lambda **kw: TASKS.update(kw["task_id"], kw.get("status"), kw.get("addBlockedBy"), kw.get("addBlocks")),
"task_list": lambda **kw: TASKS.list_all(),
"task_get": lambda **kw: TASKS.get(kw["task_id"]),
}
TOOLS = [
{"name": "bash", "description": "Run a shell command.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file contents.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}},
{"name": "write_file", "description": "Write content to file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Replace exact text in file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
{"name": "task_create", "description": "Create a new task.",
"input_schema": {"type": "object", "properties": {"subject": {"type": "string"}, "description": {"type": "string"}}, "required": ["subject"]}},
{"name": "task_update", "description": "Update a task's status or dependencies.",
"input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}, "status": {"type": "string", "enum": ["pending", "in_progress", "completed"]}, "addBlockedBy": {"type": "array", "items": {"type": "integer"}}, "addBlocks": {"type": "array", "items": {"type": "integer"}}}, "required": ["task_id"]}},
{"name": "task_list", "description": "List all tasks with status summary.",
"input_schema": {"type": "object", "properties": {}}},
{"name": "task_get", "description": "Get full details of a task by ID.",
"input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}}, "required": ["task_id"]}},
]
def agent_loop(messages: list):
while True:
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
try:
output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
except Exception as e:
output = f"Error: {e}"
print(f"> {block.name}: {str(output)[:200]}")
results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)})
messages.append({"role": "user", "content": results})
if __name__ == "__main__":
history = []
while True:
try:
query = input("\033[36ms07 >> \033[0m")
except (EOFError, KeyboardInterrupt):
break
if query.strip().lower() in ("q", "exit", ""):
break
history.append({"role": "user", "content": query})
agent_loop(history)
print()

View File

@ -0,0 +1,228 @@
#!/usr/bin/env python3
"""
s08_background_tasks.py - Background Tasks
Run commands in background threads. A notification queue is drained
before each LLM call to deliver results.
Main thread Background thread
+-----------------+ +-----------------+
| agent loop | | task executes |
| ... | | ... |
| [LLM call] <---+------- | enqueue(result) |
| ^drain queue | +-----------------+
+-----------------+
Timeline:
Agent ----[spawn A]----[spawn B]----[other work]----
| |
v v
[A runs] [B runs] (parallel)
| |
+-- notification queue --> [results injected]
Key insight: "Fire and forget -- the agent doesn't block while the command runs."
"""
import os
import subprocess
import threading
import uuid
from pathlib import Path
from anthropic import Anthropic
from dotenv import load_dotenv
load_dotenv(override=True)
if os.getenv("ANTHROPIC_BASE_URL"):
os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
WORKDIR = Path.cwd()
client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL"))
MODEL = os.environ["MODEL_ID"]
SYSTEM = f"You are a coding agent at {WORKDIR}. Use background_run for long-running commands."
# -- BackgroundManager: threaded execution + notification queue --
class BackgroundManager:
def __init__(self):
self.tasks = {} # task_id -> {status, result, command}
self._notification_queue = [] # completed task results
self._lock = threading.Lock()
def run(self, command: str) -> str:
"""Start a background thread, return task_id immediately."""
task_id = str(uuid.uuid4())[:8]
self.tasks[task_id] = {"status": "running", "result": None, "command": command}
thread = threading.Thread(
target=self._execute, args=(task_id, command), daemon=True
)
thread.start()
return f"Background task {task_id} started: {command[:80]}"
def _execute(self, task_id: str, command: str):
"""Thread target: run subprocess, capture output, push to queue."""
try:
r = subprocess.run(
command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=300
)
output = (r.stdout + r.stderr).strip()[:50000]
status = "completed"
except subprocess.TimeoutExpired:
output = "Error: Timeout (300s)"
status = "timeout"
except Exception as e:
output = f"Error: {e}"
status = "error"
self.tasks[task_id]["status"] = status
self.tasks[task_id]["result"] = output or "(no output)"
with self._lock:
self._notification_queue.append({
"task_id": task_id,
"status": status,
"command": command[:80],
"result": (output or "(no output)")[:500],
})
def check(self, task_id: str = None) -> str:
"""Check status of one task or list all."""
if task_id:
t = self.tasks.get(task_id)
if not t:
return f"Error: Unknown task {task_id}"
return f"[{t['status']}] {t['command'][:60]}\n{t.get('result') or '(running)'}"
lines = []
for tid, t in self.tasks.items():
lines.append(f"{tid}: [{t['status']}] {t['command'][:60]}")
return "\n".join(lines) if lines else "No background tasks."
def drain_notifications(self) -> list:
"""Return and clear all pending completion notifications."""
with self._lock:
notifs = list(self._notification_queue)
self._notification_queue.clear()
return notifs
BG = BackgroundManager()
# -- Tool implementations --
def safe_path(p: str) -> Path:
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {p}")
return path
def run_bash(command: str) -> str:
dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"]
if any(d in command for d in dangerous):
return "Error: Dangerous command blocked"
try:
r = subprocess.run(command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=120)
out = (r.stdout + r.stderr).strip()
return out[:50000] if out else "(no output)"
except subprocess.TimeoutExpired:
return "Error: Timeout (120s)"
def run_read(path: str, limit: int = None) -> str:
try:
lines = safe_path(path).read_text().splitlines()
if limit and limit < len(lines):
lines = lines[:limit] + [f"... ({len(lines) - limit} more)"]
return "\n".join(lines)[:50000]
except Exception as e:
return f"Error: {e}"
def run_write(path: str, content: str) -> str:
try:
fp = safe_path(path)
fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(content)
return f"Wrote {len(content)} bytes"
except Exception as e:
return f"Error: {e}"
def run_edit(path: str, old_text: str, new_text: str) -> str:
try:
fp = safe_path(path)
c = fp.read_text()
if old_text not in c:
return f"Error: Text not found in {path}"
fp.write_text(c.replace(old_text, new_text, 1))
return f"Edited {path}"
except Exception as e:
return f"Error: {e}"
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]),
"background_run": lambda **kw: BG.run(kw["command"]),
"check_background": lambda **kw: BG.check(kw.get("task_id")),
}
TOOLS = [
{"name": "bash", "description": "Run a shell command (blocking).",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file contents.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}},
{"name": "write_file", "description": "Write content to file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Replace exact text in file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
{"name": "background_run", "description": "Run command in background thread. Returns task_id immediately.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "check_background", "description": "Check background task status. Omit task_id to list all.",
"input_schema": {"type": "object", "properties": {"task_id": {"type": "string"}}}},
]
def agent_loop(messages: list):
while True:
# Drain background notifications and inject as system message before LLM call
notifs = BG.drain_notifications()
if notifs and messages:
notif_text = "\n".join(
f"[bg:{n['task_id']}] {n['status']}: {n['result']}" for n in notifs
)
messages.append({"role": "user", "content": f"<background-results>\n{notif_text}\n</background-results>"})
messages.append({"role": "assistant", "content": "Noted background results."})
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
try:
output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
except Exception as e:
output = f"Error: {e}"
print(f"> {block.name}: {str(output)[:200]}")
results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)})
messages.append({"role": "user", "content": results})
if __name__ == "__main__":
history = []
while True:
try:
query = input("\033[36ms08 >> \033[0m")
except (EOFError, KeyboardInterrupt):
break
if query.strip().lower() in ("q", "exit", ""):
break
history.append({"role": "user", "content": query})
agent_loop(history)
print()

400
agents/s09_agent_teams.py Normal file
View File

@ -0,0 +1,400 @@
#!/usr/bin/env python3
"""
s09_agent_teams.py - Agent Teams
Persistent named agents with file-based JSONL inboxes. Each teammate runs
its own agent loop in a separate thread. Communication via append-only inboxes.
Subagent (s04): spawn -> execute -> return summary -> destroyed
Teammate (s09): spawn -> work -> idle -> work -> ... -> shutdown
.team/config.json .team/inbox/
+----------------------------+ +------------------+
| {"team_name": "default", | | alice.jsonl |
| "members": [ | | bob.jsonl |
| {"name":"alice", | | lead.jsonl |
| "role":"coder", | +------------------+
| "status":"idle"} |
| ]} | send_message("alice", "fix bug"):
+----------------------------+ open("alice.jsonl", "a").write(msg)
read_inbox("alice"):
spawn_teammate("alice","coder",...) msgs = [json.loads(l) for l in ...]
| open("alice.jsonl", "w").close()
v return msgs # drain
Thread: alice Thread: bob
+------------------+ +------------------+
| agent_loop | | agent_loop |
| status: working | | status: idle |
| ... runs tools | | ... waits ... |
| status -> idle | | |
+------------------+ +------------------+
5 message types (all declared, not all handled here):
+-------------------------+-----------------------------------+
| message | Normal text message |
| broadcast | Sent to all teammates |
| shutdown_request | Request graceful shutdown (s10) |
| shutdown_response | Approve/reject shutdown (s10) |
| plan_approval_response | Approve/reject plan (s10) |
+-------------------------+-----------------------------------+
Key insight: "Teammates that can talk to each other."
"""
import json
import os
import subprocess
import threading
import time
from pathlib import Path
from anthropic import Anthropic
from dotenv import load_dotenv
load_dotenv(override=True)
if os.getenv("ANTHROPIC_BASE_URL"):
os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
WORKDIR = Path.cwd()
client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL"))
MODEL = os.environ["MODEL_ID"]
TEAM_DIR = WORKDIR / ".team"
INBOX_DIR = TEAM_DIR / "inbox"
SYSTEM = f"You are a team lead at {WORKDIR}. Spawn teammates and communicate via inboxes."
VALID_MSG_TYPES = {
"message",
"broadcast",
"shutdown_request",
"shutdown_response",
"plan_approval_response",
}
# -- MessageBus: JSONL inbox per teammate --
class MessageBus:
def __init__(self, inbox_dir: Path):
self.dir = inbox_dir
self.dir.mkdir(parents=True, exist_ok=True)
def send(self, sender: str, to: str, content: str,
msg_type: str = "message", extra: dict = None) -> str:
if msg_type not in VALID_MSG_TYPES:
return f"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}"
msg = {
"type": msg_type,
"from": sender,
"content": content,
"timestamp": time.time(),
}
if extra:
msg.update(extra)
inbox_path = self.dir / f"{to}.jsonl"
with open(inbox_path, "a") as f:
f.write(json.dumps(msg) + "\n")
return f"Sent {msg_type} to {to}"
def read_inbox(self, name: str) -> list:
inbox_path = self.dir / f"{name}.jsonl"
if not inbox_path.exists():
return []
messages = []
for line in inbox_path.read_text().strip().splitlines():
if line:
messages.append(json.loads(line))
inbox_path.write_text("")
return messages
def broadcast(self, sender: str, content: str, teammates: list) -> str:
count = 0
for name in teammates:
if name != sender:
self.send(sender, name, content, "broadcast")
count += 1
return f"Broadcast to {count} teammates"
BUS = MessageBus(INBOX_DIR)
# -- TeammateManager: persistent named agents with config.json --
class TeammateManager:
def __init__(self, team_dir: Path):
self.dir = team_dir
self.dir.mkdir(exist_ok=True)
self.config_path = self.dir / "config.json"
self.config = self._load_config()
self.threads = {}
def _load_config(self) -> dict:
if self.config_path.exists():
return json.loads(self.config_path.read_text())
return {"team_name": "default", "members": []}
def _save_config(self):
self.config_path.write_text(json.dumps(self.config, indent=2))
def _find_member(self, name: str) -> dict:
for m in self.config["members"]:
if m["name"] == name:
return m
return None
def spawn(self, name: str, role: str, prompt: str) -> str:
member = self._find_member(name)
if member:
if member["status"] not in ("idle", "shutdown"):
return f"Error: '{name}' is currently {member['status']}"
member["status"] = "working"
member["role"] = role
else:
member = {"name": name, "role": role, "status": "working"}
self.config["members"].append(member)
self._save_config()
thread = threading.Thread(
target=self._teammate_loop,
args=(name, role, prompt),
daemon=True,
)
self.threads[name] = thread
thread.start()
return f"Spawned '{name}' (role: {role})"
def _teammate_loop(self, name: str, role: str, prompt: str):
sys_prompt = (
f"You are '{name}', role: {role}, at {WORKDIR}. "
f"Use send_message to communicate. Complete your task."
)
messages = [{"role": "user", "content": prompt}]
tools = self._teammate_tools()
for _ in range(50):
inbox = BUS.read_inbox(name)
for msg in inbox:
messages.append({"role": "user", "content": json.dumps(msg)})
try:
response = client.messages.create(
model=MODEL,
system=sys_prompt,
messages=messages,
tools=tools,
max_tokens=8000,
)
except Exception:
break
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
break
results = []
for block in response.content:
if block.type == "tool_use":
output = self._exec(name, block.name, block.input)
print(f" [{name}] {block.name}: {str(output)[:120]}")
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": str(output),
})
messages.append({"role": "user", "content": results})
member = self._find_member(name)
if member and member["status"] != "shutdown":
member["status"] = "idle"
self._save_config()
def _exec(self, sender: str, tool_name: str, args: dict) -> str:
# these base tools are unchanged from s02
if tool_name == "bash":
return _run_bash(args["command"])
if tool_name == "read_file":
return _run_read(args["path"])
if tool_name == "write_file":
return _run_write(args["path"], args["content"])
if tool_name == "edit_file":
return _run_edit(args["path"], args["old_text"], args["new_text"])
if tool_name == "send_message":
return BUS.send(sender, args["to"], args["content"], args.get("msg_type", "message"))
if tool_name == "read_inbox":
return json.dumps(BUS.read_inbox(sender), indent=2)
return f"Unknown tool: {tool_name}"
def _teammate_tools(self) -> list:
# these base tools are unchanged from s02
return [
{"name": "bash", "description": "Run a shell command.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file contents.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}},
{"name": "write_file", "description": "Write content to file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Replace exact text in file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
{"name": "send_message", "description": "Send message to a teammate.",
"input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}, "msg_type": {"type": "string", "enum": list(VALID_MSG_TYPES)}}, "required": ["to", "content"]}},
{"name": "read_inbox", "description": "Read and drain your inbox.",
"input_schema": {"type": "object", "properties": {}}},
]
def list_all(self) -> str:
if not self.config["members"]:
return "No teammates."
lines = [f"Team: {self.config['team_name']}"]
for m in self.config["members"]:
lines.append(f" {m['name']} ({m['role']}): {m['status']}")
return "\n".join(lines)
def member_names(self) -> list:
return [m["name"] for m in self.config["members"]]
TEAM = TeammateManager(TEAM_DIR)
# -- Base tool implementations (these base tools are unchanged from s02) --
def _safe_path(p: str) -> Path:
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {p}")
return path
def _run_bash(command: str) -> str:
dangerous = ["rm -rf /", "sudo", "shutdown", "reboot"]
if any(d in command for d in dangerous):
return "Error: Dangerous command blocked"
try:
r = subprocess.run(
command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=120,
)
out = (r.stdout + r.stderr).strip()
return out[:50000] if out else "(no output)"
except subprocess.TimeoutExpired:
return "Error: Timeout (120s)"
def _run_read(path: str, limit: int = None) -> str:
try:
lines = _safe_path(path).read_text().splitlines()
if limit and limit < len(lines):
lines = lines[:limit] + [f"... ({len(lines) - limit} more)"]
return "\n".join(lines)[:50000]
except Exception as e:
return f"Error: {e}"
def _run_write(path: str, content: str) -> str:
try:
fp = _safe_path(path)
fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(content)
return f"Wrote {len(content)} bytes"
except Exception as e:
return f"Error: {e}"
def _run_edit(path: str, old_text: str, new_text: str) -> str:
try:
fp = _safe_path(path)
c = fp.read_text()
if old_text not in c:
return f"Error: Text not found in {path}"
fp.write_text(c.replace(old_text, new_text, 1))
return f"Edited {path}"
except Exception as e:
return f"Error: {e}"
# -- Lead tool dispatch (9 tools) --
TOOL_HANDLERS = {
"bash": lambda **kw: _run_bash(kw["command"]),
"read_file": lambda **kw: _run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: _run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: _run_edit(kw["path"], kw["old_text"], kw["new_text"]),
"spawn_teammate": lambda **kw: TEAM.spawn(kw["name"], kw["role"], kw["prompt"]),
"list_teammates": lambda **kw: TEAM.list_all(),
"send_message": lambda **kw: BUS.send("lead", kw["to"], kw["content"], kw.get("msg_type", "message")),
"read_inbox": lambda **kw: json.dumps(BUS.read_inbox("lead"), indent=2),
"broadcast": lambda **kw: BUS.broadcast("lead", kw["content"], TEAM.member_names()),
}
# these base tools are unchanged from s02
TOOLS = [
{"name": "bash", "description": "Run a shell command.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file contents.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}},
{"name": "write_file", "description": "Write content to file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Replace exact text in file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
{"name": "spawn_teammate", "description": "Spawn a persistent teammate that runs in its own thread.",
"input_schema": {"type": "object", "properties": {"name": {"type": "string"}, "role": {"type": "string"}, "prompt": {"type": "string"}}, "required": ["name", "role", "prompt"]}},
{"name": "list_teammates", "description": "List all teammates with name, role, status.",
"input_schema": {"type": "object", "properties": {}}},
{"name": "send_message", "description": "Send a message to a teammate's inbox.",
"input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}, "msg_type": {"type": "string", "enum": list(VALID_MSG_TYPES)}}, "required": ["to", "content"]}},
{"name": "read_inbox", "description": "Read and drain the lead's inbox.",
"input_schema": {"type": "object", "properties": {}}},
{"name": "broadcast", "description": "Send a message to all teammates.",
"input_schema": {"type": "object", "properties": {"content": {"type": "string"}}, "required": ["content"]}},
]
def agent_loop(messages: list):
while True:
inbox = BUS.read_inbox("lead")
if inbox:
messages.append({
"role": "user",
"content": f"<inbox>{json.dumps(inbox, indent=2)}</inbox>",
})
messages.append({
"role": "assistant",
"content": "Noted inbox messages.",
})
response = client.messages.create(
model=MODEL,
system=SYSTEM,
messages=messages,
tools=TOOLS,
max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
try:
output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
except Exception as e:
output = f"Error: {e}"
print(f"> {block.name}: {str(output)[:200]}")
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": str(output),
})
messages.append({"role": "user", "content": results})
if __name__ == "__main__":
history = []
while True:
try:
query = input("\033[36ms09 >> \033[0m")
except (EOFError, KeyboardInterrupt):
break
if query.strip().lower() in ("q", "exit", ""):
break
if query.strip() == "/team":
print(TEAM.list_all())
continue
if query.strip() == "/inbox":
print(json.dumps(BUS.read_inbox("lead"), indent=2))
continue
history.append({"role": "user", "content": query})
agent_loop(history)
print()

View File

@ -0,0 +1,481 @@
#!/usr/bin/env python3
"""
s10_team_protocols.py - Team Protocols
Shutdown protocol and plan approval protocol, both using the same
request_id correlation pattern. Builds on s09's team messaging.
Shutdown FSM: pending -> approved | rejected
Lead Teammate
+---------------------+ +---------------------+
| shutdown_request | | |
| { | -------> | receives request |
| request_id: abc | | decides: approve? |
| } | | |
+---------------------+ +---------------------+
|
+---------------------+ +-------v-------------+
| shutdown_response | <------- | shutdown_response |
| { | | { |
| request_id: abc | | request_id: abc |
| approve: true | | approve: true |
| } | | } |
+---------------------+ +---------------------+
|
v
status -> "shutdown", thread stops
Plan approval FSM: pending -> approved | rejected
Teammate Lead
+---------------------+ +---------------------+
| plan_approval | | |
| submit: {plan:"..."}| -------> | reviews plan text |
+---------------------+ | approve/reject? |
+---------------------+
|
+---------------------+ +-------v-------------+
| plan_approval_resp | <------- | plan_approval |
| {approve: true} | | review: {req_id, |
+---------------------+ | approve: true} |
+---------------------+
Trackers: {request_id: {"target|from": name, "status": "pending|..."}}
Key insight: "Same request_id correlation pattern, two domains."
"""
import json
import os
import subprocess
import threading
import time
import uuid
from pathlib import Path
from anthropic import Anthropic
from dotenv import load_dotenv
load_dotenv(override=True)
if os.getenv("ANTHROPIC_BASE_URL"):
os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
WORKDIR = Path.cwd()
client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL"))
MODEL = os.environ["MODEL_ID"]
TEAM_DIR = WORKDIR / ".team"
INBOX_DIR = TEAM_DIR / "inbox"
SYSTEM = f"You are a team lead at {WORKDIR}. Manage teammates with shutdown and plan approval protocols."
VALID_MSG_TYPES = {
"message",
"broadcast",
"shutdown_request",
"shutdown_response",
"plan_approval_response",
}
# -- Request trackers: correlate by request_id --
shutdown_requests = {}
plan_requests = {}
_tracker_lock = threading.Lock()
# -- MessageBus: JSONL inbox per teammate --
class MessageBus:
def __init__(self, inbox_dir: Path):
self.dir = inbox_dir
self.dir.mkdir(parents=True, exist_ok=True)
def send(self, sender: str, to: str, content: str,
msg_type: str = "message", extra: dict = None) -> str:
if msg_type not in VALID_MSG_TYPES:
return f"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}"
msg = {
"type": msg_type,
"from": sender,
"content": content,
"timestamp": time.time(),
}
if extra:
msg.update(extra)
inbox_path = self.dir / f"{to}.jsonl"
with open(inbox_path, "a") as f:
f.write(json.dumps(msg) + "\n")
return f"Sent {msg_type} to {to}"
def read_inbox(self, name: str) -> list:
inbox_path = self.dir / f"{name}.jsonl"
if not inbox_path.exists():
return []
messages = []
for line in inbox_path.read_text().strip().splitlines():
if line:
messages.append(json.loads(line))
inbox_path.write_text("")
return messages
def broadcast(self, sender: str, content: str, teammates: list) -> str:
count = 0
for name in teammates:
if name != sender:
self.send(sender, name, content, "broadcast")
count += 1
return f"Broadcast to {count} teammates"
BUS = MessageBus(INBOX_DIR)
# -- TeammateManager with shutdown + plan approval --
class TeammateManager:
def __init__(self, team_dir: Path):
self.dir = team_dir
self.dir.mkdir(exist_ok=True)
self.config_path = self.dir / "config.json"
self.config = self._load_config()
self.threads = {}
def _load_config(self) -> dict:
if self.config_path.exists():
return json.loads(self.config_path.read_text())
return {"team_name": "default", "members": []}
def _save_config(self):
self.config_path.write_text(json.dumps(self.config, indent=2))
def _find_member(self, name: str) -> dict:
for m in self.config["members"]:
if m["name"] == name:
return m
return None
def spawn(self, name: str, role: str, prompt: str) -> str:
member = self._find_member(name)
if member:
if member["status"] not in ("idle", "shutdown"):
return f"Error: '{name}' is currently {member['status']}"
member["status"] = "working"
member["role"] = role
else:
member = {"name": name, "role": role, "status": "working"}
self.config["members"].append(member)
self._save_config()
thread = threading.Thread(
target=self._teammate_loop,
args=(name, role, prompt),
daemon=True,
)
self.threads[name] = thread
thread.start()
return f"Spawned '{name}' (role: {role})"
def _teammate_loop(self, name: str, role: str, prompt: str):
sys_prompt = (
f"You are '{name}', role: {role}, at {WORKDIR}. "
f"Submit plans via plan_approval before major work. "
f"Respond to shutdown_request with shutdown_response."
)
messages = [{"role": "user", "content": prompt}]
tools = self._teammate_tools()
should_exit = False
for _ in range(50):
inbox = BUS.read_inbox(name)
for msg in inbox:
messages.append({"role": "user", "content": json.dumps(msg)})
if should_exit:
break
try:
response = client.messages.create(
model=MODEL,
system=sys_prompt,
messages=messages,
tools=tools,
max_tokens=8000,
)
except Exception:
break
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
break
results = []
for block in response.content:
if block.type == "tool_use":
output = self._exec(name, block.name, block.input)
print(f" [{name}] {block.name}: {str(output)[:120]}")
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": str(output),
})
if block.name == "shutdown_response" and block.input.get("approve"):
should_exit = True
messages.append({"role": "user", "content": results})
member = self._find_member(name)
if member:
member["status"] = "shutdown" if should_exit else "idle"
self._save_config()
def _exec(self, sender: str, tool_name: str, args: dict) -> str:
# these base tools are unchanged from s02
if tool_name == "bash":
return _run_bash(args["command"])
if tool_name == "read_file":
return _run_read(args["path"])
if tool_name == "write_file":
return _run_write(args["path"], args["content"])
if tool_name == "edit_file":
return _run_edit(args["path"], args["old_text"], args["new_text"])
if tool_name == "send_message":
return BUS.send(sender, args["to"], args["content"], args.get("msg_type", "message"))
if tool_name == "read_inbox":
return json.dumps(BUS.read_inbox(sender), indent=2)
if tool_name == "shutdown_response":
req_id = args["request_id"]
approve = args["approve"]
with _tracker_lock:
if req_id in shutdown_requests:
shutdown_requests[req_id]["status"] = "approved" if approve else "rejected"
BUS.send(
sender, "lead", args.get("reason", ""),
"shutdown_response", {"request_id": req_id, "approve": approve},
)
return f"Shutdown {'approved' if approve else 'rejected'}"
if tool_name == "plan_approval":
plan_text = args.get("plan", "")
req_id = str(uuid.uuid4())[:8]
with _tracker_lock:
plan_requests[req_id] = {"from": sender, "plan": plan_text, "status": "pending"}
BUS.send(
sender, "lead", plan_text, "plan_approval_response",
{"request_id": req_id, "plan": plan_text},
)
return f"Plan submitted (request_id={req_id}). Waiting for lead approval."
return f"Unknown tool: {tool_name}"
def _teammate_tools(self) -> list:
# these base tools are unchanged from s02
return [
{"name": "bash", "description": "Run a shell command.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file contents.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}},
{"name": "write_file", "description": "Write content to file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Replace exact text in file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
{"name": "send_message", "description": "Send message to a teammate.",
"input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}, "msg_type": {"type": "string", "enum": list(VALID_MSG_TYPES)}}, "required": ["to", "content"]}},
{"name": "read_inbox", "description": "Read and drain your inbox.",
"input_schema": {"type": "object", "properties": {}}},
{"name": "shutdown_response", "description": "Respond to a shutdown request. Approve to shut down, reject to keep working.",
"input_schema": {"type": "object", "properties": {"request_id": {"type": "string"}, "approve": {"type": "boolean"}, "reason": {"type": "string"}}, "required": ["request_id", "approve"]}},
{"name": "plan_approval", "description": "Submit a plan for lead approval. Provide plan text.",
"input_schema": {"type": "object", "properties": {"plan": {"type": "string"}}, "required": ["plan"]}},
]
def list_all(self) -> str:
if not self.config["members"]:
return "No teammates."
lines = [f"Team: {self.config['team_name']}"]
for m in self.config["members"]:
lines.append(f" {m['name']} ({m['role']}): {m['status']}")
return "\n".join(lines)
def member_names(self) -> list:
return [m["name"] for m in self.config["members"]]
TEAM = TeammateManager(TEAM_DIR)
# -- Base tool implementations (these base tools are unchanged from s02) --
def _safe_path(p: str) -> Path:
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {p}")
return path
def _run_bash(command: str) -> str:
dangerous = ["rm -rf /", "sudo", "shutdown", "reboot"]
if any(d in command for d in dangerous):
return "Error: Dangerous command blocked"
try:
r = subprocess.run(
command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=120,
)
out = (r.stdout + r.stderr).strip()
return out[:50000] if out else "(no output)"
except subprocess.TimeoutExpired:
return "Error: Timeout (120s)"
def _run_read(path: str, limit: int = None) -> str:
try:
lines = _safe_path(path).read_text().splitlines()
if limit and limit < len(lines):
lines = lines[:limit] + [f"... ({len(lines) - limit} more)"]
return "\n".join(lines)[:50000]
except Exception as e:
return f"Error: {e}"
def _run_write(path: str, content: str) -> str:
try:
fp = _safe_path(path)
fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(content)
return f"Wrote {len(content)} bytes"
except Exception as e:
return f"Error: {e}"
def _run_edit(path: str, old_text: str, new_text: str) -> str:
try:
fp = _safe_path(path)
c = fp.read_text()
if old_text not in c:
return f"Error: Text not found in {path}"
fp.write_text(c.replace(old_text, new_text, 1))
return f"Edited {path}"
except Exception as e:
return f"Error: {e}"
# -- Lead-specific protocol handlers --
def handle_shutdown_request(teammate: str) -> str:
req_id = str(uuid.uuid4())[:8]
with _tracker_lock:
shutdown_requests[req_id] = {"target": teammate, "status": "pending"}
BUS.send(
"lead", teammate, "Please shut down gracefully.",
"shutdown_request", {"request_id": req_id},
)
return f"Shutdown request {req_id} sent to '{teammate}' (status: pending)"
def handle_plan_review(request_id: str, approve: bool, feedback: str = "") -> str:
with _tracker_lock:
req = plan_requests.get(request_id)
if not req:
return f"Error: Unknown plan request_id '{request_id}'"
with _tracker_lock:
req["status"] = "approved" if approve else "rejected"
BUS.send(
"lead", req["from"], feedback, "plan_approval_response",
{"request_id": request_id, "approve": approve, "feedback": feedback},
)
return f"Plan {req['status']} for '{req['from']}'"
def _check_shutdown_status(request_id: str) -> str:
with _tracker_lock:
return json.dumps(shutdown_requests.get(request_id, {"error": "not found"}))
# -- Lead tool dispatch (12 tools) --
TOOL_HANDLERS = {
"bash": lambda **kw: _run_bash(kw["command"]),
"read_file": lambda **kw: _run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: _run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: _run_edit(kw["path"], kw["old_text"], kw["new_text"]),
"spawn_teammate": lambda **kw: TEAM.spawn(kw["name"], kw["role"], kw["prompt"]),
"list_teammates": lambda **kw: TEAM.list_all(),
"send_message": lambda **kw: BUS.send("lead", kw["to"], kw["content"], kw.get("msg_type", "message")),
"read_inbox": lambda **kw: json.dumps(BUS.read_inbox("lead"), indent=2),
"broadcast": lambda **kw: BUS.broadcast("lead", kw["content"], TEAM.member_names()),
"shutdown_request": lambda **kw: handle_shutdown_request(kw["teammate"]),
"shutdown_response": lambda **kw: _check_shutdown_status(kw.get("request_id", "")),
"plan_approval": lambda **kw: handle_plan_review(kw["request_id"], kw["approve"], kw.get("feedback", "")),
}
# these base tools are unchanged from s02
TOOLS = [
{"name": "bash", "description": "Run a shell command.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file contents.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}},
{"name": "write_file", "description": "Write content to file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Replace exact text in file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
{"name": "spawn_teammate", "description": "Spawn a persistent teammate.",
"input_schema": {"type": "object", "properties": {"name": {"type": "string"}, "role": {"type": "string"}, "prompt": {"type": "string"}}, "required": ["name", "role", "prompt"]}},
{"name": "list_teammates", "description": "List all teammates.",
"input_schema": {"type": "object", "properties": {}}},
{"name": "send_message", "description": "Send a message to a teammate.",
"input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}, "msg_type": {"type": "string", "enum": list(VALID_MSG_TYPES)}}, "required": ["to", "content"]}},
{"name": "read_inbox", "description": "Read and drain the lead's inbox.",
"input_schema": {"type": "object", "properties": {}}},
{"name": "broadcast", "description": "Send a message to all teammates.",
"input_schema": {"type": "object", "properties": {"content": {"type": "string"}}, "required": ["content"]}},
{"name": "shutdown_request", "description": "Request a teammate to shut down gracefully. Returns a request_id for tracking.",
"input_schema": {"type": "object", "properties": {"teammate": {"type": "string"}}, "required": ["teammate"]}},
{"name": "shutdown_response", "description": "Check the status of a shutdown request by request_id.",
"input_schema": {"type": "object", "properties": {"request_id": {"type": "string"}}, "required": ["request_id"]}},
{"name": "plan_approval", "description": "Approve or reject a teammate's plan. Provide request_id + approve + optional feedback.",
"input_schema": {"type": "object", "properties": {"request_id": {"type": "string"}, "approve": {"type": "boolean"}, "feedback": {"type": "string"}}, "required": ["request_id", "approve"]}},
]
def agent_loop(messages: list):
while True:
inbox = BUS.read_inbox("lead")
if inbox:
messages.append({
"role": "user",
"content": f"<inbox>{json.dumps(inbox, indent=2)}</inbox>",
})
messages.append({
"role": "assistant",
"content": "Noted inbox messages.",
})
response = client.messages.create(
model=MODEL,
system=SYSTEM,
messages=messages,
tools=TOOLS,
max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
try:
output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
except Exception as e:
output = f"Error: {e}"
print(f"> {block.name}: {str(output)[:200]}")
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": str(output),
})
messages.append({"role": "user", "content": results})
if __name__ == "__main__":
history = []
while True:
try:
query = input("\033[36ms10 >> \033[0m")
except (EOFError, KeyboardInterrupt):
break
if query.strip().lower() in ("q", "exit", ""):
break
if query.strip() == "/team":
print(TEAM.list_all())
continue
if query.strip() == "/inbox":
print(json.dumps(BUS.read_inbox("lead"), indent=2))
continue
history.append({"role": "user", "content": query})
agent_loop(history)
print()

View File

@ -0,0 +1,573 @@
#!/usr/bin/env python3
"""
s11_autonomous_agents.py - Autonomous Agents
Idle cycle with task board polling, auto-claiming unclaimed tasks, and
identity re-injection after context compression. Builds on s10's protocols.
Teammate lifecycle:
+-------+
| spawn |
+---+---+
|
v
+-------+ tool_use +-------+
| WORK | <----------- | LLM |
+---+---+ +-------+
|
| stop_reason != tool_use
v
+--------+
| IDLE | poll every 5s for up to 60s
+---+----+
|
+---> check inbox -> message? -> resume WORK
|
+---> scan .tasks/ -> unclaimed? -> claim -> resume WORK
|
+---> timeout (60s) -> shutdown
Identity re-injection after compression:
messages = [identity_block, ...remaining...]
"You are 'coder', role: backend, team: my-team"
Key insight: "The agent finds work itself."
"""
import json
import os
import subprocess
import threading
import time
import uuid
from pathlib import Path
from anthropic import Anthropic
from dotenv import load_dotenv
load_dotenv(override=True)
if os.getenv("ANTHROPIC_BASE_URL"):
os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
WORKDIR = Path.cwd()
client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL"))
MODEL = os.environ["MODEL_ID"]
TEAM_DIR = WORKDIR / ".team"
INBOX_DIR = TEAM_DIR / "inbox"
TASKS_DIR = WORKDIR / ".tasks"
POLL_INTERVAL = 5
IDLE_TIMEOUT = 60
SYSTEM = f"You are a team lead at {WORKDIR}. Teammates are autonomous -- they find work themselves."
VALID_MSG_TYPES = {
"message",
"broadcast",
"shutdown_request",
"shutdown_response",
"plan_approval_response",
}
# -- Request trackers --
shutdown_requests = {}
plan_requests = {}
_tracker_lock = threading.Lock()
_claim_lock = threading.Lock()
# -- MessageBus: JSONL inbox per teammate --
class MessageBus:
def __init__(self, inbox_dir: Path):
self.dir = inbox_dir
self.dir.mkdir(parents=True, exist_ok=True)
def send(self, sender: str, to: str, content: str,
msg_type: str = "message", extra: dict = None) -> str:
if msg_type not in VALID_MSG_TYPES:
return f"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}"
msg = {
"type": msg_type,
"from": sender,
"content": content,
"timestamp": time.time(),
}
if extra:
msg.update(extra)
inbox_path = self.dir / f"{to}.jsonl"
with open(inbox_path, "a") as f:
f.write(json.dumps(msg) + "\n")
return f"Sent {msg_type} to {to}"
def read_inbox(self, name: str) -> list:
inbox_path = self.dir / f"{name}.jsonl"
if not inbox_path.exists():
return []
messages = []
for line in inbox_path.read_text().strip().splitlines():
if line:
messages.append(json.loads(line))
inbox_path.write_text("")
return messages
def broadcast(self, sender: str, content: str, teammates: list) -> str:
count = 0
for name in teammates:
if name != sender:
self.send(sender, name, content, "broadcast")
count += 1
return f"Broadcast to {count} teammates"
BUS = MessageBus(INBOX_DIR)
# -- Task board scanning --
def scan_unclaimed_tasks() -> list:
TASKS_DIR.mkdir(exist_ok=True)
unclaimed = []
for f in sorted(TASKS_DIR.glob("task_*.json")):
task = json.loads(f.read_text())
if (task.get("status") == "pending"
and not task.get("owner")
and not task.get("blockedBy")):
unclaimed.append(task)
return unclaimed
def claim_task(task_id: int, owner: str) -> str:
with _claim_lock:
path = TASKS_DIR / f"task_{task_id}.json"
if not path.exists():
return f"Error: Task {task_id} not found"
task = json.loads(path.read_text())
task["owner"] = owner
task["status"] = "in_progress"
path.write_text(json.dumps(task, indent=2))
return f"Claimed task #{task_id} for {owner}"
# -- Identity re-injection after compression --
def make_identity_block(name: str, role: str, team_name: str) -> dict:
return {
"role": "user",
"content": f"<identity>You are '{name}', role: {role}, team: {team_name}. Continue your work.</identity>",
}
# -- Autonomous TeammateManager --
class TeammateManager:
def __init__(self, team_dir: Path):
self.dir = team_dir
self.dir.mkdir(exist_ok=True)
self.config_path = self.dir / "config.json"
self.config = self._load_config()
self.threads = {}
def _load_config(self) -> dict:
if self.config_path.exists():
return json.loads(self.config_path.read_text())
return {"team_name": "default", "members": []}
def _save_config(self):
self.config_path.write_text(json.dumps(self.config, indent=2))
def _find_member(self, name: str) -> dict:
for m in self.config["members"]:
if m["name"] == name:
return m
return None
def _set_status(self, name: str, status: str):
member = self._find_member(name)
if member:
member["status"] = status
self._save_config()
def spawn(self, name: str, role: str, prompt: str) -> str:
member = self._find_member(name)
if member:
if member["status"] not in ("idle", "shutdown"):
return f"Error: '{name}' is currently {member['status']}"
member["status"] = "working"
member["role"] = role
else:
member = {"name": name, "role": role, "status": "working"}
self.config["members"].append(member)
self._save_config()
thread = threading.Thread(
target=self._loop,
args=(name, role, prompt),
daemon=True,
)
self.threads[name] = thread
thread.start()
return f"Spawned '{name}' (role: {role})"
def _loop(self, name: str, role: str, prompt: str):
team_name = self.config["team_name"]
sys_prompt = (
f"You are '{name}', role: {role}, team: {team_name}, at {WORKDIR}. "
f"Use idle tool when you have no more work. You will auto-claim new tasks."
)
messages = [{"role": "user", "content": prompt}]
tools = self._teammate_tools()
while True:
# -- WORK PHASE: standard agent loop --
for _ in range(50):
inbox = BUS.read_inbox(name)
for msg in inbox:
if msg.get("type") == "shutdown_request":
self._set_status(name, "shutdown")
return
messages.append({"role": "user", "content": json.dumps(msg)})
try:
response = client.messages.create(
model=MODEL,
system=sys_prompt,
messages=messages,
tools=tools,
max_tokens=8000,
)
except Exception:
self._set_status(name, "idle")
return
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
break
results = []
idle_requested = False
for block in response.content:
if block.type == "tool_use":
if block.name == "idle":
idle_requested = True
output = "Entering idle phase. Will poll for new tasks."
else:
output = self._exec(name, block.name, block.input)
print(f" [{name}] {block.name}: {str(output)[:120]}")
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": str(output),
})
messages.append({"role": "user", "content": results})
if idle_requested:
break
# -- IDLE PHASE: poll for inbox messages and unclaimed tasks --
self._set_status(name, "idle")
resume = False
polls = IDLE_TIMEOUT // max(POLL_INTERVAL, 1)
for _ in range(polls):
time.sleep(POLL_INTERVAL)
inbox = BUS.read_inbox(name)
if inbox:
for msg in inbox:
if msg.get("type") == "shutdown_request":
self._set_status(name, "shutdown")
return
messages.append({"role": "user", "content": json.dumps(msg)})
resume = True
break
unclaimed = scan_unclaimed_tasks()
if unclaimed:
task = unclaimed[0]
claim_task(task["id"], name)
task_prompt = (
f"<auto-claimed>Task #{task['id']}: {task['subject']}\n"
f"{task.get('description', '')}</auto-claimed>"
)
if len(messages) <= 3:
messages.insert(0, make_identity_block(name, role, team_name))
messages.insert(1, {"role": "assistant", "content": f"I am {name}. Continuing."})
messages.append({"role": "user", "content": task_prompt})
messages.append({"role": "assistant", "content": f"Claimed task #{task['id']}. Working on it."})
resume = True
break
if not resume:
self._set_status(name, "shutdown")
return
self._set_status(name, "working")
def _exec(self, sender: str, tool_name: str, args: dict) -> str:
# these base tools are unchanged from s02
if tool_name == "bash":
return _run_bash(args["command"])
if tool_name == "read_file":
return _run_read(args["path"])
if tool_name == "write_file":
return _run_write(args["path"], args["content"])
if tool_name == "edit_file":
return _run_edit(args["path"], args["old_text"], args["new_text"])
if tool_name == "send_message":
return BUS.send(sender, args["to"], args["content"], args.get("msg_type", "message"))
if tool_name == "read_inbox":
return json.dumps(BUS.read_inbox(sender), indent=2)
if tool_name == "shutdown_response":
req_id = args["request_id"]
with _tracker_lock:
if req_id in shutdown_requests:
shutdown_requests[req_id]["status"] = "approved" if args["approve"] else "rejected"
BUS.send(
sender, "lead", args.get("reason", ""),
"shutdown_response", {"request_id": req_id, "approve": args["approve"]},
)
return f"Shutdown {'approved' if args['approve'] else 'rejected'}"
if tool_name == "plan_approval":
plan_text = args.get("plan", "")
req_id = str(uuid.uuid4())[:8]
with _tracker_lock:
plan_requests[req_id] = {"from": sender, "plan": plan_text, "status": "pending"}
BUS.send(
sender, "lead", plan_text, "plan_approval_response",
{"request_id": req_id, "plan": plan_text},
)
return f"Plan submitted (request_id={req_id}). Waiting for approval."
if tool_name == "claim_task":
return claim_task(args["task_id"], sender)
return f"Unknown tool: {tool_name}"
def _teammate_tools(self) -> list:
# these base tools are unchanged from s02
return [
{"name": "bash", "description": "Run a shell command.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file contents.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}},
{"name": "write_file", "description": "Write content to file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Replace exact text in file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
{"name": "send_message", "description": "Send message to a teammate.",
"input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}, "msg_type": {"type": "string", "enum": list(VALID_MSG_TYPES)}}, "required": ["to", "content"]}},
{"name": "read_inbox", "description": "Read and drain your inbox.",
"input_schema": {"type": "object", "properties": {}}},
{"name": "shutdown_response", "description": "Respond to a shutdown request.",
"input_schema": {"type": "object", "properties": {"request_id": {"type": "string"}, "approve": {"type": "boolean"}, "reason": {"type": "string"}}, "required": ["request_id", "approve"]}},
{"name": "plan_approval", "description": "Submit a plan for lead approval.",
"input_schema": {"type": "object", "properties": {"plan": {"type": "string"}}, "required": ["plan"]}},
{"name": "idle", "description": "Signal that you have no more work. Enters idle polling phase.",
"input_schema": {"type": "object", "properties": {}}},
{"name": "claim_task", "description": "Claim a task from the task board by ID.",
"input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}}, "required": ["task_id"]}},
]
def list_all(self) -> str:
if not self.config["members"]:
return "No teammates."
lines = [f"Team: {self.config['team_name']}"]
for m in self.config["members"]:
lines.append(f" {m['name']} ({m['role']}): {m['status']}")
return "\n".join(lines)
def member_names(self) -> list:
return [m["name"] for m in self.config["members"]]
TEAM = TeammateManager(TEAM_DIR)
# -- Base tool implementations (these base tools are unchanged from s02) --
def _safe_path(p: str) -> Path:
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {p}")
return path
def _run_bash(command: str) -> str:
dangerous = ["rm -rf /", "sudo", "shutdown", "reboot"]
if any(d in command for d in dangerous):
return "Error: Dangerous command blocked"
try:
r = subprocess.run(
command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=120,
)
out = (r.stdout + r.stderr).strip()
return out[:50000] if out else "(no output)"
except subprocess.TimeoutExpired:
return "Error: Timeout (120s)"
def _run_read(path: str, limit: int = None) -> str:
try:
lines = _safe_path(path).read_text().splitlines()
if limit and limit < len(lines):
lines = lines[:limit] + [f"... ({len(lines) - limit} more)"]
return "\n".join(lines)[:50000]
except Exception as e:
return f"Error: {e}"
def _run_write(path: str, content: str) -> str:
try:
fp = _safe_path(path)
fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(content)
return f"Wrote {len(content)} bytes"
except Exception as e:
return f"Error: {e}"
def _run_edit(path: str, old_text: str, new_text: str) -> str:
try:
fp = _safe_path(path)
c = fp.read_text()
if old_text not in c:
return f"Error: Text not found in {path}"
fp.write_text(c.replace(old_text, new_text, 1))
return f"Edited {path}"
except Exception as e:
return f"Error: {e}"
# -- Lead-specific protocol handlers --
def handle_shutdown_request(teammate: str) -> str:
req_id = str(uuid.uuid4())[:8]
with _tracker_lock:
shutdown_requests[req_id] = {"target": teammate, "status": "pending"}
BUS.send(
"lead", teammate, "Please shut down gracefully.",
"shutdown_request", {"request_id": req_id},
)
return f"Shutdown request {req_id} sent to '{teammate}'"
def handle_plan_review(request_id: str, approve: bool, feedback: str = "") -> str:
with _tracker_lock:
req = plan_requests.get(request_id)
if not req:
return f"Error: Unknown plan request_id '{request_id}'"
with _tracker_lock:
req["status"] = "approved" if approve else "rejected"
BUS.send(
"lead", req["from"], feedback, "plan_approval_response",
{"request_id": request_id, "approve": approve, "feedback": feedback},
)
return f"Plan {req['status']} for '{req['from']}'"
def _check_shutdown_status(request_id: str) -> str:
with _tracker_lock:
return json.dumps(shutdown_requests.get(request_id, {"error": "not found"}))
# -- Lead tool dispatch (14 tools) --
TOOL_HANDLERS = {
"bash": lambda **kw: _run_bash(kw["command"]),
"read_file": lambda **kw: _run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: _run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: _run_edit(kw["path"], kw["old_text"], kw["new_text"]),
"spawn_teammate": lambda **kw: TEAM.spawn(kw["name"], kw["role"], kw["prompt"]),
"list_teammates": lambda **kw: TEAM.list_all(),
"send_message": lambda **kw: BUS.send("lead", kw["to"], kw["content"], kw.get("msg_type", "message")),
"read_inbox": lambda **kw: json.dumps(BUS.read_inbox("lead"), indent=2),
"broadcast": lambda **kw: BUS.broadcast("lead", kw["content"], TEAM.member_names()),
"shutdown_request": lambda **kw: handle_shutdown_request(kw["teammate"]),
"shutdown_response": lambda **kw: _check_shutdown_status(kw.get("request_id", "")),
"plan_approval": lambda **kw: handle_plan_review(kw["request_id"], kw["approve"], kw.get("feedback", "")),
"idle": lambda **kw: "Lead does not idle.",
"claim_task": lambda **kw: claim_task(kw["task_id"], "lead"),
}
# these base tools are unchanged from s02
TOOLS = [
{"name": "bash", "description": "Run a shell command.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file contents.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}},
{"name": "write_file", "description": "Write content to file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Replace exact text in file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
{"name": "spawn_teammate", "description": "Spawn an autonomous teammate.",
"input_schema": {"type": "object", "properties": {"name": {"type": "string"}, "role": {"type": "string"}, "prompt": {"type": "string"}}, "required": ["name", "role", "prompt"]}},
{"name": "list_teammates", "description": "List all teammates.",
"input_schema": {"type": "object", "properties": {}}},
{"name": "send_message", "description": "Send a message to a teammate.",
"input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}, "msg_type": {"type": "string", "enum": list(VALID_MSG_TYPES)}}, "required": ["to", "content"]}},
{"name": "read_inbox", "description": "Read and drain the lead's inbox.",
"input_schema": {"type": "object", "properties": {}}},
{"name": "broadcast", "description": "Send a message to all teammates.",
"input_schema": {"type": "object", "properties": {"content": {"type": "string"}}, "required": ["content"]}},
{"name": "shutdown_request", "description": "Request a teammate to shut down.",
"input_schema": {"type": "object", "properties": {"teammate": {"type": "string"}}, "required": ["teammate"]}},
{"name": "shutdown_response", "description": "Check shutdown request status.",
"input_schema": {"type": "object", "properties": {"request_id": {"type": "string"}}, "required": ["request_id"]}},
{"name": "plan_approval", "description": "Approve or reject a teammate's plan.",
"input_schema": {"type": "object", "properties": {"request_id": {"type": "string"}, "approve": {"type": "boolean"}, "feedback": {"type": "string"}}, "required": ["request_id", "approve"]}},
{"name": "idle", "description": "Enter idle state (for lead -- rarely used).",
"input_schema": {"type": "object", "properties": {}}},
{"name": "claim_task", "description": "Claim a task from the board by ID.",
"input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}}, "required": ["task_id"]}},
]
def agent_loop(messages: list):
while True:
inbox = BUS.read_inbox("lead")
if inbox:
messages.append({
"role": "user",
"content": f"<inbox>{json.dumps(inbox, indent=2)}</inbox>",
})
messages.append({
"role": "assistant",
"content": "Noted inbox messages.",
})
response = client.messages.create(
model=MODEL,
system=SYSTEM,
messages=messages,
tools=TOOLS,
max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
try:
output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
except Exception as e:
output = f"Error: {e}"
print(f"> {block.name}: {str(output)[:200]}")
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": str(output),
})
messages.append({"role": "user", "content": results})
if __name__ == "__main__":
history = []
while True:
try:
query = input("\033[36ms11 >> \033[0m")
except (EOFError, KeyboardInterrupt):
break
if query.strip().lower() in ("q", "exit", ""):
break
if query.strip() == "/team":
print(TEAM.list_all())
continue
if query.strip() == "/inbox":
print(json.dumps(BUS.read_inbox("lead"), indent=2))
continue
if query.strip() == "/tasks":
TASKS_DIR.mkdir(exist_ok=True)
for f in sorted(TASKS_DIR.glob("task_*.json")):
t = json.loads(f.read_text())
marker = {"pending": "[ ]", "in_progress": "[>]", "completed": "[x]"}.get(t["status"], "[?]")
owner = f" @{t['owner']}" if t.get("owner") else ""
print(f" {marker} #{t['id']}: {t['subject']}{owner}")
continue
history.append({"role": "user", "content": query})
agent_loop(history)
print()

732
agents/s_full.py Normal file
View File

@ -0,0 +1,732 @@
#!/usr/bin/env python3
"""
s_full.py - Full Reference Agent
Capstone implementation combining every mechanism from s01-s11.
NOT a teaching session -- this is the "put it all together" reference.
+------------------------------------------------------------------+
| FULL AGENT |
| |
| System prompt (s05 skills, s03 todo nag) |
| |
| Before each LLM call: |
| +--------------------+ +------------------+ +--------------+ |
| | Microcompact (s06) | | Drain bg (s08) | | Check inbox | |
| | Auto-compact (s06) | | notifications | | (s09) | |
| +--------------------+ +------------------+ +--------------+ |
| |
| Tool dispatch (s02 pattern): |
| +--------+----------+----------+---------+-----------+ |
| | bash | read | write | edit | TodoWrite | |
| | task | load_sk | compress | bg_run | bg_check | |
| | t_crt | t_get | t_upd | t_list | spawn_tm | |
| | list_tm| send_msg | rd_inbox | bcast | shutdown | |
| | plan | idle | claim | | | |
| +--------+----------+----------+---------+-----------+ |
| |
| Subagent (s04): spawn -> work -> return summary |
| Teammate (s09): spawn -> work -> idle -> auto-claim (s11) |
| Shutdown (s10): request_id handshake |
| Plan gate (s10): submit -> approve/reject |
+------------------------------------------------------------------+
REPL commands: /compact /tasks /team /inbox
"""
import json
import os
import re
import subprocess
import threading
import time
import uuid
from pathlib import Path
from queue import Queue
from anthropic import Anthropic
from dotenv import load_dotenv
load_dotenv(override=True)
if os.getenv("ANTHROPIC_BASE_URL"):
os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
WORKDIR = Path.cwd()
client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL"))
MODEL = os.environ["MODEL_ID"]
TEAM_DIR = WORKDIR / ".team"
INBOX_DIR = TEAM_DIR / "inbox"
TASKS_DIR = WORKDIR / ".tasks"
SKILLS_DIR = WORKDIR / "skills"
TRANSCRIPT_DIR = WORKDIR / ".transcripts"
TOKEN_THRESHOLD = 100000
POLL_INTERVAL = 5
IDLE_TIMEOUT = 60
VALID_MSG_TYPES = {"message", "broadcast", "shutdown_request",
"shutdown_response", "plan_approval_response"}
# === SECTION: base_tools ===
def safe_path(p: str) -> Path:
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {p}")
return path
def run_bash(command: str) -> str:
dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"]
if any(d in command for d in dangerous):
return "Error: Dangerous command blocked"
try:
r = subprocess.run(command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=120)
out = (r.stdout + r.stderr).strip()
return out[:50000] if out else "(no output)"
except subprocess.TimeoutExpired:
return "Error: Timeout (120s)"
def run_read(path: str, limit: int = None) -> str:
try:
lines = safe_path(path).read_text().splitlines()
if limit and limit < len(lines):
lines = lines[:limit] + [f"... ({len(lines) - limit} more)"]
return "\n".join(lines)[:50000]
except Exception as e:
return f"Error: {e}"
def run_write(path: str, content: str) -> str:
try:
fp = safe_path(path)
fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(content)
return f"Wrote {len(content)} bytes to {path}"
except Exception as e:
return f"Error: {e}"
def run_edit(path: str, old_text: str, new_text: str) -> str:
try:
fp = safe_path(path)
c = fp.read_text()
if old_text not in c:
return f"Error: Text not found in {path}"
fp.write_text(c.replace(old_text, new_text, 1))
return f"Edited {path}"
except Exception as e:
return f"Error: {e}"
# === SECTION: todos (s03) ===
class TodoManager:
def __init__(self):
self.items = []
def update(self, items: list) -> str:
validated, ip = [], 0
for i, item in enumerate(items):
content = str(item.get("content", "")).strip()
status = str(item.get("status", "pending")).lower()
af = str(item.get("activeForm", "")).strip()
if not content: raise ValueError(f"Item {i}: content required")
if status not in ("pending", "in_progress", "completed"):
raise ValueError(f"Item {i}: invalid status '{status}'")
if not af: raise ValueError(f"Item {i}: activeForm required")
if status == "in_progress": ip += 1
validated.append({"content": content, "status": status, "activeForm": af})
if len(validated) > 20: raise ValueError("Max 20 todos")
if ip > 1: raise ValueError("Only one in_progress allowed")
self.items = validated
return self.render()
def render(self) -> str:
if not self.items: return "No todos."
lines = []
for item in self.items:
m = {"completed": "[x]", "in_progress": "[>]", "pending": "[ ]"}.get(item["status"], "[?]")
suffix = f" <- {item['activeForm']}" if item["status"] == "in_progress" else ""
lines.append(f"{m} {item['content']}{suffix}")
done = sum(1 for t in self.items if t["status"] == "completed")
lines.append(f"\n({done}/{len(self.items)} completed)")
return "\n".join(lines)
# === SECTION: subagent (s04) ===
def run_subagent(prompt: str, agent_type: str = "Explore") -> str:
sub_tools = [
{"name": "bash", "description": "Run command.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}},
]
if agent_type != "Explore":
sub_tools += [
{"name": "write_file", "description": "Write file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Edit file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
]
sub_handlers = {
"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"]),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]),
}
sub_msgs = [{"role": "user", "content": prompt}]
resp = None
for _ in range(30):
resp = client.messages.create(model=MODEL, messages=sub_msgs, tools=sub_tools, max_tokens=8000)
sub_msgs.append({"role": "assistant", "content": resp.content})
if resp.stop_reason != "tool_use":
break
results = []
for b in resp.content:
if b.type == "tool_use":
h = sub_handlers.get(b.name, lambda **kw: "Unknown tool")
results.append({"type": "tool_result", "tool_use_id": b.id, "content": str(h(**b.input))[:50000]})
sub_msgs.append({"role": "user", "content": results})
if resp:
return "".join(b.text for b in resp.content if hasattr(b, "text")) or "(no summary)"
return "(subagent failed)"
# === SECTION: skills (s05) ===
class SkillLoader:
def __init__(self, skills_dir: Path):
self.skills = {}
if skills_dir.exists():
for f in sorted(skills_dir.glob("*.md")):
text = f.read_text()
match = re.match(r"^---\n(.*?)\n---\n(.*)", text, re.DOTALL)
meta, body = {}, text
if match:
for line in match.group(1).strip().splitlines():
if ":" in line:
k, v = line.split(":", 1)
meta[k.strip()] = v.strip()
body = match.group(2).strip()
self.skills[f.stem] = {"meta": meta, "body": body}
def descriptions(self) -> str:
if not self.skills: return "(no skills)"
return "\n".join(f" - {n}: {s['meta'].get('description', '-')}" for n, s in self.skills.items())
def load(self, name: str) -> str:
s = self.skills.get(name)
if not s: return f"Error: Unknown skill '{name}'. Available: {', '.join(self.skills.keys())}"
return f"<skill name=\"{name}\">\n{s['body']}\n</skill>"
# === SECTION: compression (s06) ===
def estimate_tokens(messages: list) -> int:
return len(json.dumps(messages, default=str)) // 4
def microcompact(messages: list):
indices = []
for i, msg in enumerate(messages):
if msg["role"] == "user" and isinstance(msg.get("content"), list):
for part in msg["content"]:
if isinstance(part, dict) and part.get("type") == "tool_result":
indices.append(part)
if len(indices) <= 3:
return
for part in indices[:-3]:
if isinstance(part.get("content"), str) and len(part["content"]) > 100:
part["content"] = "[cleared]"
def auto_compact(messages: list) -> list:
TRANSCRIPT_DIR.mkdir(exist_ok=True)
path = TRANSCRIPT_DIR / f"transcript_{int(time.time())}.jsonl"
with open(path, "w") as f:
for msg in messages:
f.write(json.dumps(msg, default=str) + "\n")
conv_text = json.dumps(messages, default=str)[:80000]
resp = client.messages.create(
model=MODEL,
messages=[{"role": "user", "content": f"Summarize for continuity:\n{conv_text}"}],
max_tokens=2000,
)
summary = resp.content[0].text
return [
{"role": "user", "content": f"[Compressed. Transcript: {path}]\n{summary}"},
{"role": "assistant", "content": "Understood. Continuing with summary context."},
]
# === SECTION: file_tasks (s07) ===
class TaskManager:
def __init__(self):
TASKS_DIR.mkdir(exist_ok=True)
def _next_id(self) -> int:
ids = [int(f.stem.split("_")[1]) for f in TASKS_DIR.glob("task_*.json")]
return max(ids, default=0) + 1
def _load(self, tid: int) -> dict:
p = TASKS_DIR / f"task_{tid}.json"
if not p.exists(): raise ValueError(f"Task {tid} not found")
return json.loads(p.read_text())
def _save(self, task: dict):
(TASKS_DIR / f"task_{task['id']}.json").write_text(json.dumps(task, indent=2))
def create(self, subject: str, description: str = "") -> str:
task = {"id": self._next_id(), "subject": subject, "description": description,
"status": "pending", "owner": None, "blockedBy": [], "blocks": []}
self._save(task)
return json.dumps(task, indent=2)
def get(self, tid: int) -> str:
return json.dumps(self._load(tid), indent=2)
def update(self, tid: int, status: str = None,
add_blocked_by: list = None, add_blocks: list = None) -> str:
task = self._load(tid)
if status:
task["status"] = status
if status == "completed":
for f in TASKS_DIR.glob("task_*.json"):
t = json.loads(f.read_text())
if tid in t.get("blockedBy", []):
t["blockedBy"].remove(tid)
self._save(t)
if status == "deleted":
(TASKS_DIR / f"task_{tid}.json").unlink(missing_ok=True)
return f"Task {tid} deleted"
if add_blocked_by:
task["blockedBy"] = list(set(task["blockedBy"] + add_blocked_by))
if add_blocks:
task["blocks"] = list(set(task["blocks"] + add_blocks))
self._save(task)
return json.dumps(task, indent=2)
def list_all(self) -> str:
tasks = [json.loads(f.read_text()) for f in sorted(TASKS_DIR.glob("task_*.json"))]
if not tasks: return "No tasks."
lines = []
for t in tasks:
m = {"pending": "[ ]", "in_progress": "[>]", "completed": "[x]"}.get(t["status"], "[?]")
owner = f" @{t['owner']}" if t.get("owner") else ""
blocked = f" (blocked by: {t['blockedBy']})" if t.get("blockedBy") else ""
lines.append(f"{m} #{t['id']}: {t['subject']}{owner}{blocked}")
return "\n".join(lines)
def claim(self, tid: int, owner: str) -> str:
task = self._load(tid)
task["owner"] = owner
task["status"] = "in_progress"
self._save(task)
return f"Claimed task #{tid} for {owner}"
# === SECTION: background (s08) ===
class BackgroundManager:
def __init__(self):
self.tasks = {}
self.notifications = Queue()
def run(self, command: str, timeout: int = 120) -> str:
tid = str(uuid.uuid4())[:8]
self.tasks[tid] = {"status": "running", "command": command, "result": None}
threading.Thread(target=self._exec, args=(tid, command, timeout), daemon=True).start()
return f"Background task {tid} started: {command[:80]}"
def _exec(self, tid: str, command: str, timeout: int):
try:
r = subprocess.run(command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=timeout)
output = (r.stdout + r.stderr).strip()[:50000]
self.tasks[tid].update({"status": "completed", "result": output or "(no output)"})
except Exception as e:
self.tasks[tid].update({"status": "error", "result": str(e)})
self.notifications.put({"task_id": tid, "status": self.tasks[tid]["status"],
"result": self.tasks[tid]["result"][:500]})
def check(self, tid: str = None) -> str:
if tid:
t = self.tasks.get(tid)
return f"[{t['status']}] {t.get('result', '(running)')}" if t else f"Unknown: {tid}"
return "\n".join(f"{k}: [{v['status']}] {v['command'][:60]}" for k, v in self.tasks.items()) or "No bg tasks."
def drain(self) -> list:
notifs = []
while not self.notifications.empty():
notifs.append(self.notifications.get_nowait())
return notifs
# === SECTION: messaging (s09) ===
class MessageBus:
def __init__(self):
INBOX_DIR.mkdir(parents=True, exist_ok=True)
def send(self, sender: str, to: str, content: str,
msg_type: str = "message", extra: dict = None) -> str:
msg = {"type": msg_type, "from": sender, "content": content,
"timestamp": time.time()}
if extra: msg.update(extra)
with open(INBOX_DIR / f"{to}.jsonl", "a") as f:
f.write(json.dumps(msg) + "\n")
return f"Sent {msg_type} to {to}"
def read_inbox(self, name: str) -> list:
path = INBOX_DIR / f"{name}.jsonl"
if not path.exists(): return []
msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]
path.write_text("")
return msgs
def broadcast(self, sender: str, content: str, names: list) -> str:
count = 0
for n in names:
if n != sender:
self.send(sender, n, content, "broadcast")
count += 1
return f"Broadcast to {count} teammates"
# === SECTION: shutdown + plan tracking (s10) ===
shutdown_requests = {}
plan_requests = {}
# === SECTION: team (s09/s11) ===
class TeammateManager:
def __init__(self, bus: MessageBus, task_mgr: TaskManager):
TEAM_DIR.mkdir(exist_ok=True)
self.bus = bus
self.task_mgr = task_mgr
self.config_path = TEAM_DIR / "config.json"
self.config = self._load()
self.threads = {}
def _load(self) -> dict:
if self.config_path.exists():
return json.loads(self.config_path.read_text())
return {"team_name": "default", "members": []}
def _save(self):
self.config_path.write_text(json.dumps(self.config, indent=2))
def _find(self, name: str) -> dict:
for m in self.config["members"]:
if m["name"] == name: return m
return None
def spawn(self, name: str, role: str, prompt: str) -> str:
member = self._find(name)
if member:
if member["status"] not in ("idle", "shutdown"):
return f"Error: '{name}' is currently {member['status']}"
member["status"] = "working"
member["role"] = role
else:
member = {"name": name, "role": role, "status": "working"}
self.config["members"].append(member)
self._save()
threading.Thread(target=self._loop, args=(name, role, prompt), daemon=True).start()
return f"Spawned '{name}' (role: {role})"
def _set_status(self, name: str, status: str):
member = self._find(name)
if member:
member["status"] = status
self._save()
def _loop(self, name: str, role: str, prompt: str):
team_name = self.config["team_name"]
sys_prompt = (f"You are '{name}', role: {role}, team: {team_name}, at {WORKDIR}. "
f"Use idle when done with current work. You may auto-claim tasks.")
messages = [{"role": "user", "content": prompt}]
tools = [
{"name": "bash", "description": "Run command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}},
{"name": "write_file", "description": "Write file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Edit file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
{"name": "send_message", "description": "Send message.", "input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}}, "required": ["to", "content"]}},
{"name": "idle", "description": "Signal no more work.", "input_schema": {"type": "object", "properties": {}}},
{"name": "claim_task", "description": "Claim task by ID.", "input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}}, "required": ["task_id"]}},
]
while True:
# -- WORK PHASE --
for _ in range(50):
inbox = self.bus.read_inbox(name)
for msg in inbox:
if msg.get("type") == "shutdown_request":
self._set_status(name, "shutdown")
return
messages.append({"role": "user", "content": json.dumps(msg)})
try:
response = client.messages.create(
model=MODEL, system=sys_prompt, messages=messages,
tools=tools, max_tokens=8000)
except Exception:
self._set_status(name, "shutdown")
return
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
break
results = []
idle_requested = False
for block in response.content:
if block.type == "tool_use":
if block.name == "idle":
idle_requested = True
output = "Entering idle phase."
elif block.name == "claim_task":
output = self.task_mgr.claim(block.input["task_id"], name)
elif block.name == "send_message":
output = self.bus.send(name, block.input["to"], block.input["content"])
else:
dispatch = {"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"]),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"])}
output = dispatch.get(block.name, lambda **kw: "Unknown")(**block.input)
print(f" [{name}] {block.name}: {str(output)[:120]}")
results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)})
messages.append({"role": "user", "content": results})
if idle_requested:
break
# -- IDLE PHASE: poll for messages and unclaimed tasks --
self._set_status(name, "idle")
resume = False
for _ in range(IDLE_TIMEOUT // max(POLL_INTERVAL, 1)):
time.sleep(POLL_INTERVAL)
inbox = self.bus.read_inbox(name)
if inbox:
for msg in inbox:
if msg.get("type") == "shutdown_request":
self._set_status(name, "shutdown")
return
messages.append({"role": "user", "content": json.dumps(msg)})
resume = True
break
unclaimed = []
for f in sorted(TASKS_DIR.glob("task_*.json")):
t = json.loads(f.read_text())
if t.get("status") == "pending" and not t.get("owner") and not t.get("blockedBy"):
unclaimed.append(t)
if unclaimed:
task = unclaimed[0]
self.task_mgr.claim(task["id"], name)
# Identity re-injection for compressed contexts
if len(messages) <= 3:
messages.insert(0, {"role": "user", "content":
f"<identity>You are '{name}', role: {role}, team: {team_name}.</identity>"})
messages.insert(1, {"role": "assistant", "content": f"I am {name}. Continuing."})
messages.append({"role": "user", "content":
f"<auto-claimed>Task #{task['id']}: {task['subject']}\n{task.get('description', '')}</auto-claimed>"})
messages.append({"role": "assistant", "content": f"Claimed task #{task['id']}. Working on it."})
resume = True
break
if not resume:
self._set_status(name, "shutdown")
return
self._set_status(name, "working")
def list_all(self) -> str:
if not self.config["members"]: return "No teammates."
lines = [f"Team: {self.config['team_name']}"]
for m in self.config["members"]:
lines.append(f" {m['name']} ({m['role']}): {m['status']}")
return "\n".join(lines)
def member_names(self) -> list:
return [m["name"] for m in self.config["members"]]
# === SECTION: global_instances ===
TODO = TodoManager()
SKILLS = SkillLoader(SKILLS_DIR)
TASK_MGR = TaskManager()
BG = BackgroundManager()
BUS = MessageBus()
TEAM = TeammateManager(BUS, TASK_MGR)
# === SECTION: system_prompt ===
SYSTEM = f"""You are a coding agent at {WORKDIR}.
Use tools to solve tasks. Use TodoWrite for multi-step work.
Use task for subagent delegation. Use load_skill for specialized knowledge.
Skills available:
{SKILLS.descriptions()}"""
# === SECTION: shutdown_protocol (s10) ===
def handle_shutdown_request(teammate: str) -> str:
req_id = str(uuid.uuid4())[:8]
shutdown_requests[req_id] = {"target": teammate, "status": "pending"}
BUS.send("lead", teammate, "Please shut down.", "shutdown_request", {"request_id": req_id})
return f"Shutdown request {req_id} sent to '{teammate}'"
# === SECTION: plan_approval (s10) ===
def handle_plan_review(request_id: str, approve: bool, feedback: str = "") -> str:
req = plan_requests.get(request_id)
if not req: return f"Error: Unknown plan request_id '{request_id}'"
req["status"] = "approved" if approve else "rejected"
BUS.send("lead", req["from"], feedback, "plan_approval_response",
{"request_id": request_id, "approve": approve, "feedback": feedback})
return f"Plan {req['status']} for '{req['from']}'"
# === SECTION: tool_dispatch (s02) ===
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]),
"TodoWrite": lambda **kw: TODO.update(kw["items"]),
"task": lambda **kw: run_subagent(kw["prompt"], kw.get("agent_type", "Explore")),
"load_skill": lambda **kw: SKILLS.load(kw["name"]),
"compress": lambda **kw: "Compressing...",
"background_run": lambda **kw: BG.run(kw["command"], kw.get("timeout", 120)),
"check_background": lambda **kw: BG.check(kw.get("task_id")),
"task_create": lambda **kw: TASK_MGR.create(kw["subject"], kw.get("description", "")),
"task_get": lambda **kw: TASK_MGR.get(kw["task_id"]),
"task_update": lambda **kw: TASK_MGR.update(kw["task_id"], kw.get("status"), kw.get("add_blocked_by"), kw.get("add_blocks")),
"task_list": lambda **kw: TASK_MGR.list_all(),
"spawn_teammate": lambda **kw: TEAM.spawn(kw["name"], kw["role"], kw["prompt"]),
"list_teammates": lambda **kw: TEAM.list_all(),
"send_message": lambda **kw: BUS.send("lead", kw["to"], kw["content"], kw.get("msg_type", "message")),
"read_inbox": lambda **kw: json.dumps(BUS.read_inbox("lead"), indent=2),
"broadcast": lambda **kw: BUS.broadcast("lead", kw["content"], TEAM.member_names()),
"shutdown_request": lambda **kw: handle_shutdown_request(kw["teammate"]),
"plan_approval": lambda **kw: handle_plan_review(kw["request_id"], kw["approve"], kw.get("feedback", "")),
"idle": lambda **kw: "Lead does not idle.",
"claim_task": lambda **kw: TASK_MGR.claim(kw["task_id"], "lead"),
}
TOOLS = [
{"name": "bash", "description": "Run a shell command.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
{"name": "read_file", "description": "Read file contents.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}},
{"name": "write_file", "description": "Write content to file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
{"name": "edit_file", "description": "Replace exact text in file.",
"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
{"name": "TodoWrite", "description": "Update task tracking list.",
"input_schema": {"type": "object", "properties": {"items": {"type": "array", "items": {"type": "object", "properties": {"content": {"type": "string"}, "status": {"type": "string", "enum": ["pending", "in_progress", "completed"]}, "activeForm": {"type": "string"}}, "required": ["content", "status", "activeForm"]}}}, "required": ["items"]}},
{"name": "task", "description": "Spawn a subagent for isolated exploration or work.",
"input_schema": {"type": "object", "properties": {"prompt": {"type": "string"}, "agent_type": {"type": "string", "enum": ["Explore", "general-purpose"]}}, "required": ["prompt"]}},
{"name": "load_skill", "description": "Load specialized knowledge by name.",
"input_schema": {"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}},
{"name": "compress", "description": "Manually compress conversation context.",
"input_schema": {"type": "object", "properties": {}}},
{"name": "background_run", "description": "Run command in background thread.",
"input_schema": {"type": "object", "properties": {"command": {"type": "string"}, "timeout": {"type": "integer"}}, "required": ["command"]}},
{"name": "check_background", "description": "Check background task status.",
"input_schema": {"type": "object", "properties": {"task_id": {"type": "string"}}}},
{"name": "task_create", "description": "Create a persistent file task.",
"input_schema": {"type": "object", "properties": {"subject": {"type": "string"}, "description": {"type": "string"}}, "required": ["subject"]}},
{"name": "task_get", "description": "Get task details by ID.",
"input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}}, "required": ["task_id"]}},
{"name": "task_update", "description": "Update task status or dependencies.",
"input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}, "status": {"type": "string", "enum": ["pending", "in_progress", "completed", "deleted"]}, "add_blocked_by": {"type": "array", "items": {"type": "integer"}}, "add_blocks": {"type": "array", "items": {"type": "integer"}}}, "required": ["task_id"]}},
{"name": "task_list", "description": "List all tasks.",
"input_schema": {"type": "object", "properties": {}}},
{"name": "spawn_teammate", "description": "Spawn a persistent autonomous teammate.",
"input_schema": {"type": "object", "properties": {"name": {"type": "string"}, "role": {"type": "string"}, "prompt": {"type": "string"}}, "required": ["name", "role", "prompt"]}},
{"name": "list_teammates", "description": "List all teammates.",
"input_schema": {"type": "object", "properties": {}}},
{"name": "send_message", "description": "Send a message to a teammate.",
"input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}, "msg_type": {"type": "string", "enum": list(VALID_MSG_TYPES)}}, "required": ["to", "content"]}},
{"name": "read_inbox", "description": "Read and drain the lead's inbox.",
"input_schema": {"type": "object", "properties": {}}},
{"name": "broadcast", "description": "Send message to all teammates.",
"input_schema": {"type": "object", "properties": {"content": {"type": "string"}}, "required": ["content"]}},
{"name": "shutdown_request", "description": "Request a teammate to shut down.",
"input_schema": {"type": "object", "properties": {"teammate": {"type": "string"}}, "required": ["teammate"]}},
{"name": "plan_approval", "description": "Approve or reject a teammate's plan.",
"input_schema": {"type": "object", "properties": {"request_id": {"type": "string"}, "approve": {"type": "boolean"}, "feedback": {"type": "string"}}, "required": ["request_id", "approve"]}},
{"name": "idle", "description": "Enter idle state.",
"input_schema": {"type": "object", "properties": {}}},
{"name": "claim_task", "description": "Claim a task from the board.",
"input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}}, "required": ["task_id"]}},
]
# === SECTION: agent_loop ===
def agent_loop(messages: list):
rounds_without_todo = 0
while True:
# s06: compression pipeline
microcompact(messages)
if estimate_tokens(messages) > TOKEN_THRESHOLD:
print("[auto-compact triggered]")
messages[:] = auto_compact(messages)
# s08: drain background notifications
notifs = BG.drain()
if notifs:
txt = "\n".join(f"[bg:{n['task_id']}] {n['status']}: {n['result']}" for n in notifs)
messages.append({"role": "user", "content": f"<background-results>\n{txt}\n</background-results>"})
messages.append({"role": "assistant", "content": "Noted background results."})
# s10: check lead inbox
inbox = BUS.read_inbox("lead")
if inbox:
messages.append({"role": "user", "content": f"<inbox>{json.dumps(inbox, indent=2)}</inbox>"})
messages.append({"role": "assistant", "content": "Noted inbox messages."})
# LLM call
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
# Tool execution
results = []
used_todo = False
manual_compress = False
for block in response.content:
if block.type == "tool_use":
if block.name == "compress":
manual_compress = True
handler = TOOL_HANDLERS.get(block.name)
try:
output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
except Exception as e:
output = f"Error: {e}"
print(f"> {block.name}: {str(output)[:200]}")
results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)})
if block.name == "TodoWrite":
used_todo = True
# s03: nag reminder
rounds_without_todo = 0 if used_todo else rounds_without_todo + 1
if rounds_without_todo >= 3:
results.insert(0, {"type": "text", "text": "<reminder>Update your todos.</reminder>"})
messages.append({"role": "user", "content": results})
# s06: manual compress
if manual_compress:
print("[manual compact]")
messages[:] = auto_compact(messages)
# === SECTION: repl ===
if __name__ == "__main__":
history = []
while True:
try:
query = input("\033[36ms_full >> \033[0m")
except (EOFError, KeyboardInterrupt):
break
if query.strip().lower() in ("q", "exit", ""):
break
if query.strip() == "/compact":
if history:
print("[manual compact via /compact]")
history[:] = auto_compact(history)
continue
if query.strip() == "/tasks":
print(TASK_MGR.list_all())
continue
if query.strip() == "/team":
print(TEAM.list_all())
continue
if query.strip() == "/inbox":
print(json.dumps(BUS.read_inbox("lead"), indent=2))
continue
history.append({"role": "user", "content": query})
agent_loop(history)
print()

View File

@ -0,0 +1,143 @@
# s01: The Agent Loop
> The entire secret of AI coding agents is a while loop that feeds tool results back to the model until the model decides to stop.
## The Problem
Why can't a language model just answer a coding question? Because coding
requires _interaction with the real world_. The model needs to read files,
run tests, check errors, and iterate. A single prompt-response pair cannot
do this.
Without the agent loop, you would have to copy-paste outputs back into the
model yourself. The user becomes the loop. The agent loop automates this:
call the model, execute whatever tools it asks for, feed the results back,
repeat until the model says "I'm done."
Consider a simple task: "Create a Python file that prints hello." The model
needs to (1) decide to write a file, (2) write it, (3) verify it works.
That is three tool calls minimum. Without a loop, each one requires manual
human intervention.
## The Solution
```
+----------+ +-------+ +---------+
| User | ---> | LLM | ---> | Tool |
| prompt | | | | execute |
+----------+ +---+---+ +----+----+
^ |
| tool_result |
+---------------+
(loop continues)
The loop terminates when stop_reason != "tool_use".
That single condition is the entire control flow.
```
## How It Works
1. The user provides a prompt. It becomes the first message.
```python
history.append({"role": "user", "content": query})
```
2. The messages array is sent to the LLM along with the tool definitions.
```python
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
```
3. The assistant response is appended to messages.
```python
messages.append({"role": "assistant", "content": response.content})
```
4. We check the stop reason. If the model did not call a tool, the loop
ends. This is the only exit condition.
```python
if response.stop_reason != "tool_use":
return
```
5. For each tool_use block in the response, execute the tool (bash in this
session) and collect results.
```python
for block in response.content:
if block.type == "tool_use":
output = run_bash(block.input["command"])
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
```
6. The results are appended as a user message, and the loop continues.
```python
messages.append({"role": "user", "content": results})
```
## Key Code
The minimum viable agent -- the entire pattern in under 30 lines
(from `agents/s01_agent_loop.py`, lines 66-86):
```python
def agent_loop(messages: list):
while True:
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
output = run_bash(block.input["command"])
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
messages.append({"role": "user", "content": results})
```
## What Changed
This is session 1 -- the starting point. There is no prior session.
| Component | Before | After |
|---------------|------------|--------------------------------|
| Agent loop | (none) | `while True` + stop_reason |
| Tools | (none) | `bash` (one tool) |
| Messages | (none) | Accumulating list |
| Control flow | (none) | `stop_reason != "tool_use"` |
## Design Rationale
This loop is the universal foundation of all LLM-based agents. Production implementations add error handling, token counting, streaming, and retry logic, but the fundamental structure is unchanged. The simplicity is the point: one exit condition (`stop_reason != "tool_use"`) controls the entire flow. Everything else in this course -- tools, planning, compression, teams -- layers on top of this loop without modifying it. Understanding this loop means understanding every agent.
## Try It
```sh
cd learn-claude-code
python agents/s01_agent_loop.py
```
Example prompts to try:
1. `Create a file called hello.py that prints "Hello, World!"`
2. `List all Python files in this directory`
3. `What is the current git branch?`
4. `Create a directory called test_output and write 3 files in it`

151
docs/en/s02-tool-use.md Normal file
View File

@ -0,0 +1,151 @@
# s02: Tools
> A dispatch map routes tool calls to handler functions -- the loop itself does not change at all.
## The Problem
With only `bash`, the agent shells out for everything: reading files,
writing files, editing files. This works but is fragile. `cat` output
gets truncated unpredictably. `sed` replacements fail on special
characters. The model wastes tokens constructing shell pipelines when
a direct function call would be simpler.
More importantly, bash is a security surface. Every bash call can do
anything the shell can do. With dedicated tools like `read_file` and
`write_file`, you can enforce path sandboxing and block dangerous
patterns at the tool level rather than hoping the model avoids them.
The insight is that adding tools does not require changing the loop.
The loop from s01 stays identical. You add entries to the tools array,
add handler functions, and wire them together with a dispatch map.
## The Solution
```
+----------+ +-------+ +------------------+
| User | ---> | LLM | ---> | Tool Dispatch |
| prompt | | | | { |
+----------+ +---+---+ | bash: run_bash |
^ | read: run_read |
| | write: run_wr |
+----------+ edit: run_edit |
tool_result| } |
+------------------+
The dispatch map is a dict: {tool_name: handler_function}
One lookup replaces any if/elif chain.
```
## How It Works
1. Define handler functions for each tool. Each takes keyword arguments
matching the tool's input_schema and returns a string result.
```python
def run_read(path: str, limit: int = None) -> str:
text = safe_path(path).read_text()
lines = text.splitlines()
if limit and limit < len(lines):
lines = lines[:limit]
return "\n".join(lines)[:50000]
```
2. Create the dispatch map linking tool names to handlers.
```python
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"],
kw["new_text"]),
}
```
3. In the agent loop, look up the handler by name instead of hardcoding.
```python
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
output = handler(**block.input)
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
```
4. Path sandboxing prevents the model from escaping the workspace.
```python
def safe_path(p: str) -> Path:
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {p}")
return path
```
## Key Code
The dispatch pattern (from `agents/s02_tool_use.py`, lines 93-129):
```python
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"],
kw["new_text"]),
}
def agent_loop(messages: list):
while True:
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
output = handler(**block.input) if handler \
else f"Unknown tool: {block.name}"
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
messages.append({"role": "user", "content": results})
```
## What Changed From s01
| Component | Before (s01) | After (s02) |
|----------------|--------------------|----------------------------|
| Tools | 1 (bash only) | 4 (bash, read, write, edit)|
| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |
| Path safety | None | `safe_path()` sandbox |
| Agent loop | Unchanged | Unchanged |
## Design Rationale
The dispatch map pattern scales linearly -- adding a tool means adding one handler and one schema entry. The loop never changes. This separation of concerns (loop vs handlers) is why agent frameworks can support dozens of tools without increasing control flow complexity. The pattern also enables independent testing of each handler in isolation, since handlers are pure functions with no coupling to the loop. Any agent that outgrows a dispatch map has a design problem, not a scaling problem.
## Try It
```sh
cd learn-claude-code
python agents/s02_tool_use.py
```
Example prompts to try:
1. `Read the file requirements.txt`
2. `Create a file called greet.py with a greet(name) function`
3. `Edit greet.py to add a docstring to the function`
4. `Read greet.py to verify the edit worked`
5. `Run the greet function with bash: python -c "from greet import greet; greet('World')"`

170
docs/en/s03-todo-write.md Normal file
View File

@ -0,0 +1,170 @@
# s03: TodoWrite
> A TodoManager lets the agent track its own progress, and a nag reminder injection forces it to keep updating when it forgets.
## The Problem
When an agent works on a multi-step task, it often loses track of what it
has done and what remains. Without explicit planning, the model might repeat
work, skip steps, or wander off on tangents. The user has no visibility
into the agent's internal plan.
This is worse than it sounds. Long conversations cause the model to "drift"
-- the system prompt fades in influence as the context window fills with
tool results. A 10-step refactoring task might complete steps 1-3, then
the model starts improvising because it forgot steps 4-10 existed.
The solution is structured state: a TodoManager that the model writes to
explicitly. The model creates a plan, marks items in_progress as it works,
and marks them completed when done. A nag reminder injects a nudge if the
model goes 3+ rounds without updating its todos.
Teaching simplification: the nag threshold of 3 rounds is set low for
teaching visibility. Production agents typically use a higher threshold
around 10 to avoid excessive prompting.
## The Solution
```
+----------+ +-------+ +---------+
| User | ---> | LLM | ---> | Tools |
| prompt | | | | + todo |
+----------+ +---+---+ +----+----+
^ |
| tool_result |
+---------------+
|
+-----------+-----------+
| TodoManager state |
| [ ] task A |
| [>] task B <- doing |
| [x] task C |
+-----------------------+
|
if rounds_since_todo >= 3:
inject <reminder> into tool_result
```
## How It Works
1. The TodoManager validates and stores a list of items with statuses.
Only one item can be `in_progress` at a time.
```python
class TodoManager:
def __init__(self):
self.items = []
def update(self, items: list) -> str:
validated = []
in_progress_count = 0
for item in items:
status = item.get("status", "pending")
if status == "in_progress":
in_progress_count += 1
validated.append({
"id": item["id"],
"text": item["text"],
"status": status,
})
if in_progress_count > 1:
raise ValueError("Only one task can be in_progress")
self.items = validated
return self.render()
```
2. The `todo` tool is added to the dispatch map like any other tool.
```python
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
# ...other tools...
"todo": lambda **kw: TODO.update(kw["items"]),
}
```
3. The nag reminder injects a `<reminder>` tag into the tool_result
messages when the model goes 3+ rounds without calling `todo`.
```python
def agent_loop(messages: list):
rounds_since_todo = 0
while True:
if rounds_since_todo >= 3 and messages:
last = messages[-1]
if (last["role"] == "user"
and isinstance(last.get("content"), list)):
last["content"].insert(0, {
"type": "text",
"text": "<reminder>Update your todos.</reminder>",
})
# ... rest of loop ...
rounds_since_todo = 0 if used_todo else rounds_since_todo + 1
```
4. The system prompt instructs the model to use todos for planning.
```python
SYSTEM = f"""You are a coding agent at {WORKDIR}.
Use the todo tool to plan multi-step tasks.
Mark in_progress before starting, completed when done.
Prefer tools over prose."""
```
## Key Code
The TodoManager and nag injection (from `agents/s03_todo_write.py`,
lines 51-85 and 158-187):
```python
class TodoManager:
def update(self, items: list) -> str:
validated = []
in_progress_count = 0
for item in items:
status = item.get("status", "pending")
if status == "in_progress":
in_progress_count += 1
validated.append({
"id": item["id"],
"text": item["text"],
"status": status,
})
if in_progress_count > 1:
raise ValueError("Only one in_progress")
self.items = validated
return self.render()
# In agent_loop:
if rounds_since_todo >= 3:
last["content"].insert(0, {
"type": "text",
"text": "<reminder>Update your todos.</reminder>",
})
```
## What Changed From s02
| Component | Before (s02) | After (s03) |
|----------------|------------------|--------------------------|
| Tools | 4 | 5 (+todo) |
| Planning | None | TodoManager with statuses|
| Nag injection | None | `<reminder>` after 3 rounds|
| Agent loop | Simple dispatch | + rounds_since_todo counter|
## Design Rationale
Visible plans improve task completion because the model can self-monitor progress. The nag mechanism creates accountability -- without it, the model may abandon plans mid-execution as conversation context grows and earlier instructions fade. The "one in_progress at a time" constraint enforces sequential focus, preventing context-switching overhead that degrades output quality. This pattern works because it externalizes the model's working memory into structured state that survives attention drift.
## Try It
```sh
cd learn-claude-code
python agents/s03_todo_write.py
```
Example prompts to try:
1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`
2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`
3. `Review all Python files and fix any style issues`

157
docs/en/s04-subagent.md Normal file
View File

@ -0,0 +1,157 @@
# s04: Subagents
> A subagent runs with a fresh messages list, shares the filesystem with the parent, and returns only a summary -- keeping the parent context clean.
## The Problem
As the agent works, its messages array grows. Every tool call, every file
read, every bash output accumulates. After 20-30 tool calls, the context
window is crowded with irrelevant history. Reading a 500-line file to
answer a quick question permanently adds 500 lines to the context.
This is particularly bad for exploratory tasks. "What testing framework
does this project use?" might require reading 5 files, but the parent
agent does not need all 5 file contents in its history -- it just needs
the answer: "pytest with conftest.py configuration."
The solution is process isolation: spawn a child agent with `messages=[]`.
The child explores, reads files, runs commands. When it finishes, only its
final text response returns to the parent. The child's entire message
history is discarded.
## The Solution
```
Parent agent Subagent
+------------------+ +------------------+
| messages=[...] | | messages=[] | <-- fresh
| | dispatch | |
| tool: task | ---------->| while tool_use: |
| prompt="..." | | call tools |
| | summary | append results |
| result = "..." | <--------- | return last text |
+------------------+ +------------------+
|
Parent context stays clean.
Subagent context is discarded.
```
## How It Works
1. The parent agent gets a `task` tool that triggers subagent spawning.
The child gets all base tools except `task` (no recursive spawning).
```python
PARENT_TOOLS = CHILD_TOOLS + [
{"name": "task",
"description": "Spawn a subagent with fresh context.",
"input_schema": {
"type": "object",
"properties": {
"prompt": {"type": "string"},
"description": {"type": "string"},
},
"required": ["prompt"],
}},
]
```
2. The subagent starts with a fresh messages list containing only
the delegated prompt. It shares the same filesystem.
```python
def run_subagent(prompt: str) -> str:
sub_messages = [{"role": "user", "content": prompt}]
for _ in range(30): # safety limit
response = client.messages.create(
model=MODEL, system=SUBAGENT_SYSTEM,
messages=sub_messages,
tools=CHILD_TOOLS, max_tokens=8000,
)
sub_messages.append({
"role": "assistant", "content": response.content
})
if response.stop_reason != "tool_use":
break
# execute tools, append results...
```
3. Only the final text returns to the parent. The child's 30+ tool
call history is discarded.
```python
return "".join(
b.text for b in response.content if hasattr(b, "text")
) or "(no summary)"
```
4. The parent receives this summary as a normal tool_result.
```python
if block.name == "task":
output = run_subagent(block.input["prompt"])
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": str(output),
})
```
## Key Code
The subagent function (from `agents/s04_subagent.py`,
lines 110-128):
```python
def run_subagent(prompt: str) -> str:
sub_messages = [{"role": "user", "content": prompt}]
for _ in range(30):
response = client.messages.create(
model=MODEL, system=SUBAGENT_SYSTEM,
messages=sub_messages,
tools=CHILD_TOOLS, max_tokens=8000,
)
sub_messages.append({"role": "assistant",
"content": response.content})
if response.stop_reason != "tool_use":
break
results = []
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
output = handler(**block.input)
results.append({"type": "tool_result",
"tool_use_id": block.id,
"content": str(output)[:50000]})
sub_messages.append({"role": "user", "content": results})
return "".join(
b.text for b in response.content if hasattr(b, "text")
) or "(no summary)"
```
## What Changed From s03
| Component | Before (s03) | After (s04) |
|----------------|------------------|---------------------------|
| Tools | 5 | 5 (base) + task (parent) |
| Context | Single shared | Parent + child isolation |
| Subagent | None | `run_subagent()` function |
| Return value | N/A | Summary text only |
| Todo system | TodoManager | Removed (not needed here) |
## Design Rationale
Process isolation gives context isolation for free. A fresh `messages[]` means the subagent cannot be confused by the parent's conversation history. The tradeoff is communication overhead -- results must be compressed back to the parent, losing detail. This is the same tradeoff as OS process isolation: safety and cleanliness in exchange for serialization cost. Limiting subagent depth (no recursive spawning) prevents unbounded resource consumption, and a max iteration count ensures runaway children terminate.
## Try It
```sh
cd learn-claude-code
python agents/s04_subagent.py
```
Example prompts to try:
1. `Use a subtask to find what testing framework this project uses`
2. `Delegate: read all .py files and summarize what each one does`
3. `Use a task to create a new module, then verify it from here`

View File

@ -0,0 +1,165 @@
# s05: Skills
> Two-layer skill injection avoids system prompt bloat by putting skill names in the system prompt (cheap) and full skill bodies in tool_result (on demand).
## The Problem
You want the agent to follow specific workflows for different domains:
git conventions, testing patterns, code review checklists. The naive
approach is to put everything in the system prompt. But the system prompt
has limited effective attention -- too much text and the model starts
ignoring parts of it.
If you have 10 skills at 2000 tokens each, that is 20,000 tokens of system
prompt. The model pays attention to the beginning and end but skims the
middle. Worse, most of those skills are irrelevant to any given task. A
file editing task does not need the git workflow instructions.
The two-layer approach solves this: Layer 1 puts short skill descriptions
in the system prompt (~100 tokens per skill). Layer 2 loads the full skill
body into a tool_result only when the model calls `load_skill`. The model
learns what skills exist (cheap) and loads them on demand (only when
relevant).
## The Solution
```
System prompt (Layer 1 -- always present):
+--------------------------------------+
| You are a coding agent. |
| Skills available: |
| - git: Git workflow helpers | ~100 tokens/skill
| - test: Testing best practices |
+--------------------------------------+
When model calls load_skill("git"):
+--------------------------------------+
| tool_result (Layer 2 -- on demand): |
| <skill name="git"> |
| Full git workflow instructions... | ~2000 tokens
| Step 1: ... |
| Step 2: ... |
| </skill> |
+--------------------------------------+
```
## How It Works
1. Skill files live in `.skills/` as Markdown with YAML frontmatter.
```
.skills/
git.md # ---\n description: Git workflow\n ---\n ...
test.md # ---\n description: Testing patterns\n ---\n ...
```
2. The SkillLoader parses frontmatter and separates metadata from body.
```python
class SkillLoader:
def _parse_frontmatter(self, text: str) -> tuple:
match = re.match(
r"^---\n(.*?)\n---\n(.*)", text, re.DOTALL
)
if not match:
return {}, text
meta = {}
for line in match.group(1).strip().splitlines():
if ":" in line:
key, val = line.split(":", 1)
meta[key.strip()] = val.strip()
return meta, match.group(2).strip()
```
3. Layer 1: `get_descriptions()` returns short lines for the system prompt.
```python
def get_descriptions(self) -> str:
lines = []
for name, skill in self.skills.items():
desc = skill["meta"].get("description", "No description")
lines.append(f" - {name}: {desc}")
return "\n".join(lines)
SYSTEM = f"""You are a coding agent at {WORKDIR}.
Skills available:
{SKILL_LOADER.get_descriptions()}"""
```
4. Layer 2: `get_content()` returns the full body wrapped in `<skill>` tags.
```python
def get_content(self, name: str) -> str:
skill = self.skills.get(name)
if not skill:
return f"Error: Unknown skill '{name}'."
return f"<skill name=\"{name}\">\n{skill['body']}\n</skill>"
```
5. The `load_skill` tool is just another entry in the dispatch map.
```python
TOOL_HANDLERS = {
# ...base tools...
"load_skill": lambda **kw: SKILL_LOADER.get_content(kw["name"]),
}
```
## Key Code
The SkillLoader class (from `agents/s05_skill_loading.py`,
lines 51-97):
```python
class SkillLoader:
def __init__(self, skills_dir: Path):
self.skills = {}
for f in sorted(skills_dir.glob("*.md")):
text = f.read_text()
meta, body = self._parse_frontmatter(text)
self.skills[f.stem] = {
"meta": meta, "body": body
}
def get_descriptions(self) -> str:
lines = []
for name, skill in self.skills.items():
desc = skill["meta"].get("description", "")
lines.append(f" - {name}: {desc}")
return "\n".join(lines)
def get_content(self, name: str) -> str:
skill = self.skills.get(name)
if not skill:
return f"Error: Unknown skill '{name}'."
return (f"<skill name=\"{name}\">\n"
f"{skill['body']}\n</skill>")
```
## What Changed From s04
| Component | Before (s04) | After (s05) |
|----------------|------------------|----------------------------|
| Tools | 5 (base + task) | 5 (base + load_skill) |
| System prompt | Static string | + skill descriptions |
| Knowledge | None | .skills/*.md files |
| Injection | None | Two-layer (system + result)|
| Subagent | `run_subagent()` | Removed (different focus) |
## Design Rationale
Two-layer injection solves the attention budget problem. Putting all skill content in the system prompt wastes tokens on unused skills. Layer 1 (compact summaries) costs roughly 120 tokens total. Layer 2 (full content) loads on demand via tool_result. This scales to dozens of skills without degrading model attention quality. The key insight is that the model only needs to know what skills exist (cheap) to decide when to load one (expensive). This is the same lazy-loading principle used in software module systems.
## Try It
```sh
cd learn-claude-code
python agents/s05_skill_loading.py
```
Example prompts to try:
1. `What skills are available?`
2. `Load the agent-builder skill and follow its instructions`
3. `I need to do a code review -- load the relevant skill first`
4. `Build an MCP server using the mcp-builder skill`

View File

@ -0,0 +1,183 @@
# s06: Compact
> A three-layer compression pipeline lets the agent work indefinitely by strategically forgetting old tool results, auto-summarizing when tokens exceed a threshold, and allowing manual compression on demand.
## The Problem
The context window is finite. After enough tool calls, the messages array
exceeds the model's context limit and the API call fails. Even before
hitting the hard limit, performance degrades: the model becomes slower,
less accurate, and starts ignoring earlier messages.
A 200,000 token context window sounds large, but a single `read_file` on
a 1000-line source file consumes ~4000 tokens. After reading 30 files and
running 20 bash commands, you are at 100,000+ tokens. The agent cannot
work on large codebases without some form of compression.
The three-layer pipeline addresses this with increasing aggressiveness:
Layer 1 (micro-compact) silently replaces old tool results every turn.
Layer 2 (auto-compact) triggers a full summarization when tokens exceed
a threshold. Layer 3 (manual compact) lets the model trigger compression
itself.
Teaching simplification: the token estimation here uses a rough
characters/4 heuristic. Production systems use proper tokenizer
libraries for accurate counts.
## The Solution
```
Every turn:
+------------------+
| Tool call result |
+------------------+
|
v
[Layer 1: micro_compact] (silent, every turn)
Replace tool_result > 3 turns old
with "[Previous: used {tool_name}]"
|
v
[Check: tokens > 50000?]
| |
no yes
| |
v v
continue [Layer 2: auto_compact]
Save transcript to .transcripts/
LLM summarizes conversation.
Replace all messages with [summary].
|
v
[Layer 3: compact tool]
Model calls compact explicitly.
Same summarization as auto_compact.
```
## How It Works
1. **Layer 1 -- micro_compact**: Before each LLM call, find all
tool_result entries older than the last 3 and replace their content.
```python
def micro_compact(messages: list) -> list:
tool_results = []
for i, msg in enumerate(messages):
if msg["role"] == "user" and isinstance(msg.get("content"), list):
for j, part in enumerate(msg["content"]):
if isinstance(part, dict) and part.get("type") == "tool_result":
tool_results.append((i, j, part))
if len(tool_results) <= KEEP_RECENT:
return messages
to_clear = tool_results[:-KEEP_RECENT]
for _, _, part in to_clear:
if len(part.get("content", "")) > 100:
tool_id = part.get("tool_use_id", "")
tool_name = tool_name_map.get(tool_id, "unknown")
part["content"] = f"[Previous: used {tool_name}]"
return messages
```
2. **Layer 2 -- auto_compact**: When estimated tokens exceed 50,000,
save the full transcript and ask the LLM to summarize.
```python
def auto_compact(messages: list) -> list:
TRANSCRIPT_DIR.mkdir(exist_ok=True)
transcript_path = TRANSCRIPT_DIR / f"transcript_{int(time.time())}.jsonl"
with open(transcript_path, "w") as f:
for msg in messages:
f.write(json.dumps(msg, default=str) + "\n")
response = client.messages.create(
model=MODEL,
messages=[{"role": "user", "content":
"Summarize this conversation for continuity..."
+ json.dumps(messages, default=str)[:80000]}],
max_tokens=2000,
)
summary = response.content[0].text
return [
{"role": "user", "content": f"[Compressed]\n\n{summary}"},
{"role": "assistant", "content": "Understood. Continuing."},
]
```
3. **Layer 3 -- manual compact**: The `compact` tool triggers the same
summarization on demand.
```python
if manual_compact:
messages[:] = auto_compact(messages)
```
4. The agent loop integrates all three layers.
```python
def agent_loop(messages: list):
while True:
micro_compact(messages)
if estimate_tokens(messages) > THRESHOLD:
messages[:] = auto_compact(messages)
response = client.messages.create(...)
# ... tool execution ...
if manual_compact:
messages[:] = auto_compact(messages)
```
## Key Code
The three-layer pipeline (from `agents/s06_context_compact.py`,
lines 67-93 and 189-223):
```python
THRESHOLD = 50000
KEEP_RECENT = 3
def micro_compact(messages):
# Replace old tool results with placeholders
...
def auto_compact(messages):
# Save transcript, LLM summarize, replace messages
...
def agent_loop(messages):
while True:
micro_compact(messages) # Layer 1
if estimate_tokens(messages) > THRESHOLD:
messages[:] = auto_compact(messages) # Layer 2
response = client.messages.create(...)
# ...
if manual_compact:
messages[:] = auto_compact(messages) # Layer 3
```
## What Changed From s05
| Component | Before (s05) | After (s06) |
|----------------|------------------|----------------------------|
| Tools | 5 | 5 (base + compact) |
| Context mgmt | None | Three-layer compression |
| Micro-compact | None | Old results -> placeholders|
| Auto-compact | None | Token threshold trigger |
| Manual compact | None | `compact` tool |
| Transcripts | None | Saved to .transcripts/ |
| Skills | load_skill | Removed (different focus) |
## Design Rationale
Context windows are finite, but agent sessions can be infinite. Three compression layers solve this at different granularities: micro-compact (replace old tool outputs), auto-compact (LLM summarizes when approaching limit), and manual compact (user-triggered). The key insight is that forgetting is a feature, not a bug -- it enables unbounded sessions. Transcripts preserve the full history on disk so nothing is truly lost, just moved out of the active context. The layered approach lets each layer operate independently at its own granularity, from silent per-turn cleanup to full conversation reset.
## Try It
```sh
cd learn-claude-code
python agents/s06_context_compact.py
```
Example prompts to try:
1. `Read every Python file in the agents/ directory one by one`
(watch micro-compact replace old results)
2. `Keep reading files until compression triggers automatically`
3. `Use the compact tool to manually compress the conversation`

172
docs/en/s07-task-system.md Normal file
View File

@ -0,0 +1,172 @@
# s07: Tasks
> Tasks persist as JSON files on the filesystem with a dependency graph, so they survive context compression and can be shared across agents.
## The Problem
In-memory state like TodoManager (s03) is lost when the context is
compressed (s06). After auto_compact replaces messages with a summary,
the todo list is gone. The agent has to reconstruct it from the summary
text, which is lossy and error-prone.
This is the critical s06-to-s07 bridge: TodoManager items die with
compression; file-based tasks don't. Moving state to the filesystem
makes it compression-proof.
More fundamentally, in-memory state is invisible to other agents.
When we eventually build teams (s09+), teammates need a shared task
board. In-memory data structures are process-local.
The solution is to persist tasks as JSON files in `.tasks/`. Each task
is a separate file with an ID, subject, status, and dependency graph.
Completing task 1 automatically unblocks task 2 if task 2 has
`blockedBy: [1]`. The file system becomes the source of truth.
## The Solution
```
.tasks/
task_1.json {"id":1, "status":"completed", ...}
task_2.json {"id":2, "blockedBy":[1], "status":"pending"}
task_3.json {"id":3, "blockedBy":[2], "status":"pending"}
Dependency resolution:
+----------+ +----------+ +----------+
| task 1 | --> | task 2 | --> | task 3 |
| complete | | blocked | | blocked |
+----------+ +----------+ +----------+
| ^
+--- completing task 1 removes it from
task 2's blockedBy list
```
## How It Works
1. The TaskManager provides CRUD operations. Each task is a JSON file.
```python
class TaskManager:
def create(self, subject: str, description: str = "") -> str:
task = {
"id": self._next_id,
"subject": subject,
"description": description,
"status": "pending",
"blockedBy": [],
"blocks": [],
"owner": "",
}
self._save(task)
self._next_id += 1
return json.dumps(task, indent=2)
```
2. When a task is marked completed, `_clear_dependency` removes its ID
from all other tasks' `blockedBy` lists.
```python
def _clear_dependency(self, completed_id: int):
for f in self.dir.glob("task_*.json"):
task = json.loads(f.read_text())
if completed_id in task.get("blockedBy", []):
task["blockedBy"].remove(completed_id)
self._save(task)
```
3. The `update` method handles status changes and bidirectional dependency
wiring.
```python
def update(self, task_id, status=None,
add_blocked_by=None, add_blocks=None):
task = self._load(task_id)
if status:
task["status"] = status
if status == "completed":
self._clear_dependency(task_id)
if add_blocks:
task["blocks"] = list(set(task["blocks"] + add_blocks))
for blocked_id in add_blocks:
blocked = self._load(blocked_id)
if task_id not in blocked["blockedBy"]:
blocked["blockedBy"].append(task_id)
self._save(blocked)
self._save(task)
```
4. Four task tools are added to the dispatch map.
```python
TOOL_HANDLERS = {
# ...base tools...
"task_create": lambda **kw: TASKS.create(kw["subject"]),
"task_update": lambda **kw: TASKS.update(kw["task_id"],
kw.get("status")),
"task_list": lambda **kw: TASKS.list_all(),
"task_get": lambda **kw: TASKS.get(kw["task_id"]),
}
```
## Key Code
The TaskManager with dependency graph (from `agents/s07_task_system.py`,
lines 46-123):
```python
class TaskManager:
def __init__(self, tasks_dir: Path):
self.dir = tasks_dir
self.dir.mkdir(exist_ok=True)
self._next_id = self._max_id() + 1
def _load(self, task_id: int) -> dict:
path = self.dir / f"task_{task_id}.json"
return json.loads(path.read_text())
def _save(self, task: dict):
path = self.dir / f"task_{task['id']}.json"
path.write_text(json.dumps(task, indent=2))
def create(self, subject, description=""):
task = {"id": self._next_id, "subject": subject,
"status": "pending", "blockedBy": [],
"blocks": [], "owner": ""}
self._save(task)
self._next_id += 1
return json.dumps(task, indent=2)
def _clear_dependency(self, completed_id):
for f in self.dir.glob("task_*.json"):
task = json.loads(f.read_text())
if completed_id in task.get("blockedBy", []):
task["blockedBy"].remove(completed_id)
self._save(task)
```
## What Changed From s06
| Component | Before (s06) | After (s07) |
|----------------|------------------|----------------------------|
| Tools | 5 | 8 (+task_create/update/list/get)|
| State storage | In-memory only | JSON files in .tasks/ |
| Dependencies | None | blockedBy + blocks graph |
| Compression | Three-layer | Removed (different focus) |
| Persistence | Lost on compact | Survives compression |
## Design Rationale
File-based state survives context compression. When the agent's conversation is compacted, in-memory state is lost, but tasks written to disk persist. The dependency graph ensures correct execution order even after context loss. This is the bridge between ephemeral conversation and persistent work -- the agent can forget conversation details but always has the task board to remind it what needs doing. The filesystem as source of truth also enables future multi-agent sharing, since any process can read the same JSON files.
## Try It
```sh
cd learn-claude-code
python agents/s07_task_system.py
```
Example prompts to try:
1. `Create 3 tasks: "Setup project", "Write code", "Write tests". Make them depend on each other in order.`
2. `List all tasks and show the dependency graph`
3. `Complete task 1 and then list tasks to see task 2 unblocked`
4. `Create a task board for refactoring: parse -> transform -> emit -> test`

View File

@ -0,0 +1,188 @@
# s08: Background Tasks
> A BackgroundManager runs commands in separate threads and drains a notification queue before each LLM call, so the agent never blocks on long-running operations.
## The Problem
Some commands take minutes: `npm install`, `pytest`, `docker build`. With
a blocking agent loop, the model sits idle waiting for the subprocess to
finish. It cannot do anything else. If the user asked "install dependencies
and while that runs, create the config file," the agent would install
first, _then_ create the config -- sequentially, not in parallel.
The agent needs concurrency. Not full multi-threading of the agent loop
itself, but the ability to fire off a long command and continue working
while it runs. When the command finishes, its result should appear
naturally in the conversation.
The solution is a BackgroundManager that runs commands in daemon threads
and collects results in a notification queue. Before each LLM call, the
queue is drained and results are injected into the messages.
## The Solution
```
Main thread Background thread
+-----------------+ +-----------------+
| agent loop | | task executes |
| ... | | ... |
| [LLM call] <---+------- | enqueue(result) |
| ^drain queue | +-----------------+
+-----------------+
Timeline:
Agent --[spawn A]--[spawn B]--[other work]----
| |
v v
[A runs] [B runs] (parallel)
| |
+-- notification queue --+
|
[results injected before
next LLM call]
```
## How It Works
1. The BackgroundManager tracks tasks and maintains a thread-safe
notification queue.
```python
class BackgroundManager:
def __init__(self):
self.tasks = {}
self._notification_queue = []
self._lock = threading.Lock()
```
2. `run()` starts a daemon thread and returns a task_id immediately.
```python
def run(self, command: str) -> str:
task_id = str(uuid.uuid4())[:8]
self.tasks[task_id] = {
"status": "running",
"result": None,
"command": command,
}
thread = threading.Thread(
target=self._execute,
args=(task_id, command),
daemon=True,
)
thread.start()
return f"Background task {task_id} started"
```
3. The thread target `_execute` runs the subprocess and pushes
results to the notification queue.
```python
def _execute(self, task_id: str, command: str):
try:
r = subprocess.run(command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=300)
output = (r.stdout + r.stderr).strip()[:50000]
status = "completed"
except subprocess.TimeoutExpired:
output = "Error: Timeout (300s)"
status = "timeout"
self.tasks[task_id]["status"] = status
self.tasks[task_id]["result"] = output
with self._lock:
self._notification_queue.append({
"task_id": task_id,
"status": status,
"result": output[:500],
})
```
4. `drain_notifications()` returns and clears pending results.
```python
def drain_notifications(self) -> list:
with self._lock:
notifs = list(self._notification_queue)
self._notification_queue.clear()
return notifs
```
5. The agent loop drains notifications before each LLM call.
```python
def agent_loop(messages: list):
while True:
notifs = BG.drain_notifications()
if notifs and messages:
notif_text = "\n".join(
f"[bg:{n['task_id']}] {n['status']}: "
f"{n['result']}" for n in notifs
)
messages.append({"role": "user",
"content": f"<background-results>"
f"\n{notif_text}\n"
f"</background-results>"})
messages.append({"role": "assistant",
"content": "Noted background results."})
response = client.messages.create(...)
```
## Key Code
The BackgroundManager (from `agents/s08_background_tasks.py`, lines 49-107):
```python
class BackgroundManager:
def __init__(self):
self.tasks = {}
self._notification_queue = []
self._lock = threading.Lock()
def run(self, command: str) -> str:
task_id = str(uuid.uuid4())[:8]
self.tasks[task_id] = {"status": "running",
"result": None,
"command": command}
thread = threading.Thread(
target=self._execute,
args=(task_id, command), daemon=True)
thread.start()
return f"Background task {task_id} started"
def _execute(self, task_id, command):
# run subprocess, push to queue
...
def drain_notifications(self) -> list:
with self._lock:
notifs = list(self._notification_queue)
self._notification_queue.clear()
return notifs
```
## What Changed From s07
| Component | Before (s07) | After (s08) |
|----------------|------------------|----------------------------|
| Tools | 8 | 6 (base + background_run + check)|
| Execution | Blocking only | Blocking + background threads|
| Notification | None | Queue drained per loop |
| Concurrency | None | Daemon threads |
| Task system | File-based CRUD | Removed (different focus) |
## Design Rationale
The agent loop is inherently single-threaded (one LLM call at a time). Background threads break this constraint for I/O-bound work (tests, builds, installs). The notification queue pattern ("drain before next LLM call") ensures results arrive at natural conversation breakpoints rather than interrupting the model's reasoning mid-thought. This is a minimal concurrency model: the agent loop stays single-threaded and deterministic, while only the I/O-bound subprocess execution is parallelized.
## Try It
```sh
cd learn-claude-code
python agents/s08_background_tasks.py
```
Example prompts to try:
1. `Run "sleep 5 && echo done" in the background, then create a file while it runs`
2. `Start 3 background tasks: "sleep 2", "sleep 4", "sleep 6". Check their status.`
3. `Run pytest in the background and keep working on other things`

233
docs/en/s09-agent-teams.md Normal file
View File

@ -0,0 +1,233 @@
# s09: Agent Teams
> Persistent teammates with JSONL inboxes turn isolated agents into a communicating team -- spawn, message, broadcast, and drain.
## The Problem
Subagents (s04) are disposable: spawn, work, return summary, die. They
have no identity, no memory between invocations, and no way to receive
follow-up instructions. Background tasks (s08) run shell commands but
cannot make LLM-guided decisions or communicate findings.
For real teamwork you need three things: (1) persistent agents that
survive beyond a single prompt, (2) identity and lifecycle management,
and (3) a communication channel between agents. Without messaging, even
persistent teammates are deaf and mute -- they can work in parallel but
never coordinate.
The solution combines a TeammateManager for spawning persistent named
agents with a MessageBus using JSONL inbox files. Each teammate runs
its own agent loop in a thread, checks its inbox before every LLM call,
and can send messages to any other teammate or the lead.
Note on the s06-to-s07 bridge: TodoManager items from s03 die with
compression (s06). File-based tasks (s07) survive compression because
they live on disk. Teams build on this same principle -- config.json and
inbox files persist outside the context window.
## The Solution
```
Teammate lifecycle:
spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN
Communication:
.team/
config.json <- team roster + statuses
inbox/
alice.jsonl <- append-only, drain-on-read
bob.jsonl
lead.jsonl
+--------+ send("alice","bob","...") +--------+
| alice | -----------------------------> | bob |
| loop | bob.jsonl << {json_line} | loop |
+--------+ +--------+
^ |
| BUS.read_inbox("alice") |
+---- alice.jsonl -> read + drain ---------+
5 message types:
+-------------------------+------------------------------+
| message | Normal text between agents |
| broadcast | Sent to all teammates |
| shutdown_request | Request graceful shutdown |
| shutdown_response | Approve/reject shutdown |
| plan_approval_response | Approve/reject plan |
+-------------------------+------------------------------+
```
## How It Works
1. The TeammateManager maintains config.json with the team roster.
Each member has a name, role, and status.
```python
class TeammateManager:
def __init__(self, team_dir: Path):
self.dir = team_dir
self.dir.mkdir(exist_ok=True)
self.config_path = self.dir / "config.json"
self.config = self._load_config()
self.threads = {}
```
2. `spawn()` creates a teammate and starts its agent loop in a thread.
Re-spawning an idle teammate reactivates it.
```python
def spawn(self, name: str, role: str, prompt: str) -> str:
member = self._find_member(name)
if member:
if member["status"] not in ("idle", "shutdown"):
return f"Error: '{name}' is currently {member['status']}"
member["status"] = "working"
else:
member = {"name": name, "role": role, "status": "working"}
self.config["members"].append(member)
self._save_config()
thread = threading.Thread(
target=self._teammate_loop,
args=(name, role, prompt), daemon=True)
self.threads[name] = thread
thread.start()
return f"Spawned teammate '{name}' (role: {role})"
```
3. The MessageBus handles JSONL inbox files. `send()` appends a JSON
line; `read_inbox()` reads all lines and drains the file.
```python
class MessageBus:
def send(self, sender, to, content,
msg_type="message", extra=None):
msg = {"type": msg_type, "from": sender,
"content": content,
"timestamp": time.time()}
if extra:
msg.update(extra)
with open(self.dir / f"{to}.jsonl", "a") as f:
f.write(json.dumps(msg) + "\n")
return f"Sent {msg_type} to {to}"
def read_inbox(self, name):
path = self.dir / f"{name}.jsonl"
if not path.exists():
return "[]"
msgs = [json.loads(l)
for l in path.read_text().strip().splitlines()
if l]
path.write_text("") # drain
return json.dumps(msgs, indent=2)
```
4. Each teammate checks its inbox before every LLM call and injects
received messages into the conversation context.
```python
def _teammate_loop(self, name, role, prompt):
sys_prompt = f"You are '{name}', role: {role}, at {WORKDIR}."
messages = [{"role": "user", "content": prompt}]
for _ in range(50):
inbox = BUS.read_inbox(name)
if inbox != "[]":
messages.append({"role": "user",
"content": f"<inbox>{inbox}</inbox>"})
messages.append({"role": "assistant",
"content": "Noted inbox messages."})
response = client.messages.create(
model=MODEL, system=sys_prompt,
messages=messages, tools=TOOLS)
messages.append({"role": "assistant",
"content": response.content})
if response.stop_reason != "tool_use":
break
# execute tools, append results...
self._find_member(name)["status"] = "idle"
self._save_config()
```
5. `broadcast()` sends the same message to all teammates except the
sender.
```python
def broadcast(self, sender, content, teammates):
count = 0
for name in teammates:
if name != sender:
self.send(sender, name, content, "broadcast")
count += 1
return f"Broadcast to {count} teammates"
```
## Key Code
The TeammateManager + MessageBus core (from `agents/s09_agent_teams.py`):
```python
class TeammateManager:
def spawn(self, name, role, prompt):
member = self._find_member(name) or {
"name": name, "role": role, "status": "working"
}
member["status"] = "working"
self._save_config()
thread = threading.Thread(
target=self._teammate_loop,
args=(name, role, prompt), daemon=True)
thread.start()
return f"Spawned '{name}'"
class MessageBus:
def send(self, sender, to, content,
msg_type="message", extra=None):
msg = {"type": msg_type, "from": sender,
"content": content, "timestamp": time.time()}
if extra: msg.update(extra)
with open(self.dir / f"{to}.jsonl", "a") as f:
f.write(json.dumps(msg) + "\n")
def read_inbox(self, name):
path = self.dir / f"{name}.jsonl"
if not path.exists(): return "[]"
msgs = [json.loads(l)
for l in path.read_text().strip().splitlines()
if l]
path.write_text("")
return json.dumps(msgs, indent=2)
```
## What Changed From s08
| Component | Before (s08) | After (s09) |
|----------------|------------------|----------------------------|
| Tools | 6 | 9 (+spawn/send/read_inbox) |
| Agents | Single | Lead + N teammates |
| Persistence | None | config.json + JSONL inboxes|
| Threads | Background cmds | Full agent loops per thread|
| Lifecycle | Fire-and-forget | idle -> working -> idle |
| Communication | None | 5 message types + broadcast|
Teaching simplification: this implementation does not use lock files
for inbox access. In production, concurrent append from multiple writers
would need file locking or atomic rename. The single-writer-per-inbox
pattern used here is safe for the teaching scenario.
## Design Rationale
File-based mailboxes (append-only JSONL) provide concurrency-safe inter-agent communication. Append is atomic on most filesystems, avoiding lock contention. The "drain on read" pattern (read all, truncate) gives batch delivery. This is simpler and more robust than shared memory or socket-based IPC for agent coordination. The tradeoff is latency -- messages are only seen at the next poll -- but for LLM-driven agents where each turn takes seconds, polling latency is negligible compared to inference time.
## Try It
```sh
cd learn-claude-code
python agents/s09_agent_teams.py
```
Example prompts to try:
1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`
2. `Broadcast "status update: phase 1 complete" to all teammates`
3. `Check the lead inbox for any messages`
4. Type `/team` to see the team roster with statuses
5. Type `/inbox` to manually check the lead's inbox

View File

@ -0,0 +1,204 @@
# s10: Team Protocols
> The same request_id handshake pattern powers both shutdown and plan approval -- one FSM, two applications.
## The Problem
In s09, teammates work and communicate but there is no structured
coordination. Two problems arise:
**Shutdown**: How do you stop a teammate cleanly? Killing the thread
leaves files partially written and config.json in a wrong state.
Graceful shutdown requires a handshake: the lead requests, the teammate
decides whether to approve (finish and exit) or reject (keep working).
**Plan approval**: How do you gate execution? When the lead says
"refactor the auth module," the teammate starts immediately. For
high-risk changes, the lead should review the plan before execution
begins. A junior proposes, a senior approves.
Both problems share the same structure: one side sends a request with a
unique ID, the other side responds referencing that ID. A finite state
machine tracks each request through pending -> approved | rejected.
## The Solution
```
Shutdown Protocol Plan Approval Protocol
================== ======================
Lead Teammate Teammate Lead
| | | |
|--shutdown_req-->| |--plan_req------>|
| {req_id:"abc"} | | {req_id:"xyz"} |
| | | |
|<--shutdown_resp-| |<--plan_resp-----|
| {req_id:"abc", | | {req_id:"xyz", |
| approve:true} | | approve:true} |
| | | |
v v v v
tracker["abc"] exits proceeds tracker["xyz"]
= approved = approved
Shared FSM (identical for both protocols):
[pending] --approve--> [approved]
[pending] --reject---> [rejected]
Trackers:
shutdown_requests = {req_id: {target, status}}
plan_requests = {req_id: {from, plan, status}}
```
## How It Works
1. The lead initiates shutdown by generating a request_id and sending
a shutdown_request through the inbox.
```python
shutdown_requests = {}
def handle_shutdown_request(teammate: str) -> str:
req_id = str(uuid.uuid4())[:8]
shutdown_requests[req_id] = {
"target": teammate, "status": "pending",
}
BUS.send("lead", teammate, "Please shut down gracefully.",
"shutdown_request", {"request_id": req_id})
return f"Shutdown request {req_id} sent (status: pending)"
```
2. The teammate receives the request in its inbox and calls the
`shutdown_response` tool to approve or reject.
```python
if tool_name == "shutdown_response":
req_id = args["request_id"]
approve = args["approve"]
if req_id in shutdown_requests:
shutdown_requests[req_id]["status"] = \
"approved" if approve else "rejected"
BUS.send(sender, "lead", args.get("reason", ""),
"shutdown_response",
{"request_id": req_id, "approve": approve})
return f"Shutdown {'approved' if approve else 'rejected'}"
```
3. The teammate loop checks for approved shutdown and exits.
```python
if (block.name == "shutdown_response"
and block.input.get("approve")):
should_exit = True
# ...
member["status"] = "shutdown" if should_exit else "idle"
```
4. Plan approval follows the identical pattern. The teammate submits
a plan, generating a request_id.
```python
plan_requests = {}
if tool_name == "plan_approval":
plan_text = args.get("plan", "")
req_id = str(uuid.uuid4())[:8]
plan_requests[req_id] = {
"from": sender, "plan": plan_text,
"status": "pending",
}
BUS.send(sender, "lead", plan_text,
"plan_approval_request",
{"request_id": req_id, "plan": plan_text})
return f"Plan submitted (request_id={req_id})"
```
5. The lead reviews and responds with the same request_id.
```python
def handle_plan_review(request_id, approve, feedback=""):
req = plan_requests.get(request_id)
if not req:
return f"Error: Unknown request_id '{request_id}'"
req["status"] = "approved" if approve else "rejected"
BUS.send("lead", req["from"], feedback,
"plan_approval_response",
{"request_id": request_id,
"approve": approve,
"feedback": feedback})
return f"Plan {req['status']} for '{req['from']}'"
```
6. Both protocols use the same `plan_approval` tool name with two
modes: teammates submit (no request_id), the lead reviews (with
request_id).
```python
# Lead tool dispatch:
"plan_approval": lambda **kw: handle_plan_review(
kw["request_id"], kw["approve"],
kw.get("feedback", "")),
# Teammate: submit mode (generate request_id)
```
## Key Code
The dual protocol handlers (from `agents/s10_team_protocols.py`):
```python
shutdown_requests = {}
plan_requests = {}
# -- Shutdown --
def handle_shutdown_request(teammate):
req_id = str(uuid.uuid4())[:8]
shutdown_requests[req_id] = {
"target": teammate, "status": "pending"
}
BUS.send("lead", teammate,
"Please shut down gracefully.",
"shutdown_request",
{"request_id": req_id})
# -- Plan Approval --
def handle_plan_review(request_id, approve, feedback=""):
req = plan_requests[request_id]
req["status"] = "approved" if approve else "rejected"
BUS.send("lead", req["from"], feedback,
"plan_approval_response",
{"request_id": request_id,
"approve": approve})
# Both use the same FSM:
# pending -> approved | rejected
# Both correlate by request_id across async inboxes
```
## What Changed From s09
| Component | Before (s09) | After (s10) |
|----------------|------------------|------------------------------|
| Tools | 9 | 12 (+shutdown_req/resp +plan)|
| Shutdown | Natural exit only| Request-response handshake |
| Plan gating | None | Submit/review with approval |
| Request tracking| None | Two tracker dicts |
| Correlation | None | request_id per request |
| FSM | None | pending -> approved/rejected |
## Design Rationale
The request_id correlation pattern turns any async interaction into a trackable finite state machine. The same 3-state machine (pending -> approved/rejected) applies to shutdown, plan approval, or any future protocol. This is why one pattern handles multiple protocols -- the FSM does not care what it is approving. The request_id provides correlation across async inboxes where messages may arrive out of order, making the pattern robust to timing variations between agents.
## Try It
```sh
cd learn-claude-code
python agents/s10_team_protocols.py
```
Example prompts to try:
1. `Spawn alice as a coder. Then request her shutdown.`
2. `List teammates to see alice's status after shutdown approval`
3. `Spawn bob with a risky refactoring task. Review and reject his plan.`
4. `Spawn charlie, have him submit a plan, then approve it.`
5. Type `/team` to monitor statuses

View File

@ -0,0 +1,232 @@
# s11: Autonomous Agents
> An idle cycle with task board polling lets teammates find and claim work themselves, with identity re-injection after context compression.
## The Problem
In s09-s10, teammates only work when explicitly told to. The lead must
spawn each teammate with a specific prompt. If the task board has 10
unclaimed tasks, the lead must manually assign each one. This does not
scale.
True autonomy means teammates find work themselves. When a teammate
finishes its current task, it should scan the task board for unclaimed
work, claim a task, and start working -- without any instruction from
the lead.
But autonomous agents face a subtlety: after context compression, the
agent might forget who it is. If the messages are summarized, the
original system prompt identity ("you are alice, role: coder") fades.
Identity re-injection solves this by inserting an identity block at the
start of compressed contexts.
Teaching simplification: the token estimation used here is rough
(characters / 4). Production systems use proper tokenizer libraries.
The nag threshold of 3 rounds (from s03) is set low for teaching
visibility; production agents typically use a higher threshold around 10.
## The Solution
```
Teammate lifecycle with idle cycle:
+-------+
| spawn |
+---+---+
|
v
+-------+ tool_use +-------+
| WORK | <------------- | LLM |
+---+---+ +-------+
|
| stop_reason != tool_use
| (or idle tool called)
v
+--------+
| IDLE | poll every 5s for up to 60s
+---+----+
|
+---> check inbox --> message? ----------> WORK
|
+---> scan .tasks/ --> unclaimed? -------> claim -> WORK
|
+---> 60s timeout ----------------------> SHUTDOWN
Identity re-injection after compression:
if len(messages) <= 3:
messages.insert(0, identity_block)
"You are 'alice', role: coder, team: my-team"
```
## How It Works
1. The teammate loop has two phases: WORK and IDLE. WORK runs the
standard agent loop. When the LLM stops calling tools (or calls
the `idle` tool), the teammate enters the IDLE phase.
```python
def _loop(self, name, role, prompt):
while True:
# -- WORK PHASE --
messages = [{"role": "user", "content": prompt}]
for _ in range(50):
inbox = BUS.read_inbox(name)
for msg in inbox:
if msg.get("type") == "shutdown_request":
self._set_status(name, "shutdown")
return
messages.append(...)
response = client.messages.create(...)
if response.stop_reason != "tool_use":
break
# execute tools...
if idle_requested:
break
# -- IDLE PHASE --
self._set_status(name, "idle")
resume = self._idle_poll(name, messages)
if not resume:
self._set_status(name, "shutdown")
return
self._set_status(name, "working")
```
2. The idle phase polls the inbox and task board in a loop.
```python
def _idle_poll(self, name, messages):
polls = IDLE_TIMEOUT // POLL_INTERVAL # 60s / 5s = 12
for _ in range(polls):
time.sleep(POLL_INTERVAL)
# Check inbox for new messages
inbox = BUS.read_inbox(name)
if inbox:
messages.append({"role": "user",
"content": f"<inbox>{inbox}</inbox>"})
return True
# Scan task board for unclaimed tasks
unclaimed = scan_unclaimed_tasks()
if unclaimed:
task = unclaimed[0]
claim_task(task["id"], name)
messages.append({"role": "user",
"content": f"<auto-claimed>Task #{task['id']}: "
f"{task['subject']}</auto-claimed>"})
return True
return False # timeout -> shutdown
```
3. Task board scanning looks for pending, unowned, unblocked tasks.
```python
def scan_unclaimed_tasks() -> list:
TASKS_DIR.mkdir(exist_ok=True)
unclaimed = []
for f in sorted(TASKS_DIR.glob("task_*.json")):
task = json.loads(f.read_text())
if (task.get("status") == "pending"
and not task.get("owner")
and not task.get("blockedBy")):
unclaimed.append(task)
return unclaimed
def claim_task(task_id: int, owner: str):
path = TASKS_DIR / f"task_{task_id}.json"
task = json.loads(path.read_text())
task["status"] = "in_progress"
task["owner"] = owner
path.write_text(json.dumps(task, indent=2))
```
4. Identity re-injection inserts an identity block when the context
is too short, indicating compression has occurred.
```python
def make_identity_block(name, role, team_name):
return {"role": "user",
"content": f"<identity>You are '{name}', "
f"role: {role}, team: {team_name}. "
f"Continue your work.</identity>"}
# Before resuming work after idle:
if len(messages) <= 3:
messages.insert(0, make_identity_block(
name, role, team_name))
messages.insert(1, {"role": "assistant",
"content": f"I am {name}. Continuing."})
```
5. The `idle` tool lets the teammate explicitly signal it has no more
work, entering the idle polling phase early.
```python
{"name": "idle",
"description": "Signal that you have no more work. "
"Enters idle polling phase.",
"input_schema": {"type": "object", "properties": {}}},
```
## Key Code
The autonomous loop (from `agents/s11_autonomous_agents.py`):
```python
def _loop(self, name, role, prompt):
while True:
# WORK PHASE
for _ in range(50):
response = client.messages.create(...)
if response.stop_reason != "tool_use":
break
for block in response.content:
if block.name == "idle":
idle_requested = True
if idle_requested:
break
# IDLE PHASE
self._set_status(name, "idle")
for _ in range(IDLE_TIMEOUT // POLL_INTERVAL):
time.sleep(POLL_INTERVAL)
inbox = BUS.read_inbox(name)
if inbox: resume = True; break
unclaimed = scan_unclaimed_tasks()
if unclaimed:
claim_task(unclaimed[0]["id"], name)
resume = True; break
if not resume:
self._set_status(name, "shutdown")
return
self._set_status(name, "working")
```
## What Changed From s10
| Component | Before (s10) | After (s11) |
|----------------|------------------|----------------------------|
| Tools | 12 | 14 (+idle, +claim_task) |
| Autonomy | Lead-directed | Self-organizing |
| Idle phase | None | Poll inbox + task board |
| Task claiming | Manual only | Auto-claim unclaimed tasks |
| Identity | System prompt | + re-injection after compress|
| Timeout | None | 60s idle -> auto shutdown |
## Design Rationale
Polling + timeout makes agents self-organizing without a central coordinator. Each agent independently polls the task board, claims unclaimed work, and returns to idle when done. The timeout triggers the poll cycle, and if no work appears within the window, the agent shuts itself down. This is the same pattern as work-stealing thread pools -- distributed, no single point of failure. Identity re-injection after compression ensures agents maintain their role even when conversation history is summarized away.
## Try It
```sh
cd learn-claude-code
python agents/s11_autonomous_agents.py
```
Example prompts to try:
1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`
2. `Spawn a coder teammate and let it find work from the task board itself`
3. `Create tasks with dependencies. Watch teammates respect the blocked order.`
4. Type `/tasks` to see the task board with owners
5. Type `/team` to monitor who is working vs idle

View File

@ -0,0 +1,132 @@
# s01: The Agent Loop
> AIコーディングエージェントの秘密はすべて、モデルが「終了」と判断するまでツール結果をモデルにフィードバックし続けるwhileループにある。
## 問題
なぜ言語モデルは単体でコーディングの質問に答えられないのか。それはコーディングが「現実世界とのインタラクション」を必要とするからだ。モデルはファイルを読み、テストを実行し、エラーを確認し、反復する必要がある。一回のプロンプト-レスポンスのやり取りではこれは実現できない。
agent loopがなければ、ユーザーが自分でモデルの出力をコピーペーストして戻す必要がある。つまりユーザー自身がループの役割を果たすことになる。agent loopはこれを自動化する: モデルを呼び出し、モデルが要求したツールを実行し、結果をフィードバックし、モデルが「完了」と言うまで繰り返す。
単純なタスクを考えてみよう: 「helloと出力するPythonファイルを作成せよ」。モデルは(1)ファイルを書くことを決定し、(2)書き、(3)動作を検証する必要がある。最低でも3回のツール呼び出しが必要だ。ループがなければ、そのたびに手動の介入が必要になる。
## 解決策
```
+----------+ +-------+ +---------+
| User | ---> | LLM | ---> | Tool |
| prompt | | | | execute |
+----------+ +---+---+ +----+----+
^ |
| tool_result |
+---------------+
(loop continues)
The loop terminates when stop_reason != "tool_use".
That single condition is the entire control flow.
```
## 仕組み
1. ユーザーがプロンプトを入力する。これが最初のメッセージになる。
```python
history.append({"role": "user", "content": query})
```
2. メッセージ配列がツール定義と共にLLMに送信される。
```python
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
```
3. アシスタントのレスポンスがメッセージに追加される。
```python
messages.append({"role": "assistant", "content": response.content})
```
4. stop reasonを確認する。モデルがツールを呼び出さなかった場合、ループは終了する。これが唯一の終了条件だ。
```python
if response.stop_reason != "tool_use":
return
```
5. レスポンス中の各tool_useブロックについて、ツール(このセッションではbash)を実行し、結果を収集する。
```python
for block in response.content:
if block.type == "tool_use":
output = run_bash(block.input["command"])
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
```
6. 結果がuserメッセージとして追加され、ループが続行する。
```python
messages.append({"role": "user", "content": results})
```
## 主要コード
最小限のエージェント -- パターン全体が30行未満
(`agents/s01_agent_loop.py` 66-86行目):
```python
def agent_loop(messages: list):
while True:
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
output = run_bash(block.input["command"])
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
messages.append({"role": "user", "content": results})
```
## 変更点
これはセッション1 -- 出発点である。前のセッションは存在しない。
| Component | Before | After |
|---------------|------------|--------------------------------|
| Agent loop | (none) | `while True` + stop_reason |
| Tools | (none) | `bash` (one tool) |
| Messages | (none) | Accumulating list |
| Control flow | (none) | `stop_reason != "tool_use"` |
## 設計原理
このループはすべてのLLMベースエージェントの普遍的な基盤だ。本番実装ではエラーハンドリング、トークンカウント、ストリーミング、リトライロジックが追加されるが、根本的な構造は変わらない。シンプルさこそがポイントだ: 1つの終了条件(`stop_reason != "tool_use"`)がフロー全体を制御する。本コースの他のすべて -- ツール、計画、圧縮、チーム -- はこのループの上に積み重なるが、ループ自体は変更しない。このループを理解することは、すべてのエージェントを理解することだ。
## 試してみる
```sh
cd learn-claude-code
python agents/s01_agent_loop.py
```
試せるプロンプト例:
1. `Create a file called hello.py that prints "Hello, World!"`
2. `List all Python files in this directory`
3. `What is the current git branch?`
4. `Create a directory called test_output and write 3 files in it`

141
docs/ja/s02-tool-use.md Normal file
View File

@ -0,0 +1,141 @@
# s02: Tools
> ディスパッチマップがツール呼び出しをハンドラ関数にルーティングする -- ループ自体はまったく変更しない。
## 問題
`bash`だけでは、エージェントはすべてをシェル経由で行う: ファイルの読み取り、書き込み、編集。これは動くが脆弱だ。`cat`の出力は予期しないタイミングで切り詰められる。`sed`による置換は特殊文字で失敗する。直接的な関数呼び出しの方がシンプルなのに、モデルはシェルパイプラインの構築にトークンを浪費する。
さらに重要なのは、bashがセキュリティ上の攻撃面であること。bashの呼び出しはシェルでできることなら何でもできてしまう。`read_file``write_file`のような専用ツールがあれば、モデルが危険な操作を避けることを期待するのではなく、ツールレベルでパスのサンドボックス化や危険なパターンのブロックを強制できる。
重要な洞察は、ツールを追加してもループを変更する必要がないということだ。s01のループはそのまま同一で維持される。ツール配列にエントリを追加し、ハンドラ関数を追加し、ディスパッチマップで接続するだけだ。
## 解決策
```
+----------+ +-------+ +------------------+
| User | ---> | LLM | ---> | Tool Dispatch |
| prompt | | | | { |
+----------+ +---+---+ | bash: run_bash |
^ | read: run_read |
| | write: run_wr |
+----------+ edit: run_edit |
tool_result| } |
+------------------+
The dispatch map is a dict: {tool_name: handler_function}
One lookup replaces any if/elif chain.
```
## 仕組み
1. 各ツールのハンドラ関数を定義する。各関数はツールのinput_schemaに対応するキーワード引数を受け取り、文字列の結果を返す。
```python
def run_read(path: str, limit: int = None) -> str:
text = safe_path(path).read_text()
lines = text.splitlines()
if limit and limit < len(lines):
lines = lines[:limit]
return "\n".join(lines)[:50000]
```
2. ツール名とハンドラを結びつけるディスパッチマップを作成する。
```python
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"],
kw["new_text"]),
}
```
3. agent loop内で、ハードコードの代わりに名前でハンドラをルックアップする。
```python
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
output = handler(**block.input)
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
```
4. パスのサンドボックス化により、モデルがワークスペースの外に出ることを防ぐ。
```python
def safe_path(p: str) -> Path:
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {p}")
return path
```
## 主要コード
ディスパッチパターン(`agents/s02_tool_use.py` 93-129行目):
```python
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"],
kw["new_text"]),
}
def agent_loop(messages: list):
while True:
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
output = handler(**block.input) if handler \
else f"Unknown tool: {block.name}"
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
messages.append({"role": "user", "content": results})
```
## s01からの変更点
| Component | Before (s01) | After (s02) |
|----------------|--------------------|----------------------------|
| Tools | 1 (bash only) | 4 (bash, read, write, edit)|
| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |
| Path safety | None | `safe_path()` sandbox |
| Agent loop | Unchanged | Unchanged |
## 設計原理
ディスパッチマップパターンは線形にスケールする -- ツールの追加はハンドラ関数とスキーマエントリを1つずつ追加するだけだ。ループは決して変更しない。この関心の分離(ループ vs ハンドラ)こそが、エージェントフレームワークが制御フローの複雑さを増すことなく数十のツールをサポートできる理由だ。このパターンはまた、各ハンドラの独立テストも可能にする。ハンドラはループとの結合がない純粋関数だからだ。ディスパッチマップを超えるエージェントは、スケーリングの問題ではなく設計の問題を抱えている。
## 試してみる
```sh
cd learn-claude-code
python agents/s02_tool_use.py
```
試せるプロンプト例:
1. `Read the file requirements.txt`
2. `Create a file called greet.py with a greet(name) function`
3. `Edit greet.py to add a docstring to the function`
4. `Read greet.py to verify the edit worked`
5. `Run the greet function with bash: python -c "from greet import greet; greet('World')"`

156
docs/ja/s03-todo-write.md Normal file
View File

@ -0,0 +1,156 @@
# s03: TodoWrite
> TodoManagerによりエージェントが自身の進捗を追跡でき、nagリマインダーの注入により更新を忘れた場合に強制的に更新させる。
## 問題
エージェントがマルチステップのタスクに取り組むとき、何を完了し何が残っているかを見失うことが多い。明示的な計画がなければ、モデルは作業を繰り返したり、ステップを飛ばしたり、脱線したりする可能性がある。ユーザーにはエージェントの内部計画が見えない。
これは見た目以上に深刻だ。長い会話ではモデルが「ドリフト」する -- コンテキストウィンドウがツール結果で埋まるにつれ、システムプロンプトの影響力が薄れていく。10ステップのリファクタリングタスクでステップ1-3を完了した後、モデルはステップ4-10の存在を忘れて即興で行動し始めるかもしれない。
解決策は構造化された状態管理だ: モデルが明示的に書き込むTodoManager。モデルは計画を作成し、作業中のアイテムをin_progressとしてマークし、完了時にcompletedとマークする。nagリマインダーは、モデルが3ラウンド以上todoを更新しなかった場合にナッジを注入する。
教育上の簡略化: nagの閾値3ラウンドは教育目的の可視化のために低く設定されている。本番のエージェントでは過剰なプロンプトを避けるため閾値は約10に設定されている。
## 解決策
```
+----------+ +-------+ +---------+
| User | ---> | LLM | ---> | Tools |
| prompt | | | | + todo |
+----------+ +---+---+ +----+----+
^ |
| tool_result |
+---------------+
|
+-----------+-----------+
| TodoManager state |
| [ ] task A |
| [>] task B <- doing |
| [x] task C |
+-----------------------+
|
if rounds_since_todo >= 3:
inject <reminder> into tool_result
```
## 仕組み
1. TodoManagerはアイテムのリストをバリデーションして保持する。`in_progress`にできるのは一度に1つだけ。
```python
class TodoManager:
def __init__(self):
self.items = []
def update(self, items: list) -> str:
validated = []
in_progress_count = 0
for item in items:
status = item.get("status", "pending")
if status == "in_progress":
in_progress_count += 1
validated.append({
"id": item["id"],
"text": item["text"],
"status": status,
})
if in_progress_count > 1:
raise ValueError("Only one task can be in_progress")
self.items = validated
return self.render()
```
2. `todo`ツールは他のツールと同様にディスパッチマップに追加される。
```python
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
# ...other tools...
"todo": lambda **kw: TODO.update(kw["items"]),
}
```
3. nagリマインダーは、モデルが3ラウンド以上`todo`を呼び出さなかった場合にtool_resultメッセージに`<reminder>`タグを注入する。
```python
def agent_loop(messages: list):
rounds_since_todo = 0
while True:
if rounds_since_todo >= 3 and messages:
last = messages[-1]
if (last["role"] == "user"
and isinstance(last.get("content"), list)):
last["content"].insert(0, {
"type": "text",
"text": "<reminder>Update your todos.</reminder>",
})
# ... rest of loop ...
rounds_since_todo = 0 if used_todo else rounds_since_todo + 1
```
4. システムプロンプトがモデルにtodoによる計画を指示する。
```python
SYSTEM = f"""You are a coding agent at {WORKDIR}.
Use the todo tool to plan multi-step tasks.
Mark in_progress before starting, completed when done.
Prefer tools over prose."""
```
## 主要コード
TodoManagerとnag注入(`agents/s03_todo_write.py` 51-85行目および158-187行目):
```python
class TodoManager:
def update(self, items: list) -> str:
validated = []
in_progress_count = 0
for item in items:
status = item.get("status", "pending")
if status == "in_progress":
in_progress_count += 1
validated.append({
"id": item["id"],
"text": item["text"],
"status": status,
})
if in_progress_count > 1:
raise ValueError("Only one in_progress")
self.items = validated
return self.render()
# In agent_loop:
if rounds_since_todo >= 3:
last["content"].insert(0, {
"type": "text",
"text": "<reminder>Update your todos.</reminder>",
})
```
## s02からの変更点
| Component | Before (s02) | After (s03) |
|----------------|------------------|--------------------------|
| Tools | 4 | 5 (+todo) |
| Planning | None | TodoManager with statuses|
| Nag injection | None | `<reminder>` after 3 rounds|
| Agent loop | Simple dispatch | + rounds_since_todo counter|
## 設計原理
可視化された計画はタスク完了率を向上させる。モデルが自身の進捗を自己監視できるからだ。nagメカニズムはアカウンタビリティを生み出す -- これがなければ、会話コンテキストが増大し初期の指示が薄れるにつれ、モデルは実行途中で計画を放棄する可能性がある。「一度にin_progressは1つだけ」という制約は逐次的な集中を強制し、出力品質を低下させるコンテキストスイッチのオーバーヘッドを防ぐ。このパターンが機能するのは、モデルのワーキングメモリを注意力のドリフトに耐える構造化された状態に外部化するからだ。
## 試してみる
```sh
cd learn-claude-code
python agents/s03_todo_write.py
```
試せるプロンプト例:
1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`
2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`
3. `Review all Python files and fix any style issues`

144
docs/ja/s04-subagent.md Normal file
View File

@ -0,0 +1,144 @@
# s04: Subagents
> サブエージェントは新しいメッセージリストで実行され、親とファイルシステムを共有し、要約のみを返す -- 親のコンテキストをクリーンに保つ。
## 問題
エージェントが作業するにつれ、メッセージ配列は膨張する。すべてのツール呼び出し、ファイル読み取り、bash出力が蓄積されていく。20-30回のツール呼び出しの後、コンテキストウィンドウは無関係な履歴で溢れる。ちょっとした質問に答えるために500行のファイルを読むと、永久に500行がコンテキストに追加される。
これは探索的タスクで特に深刻だ。「このプロジェクトはどのテストフレームワークを使っているか」という質問には5つのファイルを読む必要があるかもしれないが、親エージェントには5つのファイルの内容すべては不要だ -- 「pytest with conftest.py configuration」という回答だけが必要なのだ。
解決策はプロセスの分離だ: `messages=[]`で子エージェントを生成する。子は探索し、ファイルを読み、コマンドを実行する。終了時には最終的なテキストレスポンスだけが親に返される。子のメッセージ履歴全体は破棄される。
## 解決策
```
Parent agent Subagent
+------------------+ +------------------+
| messages=[...] | | messages=[] | <-- fresh
| | dispatch | |
| tool: task | ---------->| while tool_use: |
| prompt="..." | | call tools |
| | summary | append results |
| result = "..." | <--------- | return last text |
+------------------+ +------------------+
|
Parent context stays clean.
Subagent context is discarded.
```
## 仕組み
1. 親エージェントにサブエージェント生成をトリガーする`task`ツールが追加される。子は`task`を除くすべての基本ツールを取得する(再帰的な生成は不可)。
```python
PARENT_TOOLS = CHILD_TOOLS + [
{"name": "task",
"description": "Spawn a subagent with fresh context.",
"input_schema": {
"type": "object",
"properties": {
"prompt": {"type": "string"},
"description": {"type": "string"},
},
"required": ["prompt"],
}},
]
```
2. サブエージェントは委譲されたプロンプトのみを含む新しいメッセージリストで開始する。ファイルシステムは共有される。
```python
def run_subagent(prompt: str) -> str:
sub_messages = [{"role": "user", "content": prompt}]
for _ in range(30): # safety limit
response = client.messages.create(
model=MODEL, system=SUBAGENT_SYSTEM,
messages=sub_messages,
tools=CHILD_TOOLS, max_tokens=8000,
)
sub_messages.append({
"role": "assistant", "content": response.content
})
if response.stop_reason != "tool_use":
break
# execute tools, append results...
```
3. 最終テキストのみが親に返される。子の30回以上のツール呼び出し履歴は破棄される。
```python
return "".join(
b.text for b in response.content if hasattr(b, "text")
) or "(no summary)"
```
4. 親はこの要約を通常のtool_resultとして受け取る。
```python
if block.name == "task":
output = run_subagent(block.input["prompt"])
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": str(output),
})
```
## 主要コード
サブエージェント関数(`agents/s04_subagent.py` 110-128行目):
```python
def run_subagent(prompt: str) -> str:
sub_messages = [{"role": "user", "content": prompt}]
for _ in range(30):
response = client.messages.create(
model=MODEL, system=SUBAGENT_SYSTEM,
messages=sub_messages,
tools=CHILD_TOOLS, max_tokens=8000,
)
sub_messages.append({"role": "assistant",
"content": response.content})
if response.stop_reason != "tool_use":
break
results = []
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
output = handler(**block.input)
results.append({"type": "tool_result",
"tool_use_id": block.id,
"content": str(output)[:50000]})
sub_messages.append({"role": "user", "content": results})
return "".join(
b.text for b in response.content if hasattr(b, "text")
) or "(no summary)"
```
## s03からの変更点
| Component | Before (s03) | After (s04) |
|----------------|------------------|---------------------------|
| Tools | 5 | 5 (base) + task (parent) |
| Context | Single shared | Parent + child isolation |
| Subagent | None | `run_subagent()` function |
| Return value | N/A | Summary text only |
| Todo system | TodoManager | Removed (not needed here) |
## 設計原理
プロセス分離はコンテキスト分離を無料で提供する。新しい`messages[]`は、サブエージェントが親の会話履歴に混乱させられないことを意味する。トレードオフは通信オーバーヘッドだ -- 結果は親に圧縮して返す必要があり、詳細が失われる。これはOSのプロセス分離と同じトレードオフだ: シリアライゼーションコストと引き換えに安全性とクリーンさを得る。サブエージェントの深さ制限(再帰的なスポーンは不可)は無制限のリソース消費を防ぎ、最大反復回数は暴走した子プロセスの終了を保証する。
## 試してみる
```sh
cd learn-claude-code
python agents/s04_subagent.py
```
試せるプロンプト例:
1. `Use a subtask to find what testing framework this project uses`
2. `Delegate: read all .py files and summarize what each one does`
3. `Use a task to create a new module, then verify it from here`

View File

@ -0,0 +1,153 @@
# s05: Skills
> 2層のスキル注入により、スキル名をシステムプロンプトに(低コスト)、スキル本体をtool_resultに(オンデマンド)配置することで、システムプロンプトの肥大化を回避する。
## 問題
エージェントに特定のドメインのワークフローを遵守させたい: gitの規約、テストパターン、コードレビューのチェックリストなど。単純なアプローチはすべてをシステムプロンプトに入れることだ。しかしシステムプロンプトの実効的な注意力は有限であり、テキストが多すぎるとモデルはその一部を無視し始める。
10個のスキルが各2000トークンあれば、20,000トークンのシステムプロンプトになる。モデルは先頭と末尾に注意を払い、中間部分は飛ばし読みする。さらに悪いことに、ほとんどのスキルは任意のタスクに対して無関係だ。ファイル編集のタスクにgitワークフローの指示は不要だ。
2層アプローチがこれを解決する: 第1層はシステムプロンプトにスキルの短い説明を置く(スキルあたり約100トークン)。第2層はモデルが`load_skill`を呼び出した時だけ、スキル本体の全文をtool_resultに読み込む。モデルはどのスキルが存在するかを知り(低コスト)、必要な時だけ読み込む(関連する時のみ)。
## 解決策
```
System prompt (Layer 1 -- always present):
+--------------------------------------+
| You are a coding agent. |
| Skills available: |
| - git: Git workflow helpers | ~100 tokens/skill
| - test: Testing best practices |
+--------------------------------------+
When model calls load_skill("git"):
+--------------------------------------+
| tool_result (Layer 2 -- on demand): |
| <skill name="git"> |
| Full git workflow instructions... | ~2000 tokens
| Step 1: ... |
| Step 2: ... |
| </skill> |
+--------------------------------------+
```
## 仕組み
1. スキルファイルは`.skills/`にYAMLフロントマター付きMarkdownとして配置される。
```
.skills/
git.md # ---\n description: Git workflow\n ---\n ...
test.md # ---\n description: Testing patterns\n ---\n ...
```
2. SkillLoaderがフロントマターを解析し、メタデータと本体を分離する。
```python
class SkillLoader:
def _parse_frontmatter(self, text: str) -> tuple:
match = re.match(
r"^---\n(.*?)\n---\n(.*)", text, re.DOTALL
)
if not match:
return {}, text
meta = {}
for line in match.group(1).strip().splitlines():
if ":" in line:
key, val = line.split(":", 1)
meta[key.strip()] = val.strip()
return meta, match.group(2).strip()
```
3. 第1層: `get_descriptions()`がシステムプロンプト用の短い行を返す。
```python
def get_descriptions(self) -> str:
lines = []
for name, skill in self.skills.items():
desc = skill["meta"].get("description", "No description")
lines.append(f" - {name}: {desc}")
return "\n".join(lines)
SYSTEM = f"""You are a coding agent at {WORKDIR}.
Skills available:
{SKILL_LOADER.get_descriptions()}"""
```
4. 第2層: `get_content()``<skill>`タグで囲まれた本体全文を返す。
```python
def get_content(self, name: str) -> str:
skill = self.skills.get(name)
if not skill:
return f"Error: Unknown skill '{name}'."
return f"<skill name=\"{name}\">\n{skill['body']}\n</skill>"
```
5. `load_skill`ツールはディスパッチマップの単なる一エントリだ。
```python
TOOL_HANDLERS = {
# ...base tools...
"load_skill": lambda **kw: SKILL_LOADER.get_content(kw["name"]),
}
```
## 主要コード
SkillLoaderクラス(`agents/s05_skill_loading.py` 51-97行目):
```python
class SkillLoader:
def __init__(self, skills_dir: Path):
self.skills = {}
for f in sorted(skills_dir.glob("*.md")):
text = f.read_text()
meta, body = self._parse_frontmatter(text)
self.skills[f.stem] = {
"meta": meta, "body": body
}
def get_descriptions(self) -> str:
lines = []
for name, skill in self.skills.items():
desc = skill["meta"].get("description", "")
lines.append(f" - {name}: {desc}")
return "\n".join(lines)
def get_content(self, name: str) -> str:
skill = self.skills.get(name)
if not skill:
return f"Error: Unknown skill '{name}'."
return (f"<skill name=\"{name}\">\n"
f"{skill['body']}\n</skill>")
```
## s04からの変更点
| Component | Before (s04) | After (s05) |
|----------------|------------------|----------------------------|
| Tools | 5 (base + task) | 5 (base + load_skill) |
| System prompt | Static string | + skill descriptions |
| Knowledge | None | .skills/*.md files |
| Injection | None | Two-layer (system + result)|
| Subagent | `run_subagent()` | Removed (different focus) |
## 設計原理
2層注入は注意力バジェットの問題を解決する。すべてのスキル内容をシステムプロンプトに入れると、未使用のスキルにトークンを浪費する。第1層(コンパクトな要約)は合計約120トークンのコストだ。第2層(完全な内容)はtool_resultを通じてオンデマンドで読み込まれる。これにより、モデルの注意力品質を劣化させることなく数十のスキルにスケールできる。重要な洞察は、モデルはどのスキルが存在するか(低コスト)を知るだけで、いつスキルを読み込むか(高コスト)を判断できるということだ。これはソフトウェアモジュールシステムで使われる遅延読み込みと同じ原理だ。
## 試してみる
```sh
cd learn-claude-code
python agents/s05_skill_loading.py
```
試せるプロンプト例:
1. `What skills are available?`
2. `Load the agent-builder skill and follow its instructions`
3. `I need to do a code review -- load the relevant skill first`
4. `Build an MCP server using the mcp-builder skill`

View File

@ -0,0 +1,170 @@
# s06: Compact
> 3層の圧縮パイプラインにより、古いツール結果の戦略的な忘却、トークンが閾値を超えた時の自動要約、オンデマンドの手動圧縮を組み合わせて、エージェントを無期限に動作可能にする。
## 問題
コンテキストウィンドウは有限だ。十分なツール呼び出しの後、メッセージ配列がモデルのコンテキスト上限を超え、API呼び出しが失敗する。ハード制限に達する前でも、パフォーマンスは劣化する: モデルは遅くなり、精度が落ち、以前のメッセージを無視し始める。
200,000トークンのコンテキストウィンドウは大きく聞こえるが、1000行のソースファイルに対する一回の`read_file`で約4000トークンを消費する。30ファイルを読み20回のbashコマンドを実行すると、100,000トークン以上になる。何らかの圧縮がなければ、エージェントは大規模なコードベースで作業できない。
3層のパイプラインは積極性を段階的に上げて対処する:
第1層(micro-compact)は毎ターン静かに古いツール結果を置換する。
第2層(auto-compact)はトークンが閾値を超えた時に完全な要約を発動する。
第3層(manual compact)はモデル自身が圧縮をトリガーできる。
教育上の簡略化: ここでのトークン推定は大まかな「文字数/4」ヒューリスティックを使用している。本番システムでは正確なカウントのために適切なトークナイザーライブラリを使用する。
## 解決策
```
Every turn:
+------------------+
| Tool call result |
+------------------+
|
v
[Layer 1: micro_compact] (silent, every turn)
Replace tool_result > 3 turns old
with "[Previous: used {tool_name}]"
|
v
[Check: tokens > 50000?]
| |
no yes
| |
v v
continue [Layer 2: auto_compact]
Save transcript to .transcripts/
LLM summarizes conversation.
Replace all messages with [summary].
|
v
[Layer 3: compact tool]
Model calls compact explicitly.
Same summarization as auto_compact.
```
## 仕組み
1. **第1層 -- micro_compact**: 各LLM呼び出しの前に、直近3件以前のすべてのtool_resultエントリを見つけて内容を置換する。
```python
def micro_compact(messages: list) -> list:
tool_results = []
for i, msg in enumerate(messages):
if msg["role"] == "user" and isinstance(msg.get("content"), list):
for j, part in enumerate(msg["content"]):
if isinstance(part, dict) and part.get("type") == "tool_result":
tool_results.append((i, j, part))
if len(tool_results) <= KEEP_RECENT:
return messages
to_clear = tool_results[:-KEEP_RECENT]
for _, _, part in to_clear:
if len(part.get("content", "")) > 100:
tool_id = part.get("tool_use_id", "")
tool_name = tool_name_map.get(tool_id, "unknown")
part["content"] = f"[Previous: used {tool_name}]"
return messages
```
2. **第2層 -- auto_compact**: 推定トークン数が50,000を超えた時、完全なトランスクリプトを保存し、LLMに要約を依頼する。
```python
def auto_compact(messages: list) -> list:
TRANSCRIPT_DIR.mkdir(exist_ok=True)
transcript_path = TRANSCRIPT_DIR / f"transcript_{int(time.time())}.jsonl"
with open(transcript_path, "w") as f:
for msg in messages:
f.write(json.dumps(msg, default=str) + "\n")
response = client.messages.create(
model=MODEL,
messages=[{"role": "user", "content":
"Summarize this conversation for continuity..."
+ json.dumps(messages, default=str)[:80000]}],
max_tokens=2000,
)
summary = response.content[0].text
return [
{"role": "user", "content": f"[Compressed]\n\n{summary}"},
{"role": "assistant", "content": "Understood. Continuing."},
]
```
3. **第3層 -- manual compact**: `compact`ツールが同じ要約処理をオンデマンドでトリガーする。
```python
if manual_compact:
messages[:] = auto_compact(messages)
```
4. agent loopが3つの層すべてを統合する。
```python
def agent_loop(messages: list):
while True:
micro_compact(messages)
if estimate_tokens(messages) > THRESHOLD:
messages[:] = auto_compact(messages)
response = client.messages.create(...)
# ... tool execution ...
if manual_compact:
messages[:] = auto_compact(messages)
```
## 主要コード
3層パイプライン(`agents/s06_context_compact.py` 67-93行目および189-223行目):
```python
THRESHOLD = 50000
KEEP_RECENT = 3
def micro_compact(messages):
# Replace old tool results with placeholders
...
def auto_compact(messages):
# Save transcript, LLM summarize, replace messages
...
def agent_loop(messages):
while True:
micro_compact(messages) # Layer 1
if estimate_tokens(messages) > THRESHOLD:
messages[:] = auto_compact(messages) # Layer 2
response = client.messages.create(...)
# ...
if manual_compact:
messages[:] = auto_compact(messages) # Layer 3
```
## s05からの変更点
| Component | Before (s05) | After (s06) |
|----------------|------------------|----------------------------|
| Tools | 5 | 5 (base + compact) |
| Context mgmt | None | Three-layer compression |
| Micro-compact | None | Old results -> placeholders|
| Auto-compact | None | Token threshold trigger |
| Manual compact | None | `compact` tool |
| Transcripts | None | Saved to .transcripts/ |
| Skills | load_skill | Removed (different focus) |
## 設計原理
コンテキストウィンドウは有限だが、エージェントセッションは無限にできる。3層の圧縮が異なる粒度でこれを解決する: micro-compact(古いツール出力の置換)、auto-compact(上限に近づいたときのLLM要約)、manual compact(ユーザートリガー)。重要な洞察は、忘却はバグではなく機能だということだ -- 無制限のセッションを可能にする。トランスクリプトはディスク上に完全な履歴を保存するため、何も真に失われず、アクティブなコンテキストの外に移動されるだけだ。層状のアプローチにより、各層がサイレントなターンごとのクリーンアップから完全な会話リセットまで、独自の粒度で独立して動作する。
## 試してみる
```sh
cd learn-claude-code
python agents/s06_context_compact.py
```
試せるプロンプト例:
1. `Read every Python file in the agents/ directory one by one`
(micro-compactが古い結果を置換するのを観察する)
2. `Keep reading files until compression triggers automatically`
3. `Use the compact tool to manually compress the conversation`

159
docs/ja/s07-task-system.md Normal file
View File

@ -0,0 +1,159 @@
# s07: Tasks
> タスクはファイルシステム上にJSON形式で依存グラフ付きで永続化され、コンテキスト圧縮後も生き残り、複数エージェント間で共有できる。
## 問題
インメモリの状態であるTodoManager(s03)は、コンテキストが圧縮(s06)されると失われる。auto_compactがメッセージを要約で置換した後、todoリストは消える。エージェントは要約テキストからそれを再構成しなければならないが、これは不正確でエラーが起きやすい。
これがs06からs07への重要な橋渡しだ: TodoManagerのアイテムは圧縮と共に死ぬが、ファイルベースのタスクは死なない。状態をファイルシステムに移すことで、圧縮に対する耐性が得られる。
さらに根本的な問題として、インメモリの状態は他のエージェントからは見えない。最終的にチーム(s09以降)を構築する際、チームメイトには共有のタスクボードが必要だ。インメモリのデータ構造はプロセスローカルだ。
解決策はタスクを`.tasks/`にJSON形式で永続化すること。各タスクはID、件名、ステータス、依存グラフを持つ個別のファイルだ。タスク1を完了すると、タスク2が`blockedBy: [1]`を持つ場合、自動的にタスク2のブロックが解除される。ファイルシステムが信頼できる情報源となる。
## 解決策
```
.tasks/
task_1.json {"id":1, "status":"completed", ...}
task_2.json {"id":2, "blockedBy":[1], "status":"pending"}
task_3.json {"id":3, "blockedBy":[2], "status":"pending"}
Dependency resolution:
+----------+ +----------+ +----------+
| task 1 | --> | task 2 | --> | task 3 |
| complete | | blocked | | blocked |
+----------+ +----------+ +----------+
| ^
+--- completing task 1 removes it from
task 2's blockedBy list
```
## 仕組み
1. TaskManagerがCRUD操作を提供する。各タスクは1つのJSONファイル。
```python
class TaskManager:
def create(self, subject: str, description: str = "") -> str:
task = {
"id": self._next_id,
"subject": subject,
"description": description,
"status": "pending",
"blockedBy": [],
"blocks": [],
"owner": "",
}
self._save(task)
self._next_id += 1
return json.dumps(task, indent=2)
```
2. タスクが完了とマークされると、`_clear_dependency`がそのIDを他のすべてのタスクの`blockedBy`リストから除去する。
```python
def _clear_dependency(self, completed_id: int):
for f in self.dir.glob("task_*.json"):
task = json.loads(f.read_text())
if completed_id in task.get("blockedBy", []):
task["blockedBy"].remove(completed_id)
self._save(task)
```
3. `update`メソッドがステータス変更と双方向の依存関係の結線を処理する。
```python
def update(self, task_id, status=None,
add_blocked_by=None, add_blocks=None):
task = self._load(task_id)
if status:
task["status"] = status
if status == "completed":
self._clear_dependency(task_id)
if add_blocks:
task["blocks"] = list(set(task["blocks"] + add_blocks))
for blocked_id in add_blocks:
blocked = self._load(blocked_id)
if task_id not in blocked["blockedBy"]:
blocked["blockedBy"].append(task_id)
self._save(blocked)
self._save(task)
```
4. 4つのタスクツールがディスパッチマップに追加される。
```python
TOOL_HANDLERS = {
# ...base tools...
"task_create": lambda **kw: TASKS.create(kw["subject"]),
"task_update": lambda **kw: TASKS.update(kw["task_id"],
kw.get("status")),
"task_list": lambda **kw: TASKS.list_all(),
"task_get": lambda **kw: TASKS.get(kw["task_id"]),
}
```
## 主要コード
依存グラフ付きTaskManager(`agents/s07_task_system.py` 46-123行目):
```python
class TaskManager:
def __init__(self, tasks_dir: Path):
self.dir = tasks_dir
self.dir.mkdir(exist_ok=True)
self._next_id = self._max_id() + 1
def _load(self, task_id: int) -> dict:
path = self.dir / f"task_{task_id}.json"
return json.loads(path.read_text())
def _save(self, task: dict):
path = self.dir / f"task_{task['id']}.json"
path.write_text(json.dumps(task, indent=2))
def create(self, subject, description=""):
task = {"id": self._next_id, "subject": subject,
"status": "pending", "blockedBy": [],
"blocks": [], "owner": ""}
self._save(task)
self._next_id += 1
return json.dumps(task, indent=2)
def _clear_dependency(self, completed_id):
for f in self.dir.glob("task_*.json"):
task = json.loads(f.read_text())
if completed_id in task.get("blockedBy", []):
task["blockedBy"].remove(completed_id)
self._save(task)
```
## s06からの変更点
| Component | Before (s06) | After (s07) |
|----------------|------------------|----------------------------|
| Tools | 5 | 8 (+task_create/update/list/get)|
| State storage | In-memory only | JSON files in .tasks/ |
| Dependencies | None | blockedBy + blocks graph |
| Compression | Three-layer | Removed (different focus) |
| Persistence | Lost on compact | Survives compression |
## 設計原理
ファイルベースの状態はコンテキスト圧縮を生き延びる。エージェントの会話が圧縮されるとメモリ内の状態は失われるが、ディスクに書き込まれたタスクは永続する。依存グラフにより、コンテキストが失われた後でも正しい順序で実行される。これは一時的な会話と永続的な作業の橋渡しだ -- エージェントは会話の詳細を忘れても、タスクボードが常に何をすべきかを思い出させてくれる。ファイルシステムを信頼できる情報源とすることで、将来のマルチエージェント共有も可能になる。任意のプロセスが同じJSONファイルを読み取れるからだ。
## 試してみる
```sh
cd learn-claude-code
python agents/s07_task_system.py
```
試せるプロンプト例:
1. `Create 3 tasks: "Setup project", "Write code", "Write tests". Make them depend on each other in order.`
2. `List all tasks and show the dependency graph`
3. `Complete task 1 and then list tasks to see task 2 unblocked`
4. `Create a task board for refactoring: parse -> transform -> emit -> test`

View File

@ -0,0 +1,177 @@
# s08: Background Tasks
> BackgroundManagerがコマンドを別スレッドで実行し、各LLM呼び出しの前に通知キューをドレインすることで、エージェントは長時間実行操作でブロックされなくなる。
## 問題
一部のコマンドは数分かかる: `npm install``pytest``docker build`。ブロッキングのagent loopでは、モデルはサブプロセスの終了を待って待機する。他のことは何もできない。ユーザーが「依存関係をインストールして、その間にconfigファイルを作成して」と言った場合、エージェントはまずインストールを行い、その後configを作成する -- 並列ではなく逐次的に。
エージェントには並行性が必要だ。agent loop自体の完全なマルチスレッディングではなく、長いコマンドを発射して実行中に作業を続ける能力だ。コマンドが終了したら、その結果は自然に会話に現れるべきだ。
解決策は、BackgroundManagerがコマンドをデーモンスレッドで実行し、結果を通知キューに収集すること。各LLM呼び出しの前にキューがドレインされ、結果がメッセージに注入される。
## 解決策
```
Main thread Background thread
+-----------------+ +-----------------+
| agent loop | | task executes |
| ... | | ... |
| [LLM call] <---+------- | enqueue(result) |
| ^drain queue | +-----------------+
+-----------------+
Timeline:
Agent --[spawn A]--[spawn B]--[other work]----
| |
v v
[A runs] [B runs] (parallel)
| |
+-- notification queue --+
|
[results injected before
next LLM call]
```
## 仕組み
1. BackgroundManagerがタスクを追跡し、スレッドセーフな通知キューを維持する。
```python
class BackgroundManager:
def __init__(self):
self.tasks = {}
self._notification_queue = []
self._lock = threading.Lock()
```
2. `run()`がデーモンスレッドを開始し、task_idを即座に返す。
```python
def run(self, command: str) -> str:
task_id = str(uuid.uuid4())[:8]
self.tasks[task_id] = {
"status": "running",
"result": None,
"command": command,
}
thread = threading.Thread(
target=self._execute,
args=(task_id, command),
daemon=True,
)
thread.start()
return f"Background task {task_id} started"
```
3. スレッドのターゲットである`_execute`がサブプロセスを実行し、結果を通知キューにプッシュする。
```python
def _execute(self, task_id: str, command: str):
try:
r = subprocess.run(command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=300)
output = (r.stdout + r.stderr).strip()[:50000]
status = "completed"
except subprocess.TimeoutExpired:
output = "Error: Timeout (300s)"
status = "timeout"
self.tasks[task_id]["status"] = status
self.tasks[task_id]["result"] = output
with self._lock:
self._notification_queue.append({
"task_id": task_id,
"status": status,
"result": output[:500],
})
```
4. `drain_notifications()`が保留中の結果を返してクリアする。
```python
def drain_notifications(self) -> list:
with self._lock:
notifs = list(self._notification_queue)
self._notification_queue.clear()
return notifs
```
5. agent loopが各LLM呼び出しの前に通知をドレインする。
```python
def agent_loop(messages: list):
while True:
notifs = BG.drain_notifications()
if notifs and messages:
notif_text = "\n".join(
f"[bg:{n['task_id']}] {n['status']}: "
f"{n['result']}" for n in notifs
)
messages.append({"role": "user",
"content": f"<background-results>"
f"\n{notif_text}\n"
f"</background-results>"})
messages.append({"role": "assistant",
"content": "Noted background results."})
response = client.messages.create(...)
```
## 主要コード
BackgroundManager(`agents/s08_background_tasks.py` 49-107行目):
```python
class BackgroundManager:
def __init__(self):
self.tasks = {}
self._notification_queue = []
self._lock = threading.Lock()
def run(self, command: str) -> str:
task_id = str(uuid.uuid4())[:8]
self.tasks[task_id] = {"status": "running",
"result": None,
"command": command}
thread = threading.Thread(
target=self._execute,
args=(task_id, command), daemon=True)
thread.start()
return f"Background task {task_id} started"
def _execute(self, task_id, command):
# run subprocess, push to queue
...
def drain_notifications(self) -> list:
with self._lock:
notifs = list(self._notification_queue)
self._notification_queue.clear()
return notifs
```
## s07からの変更点
| Component | Before (s07) | After (s08) |
|----------------|------------------|----------------------------|
| Tools | 8 | 6 (base + background_run + check)|
| Execution | Blocking only | Blocking + background threads|
| Notification | None | Queue drained per loop |
| Concurrency | None | Daemon threads |
| Task system | File-based CRUD | Removed (different focus) |
## 設計原理
エージェントループは本質的にシングルスレッドだ(一度に1つのLLM呼び出し)。バックグラウンドスレッドはI/Oバウンドな作業(テスト、ビルド、インストール)に対してこの制約を打破する。通知キューパターン(「次のLLM呼び出し前にドレイン」)により、結果はモデルの推論を途中で中断するのではなく、会話の自然な区切りで到着する。これは最小限の並行性モデルだ: エージェントループはシングルスレッドで決定論的なまま、I/Oバウンドなサブプロセス実行のみが並列化される。
## 試してみる
```sh
cd learn-claude-code
python agents/s08_background_tasks.py
```
試せるプロンプト例:
1. `Run "sleep 5 && echo done" in the background, then create a file while it runs`
2. `Start 3 background tasks: "sleep 2", "sleep 4", "sleep 6". Check their status.`
3. `Run pytest in the background and keep working on other things`

212
docs/ja/s09-agent-teams.md Normal file
View File

@ -0,0 +1,212 @@
# s09: Agent Teams
> JSONL形式のインボックスを持つ永続的なチームメイトが、孤立したエージェントをコミュニケーションするチームに変える -- spawn、message、broadcast、drain。
## 問題
サブエージェント(s04)は使い捨てだ: 生成し、作業し、要約を返し、消滅する。アイデンティティもなく、呼び出し間の記憶もなく、フォローアップの指示を受け取る方法もない。バックグラウンドタスク(s08)はシェルコマンドを実行するが、LLM誘導の意思決定やフィードバックの伝達はできない。
本物のチームワークには3つのものが必要だ: (1)単一のプロンプトを超えて存続する永続的なエージェント、(2)アイデンティティとライフサイクル管理、(3)エージェント間の通信チャネル。メッセージングがなければ、永続的なチームメイトでさえ聾唖だ -- 並列に作業できるが協調することはない。
解決策は、名前付きの永続的エージェントを生成するTeammateManagerと、JONSLインボックスファイルを使うMessageBusの組み合わせだ。各チームメイトは自身のagent loopをスレッドで実行し、各LLM呼び出しの前にインボックスを確認し、他のチームメイトやリーダーにメッセージを送れる。
s06からs07への橋渡しについての注記: s03のTodoManagerアイテムは圧縮(s06)と共に死ぬ。ファイルベースのタスク(s07)はディスク上に存在するため圧縮後も生き残る。チームも同じ原則の上に構築されている -- config.jsonとインボックスファイルはコンテキストウィンドウの外に永続化される。
## 解決策
```
Teammate lifecycle:
spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN
Communication:
.team/
config.json <- team roster + statuses
inbox/
alice.jsonl <- append-only, drain-on-read
bob.jsonl
lead.jsonl
+--------+ send("alice","bob","...") +--------+
| alice | -----------------------------> | bob |
| loop | bob.jsonl << {json_line} | loop |
+--------+ +--------+
^ |
| BUS.read_inbox("alice") |
+---- alice.jsonl -> read + drain ---------+
5 message types:
+-------------------------+------------------------------+
| message | Normal text between agents |
| broadcast | Sent to all teammates |
| shutdown_request | Request graceful shutdown |
| shutdown_response | Approve/reject shutdown |
| plan_approval_response | Approve/reject plan |
+-------------------------+------------------------------+
```
## 仕組み
1. TeammateManagerがチームの名簿としてconfig.jsonを管理する。各メンバーは名前、役割、ステータスを持つ。
```python
class TeammateManager:
def __init__(self, team_dir: Path):
self.dir = team_dir
self.dir.mkdir(exist_ok=True)
self.config_path = self.dir / "config.json"
self.config = self._load_config()
self.threads = {}
```
2. `spawn()`がチームメイトを作成し、そのagent loopをスレッドで開始する。アイドル状態のチームメイトを再spawnすると再活性化される。
```python
def spawn(self, name: str, role: str, prompt: str) -> str:
member = self._find_member(name)
if member:
if member["status"] not in ("idle", "shutdown"):
return f"Error: '{name}' is currently {member['status']}"
member["status"] = "working"
else:
member = {"name": name, "role": role, "status": "working"}
self.config["members"].append(member)
self._save_config()
thread = threading.Thread(
target=self._teammate_loop,
args=(name, role, prompt), daemon=True)
self.threads[name] = thread
thread.start()
return f"Spawned teammate '{name}' (role: {role})"
```
3. MessageBusがJSONLインボックスファイルを処理する。`send()`がJSON行を追記し、`read_inbox()`がすべての行を読み取ってファイルをドレインする。
```python
class MessageBus:
def send(self, sender, to, content,
msg_type="message", extra=None):
msg = {"type": msg_type, "from": sender,
"content": content,
"timestamp": time.time()}
if extra:
msg.update(extra)
with open(self.dir / f"{to}.jsonl", "a") as f:
f.write(json.dumps(msg) + "\n")
return f"Sent {msg_type} to {to}"
def read_inbox(self, name):
path = self.dir / f"{name}.jsonl"
if not path.exists():
return "[]"
msgs = [json.loads(l)
for l in path.read_text().strip().splitlines()
if l]
path.write_text("") # drain
return json.dumps(msgs, indent=2)
```
4. 各チームメイトは各LLM呼び出しの前にインボックスを確認し、受信メッセージを会話コンテキストに注入する。
```python
def _teammate_loop(self, name, role, prompt):
sys_prompt = f"You are '{name}', role: {role}, at {WORKDIR}."
messages = [{"role": "user", "content": prompt}]
for _ in range(50):
inbox = BUS.read_inbox(name)
if inbox != "[]":
messages.append({"role": "user",
"content": f"<inbox>{inbox}</inbox>"})
messages.append({"role": "assistant",
"content": "Noted inbox messages."})
response = client.messages.create(
model=MODEL, system=sys_prompt,
messages=messages, tools=TOOLS)
messages.append({"role": "assistant",
"content": response.content})
if response.stop_reason != "tool_use":
break
# execute tools, append results...
self._find_member(name)["status"] = "idle"
self._save_config()
```
5. `broadcast()`が送信者以外の全チームメイトに同じメッセージを送信する。
```python
def broadcast(self, sender, content, teammates):
count = 0
for name in teammates:
if name != sender:
self.send(sender, name, content, "broadcast")
count += 1
return f"Broadcast to {count} teammates"
```
## 主要コード
TeammateManager + MessageBusのコア(`agents/s09_agent_teams.py`):
```python
class TeammateManager:
def spawn(self, name, role, prompt):
member = self._find_member(name) or {
"name": name, "role": role, "status": "working"
}
member["status"] = "working"
self._save_config()
thread = threading.Thread(
target=self._teammate_loop,
args=(name, role, prompt), daemon=True)
thread.start()
return f"Spawned '{name}'"
class MessageBus:
def send(self, sender, to, content,
msg_type="message", extra=None):
msg = {"type": msg_type, "from": sender,
"content": content, "timestamp": time.time()}
if extra: msg.update(extra)
with open(self.dir / f"{to}.jsonl", "a") as f:
f.write(json.dumps(msg) + "\n")
def read_inbox(self, name):
path = self.dir / f"{name}.jsonl"
if not path.exists(): return "[]"
msgs = [json.loads(l)
for l in path.read_text().strip().splitlines()
if l]
path.write_text("")
return json.dumps(msgs, indent=2)
```
## s08からの変更点
| Component | Before (s08) | After (s09) |
|----------------|------------------|----------------------------|
| Tools | 6 | 9 (+spawn/send/read_inbox) |
| Agents | Single | Lead + N teammates |
| Persistence | None | config.json + JSONL inboxes|
| Threads | Background cmds | Full agent loops per thread|
| Lifecycle | Fire-and-forget | idle -> working -> idle |
| Communication | None | 5 message types + broadcast|
教育上の簡略化: この実装ではインボックスアクセスにロックファイルを使用していない。本番環境では、複数ライターからの並行追記にはファイルロッキングまたはアトミックリネームが必要になる。ここで使用している単一ライター/インボックスパターンは教育シナリオでは安全だ。
## 設計原理
ファイルベースのメールボックス(追記専用JSONL)は並行性安全なエージェント間通信を提供する。追記はほとんどのファイルシステムでアトミックであり、ロック競合を回避する。「読み取り時にドレイン」パターン(全読み取り、切り詰め)はバッチ配信を提供する。これは共有メモリやソケットベースのIPCよりもシンプルで堅牢だ。トレードオフはレイテンシだ -- メッセージは次のポーリングまで見えない -- しかし各ターンに数秒の推論時間がかかるLLM駆動エージェントにとって、ポーリングレイテンシは推論時間に比べて無視できる。
## 試してみる
```sh
cd learn-claude-code
python agents/s09_agent_teams.py
```
試せるプロンプト例:
1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`
2. `Broadcast "status update: phase 1 complete" to all teammates`
3. `Check the lead inbox for any messages`
4. `/team`と入力してステータス付きのチーム名簿を確認する
5. `/inbox`と入力してリーダーのインボックスを手動確認する

View File

@ -0,0 +1,190 @@
# s10: Team Protocols
> 同じrequest_idハンドシェイクパターンがシャットダウンとプラン承認の両方を支える -- 1つのFSM、2つの適用。
## 問題
s09ではチームメイトが作業しコミュニケーションするが、構造化された協調はない。2つの問題が生じる:
**シャットダウン**: チームメイトをどうやってクリーンに停止するか。スレッドを強制終了するとファイルが中途半端に書かれ、config.jsonが不正な状態になる。グレースフルシャットダウンにはハンドシェイクが必要だ: リーダーが要求し、チームメイトが承認(終了処理を行い退出)するか拒否(作業を継続)するかを判断する。
**プラン承認**: 実行をどうやってゲーティングするか。リーダーが「認証モジュールをリファクタリングして」と言うと、チームメイトは即座に開始する。リスクの高い変更では、実行開始前にリーダーが計画をレビューすべきだ。ジュニアが提案し、シニアが承認する。
両方の問題は同じ構造を共有している: 一方がユニークなIDを持つリクエストを送り、もう一方がそのIDを参照してレスポンスする。有限状態機械が各リクエストをpending -> approved | rejectedの遷移で追跡する。
## 解決策
```
Shutdown Protocol Plan Approval Protocol
================== ======================
Lead Teammate Teammate Lead
| | | |
|--shutdown_req-->| |--plan_req------>|
| {req_id:"abc"} | | {req_id:"xyz"} |
| | | |
|<--shutdown_resp-| |<--plan_resp-----|
| {req_id:"abc", | | {req_id:"xyz", |
| approve:true} | | approve:true} |
| | | |
v v v v
tracker["abc"] exits proceeds tracker["xyz"]
= approved = approved
Shared FSM (identical for both protocols):
[pending] --approve--> [approved]
[pending] --reject---> [rejected]
Trackers:
shutdown_requests = {req_id: {target, status}}
plan_requests = {req_id: {from, plan, status}}
```
## 仕組み
1. リーダーがrequest_idを生成し、インボックス経由でshutdown_requestを送信してシャットダウンを開始する。
```python
shutdown_requests = {}
def handle_shutdown_request(teammate: str) -> str:
req_id = str(uuid.uuid4())[:8]
shutdown_requests[req_id] = {
"target": teammate, "status": "pending",
}
BUS.send("lead", teammate, "Please shut down gracefully.",
"shutdown_request", {"request_id": req_id})
return f"Shutdown request {req_id} sent (status: pending)"
```
2. チームメイトはインボックスでリクエストを受信し、`shutdown_response`ツールを呼び出して承認または拒否する。
```python
if tool_name == "shutdown_response":
req_id = args["request_id"]
approve = args["approve"]
if req_id in shutdown_requests:
shutdown_requests[req_id]["status"] = \
"approved" if approve else "rejected"
BUS.send(sender, "lead", args.get("reason", ""),
"shutdown_response",
{"request_id": req_id, "approve": approve})
return f"Shutdown {'approved' if approve else 'rejected'}"
```
3. チームメイトのループが承認済みシャットダウンを確認して終了する。
```python
if (block.name == "shutdown_response"
and block.input.get("approve")):
should_exit = True
# ...
member["status"] = "shutdown" if should_exit else "idle"
```
4. プラン承認も同一のパターンに従う。チームメイトがプランを提出し、request_idを生成する。
```python
plan_requests = {}
if tool_name == "plan_approval":
plan_text = args.get("plan", "")
req_id = str(uuid.uuid4())[:8]
plan_requests[req_id] = {
"from": sender, "plan": plan_text,
"status": "pending",
}
BUS.send(sender, "lead", plan_text,
"plan_approval_request",
{"request_id": req_id, "plan": plan_text})
return f"Plan submitted (request_id={req_id})"
```
5. リーダーがレビューし、同じrequest_idでレスポンスする。
```python
def handle_plan_review(request_id, approve, feedback=""):
req = plan_requests.get(request_id)
if not req:
return f"Error: Unknown request_id '{request_id}'"
req["status"] = "approved" if approve else "rejected"
BUS.send("lead", req["from"], feedback,
"plan_approval_response",
{"request_id": request_id,
"approve": approve,
"feedback": feedback})
return f"Plan {req['status']} for '{req['from']}'"
```
6. 両プロトコルとも同じ`plan_approval`ツール名を2つのモードで使用する: チームメイトが提出(request_idなし)、リーダーがレビュー(request_idあり)。
```python
# Lead tool dispatch:
"plan_approval": lambda **kw: handle_plan_review(
kw["request_id"], kw["approve"],
kw.get("feedback", "")),
# Teammate: submit mode (generate request_id)
```
## 主要コード
2つのプロトコルハンドラ(`agents/s10_team_protocols.py`):
```python
shutdown_requests = {}
plan_requests = {}
# -- Shutdown --
def handle_shutdown_request(teammate):
req_id = str(uuid.uuid4())[:8]
shutdown_requests[req_id] = {
"target": teammate, "status": "pending"
}
BUS.send("lead", teammate,
"Please shut down gracefully.",
"shutdown_request",
{"request_id": req_id})
# -- Plan Approval --
def handle_plan_review(request_id, approve, feedback=""):
req = plan_requests[request_id]
req["status"] = "approved" if approve else "rejected"
BUS.send("lead", req["from"], feedback,
"plan_approval_response",
{"request_id": request_id,
"approve": approve})
# Both use the same FSM:
# pending -> approved | rejected
# Both correlate by request_id across async inboxes
```
## s09からの変更点
| Component | Before (s09) | After (s10) |
|----------------|------------------|------------------------------|
| Tools | 9 | 12 (+shutdown_req/resp +plan)|
| Shutdown | Natural exit only| Request-response handshake |
| Plan gating | None | Submit/review with approval |
| Request tracking| None | Two tracker dicts |
| Correlation | None | request_id per request |
| FSM | None | pending -> approved/rejected |
## 設計原理
request_id相関パターンは、任意の非同期インタラクションを追跡可能な有限状態マシンに変換する。同じ3状態マシン(pending -> approved/rejected)がシャットダウン、プラン承認、または将来の任意のプロトコルに適用される。1つのパターンが複数のプロトコルを処理できるのはこのためだ -- FSMは何を承認しているかを気にしない。request_idはメッセージが順不同で到着する可能性のある非同期インボックス間で相関を提供し、エージェント間のタイミング差異に対してパターンを堅牢にする。
## 試してみる
```sh
cd learn-claude-code
python agents/s10_team_protocols.py
```
試せるプロンプト例:
1. `Spawn alice as a coder. Then request her shutdown.`
2. `List teammates to see alice's status after shutdown approval`
3. `Spawn bob with a risky refactoring task. Review and reject his plan.`
4. `Spawn charlie, have him submit a plan, then approve it.`
5. `/team`と入力してステータスを監視する

View File

@ -0,0 +1,215 @@
# s11: Autonomous Agents
> タスクボードポーリング付きのアイドルサイクルにより、チームメイトが自分で作業を見つけて確保できるようになり、コンテキスト圧縮後にはアイデンティティの再注入が行われる。
## 問題
s09-s10では、チームメイトは明示的に指示された時のみ作業する。リーダーは各チームメイトを特定のプロンプトでspawnしなければならない。タスクボードに未割り当てのタスクが10個あっても、リーダーが手動で各タスクを割り当てなければならない。これはスケールしない。
真の自律性とは、チームメイトが自分で作業を見つけることだ。チームメイトが現在のタスクを完了したら、タスクボードで未確保の作業をスキャンし、タスクを確保し、作業を開始すべきだ -- リーダーからの指示なしに。
しかし自律エージェントには微妙な問題がある: コンテキスト圧縮後に、エージェントが自分が誰かを忘れる可能性がある。メッセージが要約されると、元のシステムプロンプトのアイデンティティ(「あなたはalice、役割はcoder」)が薄れる。アイデンティティの再注入は、圧縮されたコンテキストの先頭にアイデンティティブロックを挿入することでこれを解決する。
教育上の簡略化: ここで使用するトークン推定は大まかなもの(文字数 / 4)だ。本番システムでは適切なトークナイザーライブラリを使用する。nagの閾値3ラウンド(s03から)は教育目的の可視化のために低く設定されている。本番のエージェントでは閾値は約10。
## 解決策
```
Teammate lifecycle with idle cycle:
+-------+
| spawn |
+---+---+
|
v
+-------+ tool_use +-------+
| WORK | <------------- | LLM |
+---+---+ +-------+
|
| stop_reason != tool_use
| (or idle tool called)
v
+--------+
| IDLE | poll every 5s for up to 60s
+---+----+
|
+---> check inbox --> message? ----------> WORK
|
+---> scan .tasks/ --> unclaimed? -------> claim -> WORK
|
+---> 60s timeout ----------------------> SHUTDOWN
Identity re-injection after compression:
if len(messages) <= 3:
messages.insert(0, identity_block)
"You are 'alice', role: coder, team: my-team"
```
## 仕組み
1. チームメイトのループにはWORKとIDLEの2つのフェーズがある。WORKは標準的なagent loopを実行する。LLMがツール呼び出しを停止した時(または`idle`ツールを呼び出した時)、チームメイトはIDLEフェーズに入る。
```python
def _loop(self, name, role, prompt):
while True:
# -- WORK PHASE --
messages = [{"role": "user", "content": prompt}]
for _ in range(50):
inbox = BUS.read_inbox(name)
for msg in inbox:
if msg.get("type") == "shutdown_request":
self._set_status(name, "shutdown")
return
messages.append(...)
response = client.messages.create(...)
if response.stop_reason != "tool_use":
break
# execute tools...
if idle_requested:
break
# -- IDLE PHASE --
self._set_status(name, "idle")
resume = self._idle_poll(name, messages)
if not resume:
self._set_status(name, "shutdown")
return
self._set_status(name, "working")
```
2. IDLEフェーズがインボックスとタスクボードをループでポーリングする。
```python
def _idle_poll(self, name, messages):
polls = IDLE_TIMEOUT // POLL_INTERVAL # 60s / 5s = 12
for _ in range(polls):
time.sleep(POLL_INTERVAL)
# Check inbox for new messages
inbox = BUS.read_inbox(name)
if inbox:
messages.append({"role": "user",
"content": f"<inbox>{inbox}</inbox>"})
return True
# Scan task board for unclaimed tasks
unclaimed = scan_unclaimed_tasks()
if unclaimed:
task = unclaimed[0]
claim_task(task["id"], name)
messages.append({"role": "user",
"content": f"<auto-claimed>Task #{task['id']}: "
f"{task['subject']}</auto-claimed>"})
return True
return False # timeout -> shutdown
```
3. タスクボードスキャンがpendingかつ未割り当てかつブロックされていないタスクを探す。
```python
def scan_unclaimed_tasks() -> list:
TASKS_DIR.mkdir(exist_ok=True)
unclaimed = []
for f in sorted(TASKS_DIR.glob("task_*.json")):
task = json.loads(f.read_text())
if (task.get("status") == "pending"
and not task.get("owner")
and not task.get("blockedBy")):
unclaimed.append(task)
return unclaimed
def claim_task(task_id: int, owner: str):
path = TASKS_DIR / f"task_{task_id}.json"
task = json.loads(path.read_text())
task["status"] = "in_progress"
task["owner"] = owner
path.write_text(json.dumps(task, indent=2))
```
4. アイデンティティの再注入は、コンテキストが短すぎる場合(圧縮が発生したことを示す)にアイデンティティブロックを挿入する。
```python
def make_identity_block(name, role, team_name):
return {"role": "user",
"content": f"<identity>You are '{name}', "
f"role: {role}, team: {team_name}. "
f"Continue your work.</identity>"}
# Before resuming work after idle:
if len(messages) <= 3:
messages.insert(0, make_identity_block(
name, role, team_name))
messages.insert(1, {"role": "assistant",
"content": f"I am {name}. Continuing."})
```
5. `idle`ツールにより、チームメイトはもう作業がないことを明示的にシグナルし、早期にアイドルポーリングフェーズに入る。
```python
{"name": "idle",
"description": "Signal that you have no more work. "
"Enters idle polling phase.",
"input_schema": {"type": "object", "properties": {}}},
```
## 主要コード
自律ループ(`agents/s11_autonomous_agents.py`):
```python
def _loop(self, name, role, prompt):
while True:
# WORK PHASE
for _ in range(50):
response = client.messages.create(...)
if response.stop_reason != "tool_use":
break
for block in response.content:
if block.name == "idle":
idle_requested = True
if idle_requested:
break
# IDLE PHASE
self._set_status(name, "idle")
for _ in range(IDLE_TIMEOUT // POLL_INTERVAL):
time.sleep(POLL_INTERVAL)
inbox = BUS.read_inbox(name)
if inbox: resume = True; break
unclaimed = scan_unclaimed_tasks()
if unclaimed:
claim_task(unclaimed[0]["id"], name)
resume = True; break
if not resume:
self._set_status(name, "shutdown")
return
self._set_status(name, "working")
```
## s10からの変更点
| Component | Before (s10) | After (s11) |
|----------------|------------------|----------------------------|
| Tools | 12 | 14 (+idle, +claim_task) |
| Autonomy | Lead-directed | Self-organizing |
| Idle phase | None | Poll inbox + task board |
| Task claiming | Manual only | Auto-claim unclaimed tasks |
| Identity | System prompt | + re-injection after compress|
| Timeout | None | 60s idle -> auto shutdown |
## 設計原理
ポーリング + タイムアウトにより、エージェントは中央コーディネーターなしで自己組織化する。各エージェントは独立してタスクボードをポーリングし、未確保の作業を確保し、完了したらアイドルに戻る。タイムアウトがポーリングサイクルをトリガーし、ウィンドウ内に作業が現れなければエージェントは自らシャットダウンする。これはワークスティーリングスレッドプールと同じパターンだ -- 分散型で単一障害点がない。圧縮後のアイデンティティ再注入により、会話履歴が要約された後もエージェントは自身の役割を維持する。
## 試してみる
```sh
cd learn-claude-code
python agents/s11_autonomous_agents.py
```
試せるプロンプト例:
1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`
2. `Spawn a coder teammate and let it find work from the task board itself`
3. `Create tasks with dependencies. Watch teammates respect the blocked order.`
4. `/tasks`と入力してオーナー付きのタスクボードを確認する
5. `/team`と入力して誰が作業中でアイドルかを監視する

View File

@ -0,0 +1,132 @@
# s01: Agent Loop (智能体循环)
> AI 编程智能体的全部秘密就是一个 while 循环 -- 把工具执行结果反馈给模型, 直到模型决定停止。
## 问题
为什么语言模型不能直接回答编程问题? 因为编程需要**与真实世界交互**。模型需要读取文件、运行测试、检查错误、反复迭代。单次的提示-响应交互无法做到这些。
没有 agent loop, 你就得手动把输出复制粘贴回模型。用户自己变成了那个循环。Agent loop 将这个过程自动化: 调用模型, 执行它要求的工具, 把结果送回去, 重复 -- 直到模型说 "我完成了"。
考虑一个简单任务: "创建一个打印 hello 的 Python 文件。" 模型需要 (1) 决定写文件, (2) 写入文件, (3) 验证是否正常工作。至少三次工具调用。没有循环的话, 每一次都需要人工干预。
## 解决方案
```
+----------+ +-------+ +---------+
| User | ---> | LLM | ---> | Tool |
| prompt | | | | execute |
+----------+ +---+---+ +----+----+
^ |
| tool_result |
+---------------+
(loop continues)
The loop terminates when stop_reason != "tool_use".
That single condition is the entire control flow.
```
## 工作原理
1. 用户提供一个 prompt, 成为第一条消息。
```python
history.append({"role": "user", "content": query})
```
2. 消息数组连同工具定义一起发送给 LLM。
```python
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
```
3. 助手的响应被追加到消息列表中。
```python
messages.append({"role": "assistant", "content": response.content})
```
4. 检查 stop_reason。如果模型没有调用工具, 循环结束。这是唯一的退出条件。
```python
if response.stop_reason != "tool_use":
return
```
5. 对响应中的每个 tool_use 块, 执行工具 (本节课中是 bash) 并收集结果。
```python
for block in response.content:
if block.type == "tool_use":
output = run_bash(block.input["command"])
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
```
6. 结果作为 user 消息追加, 循环继续。
```python
messages.append({"role": "user", "content": results})
```
## 核心代码
最小可行智能体 -- 不到 30 行代码实现整个模式
(来自 `agents/s01_agent_loop.py`, 第 66-86 行):
```python
def agent_loop(messages: list):
while True:
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
output = run_bash(block.input["command"])
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
messages.append({"role": "user", "content": results})
```
## 变更内容
这是第 1 节课 -- 起点。没有前置课程。
| 组件 | 之前 | 之后 |
|---------------|------------|--------------------------------|
| Agent loop | (无) | `while True` + stop_reason |
| Tools | (无) | `bash` (单一工具) |
| Messages | (无) | 累积式消息列表 |
| Control flow | (无) | `stop_reason != "tool_use"` |
## 设计原理
这个循环是所有基于 LLM 的智能体的通用基础。生产实现会增加错误处理、token 计数、流式输出和重试逻辑, 但基本结构不变。简洁性就是重点: 一个退出条件 (`stop_reason != "tool_use"`) 控制整个流程。本课程中的所有其他内容 -- 工具、规划、压缩、团队 -- 都是在这个循环之上叠加, 而不修改它。理解这个循环就是理解所有智能体。
## 试一试
```sh
cd learn-claude-code
python agents/s01_agent_loop.py
```
可以尝试的提示:
1. `Create a file called hello.py that prints "Hello, World!"`
2. `List all Python files in this directory`
3. `What is the current git branch?`
4. `Create a directory called test_output and write 3 files in it`

141
docs/zh/s02-tool-use.md Normal file
View File

@ -0,0 +1,141 @@
# s02: Tools (工具)
> 一个分发映射表 (dispatch map) 将工具调用路由到处理函数 -- 循环本身完全不需要改动。
## 问题
只有 `bash` 时, 智能体所有操作都通过 shell: 读文件、写文件、编辑文件。这能用但很脆弱。`cat` 的输出会被不可预测地截断。`sed` 替换遇到特殊字符就会失败。模型浪费大量 token 构造 shell 管道, 而一个直接的函数调用会简单得多。
更重要的是, bash 是一个安全攻击面。每次 bash 调用都能做 shell 能做的一切。有了专用工具如 `read_file``write_file`, 你可以在工具层面强制路径沙箱化, 阻止危险模式, 而不是寄希望于模型自觉回避。
关键洞察: 添加工具不需要修改循环。s01 的循环保持不变。你只需在工具数组中添加条目, 编写处理函数, 然后通过 dispatch map 把它们关联起来。
## 解决方案
```
+----------+ +-------+ +------------------+
| User | ---> | LLM | ---> | Tool Dispatch |
| prompt | | | | { |
+----------+ +---+---+ | bash: run_bash |
^ | read: run_read |
| | write: run_wr |
+----------+ edit: run_edit |
tool_result| } |
+------------------+
The dispatch map is a dict: {tool_name: handler_function}
One lookup replaces any if/elif chain.
```
## 工作原理
1. 为每个工具定义处理函数。每个函数接受与工具 input_schema 对应的关键字参数, 返回字符串结果。
```python
def run_read(path: str, limit: int = None) -> str:
text = safe_path(path).read_text()
lines = text.splitlines()
if limit and limit < len(lines):
lines = lines[:limit]
return "\n".join(lines)[:50000]
```
2. 创建 dispatch map, 将工具名映射到处理函数。
```python
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"],
kw["new_text"]),
}
```
3. 在 agent loop 中, 按名称查找处理函数, 而不是硬编码。
```python
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
output = handler(**block.input)
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
```
4. 路径沙箱化防止模型逃逸出工作区。
```python
def safe_path(p: str) -> Path:
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {p}")
return path
```
## 核心代码
dispatch 模式 (来自 `agents/s02_tool_use.py`, 第 93-129 行):
```python
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"],
kw["new_text"]),
}
def agent_loop(messages: list):
while True:
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=messages,
tools=TOOLS, max_tokens=8000,
)
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason != "tool_use":
return
results = []
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
output = handler(**block.input) if handler \
else f"Unknown tool: {block.name}"
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
messages.append({"role": "user", "content": results})
```
## 相对 s01 的变更
| 组件 | 之前 (s01) | 之后 (s02) |
|----------------|--------------------|----------------------------|
| Tools | 1 (仅 bash) | 4 (bash, read, write, edit)|
| Dispatch | 硬编码 bash 调用 | `TOOL_HANDLERS` 字典 |
| 路径安全 | 无 | `safe_path()` 沙箱 |
| Agent loop | 不变 | 不变 |
## 设计原理
dispatch map 模式可以线性扩展 -- 添加工具只需添加一个处理函数和一个 schema 条目。循环永远不需要改动。这种关注点分离 (循环 vs 处理函数) 是智能体框架能支持数十个工具而不增加控制流复杂度的原因。该模式还支持对每个处理函数进行独立测试, 因为处理函数是与循环无耦合的纯函数。任何超出 dispatch map 的智能体都是设计问题, 而非扩展问题。
## 试一试
```sh
cd learn-claude-code
python agents/s02_tool_use.py
```
可以尝试的提示:
1. `Read the file requirements.txt`
2. `Create a file called greet.py with a greet(name) function`
3. `Edit greet.py to add a docstring to the function`
4. `Read greet.py to verify the edit worked`
5. `Run the greet function with bash: python -c "from greet import greet; greet('World')"`

157
docs/zh/s03-todo-write.md Normal file
View File

@ -0,0 +1,157 @@
# s03: TodoWrite (待办写入)
> TodoManager 让智能体能追踪自己的进度, 而 nag reminder 注入机制在它忘记更新时强制提醒。
## 问题
当智能体处理多步骤任务时, 它经常丢失对已完成和待办事项的追踪。没有显式的计划, 模型可能重复工作、跳过步骤或跑偏。用户也无法看到智能体内部的计划。
这个问题比听起来更严重。长对话会导致模型 "漂移" -- 随着上下文窗口被工具结果填满, 系统提示的影响力逐渐减弱。一个 10 步的重构任务可能完成了 1-3 步, 然后模型就开始即兴发挥, 因为它忘了第 4-10 步的存在。
解决方案是结构化状态: 一个模型显式写入的 TodoManager。模型创建计划, 工作时将项目标记为 in_progress, 完成后标记为 completed。nag reminder 机制在模型连续 3 轮以上不更新待办时注入提醒。
教学简化说明: 这里 nag 阈值设为 3 轮是为了教学可见性。生产环境的智能体通常使用约 10 轮的阈值以避免过度提醒。
## 解决方案
```
+----------+ +-------+ +---------+
| User | ---> | LLM | ---> | Tools |
| prompt | | | | + todo |
+----------+ +---+---+ +----+----+
^ |
| tool_result |
+---------------+
|
+-----------+-----------+
| TodoManager state |
| [ ] task A |
| [>] task B <- doing |
| [x] task C |
+-----------------------+
|
if rounds_since_todo >= 3:
inject <reminder> into tool_result
```
## 工作原理
1. TodoManager 验证并存储一组带状态的项目。同一时间只允许一个项目处于 `in_progress` 状态。
```python
class TodoManager:
def __init__(self):
self.items = []
def update(self, items: list) -> str:
validated = []
in_progress_count = 0
for item in items:
status = item.get("status", "pending")
if status == "in_progress":
in_progress_count += 1
validated.append({
"id": item["id"],
"text": item["text"],
"status": status,
})
if in_progress_count > 1:
raise ValueError("Only one task can be in_progress")
self.items = validated
return self.render()
```
2. `todo` 工具和其他工具一样添加到 dispatch map 中。
```python
TOOL_HANDLERS = {
"bash": lambda **kw: run_bash(kw["command"]),
# ...other tools...
"todo": lambda **kw: TODO.update(kw["items"]),
}
```
3. nag reminder 在模型连续 3 轮以上不调用 `todo` 时, 向 tool_result 消息中注入 `<reminder>` 标签。
```python
def agent_loop(messages: list):
rounds_since_todo = 0
while True:
if rounds_since_todo >= 3 and messages:
last = messages[-1]
if (last["role"] == "user"
and isinstance(last.get("content"), list)):
last["content"].insert(0, {
"type": "text",
"text": "<reminder>Update your todos.</reminder>",
})
# ... rest of loop ...
rounds_since_todo = 0 if used_todo else rounds_since_todo + 1
```
4. 系统提示指导模型使用 todo 进行规划。
```python
SYSTEM = f"""You are a coding agent at {WORKDIR}.
Use the todo tool to plan multi-step tasks.
Mark in_progress before starting, completed when done.
Prefer tools over prose."""
```
## 核心代码
TodoManager 和 nag 注入 (来自 `agents/s03_todo_write.py`,
第 51-85 行和第 158-187 行):
```python
class TodoManager:
def update(self, items: list) -> str:
validated = []
in_progress_count = 0
for item in items:
status = item.get("status", "pending")
if status == "in_progress":
in_progress_count += 1
validated.append({
"id": item["id"],
"text": item["text"],
"status": status,
})
if in_progress_count > 1:
raise ValueError("Only one in_progress")
self.items = validated
return self.render()
# In agent_loop:
if rounds_since_todo >= 3:
last["content"].insert(0, {
"type": "text",
"text": "<reminder>Update your todos.</reminder>",
})
```
## 相对 s02 的变更
| 组件 | 之前 (s02) | 之后 (s03) |
|----------------|------------------|--------------------------|
| Tools | 4 | 5 (+todo) |
| 规划 | 无 | 带状态的 TodoManager |
| Nag 注入 | 无 | 3 轮后注入 `<reminder>` |
| Agent loop | 简单分发 | + rounds_since_todo 计数器|
## 设计原理
可见的计划能提高任务完成率, 因为模型可以自我监控进度。nag 机制创造了问责性 -- 没有它, 随着对话上下文增长和早期指令淡化, 模型可能在执行中途放弃计划。"同一时间只允许一个 in_progress" 的约束强制顺序聚焦, 防止上下文切换开销降低输出质量。这个模式之所以有效, 是因为它将模型的工作记忆外化为结构化状态, 使其能够在注意力漂移中存活。
## 试一试
```sh
cd learn-claude-code
python agents/s03_todo_write.py
```
可以尝试的提示:
1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`
2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`
3. `Review all Python files and fix any style issues`

144
docs/zh/s04-subagent.md Normal file
View File

@ -0,0 +1,144 @@
# s04: Subagent (子智能体)
> 子智能体使用全新的消息列表运行, 与父智能体共享文件系统, 仅返回摘要 -- 保持父上下文的整洁。
## 问题
随着智能体工作, 它的消息数组不断增长。每次工具调用、每次文件读取、每次 bash 输出都在累积。20-30 次工具调用后, 上下文窗口充满了无关的历史。为了回答一个简单问题而读取的 500 行文件, 会永久占据上下文中的 500 行空间。
这对探索性任务尤其糟糕。"这个项目用了什么测试框架?" 可能需要读取 5 个文件, 但父智能体的历史中并不需要这 5 个文件的全部内容 -- 它只需要答案: "pytest, 使用 conftest.py 配置。"
解决方案是进程隔离: 以 `messages=[]` 启动一个子智能体。子智能体进行探索、读取文件、运行命令。完成后, 只有最终的文本响应返回给父智能体。子智能体的全部消息历史被丢弃。
## 解决方案
```
Parent agent Subagent
+------------------+ +------------------+
| messages=[...] | | messages=[] | <-- fresh
| | dispatch | |
| tool: task | ---------->| while tool_use: |
| prompt="..." | | call tools |
| | summary | append results |
| result = "..." | <--------- | return last text |
+------------------+ +------------------+
|
Parent context stays clean.
Subagent context is discarded.
```
## 工作原理
1. 父智能体拥有一个 `task` 工具用于触发子智能体的生成。子智能体获得除 `task` 外的所有基础工具 (不允许递归生成)。
```python
PARENT_TOOLS = CHILD_TOOLS + [
{"name": "task",
"description": "Spawn a subagent with fresh context.",
"input_schema": {
"type": "object",
"properties": {
"prompt": {"type": "string"},
"description": {"type": "string"},
},
"required": ["prompt"],
}},
]
```
2. 子智能体以全新的消息列表启动, 仅包含委派的 prompt。它共享相同的文件系统。
```python
def run_subagent(prompt: str) -> str:
sub_messages = [{"role": "user", "content": prompt}]
for _ in range(30): # safety limit
response = client.messages.create(
model=MODEL, system=SUBAGENT_SYSTEM,
messages=sub_messages,
tools=CHILD_TOOLS, max_tokens=8000,
)
sub_messages.append({
"role": "assistant", "content": response.content
})
if response.stop_reason != "tool_use":
break
# execute tools, append results...
```
3. 只有最终文本返回给父智能体。子智能体 30+ 次工具调用的历史被丢弃。
```python
return "".join(
b.text for b in response.content if hasattr(b, "text")
) or "(no summary)"
```
4. 父智能体将此摘要作为普通的 tool_result 接收。
```python
if block.name == "task":
output = run_subagent(block.input["prompt"])
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": str(output),
})
```
## 核心代码
子智能体函数 (来自 `agents/s04_subagent.py`, 第 110-128 行):
```python
def run_subagent(prompt: str) -> str:
sub_messages = [{"role": "user", "content": prompt}]
for _ in range(30):
response = client.messages.create(
model=MODEL, system=SUBAGENT_SYSTEM,
messages=sub_messages,
tools=CHILD_TOOLS, max_tokens=8000,
)
sub_messages.append({"role": "assistant",
"content": response.content})
if response.stop_reason != "tool_use":
break
results = []
for block in response.content:
if block.type == "tool_use":
handler = TOOL_HANDLERS.get(block.name)
output = handler(**block.input)
results.append({"type": "tool_result",
"tool_use_id": block.id,
"content": str(output)[:50000]})
sub_messages.append({"role": "user", "content": results})
return "".join(
b.text for b in response.content if hasattr(b, "text")
) or "(no summary)"
```
## 相对 s03 的变更
| 组件 | 之前 (s03) | 之后 (s04) |
|----------------|------------------|---------------------------|
| Tools | 5 | 5 (基础) + task (仅父端) |
| 上下文 | 单一共享 | 父 + 子隔离 |
| Subagent | 无 | `run_subagent()` 函数 |
| 返回值 | 不适用 | 仅摘要文本 |
| Todo 系统 | TodoManager | 已移除 (非本节重点) |
## 设计原理
进程隔离免费提供了上下文隔离。全新的 `messages[]` 意味着子智能体不会被父级的对话历史干扰。代价是通信开销 -- 结果必须压缩回父级, 丢失细节。这与操作系统进程隔离的权衡相同: 用序列化成本换取安全性和整洁性。限制子智能体深度 (不允许递归生成) 防止无限资源消耗, 最大迭代次数确保失控的子进程能终止。
## 试一试
```sh
cd learn-claude-code
python agents/s04_subagent.py
```
可以尝试的提示:
1. `Use a subtask to find what testing framework this project uses`
2. `Delegate: read all .py files and summarize what each one does`
3. `Use a task to create a new module, then verify it from here`

View File

@ -0,0 +1,153 @@
# s05: Skills (技能加载)
> 两层技能注入避免了系统提示膨胀: 在系统提示中放技能名称 (低成本), 在 tool_result 中按需放入完整技能内容。
## 问题
你希望智能体针对不同领域遵循特定的工作流: git 约定、测试模式、代码审查清单。简单粗暴的做法是把所有内容都塞进系统提示。但系统提示的有效注意力是有限的 -- 文本太多, 模型就会开始忽略其中一部分。
如果你有 10 个技能, 每个 2000 token, 那就是 20,000 token 的系统提示。模型关注开头和结尾, 但会略过中间部分。更糟糕的是, 这些技能中大部分与当前任务无关。文件编辑任务不需要 git 工作流说明。
两层方案解决了这个问题: 第一层在系统提示中放入简短的技能描述 (每个技能约 100 token)。第二层只在模型调用 `load_skill` 时, 才将完整的技能内容加载到 tool_result 中。模型知道有哪些技能可用 (低成本), 按需加载它们 (只在相关时)。
## 解决方案
```
System prompt (Layer 1 -- always present):
+--------------------------------------+
| You are a coding agent. |
| Skills available: |
| - git: Git workflow helpers | ~100 tokens/skill
| - test: Testing best practices |
+--------------------------------------+
When model calls load_skill("git"):
+--------------------------------------+
| tool_result (Layer 2 -- on demand): |
| <skill name="git"> |
| Full git workflow instructions... | ~2000 tokens
| Step 1: ... |
| Step 2: ... |
| </skill> |
+--------------------------------------+
```
## 工作原理
1. 技能文件以 Markdown 格式存放在 `.skills/` 目录中, 带有 YAML frontmatter。
```
.skills/
git.md # ---\n description: Git workflow\n ---\n ...
test.md # ---\n description: Testing patterns\n ---\n ...
```
2. SkillLoader 解析 frontmatter, 分离元数据和正文。
```python
class SkillLoader:
def _parse_frontmatter(self, text: str) -> tuple:
match = re.match(
r"^---\n(.*?)\n---\n(.*)", text, re.DOTALL
)
if not match:
return {}, text
meta = {}
for line in match.group(1).strip().splitlines():
if ":" in line:
key, val = line.split(":", 1)
meta[key.strip()] = val.strip()
return meta, match.group(2).strip()
```
3. 第一层: `get_descriptions()` 返回简短描述, 用于系统提示。
```python
def get_descriptions(self) -> str:
lines = []
for name, skill in self.skills.items():
desc = skill["meta"].get("description", "No description")
lines.append(f" - {name}: {desc}")
return "\n".join(lines)
SYSTEM = f"""You are a coding agent at {WORKDIR}.
Skills available:
{SKILL_LOADER.get_descriptions()}"""
```
4. 第二层: `get_content()` 返回用 `<skill>` 标签包裹的完整正文。
```python
def get_content(self, name: str) -> str:
skill = self.skills.get(name)
if not skill:
return f"Error: Unknown skill '{name}'."
return f"<skill name=\"{name}\">\n{skill['body']}\n</skill>"
```
5. `load_skill` 工具只是 dispatch map 中的又一个条目。
```python
TOOL_HANDLERS = {
# ...base tools...
"load_skill": lambda **kw: SKILL_LOADER.get_content(kw["name"]),
}
```
## 核心代码
SkillLoader 类 (来自 `agents/s05_skill_loading.py`, 第 51-97 行):
```python
class SkillLoader:
def __init__(self, skills_dir: Path):
self.skills = {}
for f in sorted(skills_dir.glob("*.md")):
text = f.read_text()
meta, body = self._parse_frontmatter(text)
self.skills[f.stem] = {
"meta": meta, "body": body
}
def get_descriptions(self) -> str:
lines = []
for name, skill in self.skills.items():
desc = skill["meta"].get("description", "")
lines.append(f" - {name}: {desc}")
return "\n".join(lines)
def get_content(self, name: str) -> str:
skill = self.skills.get(name)
if not skill:
return f"Error: Unknown skill '{name}'."
return (f"<skill name=\"{name}\">\n"
f"{skill['body']}\n</skill>")
```
## 相对 s04 的变更
| 组件 | 之前 (s04) | 之后 (s05) |
|----------------|------------------|----------------------------|
| Tools | 5 (基础 + task) | 5 (基础 + load_skill) |
| 系统提示 | 静态字符串 | + 技能描述列表 |
| 知识库 | 无 | .skills/*.md 文件 |
| 注入方式 | 无 | 两层 (系统提示 + result) |
| Subagent | `run_subagent()` | 已移除 (非本节重点) |
## 设计原理
两层注入解决了注意力预算问题。将所有技能内容放入系统提示会在未使用的技能上浪费 token。第一层 (紧凑摘要) 总共约 120 token。第二层 (完整内容) 通过 tool_result 按需加载。这可以扩展到数十个技能而不降低模型注意力质量。关键洞察是: 模型只需要知道有哪些技能 (低成本) 就能决定何时加载某个技能 (高成本)。这与软件模块系统中的懒加载原则相同。
## 试一试
```sh
cd learn-claude-code
python agents/s05_skill_loading.py
```
可以尝试的提示:
1. `What skills are available?`
2. `Load the agent-builder skill and follow its instructions`
3. `I need to do a code review -- load the relevant skill first`
4. `Build an MCP server using the mcp-builder skill`

View File

@ -0,0 +1,170 @@
# s06: Compact (上下文压缩)
> 三层压缩管道让智能体可以无限期工作: 策略性地遗忘旧的工具结果, token 超过阈值时自动摘要, 以及支持手动触发压缩。
## 问题
上下文窗口是有限的。工具调用积累到足够多时, 消息数组会超过模型的上下文限制, API 调用直接失败。即使在到达硬限制之前, 性能也会下降: 模型变慢、准确率降低, 开始忽略早期消息。
200,000 token 的上下文窗口听起来很大, 但一次 `read_file` 读取 1000 行源文件就消耗约 4000 token。读取 30 个文件、运行 20 条 bash 命令后, 你就已经用掉 100,000+ token 了。没有某种压缩机制, 智能体无法在大型代码库上工作。
三层管道以递增的激进程度来应对这个问题:
第一层 (micro-compact) 每轮静默替换旧的工具结果。
第二层 (auto-compact) 在 token 超过阈值时触发完整摘要。
第三层 (manual compact) 让模型自己触发压缩。
教学简化说明: 这里的 token 估算使用粗略的 字符数/4 启发式方法。生产系统使用专业的 tokenizer 库进行精确计数。
## 解决方案
```
Every turn:
+------------------+
| Tool call result |
+------------------+
|
v
[Layer 1: micro_compact] (silent, every turn)
Replace tool_result > 3 turns old
with "[Previous: used {tool_name}]"
|
v
[Check: tokens > 50000?]
| |
no yes
| |
v v
continue [Layer 2: auto_compact]
Save transcript to .transcripts/
LLM summarizes conversation.
Replace all messages with [summary].
|
v
[Layer 3: compact tool]
Model calls compact explicitly.
Same summarization as auto_compact.
```
## 工作原理
1. **第一层 -- micro_compact**: 每次 LLM 调用前, 找到最近 3 条之前的所有 tool_result 条目, 替换其内容。
```python
def micro_compact(messages: list) -> list:
tool_results = []
for i, msg in enumerate(messages):
if msg["role"] == "user" and isinstance(msg.get("content"), list):
for j, part in enumerate(msg["content"]):
if isinstance(part, dict) and part.get("type") == "tool_result":
tool_results.append((i, j, part))
if len(tool_results) <= KEEP_RECENT:
return messages
to_clear = tool_results[:-KEEP_RECENT]
for _, _, part in to_clear:
if len(part.get("content", "")) > 100:
tool_id = part.get("tool_use_id", "")
tool_name = tool_name_map.get(tool_id, "unknown")
part["content"] = f"[Previous: used {tool_name}]"
return messages
```
2. **第二层 -- auto_compact**: 当估算 token 超过 50,000 时, 保存完整对话记录并请求 LLM 进行摘要。
```python
def auto_compact(messages: list) -> list:
TRANSCRIPT_DIR.mkdir(exist_ok=True)
transcript_path = TRANSCRIPT_DIR / f"transcript_{int(time.time())}.jsonl"
with open(transcript_path, "w") as f:
for msg in messages:
f.write(json.dumps(msg, default=str) + "\n")
response = client.messages.create(
model=MODEL,
messages=[{"role": "user", "content":
"Summarize this conversation for continuity..."
+ json.dumps(messages, default=str)[:80000]}],
max_tokens=2000,
)
summary = response.content[0].text
return [
{"role": "user", "content": f"[Compressed]\n\n{summary}"},
{"role": "assistant", "content": "Understood. Continuing."},
]
```
3. **第三层 -- manual compact**: `compact` 工具按需触发相同的摘要机制。
```python
if manual_compact:
messages[:] = auto_compact(messages)
```
4. Agent loop 整合了全部三层。
```python
def agent_loop(messages: list):
while True:
micro_compact(messages)
if estimate_tokens(messages) > THRESHOLD:
messages[:] = auto_compact(messages)
response = client.messages.create(...)
# ... tool execution ...
if manual_compact:
messages[:] = auto_compact(messages)
```
## 核心代码
三层管道 (来自 `agents/s06_context_compact.py`, 第 67-93 行和第 189-223 行):
```python
THRESHOLD = 50000
KEEP_RECENT = 3
def micro_compact(messages):
# Replace old tool results with placeholders
...
def auto_compact(messages):
# Save transcript, LLM summarize, replace messages
...
def agent_loop(messages):
while True:
micro_compact(messages) # Layer 1
if estimate_tokens(messages) > THRESHOLD:
messages[:] = auto_compact(messages) # Layer 2
response = client.messages.create(...)
# ...
if manual_compact:
messages[:] = auto_compact(messages) # Layer 3
```
## 相对 s05 的变更
| 组件 | 之前 (s05) | 之后 (s06) |
|----------------|------------------|----------------------------|
| Tools | 5 | 5 (基础 + compact) |
| 上下文管理 | 无 | 三层压缩 |
| Micro-compact | 无 | 旧结果 -> 占位符 |
| Auto-compact | 无 | token 阈值触发 |
| Manual compact | 无 | `compact` 工具 |
| Transcripts | 无 | 保存到 .transcripts/ |
| Skills | load_skill | 已移除 (非本节重点) |
## 设计原理
上下文窗口有限, 但智能体会话可以无限。三层压缩在不同粒度上解决这个问题: micro-compact (替换旧工具输出), auto-compact (接近限制时 LLM 摘要), manual compact (用户触发)。关键洞察是遗忘是特性而非缺陷 -- 它使无限会话成为可能。转录文件将完整历史保存在磁盘上, 因此没有任何东西真正丢失, 只是从活跃上下文中移出。分层方法让每一层在各自的粒度上独立运作, 从静默的逐轮清理到完整的对话重置。
## 试一试
```sh
cd learn-claude-code
python agents/s06_context_compact.py
```
可以尝试的提示:
1. `Read every Python file in the agents/ directory one by one`
(观察 micro-compact 替换旧的结果)
2. `Keep reading files until compression triggers automatically`
3. `Use the compact tool to manually compress the conversation`

159
docs/zh/s07-task-system.md Normal file
View File

@ -0,0 +1,159 @@
# s07: Tasks (任务系统)
> 任务以 JSON 文件形式持久化在文件系统上, 带有依赖图, 因此它们能在上下文压缩后存活, 也可以跨智能体共享。
## 问题
内存中的状态 (如 s03 的 TodoManager) 在上下文压缩 (s06) 时会丢失。auto_compact 用摘要替换消息后, 待办列表就没了。智能体只能从摘要文本中重建它, 这是有损且容易出错的。
这就是 s06 到 s07 的关键桥梁: TodoManager 的条目随压缩消亡; 基于文件的任务不会。将状态移到文件系统上使其不受压缩影响。
更根本地说, 内存中的状态对其他智能体不可见。当我们最终构建团队 (s09+) 时, 队友需要一个共享的任务看板。内存中的数据结构是进程局部的。
解决方案是将任务作为 JSON 文件持久化在 `.tasks/` 目录中。每个任务是一个单独的文件, 包含 ID、主题、状态和依赖图。完成任务 1 会自动解除任务 2 的阻塞 (如果任务 2 有 `blockedBy: [1]`)。文件系统成为唯一的真实来源。
## 解决方案
```
.tasks/
task_1.json {"id":1, "status":"completed", ...}
task_2.json {"id":2, "blockedBy":[1], "status":"pending"}
task_3.json {"id":3, "blockedBy":[2], "status":"pending"}
Dependency resolution:
+----------+ +----------+ +----------+
| task 1 | --> | task 2 | --> | task 3 |
| complete | | blocked | | blocked |
+----------+ +----------+ +----------+
| ^
+--- completing task 1 removes it from
task 2's blockedBy list
```
## 工作原理
1. TaskManager 提供 CRUD 操作。每个任务是一个 JSON 文件。
```python
class TaskManager:
def create(self, subject: str, description: str = "") -> str:
task = {
"id": self._next_id,
"subject": subject,
"description": description,
"status": "pending",
"blockedBy": [],
"blocks": [],
"owner": "",
}
self._save(task)
self._next_id += 1
return json.dumps(task, indent=2)
```
2. 当任务标记为 completed 时, `_clear_dependency` 将其 ID 从所有其他任务的 `blockedBy` 列表中移除。
```python
def _clear_dependency(self, completed_id: int):
for f in self.dir.glob("task_*.json"):
task = json.loads(f.read_text())
if completed_id in task.get("blockedBy", []):
task["blockedBy"].remove(completed_id)
self._save(task)
```
3. `update` 方法处理状态变更和双向依赖关联。
```python
def update(self, task_id, status=None,
add_blocked_by=None, add_blocks=None):
task = self._load(task_id)
if status:
task["status"] = status
if status == "completed":
self._clear_dependency(task_id)
if add_blocks:
task["blocks"] = list(set(task["blocks"] + add_blocks))
for blocked_id in add_blocks:
blocked = self._load(blocked_id)
if task_id not in blocked["blockedBy"]:
blocked["blockedBy"].append(task_id)
self._save(blocked)
self._save(task)
```
4. 四个任务工具添加到 dispatch map。
```python
TOOL_HANDLERS = {
# ...base tools...
"task_create": lambda **kw: TASKS.create(kw["subject"]),
"task_update": lambda **kw: TASKS.update(kw["task_id"],
kw.get("status")),
"task_list": lambda **kw: TASKS.list_all(),
"task_get": lambda **kw: TASKS.get(kw["task_id"]),
}
```
## 核心代码
带依赖图的 TaskManager (来自 `agents/s07_task_system.py`, 第 46-123 行):
```python
class TaskManager:
def __init__(self, tasks_dir: Path):
self.dir = tasks_dir
self.dir.mkdir(exist_ok=True)
self._next_id = self._max_id() + 1
def _load(self, task_id: int) -> dict:
path = self.dir / f"task_{task_id}.json"
return json.loads(path.read_text())
def _save(self, task: dict):
path = self.dir / f"task_{task['id']}.json"
path.write_text(json.dumps(task, indent=2))
def create(self, subject, description=""):
task = {"id": self._next_id, "subject": subject,
"status": "pending", "blockedBy": [],
"blocks": [], "owner": ""}
self._save(task)
self._next_id += 1
return json.dumps(task, indent=2)
def _clear_dependency(self, completed_id):
for f in self.dir.glob("task_*.json"):
task = json.loads(f.read_text())
if completed_id in task.get("blockedBy", []):
task["blockedBy"].remove(completed_id)
self._save(task)
```
## 相对 s06 的变更
| 组件 | 之前 (s06) | 之后 (s07) |
|----------------|------------------|----------------------------------|
| Tools | 5 | 8 (+task_create/update/list/get) |
| 状态存储 | 仅内存 | .tasks/ 中的 JSON 文件 |
| 依赖关系 | 无 | blockedBy + blocks 图 |
| 压缩机制 | 三层 | 已移除 (非本节重点) |
| 持久化 | 压缩后丢失 | 压缩后存活 |
## 设计原理
基于文件的状态能在上下文压缩中存活。当智能体的对话被压缩时, 内存中的状态会丢失, 但写入磁盘的任务会持久保存。依赖图确保即使在上下文丢失后也能按正确顺序执行。这是临时对话与持久工作之间的桥梁 -- 智能体可以忘记对话细节, 但始终有任务看板来提醒它还需要做什么。文件系统作为唯一真实来源也为未来的多智能体共享提供了基础, 因为任何进程都可以读取相同的 JSON 文件。
## 试一试
```sh
cd learn-claude-code
python agents/s07_task_system.py
```
可以尝试的提示:
1. `Create 3 tasks: "Setup project", "Write code", "Write tests". Make them depend on each other in order.`
2. `List all tasks and show the dependency graph`
3. `Complete task 1 and then list tasks to see task 2 unblocked`
4. `Create a task board for refactoring: parse -> transform -> emit -> test`

View File

@ -0,0 +1,177 @@
# s08: Background Tasks (后台任务)
> BackgroundManager 在独立线程中运行命令, 在每次 LLM 调用前排空通知队列, 使智能体永远不会因长时间运行的操作而阻塞。
## 问题
有些命令需要几分钟: `npm install``pytest``docker build`。在阻塞式的 agent loop 中, 模型只能干等子进程结束, 什么也做不了。如果用户要求 "安装依赖, 同时创建配置文件", 智能体会先安装, 然后才创建配置 -- 串行执行, 而非并行。
智能体需要并发能力。不是将 agent loop 本身完全多线程化, 而是能够发起一个长时间命令然后继续工作。当命令完成时, 结果自然地出现在对话中。
解决方案是一个 BackgroundManager, 它在守护线程中运行命令, 将结果收集到通知队列中。每次 LLM 调用前, 队列被排空, 结果注入到消息中。
## 解决方案
```
Main thread Background thread
+-----------------+ +-----------------+
| agent loop | | task executes |
| ... | | ... |
| [LLM call] <---+------- | enqueue(result) |
| ^drain queue | +-----------------+
+-----------------+
Timeline:
Agent --[spawn A]--[spawn B]--[other work]----
| |
v v
[A runs] [B runs] (parallel)
| |
+-- notification queue --+
|
[results injected before
next LLM call]
```
## 工作原理
1. BackgroundManager 追踪任务并维护一个线程安全的通知队列。
```python
class BackgroundManager:
def __init__(self):
self.tasks = {}
self._notification_queue = []
self._lock = threading.Lock()
```
2. `run()` 启动一个守护线程并立即返回 task_id。
```python
def run(self, command: str) -> str:
task_id = str(uuid.uuid4())[:8]
self.tasks[task_id] = {
"status": "running",
"result": None,
"command": command,
}
thread = threading.Thread(
target=self._execute,
args=(task_id, command),
daemon=True,
)
thread.start()
return f"Background task {task_id} started"
```
3. 线程目标函数 `_execute` 运行子进程并将结果推入通知队列。
```python
def _execute(self, task_id: str, command: str):
try:
r = subprocess.run(command, shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=300)
output = (r.stdout + r.stderr).strip()[:50000]
status = "completed"
except subprocess.TimeoutExpired:
output = "Error: Timeout (300s)"
status = "timeout"
self.tasks[task_id]["status"] = status
self.tasks[task_id]["result"] = output
with self._lock:
self._notification_queue.append({
"task_id": task_id,
"status": status,
"result": output[:500],
})
```
4. `drain_notifications()` 返回并清空待处理的结果。
```python
def drain_notifications(self) -> list:
with self._lock:
notifs = list(self._notification_queue)
self._notification_queue.clear()
return notifs
```
5. Agent loop 在每次 LLM 调用前排空通知。
```python
def agent_loop(messages: list):
while True:
notifs = BG.drain_notifications()
if notifs and messages:
notif_text = "\n".join(
f"[bg:{n['task_id']}] {n['status']}: "
f"{n['result']}" for n in notifs
)
messages.append({"role": "user",
"content": f"<background-results>"
f"\n{notif_text}\n"
f"</background-results>"})
messages.append({"role": "assistant",
"content": "Noted background results."})
response = client.messages.create(...)
```
## 核心代码
BackgroundManager (来自 `agents/s08_background_tasks.py`, 第 49-107 行):
```python
class BackgroundManager:
def __init__(self):
self.tasks = {}
self._notification_queue = []
self._lock = threading.Lock()
def run(self, command: str) -> str:
task_id = str(uuid.uuid4())[:8]
self.tasks[task_id] = {"status": "running",
"result": None,
"command": command}
thread = threading.Thread(
target=self._execute,
args=(task_id, command), daemon=True)
thread.start()
return f"Background task {task_id} started"
def _execute(self, task_id, command):
# run subprocess, push to queue
...
def drain_notifications(self) -> list:
with self._lock:
notifs = list(self._notification_queue)
self._notification_queue.clear()
return notifs
```
## 相对 s07 的变更
| 组件 | 之前 (s07) | 之后 (s08) |
|----------------|------------------|------------------------------------|
| Tools | 8 | 6 (基础 + background_run + check) |
| 执行方式 | 仅阻塞 | 阻塞 + 后台线程 |
| 通知机制 | 无 | 每轮排空的队列 |
| 并发 | 无 | 守护线程 |
| 任务系统 | 基于文件的 CRUD | 已移除 (非本节重点) |
## 设计原理
智能体循环本质上是单线程的 (一次一个 LLM 调用)。后台线程为 I/O 密集型工作 (测试、构建、安装) 打破了这个限制。通知队列模式 ("在下一次 LLM 调用前排空") 确保结果在对话的自然间断点到达, 而不是打断模型的推理过程。这是一个最小化的并发模型: 智能体循环保持单线程和确定性, 只有 I/O 密集型的子进程执行被并行化。
## 试一试
```sh
cd learn-claude-code
python agents/s08_background_tasks.py
```
可以尝试的提示:
1. `Run "sleep 5 && echo done" in the background, then create a file while it runs`
2. `Start 3 background tasks: "sleep 2", "sleep 4", "sleep 6". Check their status.`
3. `Run pytest in the background and keep working on other things`

212
docs/zh/s09-agent-teams.md Normal file
View File

@ -0,0 +1,212 @@
# s09: Agent Teams (智能体团队)
> 持久化的队友通过 JSONL 收件箱将孤立的智能体转变为可通信的团队 -- spawn、message、broadcast 和 drain。
## 问题
子智能体 (s04) 是一次性的: 生成、工作、返回摘要、消亡。它们没有身份, 没有跨调用的记忆, 也无法接收后续指令。后台任务 (s08) 运行 shell 命令, 但不能做 LLM 引导的决策或交流发现。
真正的团队协作需要三样东西: (1) 存活时间超过单次 prompt 的持久化智能体, (2) 身份和生命周期管理, (3) 智能体之间的通信通道。没有消息机制, 即使持久化的队友也是又聋又哑的 -- 它们可以并行工作但永远无法协调。
解决方案将 TeammateManager (用于生成持久化的命名智能体) 与使用 JSONL 收件箱文件的 MessageBus 结合。每个队友在独立线程中运行自己的 agent loop, 每次 LLM 调用前检查收件箱, 可以向任何其他队友或领导发送消息。
关于 s06 到 s07 的桥梁: s03 的 TodoManager 条目随压缩 (s06) 消亡。基于文件的任务 (s07) 因为存储在磁盘上而能存活压缩。团队建立在同样的原则上 -- config.json 和收件箱文件持久化在上下文窗口之外。
## 解决方案
```
Teammate lifecycle:
spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN
Communication:
.team/
config.json <- team roster + statuses
inbox/
alice.jsonl <- append-only, drain-on-read
bob.jsonl
lead.jsonl
+--------+ send("alice","bob","...") +--------+
| alice | -----------------------------> | bob |
| loop | bob.jsonl << {json_line} | loop |
+--------+ +--------+
^ |
| BUS.read_inbox("alice") |
+---- alice.jsonl -> read + drain ---------+
5 message types:
+-------------------------+------------------------------+
| message | Normal text between agents |
| broadcast | Sent to all teammates |
| shutdown_request | Request graceful shutdown |
| shutdown_response | Approve/reject shutdown |
| plan_approval_response | Approve/reject plan |
+-------------------------+------------------------------+
```
## 工作原理
1. TeammateManager 通过 config.json 维护团队名册。每个成员有名称、角色和状态。
```python
class TeammateManager:
def __init__(self, team_dir: Path):
self.dir = team_dir
self.dir.mkdir(exist_ok=True)
self.config_path = self.dir / "config.json"
self.config = self._load_config()
self.threads = {}
```
2. `spawn()` 创建队友并在线程中启动其 agent loop。重新 spawn 一个 idle 状态的队友会将其重新激活。
```python
def spawn(self, name: str, role: str, prompt: str) -> str:
member = self._find_member(name)
if member:
if member["status"] not in ("idle", "shutdown"):
return f"Error: '{name}' is currently {member['status']}"
member["status"] = "working"
else:
member = {"name": name, "role": role, "status": "working"}
self.config["members"].append(member)
self._save_config()
thread = threading.Thread(
target=self._teammate_loop,
args=(name, role, prompt), daemon=True)
self.threads[name] = thread
thread.start()
return f"Spawned teammate '{name}' (role: {role})"
```
3. MessageBus 处理 JSONL 收件箱文件。`send()` 追加一行 JSON; `read_inbox()` 读取所有行并清空文件。
```python
class MessageBus:
def send(self, sender, to, content,
msg_type="message", extra=None):
msg = {"type": msg_type, "from": sender,
"content": content,
"timestamp": time.time()}
if extra:
msg.update(extra)
with open(self.dir / f"{to}.jsonl", "a") as f:
f.write(json.dumps(msg) + "\n")
return f"Sent {msg_type} to {to}"
def read_inbox(self, name):
path = self.dir / f"{name}.jsonl"
if not path.exists():
return "[]"
msgs = [json.loads(l)
for l in path.read_text().strip().splitlines()
if l]
path.write_text("") # drain
return json.dumps(msgs, indent=2)
```
4. 每个队友在每次 LLM 调用前检查收件箱, 将收到的消息注入对话上下文。
```python
def _teammate_loop(self, name, role, prompt):
sys_prompt = f"You are '{name}', role: {role}, at {WORKDIR}."
messages = [{"role": "user", "content": prompt}]
for _ in range(50):
inbox = BUS.read_inbox(name)
if inbox != "[]":
messages.append({"role": "user",
"content": f"<inbox>{inbox}</inbox>"})
messages.append({"role": "assistant",
"content": "Noted inbox messages."})
response = client.messages.create(
model=MODEL, system=sys_prompt,
messages=messages, tools=TOOLS)
messages.append({"role": "assistant",
"content": response.content})
if response.stop_reason != "tool_use":
break
# execute tools, append results...
self._find_member(name)["status"] = "idle"
self._save_config()
```
5. `broadcast()` 向除发送者外的所有队友发送相同消息。
```python
def broadcast(self, sender, content, teammates):
count = 0
for name in teammates:
if name != sender:
self.send(sender, name, content, "broadcast")
count += 1
return f"Broadcast to {count} teammates"
```
## 核心代码
TeammateManager + MessageBus 核心 (来自 `agents/s09_agent_teams.py`):
```python
class TeammateManager:
def spawn(self, name, role, prompt):
member = self._find_member(name) or {
"name": name, "role": role, "status": "working"
}
member["status"] = "working"
self._save_config()
thread = threading.Thread(
target=self._teammate_loop,
args=(name, role, prompt), daemon=True)
thread.start()
return f"Spawned '{name}'"
class MessageBus:
def send(self, sender, to, content,
msg_type="message", extra=None):
msg = {"type": msg_type, "from": sender,
"content": content, "timestamp": time.time()}
if extra: msg.update(extra)
with open(self.dir / f"{to}.jsonl", "a") as f:
f.write(json.dumps(msg) + "\n")
def read_inbox(self, name):
path = self.dir / f"{name}.jsonl"
if not path.exists(): return "[]"
msgs = [json.loads(l)
for l in path.read_text().strip().splitlines()
if l]
path.write_text("")
return json.dumps(msgs, indent=2)
```
## 相对 s08 的变更
| 组件 | 之前 (s08) | 之后 (s09) |
|----------------|------------------|------------------------------------|
| Tools | 6 | 9 (+spawn/send/read_inbox) |
| 智能体数量 | 单一 | 领导 + N 个队友 |
| 持久化 | 无 | config.json + JSONL 收件箱 |
| 线程 | 后台命令 | 每线程完整 agent loop |
| 生命周期 | 一次性 | idle -> working -> idle |
| 通信 | 无 | 5 种消息类型 + broadcast |
教学简化说明: 此实现未使用文件锁来保护收件箱访问。在生产中, 多个写入者并发追加需要文件锁或原子重命名。这里使用的单写入者-per-收件箱模式在教学场景下是安全的。
## 设计原理
基于文件的邮箱 (追加式 JSONL) 提供了并发安全的智能体间通信。追加操作在大多数文件系统上是原子的, 避免了锁竞争。"读取时排空" 模式 (读取全部, 截断) 提供批量传递。这比共享内存或基于 socket 的 IPC 更简单、更健壮。代价是延迟 -- 消息只在下一次轮询时才被看到 -- 但对于每轮需要数秒推理时间的 LLM 驱动智能体来说, 轮询延迟相比推理时间可以忽略不计。
## 试一试
```sh
cd learn-claude-code
python agents/s09_agent_teams.py
```
可以尝试的提示:
1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`
2. `Broadcast "status update: phase 1 complete" to all teammates`
3. `Check the lead inbox for any messages`
4. 输入 `/team` 查看带状态的团队名册
5. 输入 `/inbox` 手动检查领导的收件箱

View File

@ -0,0 +1,190 @@
# s10: Team Protocols (团队协议)
> 同一个 request_id 握手模式驱动了关机和计划审批两种协议 -- 一个 FSM, 两种应用。
## 问题
在 s09 中, 队友可以工作和通信, 但没有结构化的协调。出现了两个问题:
**关机**: 如何干净地停止一个队友? 直接杀线程会留下写了一半的文件和错误状态的 config.json。优雅关机需要握手: 领导发起请求, 队友决定是批准 (完成并退出) 还是拒绝 (继续工作)。
**计划审批**: 如何控制执行门槛? 当领导说 "重构认证模块", 队友会立即开始。对于高风险变更, 领导应该在执行开始前审查计划。初级提出方案, 高级批准。
两个问题共享相同的结构: 一方发送带唯一 ID 的请求, 另一方引用该 ID 作出响应。一个有限状态机 (FSM) 跟踪每个请求经历 pending -> approved | rejected 的状态变迁。
## 解决方案
```
Shutdown Protocol Plan Approval Protocol
================== ======================
Lead Teammate Teammate Lead
| | | |
|--shutdown_req-->| |--plan_req------>|
| {req_id:"abc"} | | {req_id:"xyz"} |
| | | |
|<--shutdown_resp-| |<--plan_resp-----|
| {req_id:"abc", | | {req_id:"xyz", |
| approve:true} | | approve:true} |
| | | |
v v v v
tracker["abc"] exits proceeds tracker["xyz"]
= approved = approved
Shared FSM (identical for both protocols):
[pending] --approve--> [approved]
[pending] --reject---> [rejected]
Trackers:
shutdown_requests = {req_id: {target, status}}
plan_requests = {req_id: {from, plan, status}}
```
## 工作原理
1. 领导通过生成 request_id 并通过收件箱发送 shutdown_request 来发起关机。
```python
shutdown_requests = {}
def handle_shutdown_request(teammate: str) -> str:
req_id = str(uuid.uuid4())[:8]
shutdown_requests[req_id] = {
"target": teammate, "status": "pending",
}
BUS.send("lead", teammate, "Please shut down gracefully.",
"shutdown_request", {"request_id": req_id})
return f"Shutdown request {req_id} sent (status: pending)"
```
2. 队友在收件箱中收到请求, 调用 `shutdown_response` 工具来批准或拒绝。
```python
if tool_name == "shutdown_response":
req_id = args["request_id"]
approve = args["approve"]
if req_id in shutdown_requests:
shutdown_requests[req_id]["status"] = \
"approved" if approve else "rejected"
BUS.send(sender, "lead", args.get("reason", ""),
"shutdown_response",
{"request_id": req_id, "approve": approve})
return f"Shutdown {'approved' if approve else 'rejected'}"
```
3. 队友的循环检查是否批准了关机并退出。
```python
if (block.name == "shutdown_response"
and block.input.get("approve")):
should_exit = True
# ...
member["status"] = "shutdown" if should_exit else "idle"
```
4. 计划审批遵循完全相同的模式。队友提交计划时生成一个 request_id。
```python
plan_requests = {}
if tool_name == "plan_approval":
plan_text = args.get("plan", "")
req_id = str(uuid.uuid4())[:8]
plan_requests[req_id] = {
"from": sender, "plan": plan_text,
"status": "pending",
}
BUS.send(sender, "lead", plan_text,
"plan_approval_request",
{"request_id": req_id, "plan": plan_text})
return f"Plan submitted (request_id={req_id})"
```
5. 领导审查后使用同一个 request_id 作出响应。
```python
def handle_plan_review(request_id, approve, feedback=""):
req = plan_requests.get(request_id)
if not req:
return f"Error: Unknown request_id '{request_id}'"
req["status"] = "approved" if approve else "rejected"
BUS.send("lead", req["from"], feedback,
"plan_approval_response",
{"request_id": request_id,
"approve": approve,
"feedback": feedback})
return f"Plan {req['status']} for '{req['from']}'"
```
6. 两个协议使用同一个 `plan_approval` 工具名, 有两种模式: 队友提交 (无 request_id), 领导审查 (带 request_id)。
```python
# Lead tool dispatch:
"plan_approval": lambda **kw: handle_plan_review(
kw["request_id"], kw["approve"],
kw.get("feedback", "")),
# Teammate: submit mode (generate request_id)
```
## 核心代码
双协议处理器 (来自 `agents/s10_team_protocols.py`):
```python
shutdown_requests = {}
plan_requests = {}
# -- Shutdown --
def handle_shutdown_request(teammate):
req_id = str(uuid.uuid4())[:8]
shutdown_requests[req_id] = {
"target": teammate, "status": "pending"
}
BUS.send("lead", teammate,
"Please shut down gracefully.",
"shutdown_request",
{"request_id": req_id})
# -- Plan Approval --
def handle_plan_review(request_id, approve, feedback=""):
req = plan_requests[request_id]
req["status"] = "approved" if approve else "rejected"
BUS.send("lead", req["from"], feedback,
"plan_approval_response",
{"request_id": request_id,
"approve": approve})
# Both use the same FSM:
# pending -> approved | rejected
# Both correlate by request_id across async inboxes
```
## 相对 s09 的变更
| 组件 | 之前 (s09) | 之后 (s10) |
|----------------|------------------|--------------------------------------|
| Tools | 9 | 12 (+shutdown_req/resp +plan) |
| 关机 | 仅自然退出 | 请求-响应握手 |
| 计划门控 | 无 | 提交/审查与审批 |
| 请求追踪 | 无 | 两个 tracker 字典 |
| 关联 | 无 | 每个请求一个 request_id |
| FSM | 无 | pending -> approved/rejected |
## 设计原理
request_id 关联模式将任何异步交互转化为可追踪的有限状态机。同一个三状态机 (pending -> approved/rejected) 适用于关机、计划审批或任何未来的协议。这就是为什么一个模式能处理多种协议 -- FSM 不关心它在审批什么。request_id 在异步收件箱中提供关联, 消息可能乱序到达, 使该模式对智能体间的时序差异具有鲁棒性。
## 试一试
```sh
cd learn-claude-code
python agents/s10_team_protocols.py
```
可以尝试的提示:
1. `Spawn alice as a coder. Then request her shutdown.`
2. `List teammates to see alice's status after shutdown approval`
3. `Spawn bob with a risky refactoring task. Review and reject his plan.`
4. `Spawn charlie, have him submit a plan, then approve it.`
5. 输入 `/team` 监控状态

View File

@ -0,0 +1,215 @@
# s11: Autonomous Agents (自治智能体)
> 带任务看板轮询的空闲循环让队友能自己发现和认领工作, 上下文压缩后通过身份重注入保持角色认知。
## 问题
在 s09-s10 中, 队友只在被明确指示时才工作。领导必须用特定的 prompt 生成每个队友。如果任务看板上有 10 个未认领的任务, 领导必须手动分配每一个。这无法扩展。
真正的自治意味着队友自己寻找工作。当一个队友完成当前任务后, 它应该扫描任务看板寻找未认领的工作, 认领一个任务, 然后开始工作 -- 不需要领导的任何指令。
但自治智能体面临一个微妙问题: 上下文压缩后, 智能体可能忘记自己是谁。如果消息被摘要化, 原始系统提示中的身份 ("你是 alice, 角色: coder") 就会淡化。身份重注入通过在压缩后的上下文开头插入身份块来解决这个问题。
教学简化说明: 这里的 token 估算比较粗糙 (字符数 / 4)。生产系统使用专业的 tokenizer 库。s03 中的 nag 阈值 3 轮是为教学可见性设的低值; 生产环境的智能体通常使用约 10 轮的阈值。
## 解决方案
```
Teammate lifecycle with idle cycle:
+-------+
| spawn |
+---+---+
|
v
+-------+ tool_use +-------+
| WORK | <------------- | LLM |
+---+---+ +-------+
|
| stop_reason != tool_use
| (or idle tool called)
v
+--------+
| IDLE | poll every 5s for up to 60s
+---+----+
|
+---> check inbox --> message? ----------> WORK
|
+---> scan .tasks/ --> unclaimed? -------> claim -> WORK
|
+---> 60s timeout ----------------------> SHUTDOWN
Identity re-injection after compression:
if len(messages) <= 3:
messages.insert(0, identity_block)
"You are 'alice', role: coder, team: my-team"
```
## 工作原理
1. 队友循环有两个阶段: WORK 和 IDLE。WORK 阶段运行标准的 agent loop。当 LLM 停止调用工具 (或调用了 `idle` 工具) 时, 队友进入 IDLE 阶段。
```python
def _loop(self, name, role, prompt):
while True:
# -- WORK PHASE --
messages = [{"role": "user", "content": prompt}]
for _ in range(50):
inbox = BUS.read_inbox(name)
for msg in inbox:
if msg.get("type") == "shutdown_request":
self._set_status(name, "shutdown")
return
messages.append(...)
response = client.messages.create(...)
if response.stop_reason != "tool_use":
break
# execute tools...
if idle_requested:
break
# -- IDLE PHASE --
self._set_status(name, "idle")
resume = self._idle_poll(name, messages)
if not resume:
self._set_status(name, "shutdown")
return
self._set_status(name, "working")
```
2. 空闲阶段循环轮询收件箱和任务看板。
```python
def _idle_poll(self, name, messages):
polls = IDLE_TIMEOUT // POLL_INTERVAL # 60s / 5s = 12
for _ in range(polls):
time.sleep(POLL_INTERVAL)
# Check inbox for new messages
inbox = BUS.read_inbox(name)
if inbox:
messages.append({"role": "user",
"content": f"<inbox>{inbox}</inbox>"})
return True
# Scan task board for unclaimed tasks
unclaimed = scan_unclaimed_tasks()
if unclaimed:
task = unclaimed[0]
claim_task(task["id"], name)
messages.append({"role": "user",
"content": f"<auto-claimed>Task #{task['id']}: "
f"{task['subject']}</auto-claimed>"})
return True
return False # timeout -> shutdown
```
3. 任务看板扫描查找 pending 状态、无 owner、未被阻塞的任务。
```python
def scan_unclaimed_tasks() -> list:
TASKS_DIR.mkdir(exist_ok=True)
unclaimed = []
for f in sorted(TASKS_DIR.glob("task_*.json")):
task = json.loads(f.read_text())
if (task.get("status") == "pending"
and not task.get("owner")
and not task.get("blockedBy")):
unclaimed.append(task)
return unclaimed
def claim_task(task_id: int, owner: str):
path = TASKS_DIR / f"task_{task_id}.json"
task = json.loads(path.read_text())
task["status"] = "in_progress"
task["owner"] = owner
path.write_text(json.dumps(task, indent=2))
```
4. 身份重注入: 当上下文过短时插入身份块, 表明发生了压缩。
```python
def make_identity_block(name, role, team_name):
return {"role": "user",
"content": f"<identity>You are '{name}', "
f"role: {role}, team: {team_name}. "
f"Continue your work.</identity>"}
# Before resuming work after idle:
if len(messages) <= 3:
messages.insert(0, make_identity_block(
name, role, team_name))
messages.insert(1, {"role": "assistant",
"content": f"I am {name}. Continuing."})
```
5. `idle` 工具让队友显式地表示没有更多工作, 提前进入空闲轮询阶段。
```python
{"name": "idle",
"description": "Signal that you have no more work. "
"Enters idle polling phase.",
"input_schema": {"type": "object", "properties": {}}},
```
## 核心代码
自治循环 (来自 `agents/s11_autonomous_agents.py`):
```python
def _loop(self, name, role, prompt):
while True:
# WORK PHASE
for _ in range(50):
response = client.messages.create(...)
if response.stop_reason != "tool_use":
break
for block in response.content:
if block.name == "idle":
idle_requested = True
if idle_requested:
break
# IDLE PHASE
self._set_status(name, "idle")
for _ in range(IDLE_TIMEOUT // POLL_INTERVAL):
time.sleep(POLL_INTERVAL)
inbox = BUS.read_inbox(name)
if inbox: resume = True; break
unclaimed = scan_unclaimed_tasks()
if unclaimed:
claim_task(unclaimed[0]["id"], name)
resume = True; break
if not resume:
self._set_status(name, "shutdown")
return
self._set_status(name, "working")
```
## 相对 s10 的变更
| 组件 | 之前 (s10) | 之后 (s11) |
|----------------|------------------|----------------------------------|
| Tools | 12 | 14 (+idle, +claim_task) |
| 自治性 | 领导指派 | 自组织 |
| 空闲阶段 | 无 | 轮询收件箱 + 任务看板 |
| 任务认领 | 仅手动 | 自动认领未认领任务 |
| 身份 | 系统提示 | + 压缩后重注入 |
| 超时 | 无 | 60 秒空闲 -> 自动关机 |
## 设计原理
轮询 + 超时使智能体无需中央协调器即可自组织。每个智能体独立轮询任务看板, 认领未认领的工作, 完成后回到空闲状态。超时触发轮询循环, 如果在窗口期内没有工作出现, 智能体自行关机。这与工作窃取线程池的模式相同 -- 分布式, 无单点故障。压缩后的身份重注入确保智能体即使在对话历史被摘要后仍能保持其角色。
## 试一试
```sh
cd learn-claude-code
python agents/s11_autonomous_agents.py
```
可以尝试的提示:
1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`
2. `Spawn a coder teammate and let it find work from the task board itself`
3. `Create tasks with dependencies. Watch teammates respect the blocked order.`
4. 输入 `/tasks` 查看带 owner 的任务看板
5. 输入 `/team` 监控谁在工作、谁在空闲

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
anthropic>=0.25.0
python-dotenv>=1.0.0

View File

@ -0,0 +1,129 @@
---
name: agent-builder
description: |
Design and build AI agents for any domain. Use when users:
(1) ask to "create an agent", "build an assistant", or "design an AI system"
(2) want to understand agent architecture, agentic patterns, or autonomous AI
(3) need help with capabilities, subagents, planning, or skill mechanisms
(4) ask about Claude Code, Cursor, or similar agent internals
(5) want to build agents for business, research, creative, or operational tasks
Keywords: agent, assistant, autonomous, workflow, tool use, multi-step, orchestration
---
# Agent Builder
Build AI agents for any domain - customer service, research, operations, creative work, or specialized business processes.
## The Core Philosophy
> **The model already knows how to be an agent. Your job is to get out of the way.**
An agent is not complex engineering. It's a simple loop that invites the model to act:
```
LOOP:
Model sees: context + available capabilities
Model decides: act or respond
If act: execute capability, add result, continue
If respond: return to user
```
**That's it.** The magic isn't in the code - it's in the model. Your code just provides the opportunity.
## The Three Elements
### 1. Capabilities (What can it DO?)
Atomic actions the agent can perform: search, read, create, send, query, modify.
**Design principle**: Start with 3-5 capabilities. Add more only when the agent consistently fails because a capability is missing.
### 2. Knowledge (What does it KNOW?)
Domain expertise injected on-demand: policies, workflows, best practices, schemas.
**Design principle**: Make knowledge available, not mandatory. Load it when relevant, not upfront.
### 3. Context (What has happened?)
The conversation history - the thread connecting actions into coherent behavior.
**Design principle**: Context is precious. Isolate noisy subtasks. Truncate verbose outputs. Protect clarity.
## Agent Design Thinking
Before building, understand:
- **Purpose**: What should this agent accomplish?
- **Domain**: What world does it operate in? (customer service, research, operations, creative...)
- **Capabilities**: What 3-5 actions are essential?
- **Knowledge**: What expertise does it need access to?
- **Trust**: What decisions can you delegate to the model?
**CRITICAL**: Trust the model. Don't over-engineer. Don't pre-specify workflows. Give it capabilities and let it reason.
## Progressive Complexity
Start simple. Add complexity only when real usage reveals the need:
| Level | What to add | When to add it |
|-------|-------------|----------------|
| Basic | 3-5 capabilities | Always start here |
| Planning | Progress tracking | Multi-step tasks lose coherence |
| Subagents | Isolated child agents | Exploration pollutes context |
| Skills | On-demand knowledge | Domain expertise needed |
**Most agents never need to go beyond Level 2.**
## Domain Examples
**Business**: CRM queries, email, calendar, approvals
**Research**: Database search, document analysis, citations
**Operations**: Monitoring, tickets, notifications, escalation
**Creative**: Asset generation, editing, collaboration, review
The pattern is universal. Only the capabilities change.
## Key Principles
1. **The model IS the agent** - Code just runs the loop
2. **Capabilities enable** - What it CAN do
3. **Knowledge informs** - What it KNOWS how to do
4. **Constraints focus** - Limits create clarity
5. **Trust liberates** - Let the model reason
6. **Iteration reveals** - Start minimal, evolve from usage
## Anti-Patterns
| Pattern | Problem | Solution |
|---------|---------|----------|
| Over-engineering | Complexity before need | Start simple |
| Too many capabilities | Model confusion | 3-5 to start |
| Rigid workflows | Can't adapt | Let model decide |
| Front-loaded knowledge | Context bloat | Load on-demand |
| Micromanagement | Undercuts intelligence | Trust the model |
## Resources
**Philosophy & Theory**:
- `references/agent-philosophy.md` - Deep dive into why agents work
**Implementation**:
- `references/minimal-agent.py` - Complete working agent (~80 lines)
- `references/tool-templates.py` - Capability definitions
- `references/subagent-pattern.py` - Context isolation
**Scaffolding**:
- `scripts/init_agent.py` - Generate new agent projects
## The Agent Mindset
**From**: "How do I make the system do X?"
**To**: "How do I enable the model to do X?"
**From**: "What's the workflow for this task?"
**To**: "What capabilities would help accomplish this?"
The best agent code is almost boring. Simple loops. Clear capabilities. Clean context. The magic isn't in the code.
**Give the model capabilities and knowledge. Trust it to figure out the rest.**

View File

@ -0,0 +1,144 @@
# The Philosophy of Agents
> **The model already knows how to be an agent. Your job is to get out of the way.**
## The Fundamental Insight
Strip away every framework, every library, every architectural pattern. What remains?
A loop. A model. An invitation to act.
The agent is not the code. The agent is the model itself - a vast neural network trained on humanity's collective problem-solving, reasoning, and tool use. The code merely provides the opportunity for the model to express its agency.
## Why This Matters
Most agent implementations fail not from too little engineering, but from too much. They constrain. They prescribe. They second-guess the very intelligence they're trying to leverage.
Consider: The model has been trained on millions of examples of problem-solving. It has seen how experts approach complex tasks, how tools are used, how plans are formed and revised. This knowledge is already there, encoded in billions of parameters.
Your job is not to teach it how to think. Your job is to give it the means to act.
## The Three Elements
### 1. Capabilities (Tools)
Capabilities answer: **What can the agent DO?**
They are the hands of the model - its ability to affect the world. Without capabilities, the model can only speak. With them, it can act.
**The design principle**: Each capability should be atomic, clear, and well-described. The model needs to understand what each capability does, but not how to use them in sequence - it will figure that out.
**Common mistake**: Too many capabilities. The model gets confused, starts using the wrong ones, or paralyzed by choice. Start with 3-5. Add more only when the model consistently fails to accomplish tasks because a capability is missing.
### 2. Knowledge (Skills)
Knowledge answers: **What does the agent KNOW?**
This is domain expertise - the specialized understanding that turns a general assistant into a domain expert. A customer service agent needs to know company policies. A research agent needs to know methodology. A creative agent needs to know style guidelines.
**The design principle**: Inject knowledge on-demand, not upfront. The model doesn't need to know everything at once - only what's relevant to the current task. Progressive disclosure preserves context for what matters.
**Common mistake**: Front-loading all possible knowledge into the system prompt. This wastes context, confuses the model, and makes every interaction expensive. Instead, make knowledge available but not mandatory.
### 3. Context (The Conversation)
Context is the memory of the interaction - what has been said, what has been tried, what has been learned. It's the thread that connects individual actions into coherent behavior.
**The design principle**: Context is precious. Protect it. Isolate subtasks that generate noise. Truncate outputs that exceed usefulness. Summarize when history grows long.
**Common mistake**: Letting context grow unbounded, filling it with exploration details, failed attempts, and verbose tool outputs. Eventually the model can't find the signal in the noise.
## The Universal Pattern
Every effective agent - regardless of domain, framework, or implementation - follows the same pattern:
```
LOOP:
Model sees: conversation history + available capabilities
Model decides: act or respond
If act: capability executed, result added to context, loop continues
If respond: answer returned, loop ends
```
This is not a simplification. This is the actual architecture. Everything else is optimization.
## Designing for Agency
### Trust the Model
The most important principle: **trust the model**.
Don't try to anticipate every edge case. Don't build elaborate decision trees. Don't pre-specify the workflow.
The model is better at reasoning than any rule system you could write. Your conditional logic will fail on edge cases. The model will reason through them.
**Give the model capabilities and knowledge. Let it figure out how to use them.**
### Constraints Enable
This seems paradoxical, but constraints don't limit agents - they focus them.
A todo list with "only one task in progress" forces sequential focus. A subagent with "read-only access" prevents accidental modifications. A response with "under 100 words" demands clarity.
The best constraints are those that prevent the model from getting lost, not those that micromanage its approach.
### Progressive Complexity
Never build everything upfront.
```
Level 0: Model + one capability
Level 1: Model + 3-5 capabilities
Level 2: Model + capabilities + planning
Level 3: Model + capabilities + planning + subagents
Level 4: Model + capabilities + planning + subagents + skills
```
Start at the lowest level that might work. Move up only when real usage reveals the need. Most agents never need to go beyond Level 2.
## The Agent Mindset
Building agents requires a shift in thinking:
**From**: "How do I make the system do X?"
**To**: "How do I enable the model to do X?"
**From**: "What should happen when the user says Y?"
**To**: "What capabilities would help address Y?"
**From**: "What's the workflow for this task?"
**To**: "What does the model need to figure out the workflow?"
The best agent code is almost boring. Simple loops. Clear capability definitions. Clean context management. The magic isn't in the code - it's in the model.
## Philosophical Foundations
### The Model as Emergent Agent
Language models trained on human text have learned not just language, but patterns of thought. They've absorbed how humans approach problems, use tools, and accomplish goals. This is emergent agency - not programmed, but learned.
When you give a model capabilities, you're not teaching it to be an agent. You're giving it permission to express the agency it already has.
### The Loop as Liberation
The agent loop is deceptively simple: get response, check for tool use, execute, repeat. But this simplicity is its power.
The loop doesn't constrain the model to particular sequences. It doesn't enforce specific workflows. It simply says: "You have capabilities. Use them as you see fit. I'll execute what you request and show you the results."
This is liberation, not limitation.
### Capabilities as Expression
Each capability you provide is a form of expression for the model. "Read file" lets it see. "Write file" lets it create. "Search" lets it explore. "Send message" lets it communicate.
The art of agent design is choosing which forms of expression to enable. Too few, and the model is mute. Too many, and it speaks in tongues.
## Conclusion
The agent is the model. The code is just the loop. Your job is to get out of the way.
Give the model clear capabilities. Make knowledge available when needed. Protect the context from noise. Trust the model to figure out the rest.
That's it. That's the philosophy.
Everything else is refinement.

View File

@ -0,0 +1,149 @@
#!/usr/bin/env python3
"""
Minimal Agent Template - Copy and customize this.
This is the simplest possible working agent (~80 lines).
It has everything you need: 3 tools + loop.
Usage:
1. Set ANTHROPIC_API_KEY environment variable
2. python minimal-agent.py
3. Type commands, 'q' to quit
"""
from anthropic import Anthropic
from pathlib import Path
import subprocess
import os
# Configuration
client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
MODEL = os.getenv("MODEL_NAME", "claude-sonnet-4-20250514")
WORKDIR = Path.cwd()
# System prompt - keep it simple
SYSTEM = f"""You are a coding agent at {WORKDIR}.
Rules:
- Use tools to complete tasks
- Prefer action over explanation
- Summarize what you did when done"""
# Minimal tool set - add more as needed
TOOLS = [
{
"name": "bash",
"description": "Run shell command",
"input_schema": {
"type": "object",
"properties": {"command": {"type": "string"}},
"required": ["command"]
}
},
{
"name": "read_file",
"description": "Read file contents",
"input_schema": {
"type": "object",
"properties": {"path": {"type": "string"}},
"required": ["path"]
}
},
{
"name": "write_file",
"description": "Write content to file",
"input_schema": {
"type": "object",
"properties": {
"path": {"type": "string"},
"content": {"type": "string"}
},
"required": ["path", "content"]
}
},
]
def execute_tool(name: str, args: dict) -> str:
"""Execute a tool and return result."""
if name == "bash":
try:
r = subprocess.run(
args["command"], shell=True, cwd=WORKDIR,
capture_output=True, text=True, timeout=60
)
return (r.stdout + r.stderr).strip() or "(empty)"
except subprocess.TimeoutExpired:
return "Error: Timeout"
if name == "read_file":
try:
return (WORKDIR / args["path"]).read_text()[:50000]
except Exception as e:
return f"Error: {e}"
if name == "write_file":
try:
p = WORKDIR / args["path"]
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(args["content"])
return f"Wrote {len(args['content'])} bytes to {args['path']}"
except Exception as e:
return f"Error: {e}"
return f"Unknown tool: {name}"
def agent(prompt: str, history: list = None) -> str:
"""Run the agent loop."""
if history is None:
history = []
history.append({"role": "user", "content": prompt})
while True:
response = client.messages.create(
model=MODEL,
system=SYSTEM,
messages=history,
tools=TOOLS,
max_tokens=8000,
)
# Build assistant message
history.append({"role": "assistant", "content": response.content})
# If no tool calls, return text
if response.stop_reason != "tool_use":
return "".join(b.text for b in response.content if hasattr(b, "text"))
# Execute tools
results = []
for block in response.content:
if block.type == "tool_use":
print(f"> {block.name}: {block.input}")
output = execute_tool(block.name, block.input)
print(f" {output[:100]}...")
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output
})
history.append({"role": "user", "content": results})
if __name__ == "__main__":
print(f"Minimal Agent - {WORKDIR}")
print("Type 'q' to quit.\n")
history = []
while True:
try:
query = input(">> ").strip()
except (EOFError, KeyboardInterrupt):
break
if query in ("q", "quit", "exit", ""):
break
print(agent(query, history))
print()

View File

@ -0,0 +1,243 @@
"""
Subagent Pattern - How to implement Task tool for context isolation.
The key insight: spawn child agents with ISOLATED context to prevent
"context pollution" where exploration details fill up the main conversation.
"""
import time
import sys
# Assuming client, MODEL, execute_tool are defined elsewhere
# =============================================================================
# AGENT TYPE REGISTRY
# =============================================================================
AGENT_TYPES = {
# Explore: Read-only, for searching and analyzing
"explore": {
"description": "Read-only agent for exploring code, finding files, searching",
"tools": ["bash", "read_file"], # No write access!
"prompt": "You are an exploration agent. Search and analyze, but NEVER modify files. Return a concise summary of what you found.",
},
# Code: Full-powered, for implementation
"code": {
"description": "Full agent for implementing features and fixing bugs",
"tools": "*", # All tools
"prompt": "You are a coding agent. Implement the requested changes efficiently. Return a summary of what you changed.",
},
# Plan: Read-only, for design work
"plan": {
"description": "Planning agent for designing implementation strategies",
"tools": ["bash", "read_file"], # Read-only
"prompt": "You are a planning agent. Analyze the codebase and output a numbered implementation plan. Do NOT make any changes.",
},
# Add your own types here...
# "test": {
# "description": "Testing agent for running and analyzing tests",
# "tools": ["bash", "read_file"],
# "prompt": "Run tests and report results. Don't modify code.",
# },
}
def get_agent_descriptions() -> str:
"""Generate descriptions for Task tool schema."""
return "\n".join(
f"- {name}: {cfg['description']}"
for name, cfg in AGENT_TYPES.items()
)
def get_tools_for_agent(agent_type: str, base_tools: list) -> list:
"""
Filter tools based on agent type.
'*' means all base tools.
Otherwise, whitelist specific tool names.
Note: Subagents don't get Task tool to prevent infinite recursion.
"""
allowed = AGENT_TYPES.get(agent_type, {}).get("tools", "*")
if allowed == "*":
return base_tools # All base tools, but NOT Task
return [t for t in base_tools if t["name"] in allowed]
# =============================================================================
# TASK TOOL DEFINITION
# =============================================================================
TASK_TOOL = {
"name": "Task",
"description": f"""Spawn a subagent for a focused subtask.
Subagents run in ISOLATED context - they don't see parent's history.
Use this to keep the main conversation clean.
Agent types:
{get_agent_descriptions()}
Example uses:
- Task(explore): "Find all files using the auth module"
- Task(plan): "Design a migration strategy for the database"
- Task(code): "Implement the user registration form"
""",
"input_schema": {
"type": "object",
"properties": {
"description": {
"type": "string",
"description": "Short task name (3-5 words) for progress display"
},
"prompt": {
"type": "string",
"description": "Detailed instructions for the subagent"
},
"agent_type": {
"type": "string",
"enum": list(AGENT_TYPES.keys()),
"description": "Type of agent to spawn"
},
},
"required": ["description", "prompt", "agent_type"],
},
}
# =============================================================================
# SUBAGENT EXECUTION
# =============================================================================
def run_task(description: str, prompt: str, agent_type: str,
client, model: str, workdir, base_tools: list, execute_tool) -> str:
"""
Execute a subagent task with isolated context.
Key concepts:
1. ISOLATED HISTORY - subagent starts fresh, no parent context
2. FILTERED TOOLS - based on agent type permissions
3. AGENT-SPECIFIC PROMPT - specialized behavior
4. RETURNS SUMMARY ONLY - parent sees just the final result
Args:
description: Short name for progress display
prompt: Detailed instructions for subagent
agent_type: Key from AGENT_TYPES
client: Anthropic client
model: Model to use
workdir: Working directory
base_tools: List of tool definitions
execute_tool: Function to execute tools
Returns:
Final text output from subagent
"""
if agent_type not in AGENT_TYPES:
return f"Error: Unknown agent type '{agent_type}'"
config = AGENT_TYPES[agent_type]
# Agent-specific system prompt
sub_system = f"""You are a {agent_type} subagent at {workdir}.
{config["prompt"]}
Complete the task and return a clear, concise summary."""
# Filtered tools for this agent type
sub_tools = get_tools_for_agent(agent_type, base_tools)
# KEY: ISOLATED message history!
# The subagent starts fresh, doesn't see parent's conversation
sub_messages = [{"role": "user", "content": prompt}]
# Progress display
print(f" [{agent_type}] {description}")
start = time.time()
tool_count = 0
# Run the same agent loop (but silently)
while True:
response = client.messages.create(
model=model,
system=sub_system,
messages=sub_messages,
tools=sub_tools,
max_tokens=8000,
)
# Check if done
if response.stop_reason != "tool_use":
break
# Execute tools
tool_calls = [b for b in response.content if b.type == "tool_use"]
results = []
for tc in tool_calls:
tool_count += 1
output = execute_tool(tc.name, tc.input)
results.append({
"type": "tool_result",
"tool_use_id": tc.id,
"content": output
})
# Update progress (in-place on same line)
elapsed = time.time() - start
sys.stdout.write(
f"\r [{agent_type}] {description} ... {tool_count} tools, {elapsed:.1f}s"
)
sys.stdout.flush()
sub_messages.append({"role": "assistant", "content": response.content})
sub_messages.append({"role": "user", "content": results})
# Final progress update
elapsed = time.time() - start
sys.stdout.write(
f"\r [{agent_type}] {description} - done ({tool_count} tools, {elapsed:.1f}s)\n"
)
# Extract and return ONLY the final text
# This is what the parent agent sees - a clean summary
for block in response.content:
if hasattr(block, "text"):
return block.text
return "(subagent returned no text)"
# =============================================================================
# USAGE EXAMPLE
# =============================================================================
"""
# In your main agent's execute_tool function:
def execute_tool(name: str, args: dict) -> str:
if name == "Task":
return run_task(
description=args["description"],
prompt=args["prompt"],
agent_type=args["agent_type"],
client=client,
model=MODEL,
workdir=WORKDIR,
base_tools=BASE_TOOLS,
execute_tool=execute_tool # Pass self for recursion
)
# ... other tools ...
# In your TOOLS list:
TOOLS = BASE_TOOLS + [TASK_TOOL]
"""

View File

@ -0,0 +1,271 @@
"""
Tool Templates - Copy and customize these for your agent.
Each tool needs:
1. Definition (JSON schema for the model)
2. Implementation (Python function)
"""
from pathlib import Path
import subprocess
WORKDIR = Path.cwd()
# =============================================================================
# TOOL DEFINITIONS (for TOOLS list)
# =============================================================================
BASH_TOOL = {
"name": "bash",
"description": "Run a shell command. Use for: ls, find, grep, git, npm, python, etc.",
"input_schema": {
"type": "object",
"properties": {
"command": {
"type": "string",
"description": "The shell command to execute"
}
},
"required": ["command"],
},
}
READ_FILE_TOOL = {
"name": "read_file",
"description": "Read file contents. Returns UTF-8 text.",
"input_schema": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Relative path to the file"
},
"limit": {
"type": "integer",
"description": "Max lines to read (default: all)"
},
},
"required": ["path"],
},
}
WRITE_FILE_TOOL = {
"name": "write_file",
"description": "Write content to a file. Creates parent directories if needed.",
"input_schema": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Relative path for the file"
},
"content": {
"type": "string",
"description": "Content to write"
},
},
"required": ["path", "content"],
},
}
EDIT_FILE_TOOL = {
"name": "edit_file",
"description": "Replace exact text in a file. Use for surgical edits.",
"input_schema": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Relative path to the file"
},
"old_text": {
"type": "string",
"description": "Exact text to find (must match precisely)"
},
"new_text": {
"type": "string",
"description": "Replacement text"
},
},
"required": ["path", "old_text", "new_text"],
},
}
TODO_WRITE_TOOL = {
"name": "TodoWrite",
"description": "Update the task list. Use to plan and track progress.",
"input_schema": {
"type": "object",
"properties": {
"items": {
"type": "array",
"description": "Complete list of tasks",
"items": {
"type": "object",
"properties": {
"content": {"type": "string", "description": "Task description"},
"status": {"type": "string", "enum": ["pending", "in_progress", "completed"]},
"activeForm": {"type": "string", "description": "Present tense, e.g. 'Reading files'"},
},
"required": ["content", "status", "activeForm"],
},
}
},
"required": ["items"],
},
}
TASK_TOOL_TEMPLATE = """
# Generate dynamically with agent types
TASK_TOOL = {
"name": "Task",
"description": f"Spawn a subagent for a focused subtask.\\n\\nAgent types:\\n{get_agent_descriptions()}",
"input_schema": {
"type": "object",
"properties": {
"description": {"type": "string", "description": "Short task name (3-5 words)"},
"prompt": {"type": "string", "description": "Detailed instructions"},
"agent_type": {"type": "string", "enum": list(AGENT_TYPES.keys())},
},
"required": ["description", "prompt", "agent_type"],
},
}
"""
# =============================================================================
# TOOL IMPLEMENTATIONS
# =============================================================================
def safe_path(p: str) -> Path:
"""
Security: Ensure path stays within workspace.
Prevents ../../../etc/passwd attacks.
"""
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {p}")
return path
def run_bash(command: str) -> str:
"""
Execute shell command with safety checks.
Safety features:
- Blocks obviously dangerous commands
- 60 second timeout
- Output truncated to 50KB
"""
dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"]
if any(d in command for d in dangerous):
return "Error: Dangerous command blocked"
try:
result = subprocess.run(
command,
shell=True,
cwd=WORKDIR,
capture_output=True,
text=True,
timeout=60
)
output = (result.stdout + result.stderr).strip()
return output[:50000] if output else "(no output)"
except subprocess.TimeoutExpired:
return "Error: Command timed out (60s)"
except Exception as e:
return f"Error: {e}"
def run_read_file(path: str, limit: int = None) -> str:
"""
Read file contents with optional line limit.
Features:
- Safe path resolution
- Optional line limit for large files
- Output truncated to 50KB
"""
try:
text = safe_path(path).read_text()
lines = text.splitlines()
if limit and limit < len(lines):
lines = lines[:limit]
lines.append(f"... ({len(text.splitlines()) - limit} more lines)")
return "\n".join(lines)[:50000]
except Exception as e:
return f"Error: {e}"
def run_write_file(path: str, content: str) -> str:
"""
Write content to file, creating parent directories if needed.
Features:
- Safe path resolution
- Auto-creates parent directories
- Returns byte count for confirmation
"""
try:
fp = safe_path(path)
fp.parent.mkdir(parents=True, exist_ok=True)
fp.write_text(content)
return f"Wrote {len(content)} bytes to {path}"
except Exception as e:
return f"Error: {e}"
def run_edit_file(path: str, old_text: str, new_text: str) -> str:
"""
Replace exact text in a file (surgical edit).
Features:
- Exact string matching (not regex)
- Only replaces first occurrence (safety)
- Clear error if text not found
"""
try:
fp = safe_path(path)
content = fp.read_text()
if old_text not in content:
return f"Error: Text not found in {path}"
new_content = content.replace(old_text, new_text, 1)
fp.write_text(new_content)
return f"Edited {path}"
except Exception as e:
return f"Error: {e}"
# =============================================================================
# DISPATCHER PATTERN
# =============================================================================
def execute_tool(name: str, args: dict) -> str:
"""
Dispatch tool call to implementation.
This pattern makes it easy to add new tools:
1. Add definition to TOOLS list
2. Add implementation function
3. Add case to this dispatcher
"""
if name == "bash":
return run_bash(args["command"])
if name == "read_file":
return run_read_file(args["path"], args.get("limit"))
if name == "write_file":
return run_write_file(args["path"], args["content"])
if name == "edit_file":
return run_edit_file(args["path"], args["old_text"], args["new_text"])
# Add more tools here...
return f"Unknown tool: {name}"

View File

@ -0,0 +1,279 @@
#!/usr/bin/env python3
"""
Agent Scaffold Script - Create a new agent project with best practices.
Usage:
python init_agent.py <agent-name> [--level 0-4] [--path <output-dir>]
Examples:
python init_agent.py my-agent # Level 1 (4 tools)
python init_agent.py my-agent --level 0 # Minimal (bash only)
python init_agent.py my-agent --level 2 # With TodoWrite
python init_agent.py my-agent --path ./bots # Custom output directory
"""
import argparse
import sys
from pathlib import Path
# Agent templates for each level
TEMPLATES = {
0: '''#!/usr/bin/env python3
"""
Level 0 Agent - Bash is All You Need (~50 lines)
Core insight: One tool (bash) can do everything.
Subagents via self-recursion: python {name}.py "subtask"
"""
from anthropic import Anthropic
from dotenv import load_dotenv
import subprocess
import os
load_dotenv()
client = Anthropic(
api_key=os.getenv("ANTHROPIC_API_KEY"),
base_url=os.getenv("ANTHROPIC_BASE_URL")
)
MODEL = os.getenv("MODEL_NAME", "claude-sonnet-4-20250514")
SYSTEM = """You are a coding agent. Use bash for everything:
- Read: cat, grep, find, ls
- Write: echo 'content' > file
- Subagent: python {name}.py "subtask"
"""
TOOL = [{{
"name": "bash",
"description": "Execute shell command",
"input_schema": {{"type": "object", "properties": {{"command": {{"type": "string"}}}}, "required": ["command"]}}
}}]
def run(prompt, history=[]):
history.append({{"role": "user", "content": prompt}})
while True:
r = client.messages.create(model=MODEL, system=SYSTEM, messages=history, tools=TOOL, max_tokens=8000)
history.append({{"role": "assistant", "content": r.content}})
if r.stop_reason != "tool_use":
return "".join(b.text for b in r.content if hasattr(b, "text"))
results = []
for b in r.content:
if b.type == "tool_use":
print(f"> {{b.input['command']}}")
try:
out = subprocess.run(b.input["command"], shell=True, capture_output=True, text=True, timeout=60)
output = (out.stdout + out.stderr).strip() or "(empty)"
except Exception as e:
output = f"Error: {{e}}"
results.append({{"type": "tool_result", "tool_use_id": b.id, "content": output[:50000]}})
history.append({{"role": "user", "content": results}})
if __name__ == "__main__":
h = []
print("{name} - Level 0 Agent\\nType 'q' to quit.\\n")
while (q := input(">> ").strip()) not in ("q", "quit", ""):
print(run(q, h), "\\n")
''',
1: '''#!/usr/bin/env python3
"""
Level 1 Agent - Model as Agent (~200 lines)
Core insight: 4 tools cover 90% of coding tasks.
The model IS the agent. Code just runs the loop.
"""
from anthropic import Anthropic
from dotenv import load_dotenv
from pathlib import Path
import subprocess
import os
load_dotenv()
client = Anthropic(
api_key=os.getenv("ANTHROPIC_API_KEY"),
base_url=os.getenv("ANTHROPIC_BASE_URL")
)
MODEL = os.getenv("MODEL_NAME", "claude-sonnet-4-20250514")
WORKDIR = Path.cwd()
SYSTEM = f"""You are a coding agent at {{WORKDIR}}.
Rules:
- Prefer tools over prose. Act, don't just explain.
- Never invent file paths. Use ls/find first if unsure.
- Make minimal changes. Don't over-engineer.
- After finishing, summarize what changed."""
TOOLS = [
{{"name": "bash", "description": "Run shell command",
"input_schema": {{"type": "object", "properties": {{"command": {{"type": "string"}}}}, "required": ["command"]}}}},
{{"name": "read_file", "description": "Read file contents",
"input_schema": {{"type": "object", "properties": {{"path": {{"type": "string"}}}}, "required": ["path"]}}}},
{{"name": "write_file", "description": "Write content to file",
"input_schema": {{"type": "object", "properties": {{"path": {{"type": "string"}}, "content": {{"type": "string"}}}}, "required": ["path", "content"]}}}},
{{"name": "edit_file", "description": "Replace exact text in file",
"input_schema": {{"type": "object", "properties": {{"path": {{"type": "string"}}, "old_text": {{"type": "string"}}, "new_text": {{"type": "string"}}}}, "required": ["path", "old_text", "new_text"]}}}},
]
def safe_path(p: str) -> Path:
"""Prevent path escape attacks."""
path = (WORKDIR / p).resolve()
if not path.is_relative_to(WORKDIR):
raise ValueError(f"Path escapes workspace: {{p}}")
return path
def execute(name: str, args: dict) -> str:
"""Execute a tool and return result."""
if name == "bash":
dangerous = ["rm -rf /", "sudo", "shutdown", "> /dev/"]
if any(d in args["command"] for d in dangerous):
return "Error: Dangerous command blocked"
try:
r = subprocess.run(args["command"], shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=60)
return (r.stdout + r.stderr).strip()[:50000] or "(empty)"
except subprocess.TimeoutExpired:
return "Error: Timeout (60s)"
except Exception as e:
return f"Error: {{e}}"
if name == "read_file":
try:
return safe_path(args["path"]).read_text()[:50000]
except Exception as e:
return f"Error: {{e}}"
if name == "write_file":
try:
p = safe_path(args["path"])
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(args["content"])
return f"Wrote {{len(args['content'])}} bytes to {{args['path']}}"
except Exception as e:
return f"Error: {{e}}"
if name == "edit_file":
try:
p = safe_path(args["path"])
content = p.read_text()
if args["old_text"] not in content:
return f"Error: Text not found in {{args['path']}}"
p.write_text(content.replace(args["old_text"], args["new_text"], 1))
return f"Edited {{args['path']}}"
except Exception as e:
return f"Error: {{e}}"
return f"Unknown tool: {{name}}"
def agent(prompt: str, history: list = None) -> str:
"""Run the agent loop."""
if history is None:
history = []
history.append({{"role": "user", "content": prompt}})
while True:
response = client.messages.create(
model=MODEL, system=SYSTEM, messages=history, tools=TOOLS, max_tokens=8000
)
history.append({{"role": "assistant", "content": response.content}})
if response.stop_reason != "tool_use":
return "".join(b.text for b in response.content if hasattr(b, "text"))
results = []
for block in response.content:
if block.type == "tool_use":
print(f"> {{block.name}}: {{str(block.input)[:100]}}")
output = execute(block.name, block.input)
print(f" {{output[:100]}}...")
results.append({{"type": "tool_result", "tool_use_id": block.id, "content": output}})
history.append({{"role": "user", "content": results}})
if __name__ == "__main__":
print(f"{name} - Level 1 Agent at {{WORKDIR}}")
print("Type 'q' to quit.\\n")
h = []
while True:
try:
query = input(">> ").strip()
except (EOFError, KeyboardInterrupt):
break
if query in ("q", "quit", "exit", ""):
break
print(agent(query, h), "\\n")
''',
}
ENV_TEMPLATE = '''# API Configuration
ANTHROPIC_API_KEY=sk-xxx
ANTHROPIC_BASE_URL=https://api.anthropic.com
MODEL_NAME=claude-sonnet-4-20250514
'''
def create_agent(name: str, level: int, output_dir: Path):
"""Create a new agent project."""
# Validate level
if level not in TEMPLATES and level not in (2, 3, 4):
print(f"Error: Level {level} not yet implemented in scaffold.")
print("Available levels: 0 (minimal), 1 (4 tools)")
print("For levels 2-4, copy from mini-claude-code repository.")
sys.exit(1)
# Create output directory
agent_dir = output_dir / name
agent_dir.mkdir(parents=True, exist_ok=True)
# Write agent file
agent_file = agent_dir / f"{name}.py"
template = TEMPLATES.get(level, TEMPLATES[1])
agent_file.write_text(template.format(name=name))
print(f"Created: {agent_file}")
# Write .env.example
env_file = agent_dir / ".env.example"
env_file.write_text(ENV_TEMPLATE)
print(f"Created: {env_file}")
# Write .gitignore
gitignore = agent_dir / ".gitignore"
gitignore.write_text(".env\n__pycache__/\n*.pyc\n")
print(f"Created: {gitignore}")
print(f"\nAgent '{name}' created at {agent_dir}")
print(f"\nNext steps:")
print(f" 1. cd {agent_dir}")
print(f" 2. cp .env.example .env")
print(f" 3. Edit .env with your API key")
print(f" 4. pip install anthropic python-dotenv")
print(f" 5. python {name}.py")
def main():
parser = argparse.ArgumentParser(
description="Scaffold a new AI coding agent project",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Levels:
0 Minimal (~50 lines) - Single bash tool, self-recursion for subagents
1 Basic (~200 lines) - 4 core tools: bash, read, write, edit
2 Todo (~300 lines) - + TodoWrite for structured planning
3 Subagent (~450) - + Task tool for context isolation
4 Skills (~550) - + Skill tool for domain expertise
"""
)
parser.add_argument("name", help="Name of the agent to create")
parser.add_argument("--level", type=int, default=1, choices=[0, 1, 2, 3, 4],
help="Complexity level (default: 1)")
parser.add_argument("--path", type=Path, default=Path.cwd(),
help="Output directory (default: current directory)")
args = parser.parse_args()
create_agent(args.name, args.level, args.path)
if __name__ == "__main__":
main()

157
skills/code-review/SKILL.md Normal file
View File

@ -0,0 +1,157 @@
---
name: code-review
description: Perform thorough code reviews with security, performance, and maintainability analysis. Use when user asks to review code, check for bugs, or audit a codebase.
---
# Code Review Skill
You now have expertise in conducting comprehensive code reviews. Follow this structured approach:
## Review Checklist
### 1. Security (Critical)
Check for:
- [ ] **Injection vulnerabilities**: SQL, command, XSS, template injection
- [ ] **Authentication issues**: Hardcoded credentials, weak auth
- [ ] **Authorization flaws**: Missing access controls, IDOR
- [ ] **Data exposure**: Sensitive data in logs, error messages
- [ ] **Cryptography**: Weak algorithms, improper key management
- [ ] **Dependencies**: Known vulnerabilities (check with `npm audit`, `pip-audit`)
```bash
# Quick security scans
npm audit # Node.js
pip-audit # Python
cargo audit # Rust
grep -r "password\|secret\|api_key" --include="*.py" --include="*.js"
```
### 2. Correctness
Check for:
- [ ] **Logic errors**: Off-by-one, null handling, edge cases
- [ ] **Race conditions**: Concurrent access without synchronization
- [ ] **Resource leaks**: Unclosed files, connections, memory
- [ ] **Error handling**: Swallowed exceptions, missing error paths
- [ ] **Type safety**: Implicit conversions, any types
### 3. Performance
Check for:
- [ ] **N+1 queries**: Database calls in loops
- [ ] **Memory issues**: Large allocations, retained references
- [ ] **Blocking operations**: Sync I/O in async code
- [ ] **Inefficient algorithms**: O(n^2) when O(n) possible
- [ ] **Missing caching**: Repeated expensive computations
### 4. Maintainability
Check for:
- [ ] **Naming**: Clear, consistent, descriptive
- [ ] **Complexity**: Functions > 50 lines, deep nesting > 3 levels
- [ ] **Duplication**: Copy-pasted code blocks
- [ ] **Dead code**: Unused imports, unreachable branches
- [ ] **Comments**: Outdated, redundant, or missing where needed
### 5. Testing
Check for:
- [ ] **Coverage**: Critical paths tested
- [ ] **Edge cases**: Null, empty, boundary values
- [ ] **Mocking**: External dependencies isolated
- [ ] **Assertions**: Meaningful, specific checks
## Review Output Format
```markdown
## Code Review: [file/component name]
### Summary
[1-2 sentence overview]
### Critical Issues
1. **[Issue]** (line X): [Description]
- Impact: [What could go wrong]
- Fix: [Suggested solution]
### Improvements
1. **[Suggestion]** (line X): [Description]
### Positive Notes
- [What was done well]
### Verdict
[ ] Ready to merge
[ ] Needs minor changes
[ ] Needs major revision
```
## Common Patterns to Flag
### Python
```python
# Bad: SQL injection
cursor.execute(f"SELECT * FROM users WHERE id = {user_id}")
# Good:
cursor.execute("SELECT * FROM users WHERE id = ?", (user_id,))
# Bad: Command injection
os.system(f"ls {user_input}")
# Good:
subprocess.run(["ls", user_input], check=True)
# Bad: Mutable default argument
def append(item, lst=[]): # Bug: shared mutable default
# Good:
def append(item, lst=None):
lst = lst or []
```
### JavaScript/TypeScript
```javascript
// Bad: Prototype pollution
Object.assign(target, userInput)
// Good:
Object.assign(target, sanitize(userInput))
// Bad: eval usage
eval(userCode)
// Good: Never use eval with user input
// Bad: Callback hell
getData(x => process(x, y => save(y, z => done(z))))
// Good:
const data = await getData();
const processed = await process(data);
await save(processed);
```
## Review Commands
```bash
# Show recent changes
git diff HEAD~5 --stat
git log --oneline -10
# Find potential issues
grep -rn "TODO\|FIXME\|HACK\|XXX" .
grep -rn "password\|secret\|token" . --include="*.py"
# Check complexity (Python)
pip install radon && radon cc . -a
# Check dependencies
npm outdated # Node
pip list --outdated # Python
```
## Review Workflow
1. **Understand context**: Read PR description, linked issues
2. **Run the code**: Build, test, run locally if possible
3. **Read top-down**: Start with main entry points
4. **Check tests**: Are changes tested? Do tests pass?
5. **Security scan**: Run automated tools
6. **Manual review**: Use checklist above
7. **Write feedback**: Be specific, suggest fixes, be kind

213
skills/mcp-builder/SKILL.md Normal file
View File

@ -0,0 +1,213 @@
---
name: mcp-builder
description: Build MCP (Model Context Protocol) servers that give Claude new capabilities. Use when user wants to create an MCP server, add tools to Claude, or integrate external services.
---
# MCP Server Building Skill
You now have expertise in building MCP (Model Context Protocol) servers. MCP enables Claude to interact with external services through a standardized protocol.
## What is MCP?
MCP servers expose:
- **Tools**: Functions Claude can call (like API endpoints)
- **Resources**: Data Claude can read (like files or database records)
- **Prompts**: Pre-built prompt templates
## Quick Start: Python MCP Server
### 1. Project Setup
```bash
# Create project
mkdir my-mcp-server && cd my-mcp-server
python3 -m venv venv && source venv/bin/activate
# Install MCP SDK
pip install mcp
```
### 2. Basic Server Template
```python
#!/usr/bin/env python3
"""my_server.py - A simple MCP server"""
from mcp.server import Server
from mcp.server.stdio import stdio_server
from mcp.types import Tool, TextContent
# Create server instance
server = Server("my-server")
# Define a tool
@server.tool()
async def hello(name: str) -> str:
"""Say hello to someone.
Args:
name: The name to greet
"""
return f"Hello, {name}!"
@server.tool()
async def add_numbers(a: int, b: int) -> str:
"""Add two numbers together.
Args:
a: First number
b: Second number
"""
return str(a + b)
# Run server
async def main():
async with stdio_server() as (read, write):
await server.run(read, write)
if __name__ == "__main__":
import asyncio
asyncio.run(main())
```
### 3. Register with Claude
Add to `~/.claude/mcp.json`:
```json
{
"mcpServers": {
"my-server": {
"command": "python3",
"args": ["/path/to/my_server.py"]
}
}
}
```
## TypeScript MCP Server
### 1. Setup
```bash
mkdir my-mcp-server && cd my-mcp-server
npm init -y
npm install @modelcontextprotocol/sdk
```
### 2. Template
```typescript
// src/index.ts
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
const server = new Server({
name: "my-server",
version: "1.0.0",
});
// Define tools
server.setRequestHandler("tools/list", async () => ({
tools: [
{
name: "hello",
description: "Say hello to someone",
inputSchema: {
type: "object",
properties: {
name: { type: "string", description: "Name to greet" },
},
required: ["name"],
},
},
],
}));
server.setRequestHandler("tools/call", async (request) => {
if (request.params.name === "hello") {
const name = request.params.arguments.name;
return { content: [{ type: "text", text: `Hello, ${name}!` }] };
}
throw new Error("Unknown tool");
});
// Start server
const transport = new StdioServerTransport();
server.connect(transport);
```
## Advanced Patterns
### External API Integration
```python
import httpx
from mcp.server import Server
server = Server("weather-server")
@server.tool()
async def get_weather(city: str) -> str:
"""Get current weather for a city."""
async with httpx.AsyncClient() as client:
resp = await client.get(
f"https://api.weatherapi.com/v1/current.json",
params={"key": "YOUR_API_KEY", "q": city}
)
data = resp.json()
return f"{city}: {data['current']['temp_c']}C, {data['current']['condition']['text']}"
```
### Database Access
```python
import sqlite3
from mcp.server import Server
server = Server("db-server")
@server.tool()
async def query_db(sql: str) -> str:
"""Execute a read-only SQL query."""
if not sql.strip().upper().startswith("SELECT"):
return "Error: Only SELECT queries allowed"
conn = sqlite3.connect("data.db")
cursor = conn.execute(sql)
rows = cursor.fetchall()
conn.close()
return str(rows)
```
### Resources (Read-only Data)
```python
@server.resource("config://settings")
async def get_settings() -> str:
"""Application settings."""
return open("settings.json").read()
@server.resource("file://{path}")
async def read_file(path: str) -> str:
"""Read a file from the workspace."""
return open(path).read()
```
## Testing
```bash
# Test with MCP Inspector
npx @anthropics/mcp-inspector python3 my_server.py
# Or send test messages directly
echo '{"jsonrpc":"2.0","id":1,"method":"tools/list"}' | python3 my_server.py
```
## Best Practices
1. **Clear tool descriptions**: Claude uses these to decide when to call tools
2. **Input validation**: Always validate and sanitize inputs
3. **Error handling**: Return meaningful error messages
4. **Async by default**: Use async/await for I/O operations
5. **Security**: Never expose sensitive operations without auth
6. **Idempotency**: Tools should be safe to retry

112
skills/pdf/SKILL.md Normal file
View File

@ -0,0 +1,112 @@
---
name: pdf
description: Process PDF files - extract text, create PDFs, merge documents. Use when user asks to read PDF, create PDF, or work with PDF files.
---
# PDF Processing Skill
You now have expertise in PDF manipulation. Follow these workflows:
## Reading PDFs
**Option 1: Quick text extraction (preferred)**
```bash
# Using pdftotext (poppler-utils)
pdftotext input.pdf - # Output to stdout
pdftotext input.pdf output.txt # Output to file
# If pdftotext not available, try:
python3 -c "
import fitz # PyMuPDF
doc = fitz.open('input.pdf')
for page in doc:
print(page.get_text())
"
```
**Option 2: Page-by-page with metadata**
```python
import fitz # pip install pymupdf
doc = fitz.open("input.pdf")
print(f"Pages: {len(doc)}")
print(f"Metadata: {doc.metadata}")
for i, page in enumerate(doc):
text = page.get_text()
print(f"--- Page {i+1} ---")
print(text)
```
## Creating PDFs
**Option 1: From Markdown (recommended)**
```bash
# Using pandoc
pandoc input.md -o output.pdf
# With custom styling
pandoc input.md -o output.pdf --pdf-engine=xelatex -V geometry:margin=1in
```
**Option 2: Programmatically**
```python
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
c = canvas.Canvas("output.pdf", pagesize=letter)
c.drawString(100, 750, "Hello, PDF!")
c.save()
```
**Option 3: From HTML**
```bash
# Using wkhtmltopdf
wkhtmltopdf input.html output.pdf
# Or with Python
python3 -c "
import pdfkit
pdfkit.from_file('input.html', 'output.pdf')
"
```
## Merging PDFs
```python
import fitz
result = fitz.open()
for pdf_path in ["file1.pdf", "file2.pdf", "file3.pdf"]:
doc = fitz.open(pdf_path)
result.insert_pdf(doc)
result.save("merged.pdf")
```
## Splitting PDFs
```python
import fitz
doc = fitz.open("input.pdf")
for i in range(len(doc)):
single = fitz.open()
single.insert_pdf(doc, from_page=i, to_page=i)
single.save(f"page_{i+1}.pdf")
```
## Key Libraries
| Task | Library | Install |
|------|---------|---------|
| Read/Write/Merge | PyMuPDF | `pip install pymupdf` |
| Create from scratch | ReportLab | `pip install reportlab` |
| HTML to PDF | pdfkit | `pip install pdfkit` + wkhtmltopdf |
| Text extraction | pdftotext | `brew install poppler` / `apt install poppler-utils` |
## Best Practices
1. **Always check if tools are installed** before using them
2. **Handle encoding issues** - PDFs may contain various character encodings
3. **Large PDFs**: Process page by page to avoid memory issues
4. **OCR for scanned PDFs**: Use `pytesseract` if text extraction returns empty

42
web/.gitignore vendored Normal file
View File

@ -0,0 +1,42 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# dependencies
/node_modules
/.pnp
.pnp.*
.yarn/*
!.yarn/patches
!.yarn/plugins
!.yarn/releases
!.yarn/versions
# testing
/coverage
# next.js
/.next/
/out/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.pnpm-debug.log*
# env files (can opt-in for committing if needed)
.env*
# vercel
.vercel
# typescript
*.tsbuildinfo
next-env.d.ts
.env*.local

36
web/README.md Normal file
View File

@ -0,0 +1,36 @@
This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
## Getting Started
First, run the development server:
```bash
npm run dev
# or
yarn dev
# or
pnpm dev
# or
bun dev
```
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
## Learn More
To learn more about Next.js, take a look at the following resources:
- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
## Deploy on Vercel
The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.

9
web/next.config.ts Normal file
View File

@ -0,0 +1,9 @@
import type { NextConfig } from "next";
const nextConfig: NextConfig = {
output: "export",
images: { unoptimized: true },
trailingSlash: true,
};
export default nextConfig;

3784
web/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

38
web/package.json Normal file
View File

@ -0,0 +1,38 @@
{
"name": "web",
"version": "0.1.0",
"private": true,
"scripts": {
"extract": "tsx scripts/extract-content.ts",
"predev": "npm run extract",
"dev": "next dev",
"prebuild": "npm run extract",
"build": "next build",
"start": "next start"
},
"dependencies": {
"diff": "^8.0.3",
"framer-motion": "^12.34.0",
"lucide-react": "^0.564.0",
"next": "16.1.6",
"react": "19.2.3",
"react-dom": "19.2.3",
"rehype-highlight": "^7.0.2",
"rehype-raw": "^7.0.0",
"rehype-stringify": "^10.0.1",
"remark-gfm": "^4.0.1",
"remark-parse": "^11.0.0",
"remark-rehype": "^11.1.2",
"tsx": "^4.21.0",
"unified": "^11.0.5"
},
"devDependencies": {
"@tailwindcss/postcss": "^4",
"@types/diff": "^7.0.2",
"@types/node": "^20",
"@types/react": "^19",
"@types/react-dom": "^19",
"tailwindcss": "^4",
"typescript": "^5"
}
}

7
web/postcss.config.mjs Normal file
View File

@ -0,0 +1,7 @@
const config = {
plugins: {
"@tailwindcss/postcss": {},
},
};
export default config;

1
web/public/file.svg Normal file
View File

@ -0,0 +1 @@
<svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>

After

Width:  |  Height:  |  Size: 391 B

1
web/public/globe.svg Normal file
View File

@ -0,0 +1 @@
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>

After

Width:  |  Height:  |  Size: 1.0 KiB

1
web/public/next.svg Normal file
View File

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>

After

Width:  |  Height:  |  Size: 1.3 KiB

1
web/public/vercel.svg Normal file
View File

@ -0,0 +1 @@
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>

After

Width:  |  Height:  |  Size: 128 B

1
web/public/window.svg Normal file
View File

@ -0,0 +1 @@
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.5 2.5h13v10a1 1 0 0 1-1 1h-11a1 1 0 0 1-1-1zM0 1h16v11.5a2.5 2.5 0 0 1-2.5 2.5h-11A2.5 2.5 0 0 1 0 12.5zm3.75 4.5a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5M7 4.75a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0m1.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5" fill="#666"/></svg>

After

Width:  |  Height:  |  Size: 385 B

View File

@ -0,0 +1,281 @@
import * as fs from "fs";
import * as path from "path";
import type {
AgentVersion,
VersionDiff,
DocContent,
VersionIndex,
} from "../src/types/agent-data";
import { VERSION_META, VERSION_ORDER, LEARNING_PATH } from "../src/lib/constants";
// Resolve paths relative to this script's location (web/scripts/)
const WEB_DIR = path.resolve(__dirname, "..");
const REPO_ROOT = path.resolve(WEB_DIR, "..");
const AGENTS_DIR = path.join(REPO_ROOT, "agents");
const DOCS_DIR = path.join(REPO_ROOT, "docs");
const OUT_DIR = path.join(WEB_DIR, "src", "data", "generated");
// Map python filenames to version IDs
// s01_agent_loop.py -> s01
// s02_tools.py -> s02
// s_full.py -> s_full (reference agent, typically skipped)
function filenameToVersionId(filename: string): string | null {
const base = path.basename(filename, ".py");
if (base === "s_full") return null;
if (base === "__init__") return null;
const match = base.match(/^(s\d+[a-c]?)_/);
if (!match) return null;
return match[1];
}
// Extract classes from Python source
function extractClasses(
lines: string[]
): { name: string; startLine: number; endLine: number }[] {
const classes: { name: string; startLine: number; endLine: number }[] = [];
const classPattern = /^class\s+(\w+)/;
for (let i = 0; i < lines.length; i++) {
const m = lines[i].match(classPattern);
if (m) {
const name = m[1];
const startLine = i + 1;
// Find end of class: next class/function at indent 0, or EOF
let endLine = lines.length;
for (let j = i + 1; j < lines.length; j++) {
if (
lines[j].match(/^class\s/) ||
lines[j].match(/^def\s/) ||
(lines[j].match(/^\S/) && lines[j].trim() !== "" && !lines[j].startsWith("#") && !lines[j].startsWith("@"))
) {
endLine = j;
break;
}
}
classes.push({ name, startLine, endLine });
}
}
return classes;
}
// Extract top-level functions from Python source
function extractFunctions(
lines: string[]
): { name: string; signature: string; startLine: number }[] {
const functions: { name: string; signature: string; startLine: number }[] = [];
const funcPattern = /^def\s+(\w+)\((.*?)\)/;
for (let i = 0; i < lines.length; i++) {
const m = lines[i].match(funcPattern);
if (m) {
functions.push({
name: m[1],
signature: `def ${m[1]}(${m[2]})`,
startLine: i + 1,
});
}
}
return functions;
}
// Extract tool names from Python source
// Looks for "name": "tool_name" patterns in dict literals
function extractTools(source: string): string[] {
const toolPattern = /"name"\s*:\s*"(\w+)"/g;
const tools = new Set<string>();
let m;
while ((m = toolPattern.exec(source)) !== null) {
tools.add(m[1]);
}
return Array.from(tools);
}
// Count non-blank, non-comment lines
function countLoc(lines: string[]): number {
return lines.filter((line) => {
const trimmed = line.trim();
return trimmed !== "" && !trimmed.startsWith("#");
}).length;
}
// Detect locale from subdirectory path
// docs/en/s01-the-agent-loop.md -> "en"
// docs/zh/s01-the-agent-loop.md -> "zh"
// docs/ja/s01-the-agent-loop.md -> "ja"
function detectLocale(relPath: string): "en" | "zh" | "ja" {
if (relPath.startsWith("zh/") || relPath.startsWith("zh\\")) return "zh";
if (relPath.startsWith("ja/") || relPath.startsWith("ja\\")) return "ja";
return "en";
}
// Extract version from doc filename (e.g., "s01-the-agent-loop.md" -> "s01")
function extractDocVersion(filename: string): string | null {
const m = filename.match(/^(s\d+[a-c]?)-/);
return m ? m[1] : null;
}
// Main extraction
function main() {
console.log("Extracting content from agents and docs...");
console.log(` Repo root: ${REPO_ROOT}`);
console.log(` Agents dir: ${AGENTS_DIR}`);
console.log(` Docs dir: ${DOCS_DIR}`);
// Skip extraction if source directories don't exist (e.g. Vercel build).
// Pre-committed generated data will be used instead.
if (!fs.existsSync(AGENTS_DIR)) {
console.log(" Agents directory not found, skipping extraction.");
console.log(" Using pre-committed generated data.");
return;
}
// 1. Read all agent files
const agentFiles = fs
.readdirSync(AGENTS_DIR)
.filter((f) => f.startsWith("s") && f.endsWith(".py"));
console.log(` Found ${agentFiles.length} agent files`);
const versions: AgentVersion[] = [];
for (const filename of agentFiles) {
const versionId = filenameToVersionId(filename);
if (!versionId) {
console.warn(` Skipping ${filename}: could not determine version ID`);
continue;
}
const filePath = path.join(AGENTS_DIR, filename);
const source = fs.readFileSync(filePath, "utf-8");
const lines = source.split("\n");
const meta = VERSION_META[versionId];
const classes = extractClasses(lines);
const functions = extractFunctions(lines);
const tools = extractTools(source);
const loc = countLoc(lines);
versions.push({
id: versionId,
filename,
title: meta?.title ?? versionId,
subtitle: meta?.subtitle ?? "",
loc,
tools,
newTools: [], // computed after all versions are loaded
coreAddition: meta?.coreAddition ?? "",
keyInsight: meta?.keyInsight ?? "",
classes,
functions,
layer: meta?.layer ?? "tools",
source,
});
}
// Sort versions according to VERSION_ORDER
const orderMap = new Map(VERSION_ORDER.map((v, i) => [v, i]));
versions.sort(
(a, b) => (orderMap.get(a.id as any) ?? 99) - (orderMap.get(b.id as any) ?? 99)
);
// 2. Compute newTools for each version
for (let i = 0; i < versions.length; i++) {
const prev = i > 0 ? new Set(versions[i - 1].tools) : new Set<string>();
versions[i].newTools = versions[i].tools.filter((t) => !prev.has(t));
}
// 3. Compute diffs between adjacent versions in LEARNING_PATH
const diffs: VersionDiff[] = [];
const versionMap = new Map(versions.map((v) => [v.id, v]));
for (let i = 1; i < LEARNING_PATH.length; i++) {
const fromId = LEARNING_PATH[i - 1];
const toId = LEARNING_PATH[i];
const fromVer = versionMap.get(fromId);
const toVer = versionMap.get(toId);
if (!fromVer || !toVer) continue;
const fromClassNames = new Set(fromVer.classes.map((c) => c.name));
const fromFuncNames = new Set(fromVer.functions.map((f) => f.name));
const fromToolNames = new Set(fromVer.tools);
diffs.push({
from: fromId,
to: toId,
newClasses: toVer.classes
.map((c) => c.name)
.filter((n) => !fromClassNames.has(n)),
newFunctions: toVer.functions
.map((f) => f.name)
.filter((n) => !fromFuncNames.has(n)),
newTools: toVer.tools.filter((t) => !fromToolNames.has(t)),
locDelta: toVer.loc - fromVer.loc,
});
}
// 4. Read doc files from locale subdirectories (en/, zh/, ja/)
const docs: DocContent[] = [];
if (fs.existsSync(DOCS_DIR)) {
const localeDirs = ["en", "zh", "ja"];
let totalDocFiles = 0;
for (const locale of localeDirs) {
const localeDir = path.join(DOCS_DIR, locale);
if (!fs.existsSync(localeDir)) continue;
const docFiles = fs
.readdirSync(localeDir)
.filter((f) => f.endsWith(".md"));
totalDocFiles += docFiles.length;
for (const filename of docFiles) {
const version = extractDocVersion(filename);
if (!version) {
console.warn(` Skipping doc ${locale}/${filename}: could not determine version`);
continue;
}
const filePath = path.join(localeDir, filename);
const content = fs.readFileSync(filePath, "utf-8");
const titleMatch = content.match(/^#\s+(.+)$/m);
const title = titleMatch ? titleMatch[1] : filename;
docs.push({ version, locale: locale as "en" | "zh" | "ja", title, content });
}
}
console.log(` Found ${totalDocFiles} doc files across ${localeDirs.length} locales`);
} else {
console.warn(` Docs directory not found: ${DOCS_DIR}`);
}
// 5. Write output
fs.mkdirSync(OUT_DIR, { recursive: true });
const index: VersionIndex = { versions, diffs };
const indexPath = path.join(OUT_DIR, "versions.json");
fs.writeFileSync(indexPath, JSON.stringify(index, null, 2));
console.log(` Wrote ${indexPath}`);
const docsPath = path.join(OUT_DIR, "docs.json");
fs.writeFileSync(docsPath, JSON.stringify(docs, null, 2));
console.log(` Wrote ${docsPath}`);
// Summary
console.log("\nExtraction complete:");
console.log(` ${versions.length} versions`);
console.log(` ${diffs.length} diffs`);
console.log(` ${docs.length} docs`);
for (const v of versions) {
console.log(
` ${v.id}: ${v.loc} LOC, ${v.tools.length} tools, ${v.classes.length} classes, ${v.functions.length} functions`
);
}
}
main();

View File

@ -0,0 +1,82 @@
"use client";
import { ArchDiagram } from "@/components/architecture/arch-diagram";
import { WhatsNew } from "@/components/diff/whats-new";
import { DesignDecisions } from "@/components/architecture/design-decisions";
import { DocRenderer } from "@/components/docs/doc-renderer";
import { SourceViewer } from "@/components/code/source-viewer";
import { AgentLoopSimulator } from "@/components/simulator/agent-loop-simulator";
import { ExecutionFlow } from "@/components/architecture/execution-flow";
import { SessionVisualization } from "@/components/visualizations";
import { Tabs } from "@/components/ui/tabs";
import { useTranslations } from "@/lib/i18n";
interface VersionDetailClientProps {
version: string;
diff: {
from: string;
to: string;
newClasses: string[];
newFunctions: string[];
newTools: string[];
locDelta: number;
} | null;
source: string;
filename: string;
}
export function VersionDetailClient({
version,
diff,
source,
filename,
}: VersionDetailClientProps) {
const t = useTranslations("version");
const tabs = [
{ id: "learn", label: t("tab_learn") },
{ id: "simulate", label: t("tab_simulate") },
{ id: "code", label: t("tab_code") },
{ id: "deep-dive", label: t("tab_deep_dive") },
];
return (
<div className="space-y-6">
{/* Hero Visualization */}
<SessionVisualization version={version} />
{/* Tabbed content */}
<Tabs tabs={tabs} defaultTab="learn">
{(activeTab) => (
<>
{activeTab === "learn" && <DocRenderer version={version} />}
{activeTab === "simulate" && (
<AgentLoopSimulator version={version} />
)}
{activeTab === "code" && (
<SourceViewer source={source} filename={filename} />
)}
{activeTab === "deep-dive" && (
<div className="space-y-8">
<section>
<h2 className="mb-4 text-xl font-semibold">
{t("execution_flow")}
</h2>
<ExecutionFlow version={version} />
</section>
<section>
<h2 className="mb-4 text-xl font-semibold">
{t("architecture")}
</h2>
<ArchDiagram version={version} />
</section>
{diff && <WhatsNew diff={diff} />}
<DesignDecisions version={version} />
</div>
)}
</>
)}
</Tabs>
</div>
);
}

View File

@ -0,0 +1,202 @@
"use client";
import { useMemo } from "react";
import Link from "next/link";
import { useLocale } from "@/lib/i18n";
import { VERSION_META } from "@/lib/constants";
import { Card, CardHeader, CardTitle } from "@/components/ui/card";
import { LayerBadge } from "@/components/ui/badge";
import { CodeDiff } from "@/components/diff/code-diff";
import { ArrowLeft, Plus, Minus, FileCode, Wrench, Box, FunctionSquare } from "lucide-react";
import type { AgentVersion, VersionDiff, VersionIndex } from "@/types/agent-data";
import versionData from "@/data/generated/versions.json";
const data = versionData as VersionIndex;
interface DiffPageContentProps {
version: string;
}
export function DiffPageContent({ version }: DiffPageContentProps) {
const locale = useLocale();
const meta = VERSION_META[version];
const { currentVersion, prevVersion, diff } = useMemo(() => {
const current = data.versions.find((v) => v.id === version);
const prevId = meta?.prevVersion;
const prev = prevId ? data.versions.find((v) => v.id === prevId) : null;
const d = data.diffs.find((d) => d.to === version);
return { currentVersion: current, prevVersion: prev, diff: d };
}, [version, meta]);
if (!meta || !currentVersion) {
return (
<div className="py-12 text-center">
<p className="text-zinc-500">Version not found.</p>
<Link href={`/${locale}/timeline`} className="mt-4 inline-block text-sm text-blue-600 hover:underline">
Back to timeline
</Link>
</div>
);
}
if (!prevVersion || !diff) {
return (
<div className="py-12">
<Link
href={`/${locale}/${version}`}
className="mb-6 inline-flex items-center gap-1 text-sm text-zinc-500 hover:text-zinc-700 dark:hover:text-zinc-300"
>
<ArrowLeft size={14} />
Back to {meta.title}
</Link>
<h1 className="text-3xl font-bold">{meta.title}</h1>
<p className="mt-4 text-zinc-500">
This is the first version -- there is no previous version to compare against.
</p>
</div>
);
}
const prevMeta = VERSION_META[prevVersion.id];
return (
<div className="py-4">
<Link
href={`/${locale}/${version}`}
className="mb-6 inline-flex items-center gap-1 text-sm text-zinc-500 hover:text-zinc-700 dark:hover:text-zinc-300"
>
<ArrowLeft size={14} />
Back to {meta.title}
</Link>
{/* Header */}
<div className="mb-8">
<h1 className="text-3xl font-bold">
{prevMeta?.title || prevVersion.id} {meta.title}
</h1>
<p className="mt-2 text-zinc-500 dark:text-zinc-400">
{prevVersion.id} ({prevVersion.loc} LOC) {version} ({currentVersion.loc} LOC)
</p>
</div>
{/* Structural Diff */}
<div className="mb-8 grid grid-cols-1 gap-4 sm:grid-cols-2 lg:grid-cols-4">
<Card>
<CardHeader>
<div className="flex items-center gap-2 text-zinc-500 dark:text-zinc-400">
<FileCode size={16} />
<span className="text-sm">LOC Delta</span>
</div>
</CardHeader>
<CardTitle>
<span className={diff.locDelta >= 0 ? "text-green-600 dark:text-green-400" : "text-red-600 dark:text-red-400"}>
{diff.locDelta >= 0 ? "+" : ""}{diff.locDelta}
</span>
<span className="ml-2 text-sm font-normal text-zinc-500">lines</span>
</CardTitle>
</Card>
<Card>
<CardHeader>
<div className="flex items-center gap-2 text-zinc-500 dark:text-zinc-400">
<Wrench size={16} />
<span className="text-sm">New Tools</span>
</div>
</CardHeader>
<CardTitle>
<span className="text-blue-600 dark:text-blue-400">{diff.newTools.length}</span>
</CardTitle>
{diff.newTools.length > 0 && (
<div className="mt-2 flex flex-wrap gap-1">
{diff.newTools.map((tool) => (
<span key={tool} className="rounded bg-blue-100 px-1.5 py-0.5 text-xs text-blue-700 dark:bg-blue-900/30 dark:text-blue-300">
{tool}
</span>
))}
</div>
)}
</Card>
<Card>
<CardHeader>
<div className="flex items-center gap-2 text-zinc-500 dark:text-zinc-400">
<Box size={16} />
<span className="text-sm">New Classes</span>
</div>
</CardHeader>
<CardTitle>
<span className="text-purple-600 dark:text-purple-400">{diff.newClasses.length}</span>
</CardTitle>
{diff.newClasses.length > 0 && (
<div className="mt-2 flex flex-wrap gap-1">
{diff.newClasses.map((cls) => (
<span key={cls} className="rounded bg-purple-100 px-1.5 py-0.5 text-xs text-purple-700 dark:bg-purple-900/30 dark:text-purple-300">
{cls}
</span>
))}
</div>
)}
</Card>
<Card>
<CardHeader>
<div className="flex items-center gap-2 text-zinc-500 dark:text-zinc-400">
<FunctionSquare size={16} />
<span className="text-sm">New Functions</span>
</div>
</CardHeader>
<CardTitle>
<span className="text-amber-600 dark:text-amber-400">{diff.newFunctions.length}</span>
</CardTitle>
{diff.newFunctions.length > 0 && (
<div className="mt-2 flex flex-wrap gap-1">
{diff.newFunctions.map((fn) => (
<span key={fn} className="rounded bg-amber-100 px-1.5 py-0.5 text-xs text-amber-700 dark:bg-amber-900/30 dark:text-amber-300">
{fn}
</span>
))}
</div>
)}
</Card>
</div>
{/* Version Info Comparison */}
<div className="mb-8 grid grid-cols-1 gap-4 sm:grid-cols-2">
<Card className="border-l-4 border-l-red-300 dark:border-l-red-700">
<CardHeader>
<CardTitle>{prevMeta?.title || prevVersion.id}</CardTitle>
<p className="text-sm text-zinc-500">{prevMeta?.subtitle}</p>
</CardHeader>
<div className="space-y-1 text-sm text-zinc-600 dark:text-zinc-400">
<p>{prevVersion.loc} LOC</p>
<p>{prevVersion.tools.length} tools: {prevVersion.tools.join(", ")}</p>
<LayerBadge layer={prevVersion.layer}>{prevVersion.layer}</LayerBadge>
</div>
</Card>
<Card className="border-l-4 border-l-green-300 dark:border-l-green-700">
<CardHeader>
<CardTitle>{meta.title}</CardTitle>
<p className="text-sm text-zinc-500">{meta.subtitle}</p>
</CardHeader>
<div className="space-y-1 text-sm text-zinc-600 dark:text-zinc-400">
<p>{currentVersion.loc} LOC</p>
<p>{currentVersion.tools.length} tools: {currentVersion.tools.join(", ")}</p>
<LayerBadge layer={currentVersion.layer}>{currentVersion.layer}</LayerBadge>
</div>
</Card>
</div>
{/* Code Diff */}
<div>
<h2 className="mb-4 text-xl font-semibold">Source Code Diff</h2>
<CodeDiff
oldSource={prevVersion.source}
newSource={currentVersion.source}
oldLabel={`${prevVersion.id} (${prevVersion.filename})`}
newLabel={`${version} (${currentVersion.filename})`}
/>
</div>
</div>
);
}

View File

@ -0,0 +1,15 @@
import { LEARNING_PATH } from "@/lib/constants";
import { DiffPageContent } from "./diff-content";
export function generateStaticParams() {
return LEARNING_PATH.map((version) => ({ version }));
}
export default async function DiffPage({
params,
}: {
params: Promise<{ locale: string; version: string }>;
}) {
const { version } = await params;
return <DiffPageContent version={version} />;
}

View File

@ -0,0 +1,125 @@
import Link from "next/link";
import { LEARNING_PATH, VERSION_META, LAYERS } from "@/lib/constants";
import { LayerBadge } from "@/components/ui/badge";
import versionsData from "@/data/generated/versions.json";
import { VersionDetailClient } from "./client";
import { getTranslations } from "@/lib/i18n-server";
export function generateStaticParams() {
return LEARNING_PATH.map((version) => ({ version }));
}
export default async function VersionPage({
params,
}: {
params: Promise<{ locale: string; version: string }>;
}) {
const { locale, version } = await params;
const versionData = versionsData.versions.find((v) => v.id === version);
const meta = VERSION_META[version];
const diff = versionsData.diffs.find((d) => d.to === version) ?? null;
if (!versionData || !meta) {
return (
<div className="py-20 text-center">
<h1 className="text-2xl font-bold">Version not found</h1>
<p className="mt-2 text-zinc-500">{version}</p>
</div>
);
}
const t = getTranslations(locale, "version");
const tSession = getTranslations(locale, "sessions");
const tLayer = getTranslations(locale, "layer_labels");
const layer = LAYERS.find((l) => l.id === meta.layer);
const pathIndex = LEARNING_PATH.indexOf(version as typeof LEARNING_PATH[number]);
const prevVersion = pathIndex > 0 ? LEARNING_PATH[pathIndex - 1] : null;
const nextVersion =
pathIndex < LEARNING_PATH.length - 1
? LEARNING_PATH[pathIndex + 1]
: null;
return (
<div className="mx-auto max-w-3xl space-y-10 py-4">
{/* Header */}
<header className="space-y-3">
<div className="flex flex-wrap items-center gap-3">
<span className="rounded-lg bg-zinc-100 px-3 py-1 font-mono text-lg font-bold dark:bg-zinc-800">
{version}
</span>
<h1 className="text-2xl font-bold sm:text-3xl">{tSession(version) || meta.title}</h1>
{layer && (
<LayerBadge layer={meta.layer}>{tLayer(layer.id)}</LayerBadge>
)}
</div>
<p className="text-lg text-zinc-500 dark:text-zinc-400">
{meta.subtitle}
</p>
<div className="flex flex-wrap items-center gap-4 text-sm text-zinc-500 dark:text-zinc-400">
<span className="font-mono">{versionData.loc} LOC</span>
<span>{versionData.tools.length} {t("tools")}</span>
{meta.coreAddition && (
<span className="rounded-full bg-zinc-100 px-2.5 py-0.5 text-xs dark:bg-zinc-800">
{meta.coreAddition}
</span>
)}
</div>
{meta.keyInsight && (
<blockquote className="border-l-4 border-zinc-300 pl-4 text-sm italic text-zinc-500 dark:border-zinc-600 dark:text-zinc-400">
{meta.keyInsight}
</blockquote>
)}
</header>
{/* Client-rendered interactive sections */}
<VersionDetailClient
version={version}
diff={diff}
source={versionData.source}
filename={versionData.filename}
/>
{/* Prev / Next navigation */}
<nav className="flex items-center justify-between border-t border-zinc-200 pt-6 dark:border-zinc-700">
{prevVersion ? (
<Link
href={`/${locale}/${prevVersion}`}
className="group flex items-center gap-2 text-sm text-zinc-500 transition-colors hover:text-zinc-900 dark:hover:text-white"
>
<span className="transition-transform group-hover:-translate-x-1">
&larr;
</span>
<div>
<div className="text-xs text-zinc-400">{t("prev")}</div>
<div className="font-medium">
{prevVersion} - {tSession(prevVersion) || VERSION_META[prevVersion]?.title}
</div>
</div>
</Link>
) : (
<div />
)}
{nextVersion ? (
<Link
href={`/${locale}/${nextVersion}`}
className="group flex items-center gap-2 text-right text-sm text-zinc-500 transition-colors hover:text-zinc-900 dark:hover:text-white"
>
<div>
<div className="text-xs text-zinc-400">{t("next")}</div>
<div className="font-medium">
{tSession(nextVersion) || VERSION_META[nextVersion]?.title} - {nextVersion}
</div>
</div>
<span className="transition-transform group-hover:translate-x-1">
&rarr;
</span>
</Link>
) : (
<div />
)}
</nav>
</div>
);
}

View File

@ -0,0 +1,308 @@
"use client";
import { useState, useMemo } from "react";
import { useLocale, useTranslations } from "@/lib/i18n";
import { LEARNING_PATH, VERSION_META } from "@/lib/constants";
import { Card, CardHeader, CardTitle } from "@/components/ui/card";
import { LayerBadge } from "@/components/ui/badge";
import { CodeDiff } from "@/components/diff/code-diff";
import { ArchDiagram } from "@/components/architecture/arch-diagram";
import { ArrowRight, FileCode, Wrench, Box, FunctionSquare } from "lucide-react";
import type { VersionIndex } from "@/types/agent-data";
import versionData from "@/data/generated/versions.json";
const data = versionData as VersionIndex;
export default function ComparePage() {
const t = useTranslations("compare");
const locale = useLocale();
const [versionA, setVersionA] = useState<string>("");
const [versionB, setVersionB] = useState<string>("");
const infoA = useMemo(() => data.versions.find((v) => v.id === versionA), [versionA]);
const infoB = useMemo(() => data.versions.find((v) => v.id === versionB), [versionB]);
const metaA = versionA ? VERSION_META[versionA] : null;
const metaB = versionB ? VERSION_META[versionB] : null;
const comparison = useMemo(() => {
if (!infoA || !infoB) return null;
const toolsA = new Set(infoA.tools);
const toolsB = new Set(infoB.tools);
const onlyA = infoA.tools.filter((t) => !toolsB.has(t));
const onlyB = infoB.tools.filter((t) => !toolsA.has(t));
const shared = infoA.tools.filter((t) => toolsB.has(t));
const classesA = new Set(infoA.classes.map((c) => c.name));
const classesB = new Set(infoB.classes.map((c) => c.name));
const newClasses = infoB.classes.map((c) => c.name).filter((c) => !classesA.has(c));
const funcsA = new Set(infoA.functions.map((f) => f.name));
const funcsB = new Set(infoB.functions.map((f) => f.name));
const newFunctions = infoB.functions.map((f) => f.name).filter((f) => !funcsA.has(f));
return {
locDelta: infoB.loc - infoA.loc,
toolsOnlyA: onlyA,
toolsOnlyB: onlyB,
toolsShared: shared,
newClasses,
newFunctions,
};
}, [infoA, infoB]);
return (
<div className="py-4">
<div className="mb-8">
<h1 className="text-3xl font-bold">{t("title")}</h1>
<p className="mt-2 text-zinc-500 dark:text-zinc-400">{t("subtitle")}</p>
</div>
{/* Selectors */}
<div className="mb-8 flex flex-col items-start gap-4 sm:flex-row sm:items-center">
<div className="flex-1">
<label className="mb-1 block text-sm font-medium text-zinc-600 dark:text-zinc-400">
{t("select_a")}
</label>
<select
value={versionA}
onChange={(e) => setVersionA(e.target.value)}
className="w-full rounded-lg border border-zinc-300 bg-white px-3 py-2 text-sm dark:border-zinc-600 dark:bg-zinc-800 dark:text-zinc-200"
>
<option value="">-- select --</option>
{LEARNING_PATH.map((v) => (
<option key={v} value={v}>
{v} - {VERSION_META[v]?.title}
</option>
))}
</select>
</div>
<ArrowRight size={20} className="mt-5 hidden text-zinc-400 sm:block" />
<div className="flex-1">
<label className="mb-1 block text-sm font-medium text-zinc-600 dark:text-zinc-400">
{t("select_b")}
</label>
<select
value={versionB}
onChange={(e) => setVersionB(e.target.value)}
className="w-full rounded-lg border border-zinc-300 bg-white px-3 py-2 text-sm dark:border-zinc-600 dark:bg-zinc-800 dark:text-zinc-200"
>
<option value="">-- select --</option>
{LEARNING_PATH.map((v) => (
<option key={v} value={v}>
{v} - {VERSION_META[v]?.title}
</option>
))}
</select>
</div>
</div>
{/* Results */}
{infoA && infoB && comparison && (
<div className="space-y-8">
{/* Side-by-side version info */}
<div className="grid grid-cols-1 gap-4 sm:grid-cols-2">
<Card>
<CardHeader>
<CardTitle>{metaA?.title || versionA}</CardTitle>
<p className="text-sm text-zinc-500">{metaA?.subtitle}</p>
</CardHeader>
<div className="space-y-2 text-sm text-zinc-600 dark:text-zinc-400">
<p>{infoA.loc} LOC</p>
<p>{infoA.tools.length} tools</p>
{metaA && <LayerBadge layer={metaA.layer}>{metaA.layer}</LayerBadge>}
</div>
</Card>
<Card>
<CardHeader>
<CardTitle>{metaB?.title || versionB}</CardTitle>
<p className="text-sm text-zinc-500">{metaB?.subtitle}</p>
</CardHeader>
<div className="space-y-2 text-sm text-zinc-600 dark:text-zinc-400">
<p>{infoB.loc} LOC</p>
<p>{infoB.tools.length} tools</p>
{metaB && <LayerBadge layer={metaB.layer}>{metaB.layer}</LayerBadge>}
</div>
</Card>
</div>
{/* Side-by-side Architecture Diagrams */}
<div>
<h2 className="mb-4 text-xl font-semibold">{t("architecture")}</h2>
<div className="grid grid-cols-1 gap-4 lg:grid-cols-2">
<div>
<h3 className="mb-3 text-sm font-medium text-zinc-500 dark:text-zinc-400">
{metaA?.title || versionA}
</h3>
<ArchDiagram version={versionA} />
</div>
<div>
<h3 className="mb-3 text-sm font-medium text-zinc-500 dark:text-zinc-400">
{metaB?.title || versionB}
</h3>
<ArchDiagram version={versionB} />
</div>
</div>
</div>
{/* Structural diff */}
<div className="grid grid-cols-1 gap-4 sm:grid-cols-2 lg:grid-cols-4">
<Card>
<CardHeader>
<div className="flex items-center gap-2 text-zinc-500 dark:text-zinc-400">
<FileCode size={16} />
<span className="text-sm">{t("loc_delta")}</span>
</div>
</CardHeader>
<CardTitle>
<span className={comparison.locDelta >= 0 ? "text-green-600 dark:text-green-400" : "text-red-600 dark:text-red-400"}>
{comparison.locDelta >= 0 ? "+" : ""}{comparison.locDelta}
</span>
<span className="ml-2 text-sm font-normal text-zinc-500">{t("lines")}</span>
</CardTitle>
</Card>
<Card>
<CardHeader>
<div className="flex items-center gap-2 text-zinc-500 dark:text-zinc-400">
<Wrench size={16} />
<span className="text-sm">{t("new_tools_in_b")}</span>
</div>
</CardHeader>
<CardTitle>
<span className="text-blue-600 dark:text-blue-400">{comparison.toolsOnlyB.length}</span>
</CardTitle>
{comparison.toolsOnlyB.length > 0 && (
<div className="mt-2 flex flex-wrap gap-1">
{comparison.toolsOnlyB.map((tool) => (
<span key={tool} className="rounded bg-blue-100 px-1.5 py-0.5 text-xs text-blue-700 dark:bg-blue-900/30 dark:text-blue-300">
{tool}
</span>
))}
</div>
)}
</Card>
<Card>
<CardHeader>
<div className="flex items-center gap-2 text-zinc-500 dark:text-zinc-400">
<Box size={16} />
<span className="text-sm">{t("new_classes_in_b")}</span>
</div>
</CardHeader>
<CardTitle>
<span className="text-purple-600 dark:text-purple-400">{comparison.newClasses.length}</span>
</CardTitle>
{comparison.newClasses.length > 0 && (
<div className="mt-2 flex flex-wrap gap-1">
{comparison.newClasses.map((cls) => (
<span key={cls} className="rounded bg-purple-100 px-1.5 py-0.5 text-xs text-purple-700 dark:bg-purple-900/30 dark:text-purple-300">
{cls}
</span>
))}
</div>
)}
</Card>
<Card>
<CardHeader>
<div className="flex items-center gap-2 text-zinc-500 dark:text-zinc-400">
<FunctionSquare size={16} />
<span className="text-sm">{t("new_functions_in_b")}</span>
</div>
</CardHeader>
<CardTitle>
<span className="text-amber-600 dark:text-amber-400">{comparison.newFunctions.length}</span>
</CardTitle>
{comparison.newFunctions.length > 0 && (
<div className="mt-2 flex flex-wrap gap-1">
{comparison.newFunctions.map((fn) => (
<span key={fn} className="rounded bg-amber-100 px-1.5 py-0.5 text-xs text-amber-700 dark:bg-amber-900/30 dark:text-amber-300">
{fn}
</span>
))}
</div>
)}
</Card>
</div>
{/* Tool comparison */}
<Card>
<CardHeader>
<CardTitle>{t("tool_comparison")}</CardTitle>
</CardHeader>
<div className="grid grid-cols-1 gap-6 sm:grid-cols-3">
<div>
<h4 className="mb-2 text-sm font-medium text-zinc-600 dark:text-zinc-400">
{t("only_in")} {metaA?.title || versionA}
</h4>
{comparison.toolsOnlyA.length === 0 ? (
<p className="text-xs text-zinc-400">{t("none")}</p>
) : (
<div className="flex flex-wrap gap-1">
{comparison.toolsOnlyA.map((tool) => (
<span key={tool} className="rounded bg-red-100 px-1.5 py-0.5 text-xs text-red-700 dark:bg-red-900/30 dark:text-red-300">
{tool}
</span>
))}
</div>
)}
</div>
<div>
<h4 className="mb-2 text-sm font-medium text-zinc-600 dark:text-zinc-400">
{t("shared")}
</h4>
{comparison.toolsShared.length === 0 ? (
<p className="text-xs text-zinc-400">{t("none")}</p>
) : (
<div className="flex flex-wrap gap-1">
{comparison.toolsShared.map((tool) => (
<span key={tool} className="rounded bg-zinc-100 px-1.5 py-0.5 text-xs text-zinc-700 dark:bg-zinc-800 dark:text-zinc-300">
{tool}
</span>
))}
</div>
)}
</div>
<div>
<h4 className="mb-2 text-sm font-medium text-zinc-600 dark:text-zinc-400">
{t("only_in")} {metaB?.title || versionB}
</h4>
{comparison.toolsOnlyB.length === 0 ? (
<p className="text-xs text-zinc-400">{t("none")}</p>
) : (
<div className="flex flex-wrap gap-1">
{comparison.toolsOnlyB.map((tool) => (
<span key={tool} className="rounded bg-green-100 px-1.5 py-0.5 text-xs text-green-700 dark:bg-green-900/30 dark:text-green-300">
{tool}
</span>
))}
</div>
)}
</div>
</div>
</Card>
{/* Code Diff */}
<div>
<h2 className="mb-4 text-xl font-semibold">{t("source_diff")}</h2>
<CodeDiff
oldSource={infoA.source}
newSource={infoB.source}
oldLabel={`${infoA.id} (${infoA.filename})`}
newLabel={`${infoB.id} (${infoB.filename})`}
/>
</div>
</div>
)}
{/* Empty state */}
{(!versionA || !versionB) && (
<div className="rounded-lg border border-dashed border-zinc-300 p-12 text-center dark:border-zinc-700">
<p className="text-zinc-400">{t("empty_hint")}</p>
</div>
)}
</div>
);
}

View File

@ -0,0 +1,133 @@
"use client";
import Link from "next/link";
import { useTranslations, useLocale } from "@/lib/i18n";
import { LAYERS, VERSION_META } from "@/lib/constants";
import { Card, CardHeader, CardTitle } from "@/components/ui/card";
import { LayerBadge } from "@/components/ui/badge";
import { cn } from "@/lib/utils";
import { ChevronRight } from "lucide-react";
import type { VersionIndex } from "@/types/agent-data";
import versionData from "@/data/generated/versions.json";
const data = versionData as VersionIndex;
const LAYER_BORDER_CLASSES: Record<string, string> = {
tools: "border-l-blue-500",
planning: "border-l-emerald-500",
memory: "border-l-purple-500",
concurrency: "border-l-amber-500",
collaboration: "border-l-red-500",
};
const LAYER_HEADER_BG: Record<string, string> = {
tools: "bg-blue-500",
planning: "bg-emerald-500",
memory: "bg-purple-500",
concurrency: "bg-amber-500",
collaboration: "bg-red-500",
};
export default function LayersPage() {
const t = useTranslations("layers");
const locale = useLocale();
return (
<div className="py-4">
<div className="mb-10">
<h1 className="text-3xl font-bold">{t("title")}</h1>
<p className="mt-2 text-zinc-500 dark:text-zinc-400">{t("subtitle")}</p>
</div>
<div className="space-y-6">
{LAYERS.map((layer, index) => {
const versionInfos = layer.versions.map((vId) => {
const info = data.versions.find((v) => v.id === vId);
const meta = VERSION_META[vId];
return { id: vId, info, meta };
});
return (
<div
key={layer.id}
className={cn(
"overflow-hidden rounded-xl border border-zinc-200 dark:border-zinc-800",
"border-l-4",
LAYER_BORDER_CLASSES[layer.id]
)}
>
{/* Layer header */}
<div className="flex items-center gap-3 px-6 py-4">
<div className={cn("h-3 w-3 rounded-full", LAYER_HEADER_BG[layer.id])} />
<div>
<h2 className="text-xl font-bold">
<span className="text-zinc-400 dark:text-zinc-600">L{index + 1}</span>
{" "}
{layer.label}
</h2>
<p className="mt-1 text-sm text-zinc-500 dark:text-zinc-400">
{t(layer.id)}
</p>
</div>
</div>
{/* Version cards within this layer */}
<div className="border-t border-zinc-200 bg-zinc-50/50 px-6 py-4 dark:border-zinc-800 dark:bg-zinc-900/50">
<div className="grid grid-cols-1 gap-3 sm:grid-cols-2 lg:grid-cols-3">
{versionInfos.map(({ id, info, meta }) => (
<Link
key={id}
href={`/${locale}/${id}`}
className="group"
>
<Card className="transition-shadow hover:shadow-md">
<div className="flex items-start justify-between">
<div className="min-w-0 flex-1">
<div className="flex items-center gap-2">
<span className="text-xs font-mono text-zinc-400">{id}</span>
<LayerBadge layer={layer.id}>{layer.id}</LayerBadge>
</div>
<h3 className="mt-1 font-semibold text-zinc-900 dark:text-zinc-100">
{meta?.title || id}
</h3>
{meta?.subtitle && (
<p className="mt-0.5 text-xs text-zinc-500 dark:text-zinc-400">
{meta.subtitle}
</p>
)}
</div>
<ChevronRight
size={16}
className="mt-1 shrink-0 text-zinc-300 transition-colors group-hover:text-zinc-600 dark:text-zinc-600 dark:group-hover:text-zinc-300"
/>
</div>
<div className="mt-3 flex items-center gap-4 text-xs text-zinc-500 dark:text-zinc-400">
<span>{info?.loc ?? "?"} LOC</span>
<span>{info?.tools.length ?? "?"} tools</span>
</div>
{meta?.keyInsight && (
<p className="mt-2 text-xs leading-relaxed text-zinc-500 dark:text-zinc-400 line-clamp-2">
{meta.keyInsight}
</p>
)}
</Card>
</Link>
))}
</div>
</div>
{/* Composition indicator */}
{index < LAYERS.length - 1 && (
<div className="flex items-center justify-center py-1 text-zinc-300 dark:text-zinc-700">
<svg width="20" height="12" viewBox="0 0 20 12" fill="none" className="text-current">
<path d="M10 0 L10 12 M5 7 L10 12 L15 7" stroke="currentColor" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
</svg>
</div>
)}
</div>
);
})}
</div>
</div>
);
}

View File

@ -0,0 +1,14 @@
import { Sidebar } from "@/components/layout/sidebar";
export default function LearnLayout({
children,
}: {
children: React.ReactNode;
}) {
return (
<div className="flex gap-8">
<Sidebar />
<div className="min-w-0 flex-1">{children}</div>
</div>
);
}

View File

@ -0,0 +1,20 @@
"use client";
import { useTranslations } from "@/lib/i18n";
import { Timeline } from "@/components/timeline/timeline";
export default function TimelinePage() {
const t = useTranslations("timeline");
return (
<div>
<div className="mb-8">
<h1 className="text-3xl font-bold">{t("title")}</h1>
<p className="mt-2 text-[var(--color-text-secondary)]">
{t("subtitle")}
</p>
</div>
<Timeline />
</div>
);
}

View File

@ -0,0 +1,60 @@
import type { Metadata } from "next";
import { I18nProvider } from "@/lib/i18n";
import { Header } from "@/components/layout/header";
import en from "@/i18n/messages/en.json";
import zh from "@/i18n/messages/zh.json";
import ja from "@/i18n/messages/ja.json";
import "../globals.css";
const locales = ["en", "zh", "ja"];
const metaMessages: Record<string, typeof en> = { en, zh, ja };
export function generateStaticParams() {
return locales.map((locale) => ({ locale }));
}
export async function generateMetadata({
params,
}: {
params: Promise<{ locale: string }>;
}): Promise<Metadata> {
const { locale } = await params;
const messages = metaMessages[locale] || metaMessages.en;
return {
title: messages.meta?.title || "Learn Claude Code",
description: messages.meta?.description || "Build an AI coding agent from scratch, one concept at a time",
};
}
export default async function RootLayout({
children,
params,
}: {
children: React.ReactNode;
params: Promise<{ locale: string }>;
}) {
const { locale } = await params;
return (
<html lang={locale} suppressHydrationWarning>
<head>
<script dangerouslySetInnerHTML={{ __html: `
(function() {
var theme = localStorage.getItem('theme');
if (theme === 'dark' || (!theme && window.matchMedia('(prefers-color-scheme: dark)').matches)) {
document.documentElement.classList.add('dark');
}
})();
`}} />
</head>
<body className="min-h-screen bg-[var(--color-bg)] text-[var(--color-text)] antialiased">
<I18nProvider locale={locale}>
<Header />
<main className="mx-auto max-w-7xl px-4 py-8 sm:px-6 lg:px-8">
{children}
</main>
</I18nProvider>
</body>
</html>
);
}

View File

@ -0,0 +1,234 @@
"use client";
import Link from "next/link";
import { useTranslations, useLocale } from "@/lib/i18n";
import { LEARNING_PATH, VERSION_META, LAYERS } from "@/lib/constants";
import { LayerBadge } from "@/components/ui/badge";
import { Card } from "@/components/ui/card";
import { cn } from "@/lib/utils";
import versionsData from "@/data/generated/versions.json";
import { MessageFlow } from "@/components/architecture/message-flow";
const LAYER_DOT_COLORS: Record<string, string> = {
tools: "bg-blue-500",
planning: "bg-emerald-500",
memory: "bg-purple-500",
concurrency: "bg-amber-500",
collaboration: "bg-red-500",
};
const LAYER_BORDER_COLORS: Record<string, string> = {
tools: "border-blue-500/30 hover:border-blue-500/60",
planning: "border-emerald-500/30 hover:border-emerald-500/60",
memory: "border-purple-500/30 hover:border-purple-500/60",
concurrency: "border-amber-500/30 hover:border-amber-500/60",
collaboration: "border-red-500/30 hover:border-red-500/60",
};
const LAYER_BAR_COLORS: Record<string, string> = {
tools: "bg-blue-500",
planning: "bg-emerald-500",
memory: "bg-purple-500",
concurrency: "bg-amber-500",
collaboration: "bg-red-500",
};
function getVersionData(id: string) {
return versionsData.versions.find((v) => v.id === id);
}
export default function HomePage() {
const t = useTranslations("home");
const locale = useLocale();
return (
<div className="flex flex-col gap-20 pb-16">
{/* Hero Section */}
<section className="flex flex-col items-center px-2 pt-8 text-center sm:pt-20">
<h1 className="text-3xl font-bold tracking-tight sm:text-5xl lg:text-6xl">
{t("hero_title")}
</h1>
<p className="mt-4 max-w-2xl text-base text-[var(--color-text-secondary)] sm:text-xl">
{t("hero_subtitle")}
</p>
<div className="mt-8">
<Link
href={`/${locale}/timeline`}
className="inline-flex min-h-[44px] items-center gap-2 rounded-lg bg-zinc-900 px-6 py-3 text-sm font-medium text-white transition-colors hover:bg-zinc-700 dark:bg-white dark:text-zinc-900 dark:hover:bg-zinc-200"
>
{t("start")}
<span aria-hidden="true">&rarr;</span>
</Link>
</div>
</section>
{/* Core Pattern Section */}
<section>
<div className="mb-6 text-center">
<h2 className="text-2xl font-bold sm:text-3xl">{t("core_pattern")}</h2>
<p className="mt-2 text-[var(--color-text-secondary)]">
{t("core_pattern_desc")}
</p>
</div>
<div className="mx-auto max-w-2xl overflow-hidden rounded-xl border border-zinc-800 bg-zinc-950">
<div className="flex items-center gap-2 border-b border-zinc-800 px-4 py-2.5">
<span className="h-3 w-3 rounded-full bg-red-500/70" />
<span className="h-3 w-3 rounded-full bg-yellow-500/70" />
<span className="h-3 w-3 rounded-full bg-green-500/70" />
<span className="ml-3 text-xs text-zinc-500">agent_loop.py</span>
</div>
<pre className="overflow-x-auto p-4 text-sm leading-relaxed">
<code>
<span className="text-purple-400">while</span>
<span className="text-zinc-300"> </span>
<span className="text-orange-300">True</span>
<span className="text-zinc-500">:</span>
{"\n"}
<span className="text-zinc-300">{" "}response = client.messages.</span>
<span className="text-blue-400">create</span>
<span className="text-zinc-500">(</span>
<span className="text-zinc-300">messages=</span>
<span className="text-zinc-300">messages</span>
<span className="text-zinc-500">,</span>
<span className="text-zinc-300"> tools=</span>
<span className="text-zinc-300">tools</span>
<span className="text-zinc-500">)</span>
{"\n"}
<span className="text-purple-400">{" "}if</span>
<span className="text-zinc-300"> response.stop_reason != </span>
<span className="text-green-400">&quot;tool_use&quot;</span>
<span className="text-zinc-500">:</span>
{"\n"}
<span className="text-purple-400">{" "}break</span>
{"\n"}
<span className="text-purple-400">{" "}for</span>
<span className="text-zinc-300"> tool_call </span>
<span className="text-purple-400">in</span>
<span className="text-zinc-300"> response.content</span>
<span className="text-zinc-500">:</span>
{"\n"}
<span className="text-zinc-300">{" "}result = </span>
<span className="text-blue-400">execute_tool</span>
<span className="text-zinc-500">(</span>
<span className="text-zinc-300">tool_call.name</span>
<span className="text-zinc-500">,</span>
<span className="text-zinc-300"> tool_call.input</span>
<span className="text-zinc-500">)</span>
{"\n"}
<span className="text-zinc-300">{" "}messages.</span>
<span className="text-blue-400">append</span>
<span className="text-zinc-500">(</span>
<span className="text-zinc-300">result</span>
<span className="text-zinc-500">)</span>
</code>
</pre>
</div>
</section>
{/* Message Flow Visualization */}
<section>
<div className="mb-6 text-center">
<h2 className="text-2xl font-bold sm:text-3xl">{t("message_flow")}</h2>
<p className="mt-2 text-[var(--color-text-secondary)]">
{t("message_flow_desc")}
</p>
</div>
<div className="mx-auto max-w-2xl">
<MessageFlow />
</div>
</section>
{/* Learning Path Preview */}
<section>
<div className="mb-6 text-center">
<h2 className="text-2xl font-bold sm:text-3xl">{t("learning_path")}</h2>
<p className="mt-2 text-[var(--color-text-secondary)]">
{t("learning_path_desc")}
</p>
</div>
<div className="grid grid-cols-1 gap-3 sm:grid-cols-2 lg:grid-cols-3">
{LEARNING_PATH.map((versionId) => {
const meta = VERSION_META[versionId];
const data = getVersionData(versionId);
if (!meta || !data) return null;
return (
<Link
key={versionId}
href={`/${locale}/${versionId}`}
className="group block"
>
<Card
className={cn(
"h-full border transition-all duration-200",
LAYER_BORDER_COLORS[meta.layer]
)}
>
<div className="flex items-start justify-between gap-2">
<LayerBadge layer={meta.layer}>{versionId}</LayerBadge>
<span className="text-xs tabular-nums text-[var(--color-text-secondary)]">
{data.loc} {t("loc")}
</span>
</div>
<h3 className="mt-3 text-sm font-semibold group-hover:underline">
{meta.title}
</h3>
<p className="mt-1 text-xs text-[var(--color-text-secondary)]">
{meta.keyInsight}
</p>
</Card>
</Link>
);
})}
</div>
</section>
{/* Layer Overview */}
<section>
<div className="mb-6 text-center">
<h2 className="text-2xl font-bold sm:text-3xl">{t("layers_title")}</h2>
<p className="mt-2 text-[var(--color-text-secondary)]">
{t("layers_desc")}
</p>
</div>
<div className="flex flex-col gap-3">
{LAYERS.map((layer) => (
<div
key={layer.id}
className="flex items-center gap-4 rounded-xl border border-[var(--color-border)] bg-[var(--color-bg)] p-4"
>
<div
className={cn(
"h-full w-1.5 self-stretch rounded-full",
LAYER_BAR_COLORS[layer.id]
)}
/>
<div className="flex-1">
<div className="flex items-center gap-2">
<h3 className="text-sm font-semibold">{layer.label}</h3>
<span className="text-xs text-[var(--color-text-secondary)]">
{layer.versions.length} {t("versions_in_layer")}
</span>
</div>
<div className="mt-2 flex flex-wrap gap-1.5">
{layer.versions.map((vid) => {
const meta = VERSION_META[vid];
return (
<Link key={vid} href={`/${locale}/${vid}`}>
<LayerBadge
layer={layer.id}
className="cursor-pointer transition-opacity hover:opacity-80"
>
{vid}: {meta?.title}
</LayerBadge>
</Link>
);
})}
</div>
</div>
</div>
))}
</div>
</section>
</div>
);
}

BIN
web/src/app/favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

555
web/src/app/globals.css Normal file
View File

@ -0,0 +1,555 @@
@import "tailwindcss";
@custom-variant dark (&:where(.dark, .dark *));
:root {
--color-layer-tools: #3B82F6;
--color-layer-planning: #10B981;
--color-layer-memory: #8B5CF6;
--color-layer-concurrency: #F59E0B;
--color-layer-collaboration: #EF4444;
--color-bg: #ffffff;
--color-bg-secondary: #f4f4f5;
--color-text: #09090b;
--color-text-secondary: #71717a;
--color-border: #e4e4e7;
}
.dark {
--color-bg: #09090b;
--color-bg-secondary: #18181b;
--color-text: #fafafa;
--color-text-secondary: #a1a1aa;
--color-border: #27272a;
}
body {
background: var(--color-bg);
color: var(--color-text);
}
@media (max-width: 640px) {
pre, code {
font-size: 11px;
}
}
* {
-webkit-tap-highlight-color: transparent;
}
/* =====================================================
PROSE-CUSTOM: Premium documentation rendering
===================================================== */
/* -- Headings -- */
.prose-custom h1 {
margin-top: 2.5rem;
margin-bottom: 1rem;
font-size: 1.5rem;
line-height: 2rem;
font-weight: 800;
letter-spacing: -0.02em;
color: #09090b;
}
.dark .prose-custom h1 {
color: #fafafa;
}
.prose-custom h2 {
margin-top: 2.5rem;
margin-bottom: 1rem;
padding-bottom: 0.5rem;
font-size: 1.25rem;
line-height: 1.75rem;
font-weight: 700;
letter-spacing: -0.01em;
color: #09090b;
border-bottom: 1px solid #e4e4e7;
}
.dark .prose-custom h2 {
color: #fafafa;
border-bottom-color: #27272a;
}
.prose-custom h3 {
margin-top: 2rem;
margin-bottom: 0.75rem;
font-size: 1.0625rem;
line-height: 1.5rem;
font-weight: 600;
color: #18181b;
}
.dark .prose-custom h3 {
color: #e4e4e7;
}
.prose-custom h4 {
margin-top: 1.5rem;
margin-bottom: 0.5rem;
font-size: 0.9375rem;
line-height: 1.5rem;
font-weight: 600;
color: #27272a;
}
.dark .prose-custom h4 {
color: #d4d4d8;
}
/* -- Paragraphs -- */
.prose-custom p {
margin-top: 0.75rem;
margin-bottom: 0.75rem;
font-size: 0.9rem;
line-height: 1.7;
color: #3f3f46;
}
.dark .prose-custom p {
color: #d4d4d8;
}
/* -- Hero callout (first blockquote) -- */
.prose-custom blockquote.hero-callout {
position: relative;
margin-top: 0;
margin-bottom: 1.5rem;
border-left: none;
border-radius: 0.75rem;
background: linear-gradient(135deg, #eff6ff 0%, #f0fdf4 100%);
padding: 1.25rem 1.5rem 1.25rem 1.75rem;
font-style: normal;
overflow: hidden;
}
.prose-custom blockquote.hero-callout::before {
content: '';
position: absolute;
left: 0;
top: 0;
bottom: 0;
width: 4px;
background: linear-gradient(to bottom, #3b82f6, #10b981);
border-radius: 4px 0 0 4px;
}
.prose-custom blockquote.hero-callout p {
font-size: 0.95rem;
line-height: 1.65;
font-weight: 500;
color: #1e40af;
margin: 0;
}
.dark .prose-custom blockquote.hero-callout {
background: linear-gradient(135deg, #172554 0%, #052e16 100%);
}
.dark .prose-custom blockquote.hero-callout p {
color: #93c5fd;
}
/* -- Regular blockquotes -- */
.prose-custom blockquote {
margin-top: 1rem;
margin-bottom: 1rem;
border-left: 3px solid #a5b4fc;
border-radius: 0 0.5rem 0.5rem 0;
background-color: #eef2ff;
padding: 0.75rem 1rem;
font-style: normal;
}
.prose-custom blockquote p {
color: #4338ca;
font-size: 0.875rem;
margin: 0;
}
.dark .prose-custom blockquote {
border-left-color: #6366f1;
background-color: rgba(99, 102, 241, 0.1);
}
.dark .prose-custom blockquote p {
color: #c7d2fe;
}
/* -- Code blocks with language label -- */
.prose-custom pre {
position: relative;
overflow-x: auto;
margin-top: 1rem;
margin-bottom: 1rem;
border-radius: 0.75rem;
border: 1px solid #1e293b;
background-color: #0f172a;
padding: 1.25rem;
font-size: 0.8125rem;
line-height: 1.6;
color: #e2e8f0;
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
}
.prose-custom pre.code-block {
padding-top: 2.25rem;
}
.prose-custom pre.code-block::before {
content: attr(data-language);
position: absolute;
top: 0;
right: 0.75rem;
padding: 0.125rem 0.625rem 0.25rem;
background: #3b82f6;
color: #ffffff;
font-size: 0.625rem;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 0.08em;
border-radius: 0 0 0.375rem 0.375rem;
font-family: system-ui, -apple-system, sans-serif;
}
.prose-custom pre.code-block[data-language="sh"]::before {
background: #22c55e;
content: "terminal";
}
/* -- ASCII diagram containers -- */
.prose-custom pre.ascii-diagram {
background: linear-gradient(135deg, #f8fafc, #f1f5f9);
border: 1px solid #cbd5e1;
color: #334155;
text-align: center;
font-size: 0.75rem;
line-height: 1.35;
padding: 1.5rem 1rem;
}
.dark .prose-custom pre.ascii-diagram {
background: linear-gradient(135deg, #1e1b4b, #172554);
border-color: #312e81;
color: #c7d2fe;
}
/* -- Inline code -- */
.prose-custom :not(pre) > code {
border-radius: 0.375rem;
background-color: #f1f5f9;
border: 1px solid #e2e8f0;
padding: 0.125rem 0.425rem;
font-size: 0.8125rem;
font-weight: 500;
color: #be185d;
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
}
.dark .prose-custom :not(pre) > code {
background-color: #27272a;
border-color: #3f3f46;
color: #f9a8d4;
}
/* -- Links -- */
.prose-custom a {
color: #2563eb;
font-weight: 500;
text-decoration: underline;
text-decoration-color: #93c5fd;
text-underline-offset: 2px;
transition: text-decoration-color 0.15s;
}
.prose-custom a:hover {
text-decoration-color: #2563eb;
}
.dark .prose-custom a {
color: #60a5fa;
text-decoration-color: #1e40af;
}
.dark .prose-custom a:hover {
text-decoration-color: #60a5fa;
}
/* -- Lists -- */
.prose-custom ul {
margin-top: 0.75rem;
margin-bottom: 0.75rem;
padding-left: 1.5rem;
font-size: 0.9rem;
line-height: 1.7;
color: #3f3f46;
}
.dark .prose-custom ul {
color: #d4d4d8;
}
.prose-custom ul > li {
margin-top: 0.375rem;
margin-bottom: 0.375rem;
position: relative;
}
.prose-custom ul > li::marker {
color: #3b82f6;
}
.prose-custom ol {
margin-top: 0.75rem;
margin-bottom: 0.75rem;
padding-left: 0;
list-style: none;
counter-reset: step-counter;
font-size: 0.9rem;
line-height: 1.7;
color: #3f3f46;
}
.dark .prose-custom ol {
color: #d4d4d8;
}
.prose-custom ol > li {
counter-increment: step-counter;
margin-top: 0.75rem;
margin-bottom: 0.75rem;
padding-left: 2.75rem;
position: relative;
}
.prose-custom ol > li::before {
content: counter(step-counter);
position: absolute;
left: 0;
top: 0;
width: 1.75rem;
height: 1.75rem;
display: flex;
align-items: center;
justify-content: center;
border-radius: 0.5rem;
background: linear-gradient(135deg, #3b82f6, #6366f1);
color: #ffffff;
font-size: 0.75rem;
font-weight: 700;
font-family: ui-monospace, SFMono-Regular, monospace;
flex-shrink: 0;
}
/* Reset nested lists inside ol to normal style */
.prose-custom ol > li > ul {
padding-left: 1.25rem;
}
.prose-custom ol > li > ul > li {
padding-left: 0;
}
.prose-custom ol > li > ul > li::before {
display: none;
}
/* -- Tables -- */
.prose-custom table {
width: 100%;
margin-top: 1.25rem;
margin-bottom: 1.25rem;
border-collapse: separate;
border-spacing: 0;
font-size: 0.8125rem;
line-height: 1.5;
border-radius: 0.75rem;
overflow: hidden;
border: 1px solid #e2e8f0;
}
.dark .prose-custom table {
border-color: #27272a;
}
.prose-custom thead {
border-bottom: none;
}
.prose-custom th {
padding: 0.625rem 1rem;
text-align: left;
font-weight: 600;
font-size: 0.6875rem;
text-transform: uppercase;
letter-spacing: 0.05em;
color: #64748b;
background-color: #f8fafc;
border-bottom: 1px solid #e2e8f0;
}
.dark .prose-custom th {
color: #94a3b8;
background-color: #18181b;
border-bottom-color: #27272a;
}
.prose-custom td {
padding: 0.625rem 1rem;
border-bottom: 1px solid #f1f5f9;
color: #475569;
}
.prose-custom td code {
font-size: 0.75rem;
}
.dark .prose-custom td {
border-bottom-color: #1e1e22;
color: #cbd5e1;
}
.prose-custom tbody tr:last-child td {
border-bottom: none;
}
.prose-custom tbody tr:hover {
background-color: #f8fafc;
}
.dark .prose-custom tbody tr:hover {
background-color: #111113;
}
/* -- Horizontal rules -- */
.prose-custom hr {
margin-top: 2rem;
margin-bottom: 2rem;
border: none;
height: 1px;
background: linear-gradient(to right, transparent, #d4d4d8, transparent);
}
.dark .prose-custom hr {
background: linear-gradient(to right, transparent, #3f3f46, transparent);
}
/* -- Strong / Em -- */
.prose-custom strong {
font-weight: 700;
color: #09090b;
}
.dark .prose-custom strong {
color: #fafafa;
}
.prose-custom em {
font-style: italic;
color: #52525b;
}
.dark .prose-custom em {
color: #a1a1aa;
}
/* =====================================================
HIGHLIGHT.JS TOKEN THEME (code syntax highlighting)
===================================================== */
.hljs {
background: transparent !important;
}
.hljs-keyword,
.hljs-selector-tag,
.hljs-type {
color: #c084fc;
font-weight: 500;
}
.hljs-literal,
.hljs-symbol,
.hljs-bullet {
color: #fb923c;
}
.hljs-string,
.hljs-doctag,
.hljs-template-variable,
.hljs-variable {
color: #34d399;
}
.hljs-number {
color: #fb923c;
}
.hljs-comment,
.hljs-quote {
color: #64748b;
font-style: italic;
}
.hljs-title,
.hljs-section {
color: #60a5fa;
font-weight: 600;
}
.hljs-title.function_,
.hljs-title.class_ {
color: #60a5fa;
}
.hljs-built_in {
color: #f472b6;
}
.hljs-attr,
.hljs-attribute {
color: #fbbf24;
}
.hljs-params {
color: #e2e8f0;
}
.hljs-meta {
color: #94a3b8;
}
.hljs-name,
.hljs-tag {
color: #f87171;
}
.hljs-selector-class,
.hljs-selector-id {
color: #a78bfa;
}
.hljs-deletion {
color: #fca5a5;
background-color: rgba(239, 68, 68, 0.15);
}
.hljs-addition {
color: #86efac;
background-color: rgba(34, 197, 94, 0.15);
}

View File

@ -0,0 +1,228 @@
"use client";
import { motion } from "framer-motion";
import { cn } from "@/lib/utils";
import { LAYERS } from "@/lib/constants";
import versionsData from "@/data/generated/versions.json";
const CLASS_DESCRIPTIONS: Record<string, string> = {
TodoManager: "Visible task planning with constraints",
SkillLoader: "Dynamic knowledge injection from SKILL.md files",
ContextManager: "Three-layer context compression pipeline",
Task: "File-based persistent task with dependencies",
TaskManager: "File-based persistent task CRUD with dependencies",
BackgroundTask: "Single background execution unit",
BackgroundManager: "Non-blocking thread execution + notification queue",
TeammateManager: "Multi-agent team lifecycle and coordination",
Teammate: "Individual agent identity and state tracking",
SharedBoard: "Cross-agent shared state coordination",
};
interface ArchDiagramProps {
version: string;
}
function getLayerColor(versionId: string): string {
const layer = LAYERS.find((l) => (l.versions as readonly string[]).includes(versionId));
return layer?.color ?? "#71717a";
}
function getLayerColorClasses(versionId: string): {
border: string;
bg: string;
} {
const v =
versionsData.versions.find((v) => v.id === versionId) as { layer?: string } | undefined;
const layer = v?.layer;
switch (layer) {
case "tools":
return {
border: "border-blue-500",
bg: "bg-blue-500/10",
};
case "planning":
return {
border: "border-emerald-500",
bg: "bg-emerald-500/10",
};
case "memory":
return {
border: "border-purple-500",
bg: "bg-purple-500/10",
};
case "concurrency":
return {
border: "border-amber-500",
bg: "bg-amber-500/10",
};
case "collaboration":
return {
border: "border-red-500",
bg: "bg-red-500/10",
};
default:
return {
border: "border-zinc-500",
bg: "bg-zinc-500/10",
};
}
}
function collectClassesUpTo(
targetId: string
): { name: string; introducedIn: string }[] {
const { versions, diffs } = versionsData;
const order = versions.map((v) => v.id);
const targetIdx = order.indexOf(targetId);
if (targetIdx < 0) return [];
const result: { name: string; introducedIn: string }[] = [];
const seen = new Set<string>();
for (let i = 0; i <= targetIdx; i++) {
const v = versions[i];
if (!v.classes) continue;
for (const cls of v.classes) {
if (!seen.has(cls.name)) {
seen.add(cls.name);
result.push({ name: cls.name, introducedIn: v.id });
}
}
}
return result;
}
function getNewClassNames(version: string): Set<string> {
const diff = versionsData.diffs.find((d) => d.to === version);
if (!diff) {
const v = versionsData.versions.find((ver) => ver.id === version);
return new Set(v?.classes?.map((c) => c.name) ?? []);
}
return new Set(diff.newClasses ?? []);
}
export function ArchDiagram({ version }: ArchDiagramProps) {
const allClasses = collectClassesUpTo(version);
const newClassNames = getNewClassNames(version);
const versionData = versionsData.versions.find((v) => v.id === version);
const tools = versionData?.tools ?? [];
const reversed = [...allClasses].reverse();
return (
<div className="space-y-3">
{reversed.map((cls, i) => {
const isNew = newClassNames.has(cls.name);
const colorClasses = getLayerColorClasses(cls.introducedIn);
return (
<div key={cls.name}>
{i > 0 && (
<div className="flex justify-center py-1">
<motion.svg
width="24"
height="20"
viewBox="0 0 24 20"
initial={{ opacity: 0 }}
animate={{ opacity: 1 }}
transition={{ delay: i * 0.08 + 0.05 }}
>
<motion.line
x1={12}
y1={0}
x2={12}
y2={14}
stroke="var(--color-text-secondary)"
strokeWidth={1.5}
initial={{ pathLength: 0 }}
animate={{ pathLength: 1 }}
transition={{ duration: 0.3, delay: i * 0.08 }}
/>
<motion.polygon
points="7,12 12,19 17,12"
fill="var(--color-text-secondary)"
initial={{ opacity: 0 }}
animate={{ opacity: 1 }}
transition={{ delay: i * 0.08 + 0.2 }}
/>
</motion.svg>
</div>
)}
<motion.div
key={cls.name}
initial={{ opacity: 0, y: 20 }}
animate={{ opacity: 1, y: 0 }}
transition={{ delay: i * 0.08, duration: 0.3 }}
className={cn(
"rounded-lg border-2 px-4 py-3 transition-colors",
isNew
? cn(colorClasses.border, colorClasses.bg)
: "border-zinc-200 bg-zinc-50 dark:border-zinc-700 dark:bg-zinc-800/50"
)}
>
<div className="flex items-center justify-between">
<div>
<span
className={cn(
"font-mono text-sm font-semibold",
isNew
? "text-zinc-900 dark:text-white"
: "text-zinc-400 dark:text-zinc-500"
)}
>
{cls.name}
</span>
<p
className={cn(
"mt-0.5 text-xs",
isNew
? "text-zinc-600 dark:text-zinc-300"
: "text-zinc-400 dark:text-zinc-500"
)}
>
{CLASS_DESCRIPTIONS[cls.name] || ""}
</p>
</div>
<div className="flex items-center gap-2">
<span className="text-xs text-zinc-400 dark:text-zinc-500">
{cls.introducedIn}
</span>
{isNew && (
<span className="rounded-full bg-zinc-900 px-2 py-0.5 text-[10px] font-bold uppercase text-white dark:bg-white dark:text-zinc-900">
NEW
</span>
)}
</div>
</div>
</motion.div>
</div>
);
})}
{allClasses.length === 0 && (
<div className="rounded-lg border border-dashed border-zinc-300 px-4 py-6 text-center text-sm text-zinc-400 dark:border-zinc-600">
No classes in this version (functions only)
</div>
)}
{tools.length > 0 && (
<motion.div
initial={{ opacity: 0 }}
animate={{ opacity: 1 }}
transition={{ delay: reversed.length * 0.08 + 0.1 }}
className="flex flex-wrap gap-1.5 pt-2"
>
{tools.map((tool) => (
<span
key={tool}
className="rounded-md bg-zinc-100 px-2 py-1 font-mono text-xs text-zinc-600 dark:bg-zinc-800 dark:text-zinc-400"
>
{tool}
</span>
))}
</motion.div>
)}
</div>
);
}

View File

@ -0,0 +1,145 @@
"use client";
import { useState } from "react";
import { motion, AnimatePresence } from "framer-motion";
import { useTranslations, useLocale } from "@/lib/i18n";
import { ChevronDown } from "lucide-react";
import { cn } from "@/lib/utils";
import s01Annotations from "@/data/annotations/s01.json";
import s02Annotations from "@/data/annotations/s02.json";
import s03Annotations from "@/data/annotations/s03.json";
import s04Annotations from "@/data/annotations/s04.json";
import s05Annotations from "@/data/annotations/s05.json";
import s06Annotations from "@/data/annotations/s06.json";
import s07Annotations from "@/data/annotations/s07.json";
import s08Annotations from "@/data/annotations/s08.json";
import s09Annotations from "@/data/annotations/s09.json";
import s10Annotations from "@/data/annotations/s10.json";
import s11Annotations from "@/data/annotations/s11.json";
interface Decision {
id: string;
title: string;
description: string;
alternatives: string;
zh?: { title: string; description: string };
ja?: { title: string; description: string };
}
interface AnnotationFile {
version: string;
decisions: Decision[];
}
const ANNOTATIONS: Record<string, AnnotationFile> = {
s01: s01Annotations as AnnotationFile,
s02: s02Annotations as AnnotationFile,
s03: s03Annotations as AnnotationFile,
s04: s04Annotations as AnnotationFile,
s05: s05Annotations as AnnotationFile,
s06: s06Annotations as AnnotationFile,
s07: s07Annotations as AnnotationFile,
s08: s08Annotations as AnnotationFile,
s09: s09Annotations as AnnotationFile,
s10: s10Annotations as AnnotationFile,
s11: s11Annotations as AnnotationFile,
};
interface DesignDecisionsProps {
version: string;
}
function DecisionCard({
decision,
locale,
}: {
decision: Decision;
locale: string;
}) {
const [open, setOpen] = useState(false);
const t = useTranslations("version");
const localized =
locale !== "en" ? (decision as unknown as Record<string, unknown>)[locale] as { title?: string; description?: string } | undefined : undefined;
const title = localized?.title || decision.title;
const description = localized?.description || decision.description;
return (
<div className="rounded-lg border border-zinc-200 bg-white dark:border-zinc-700 dark:bg-zinc-900">
<button
onClick={() => setOpen(!open)}
className="flex w-full items-center justify-between px-4 py-3 text-left"
>
<span className="pr-4 text-sm font-semibold text-zinc-900 dark:text-white">
{title}
</span>
<ChevronDown
size={16}
className={cn(
"shrink-0 text-zinc-400 transition-transform duration-200",
open && "rotate-180"
)}
/>
</button>
<AnimatePresence>
{open && (
<motion.div
initial={{ height: 0, opacity: 0 }}
animate={{ height: "auto", opacity: 1 }}
exit={{ height: 0, opacity: 0 }}
transition={{ duration: 0.2 }}
className="overflow-hidden"
>
<div className="border-t border-zinc-100 px-4 py-3 dark:border-zinc-800">
<p className="text-sm leading-relaxed text-zinc-600 dark:text-zinc-300">
{description}
</p>
{decision.alternatives && (
<div className="mt-3">
<h4 className="text-xs font-medium uppercase tracking-wide text-zinc-400 dark:text-zinc-500">
{t("alternatives")}
</h4>
<p className="mt-1 text-sm leading-relaxed text-zinc-500 dark:text-zinc-400">
{decision.alternatives}
</p>
</div>
)}
</div>
</motion.div>
)}
</AnimatePresence>
</div>
);
}
export function DesignDecisions({ version }: DesignDecisionsProps) {
const t = useTranslations("version");
const locale = useLocale();
const annotations = ANNOTATIONS[version];
if (!annotations || annotations.decisions.length === 0) {
return null;
}
return (
<div className="space-y-4">
<h2 className="text-xl font-semibold">{t("design_decisions")}</h2>
<div className="space-y-2">
{annotations.decisions.map((decision, i) => (
<motion.div
key={decision.id}
initial={{ opacity: 0, y: 10 }}
animate={{ opacity: 1, y: 0 }}
transition={{ delay: i * 0.05 }}
>
<DecisionCard decision={decision} locale={locale} />
</motion.div>
))}
</div>
</div>
);
}

View File

@ -0,0 +1,243 @@
"use client";
import { useEffect, useState } from "react";
import { motion } from "framer-motion";
import { useTranslations } from "@/lib/i18n";
import { getFlowForVersion } from "@/data/execution-flows";
import type { FlowNode, FlowEdge } from "@/types/agent-data";
const NODE_WIDTH = 140;
const NODE_HEIGHT = 40;
const DIAMOND_SIZE = 50;
const LAYER_COLORS: Record<string, string> = {
start: "#3B82F6",
process: "#10B981",
decision: "#F59E0B",
subprocess: "#8B5CF6",
end: "#EF4444",
};
function getNodeCenter(node: FlowNode): { cx: number; cy: number } {
return { cx: node.x, cy: node.y };
}
function getEdgePath(from: FlowNode, to: FlowNode): string {
const { cx: x1, cy: y1 } = getNodeCenter(from);
const { cx: x2, cy: y2 } = getNodeCenter(to);
const halfH = from.type === "decision" ? DIAMOND_SIZE / 2 : NODE_HEIGHT / 2;
const halfHTo = to.type === "decision" ? DIAMOND_SIZE / 2 : NODE_HEIGHT / 2;
if (Math.abs(x1 - x2) < 10) {
const startY = y1 + halfH;
const endY = y2 - halfHTo;
return `M ${x1} ${startY} L ${x2} ${endY}`;
}
const startY = y1 + halfH;
const endY = y2 - halfHTo;
const midY = (startY + endY) / 2;
return `M ${x1} ${startY} L ${x1} ${midY} L ${x2} ${midY} L ${x2} ${endY}`;
}
function NodeShape({ node }: { node: FlowNode }) {
const color = LAYER_COLORS[node.type];
const lines = node.label.split("\n");
if (node.type === "decision") {
const half = DIAMOND_SIZE / 2;
return (
<g>
<polygon
points={`${node.x},${node.y - half} ${node.x + half},${node.y} ${node.x},${node.y + half} ${node.x - half},${node.y}`}
fill="none"
stroke={color}
strokeWidth={2}
/>
{lines.map((line, i) => (
<text
key={i}
x={node.x}
y={node.y + (i - (lines.length - 1) / 2) * 12}
textAnchor="middle"
dominantBaseline="central"
fontSize={10}
fontFamily="monospace"
fill="currentColor"
>
{line}
</text>
))}
</g>
);
}
if (node.type === "start" || node.type === "end") {
return (
<g>
<rect
x={node.x - NODE_WIDTH / 2}
y={node.y - NODE_HEIGHT / 2}
width={NODE_WIDTH}
height={NODE_HEIGHT}
rx={NODE_HEIGHT / 2}
fill="none"
stroke={color}
strokeWidth={2}
/>
<text
x={node.x}
y={node.y}
textAnchor="middle"
dominantBaseline="central"
fontSize={12}
fontWeight={600}
fontFamily="monospace"
fill="currentColor"
>
{node.label}
</text>
</g>
);
}
const isSubprocess = node.type === "subprocess";
return (
<g>
<rect
x={node.x - NODE_WIDTH / 2}
y={node.y - NODE_HEIGHT / 2}
width={NODE_WIDTH}
height={NODE_HEIGHT}
rx={4}
fill="none"
stroke={color}
strokeWidth={2}
strokeDasharray={isSubprocess ? "6 3" : undefined}
/>
{lines.map((line, i) => (
<text
key={i}
x={node.x}
y={node.y + (i - (lines.length - 1) / 2) * 13}
textAnchor="middle"
dominantBaseline="central"
fontSize={11}
fontFamily="monospace"
fill="currentColor"
>
{line}
</text>
))}
</g>
);
}
function EdgePath({
edge,
nodes,
index,
}: {
edge: FlowEdge;
nodes: FlowNode[];
index: number;
}) {
const from = nodes.find((n) => n.id === edge.from);
const to = nodes.find((n) => n.id === edge.to);
if (!from || !to) return null;
const d = getEdgePath(from, to);
const midX = (from.x + to.x) / 2;
const midY = (from.y + to.y) / 2;
return (
<g>
<motion.path
d={d}
fill="none"
stroke="var(--color-text-secondary)"
strokeWidth={1.5}
markerEnd="url(#arrowhead)"
initial={{ pathLength: 0, opacity: 0 }}
animate={{ pathLength: 1, opacity: 1 }}
transition={{ duration: 0.5, delay: index * 0.12 }}
/>
{edge.label && (
<motion.text
x={midX + 8}
y={midY - 4}
fontSize={10}
fill="var(--color-text-secondary)"
fontFamily="monospace"
initial={{ opacity: 0 }}
animate={{ opacity: 1 }}
transition={{ delay: index * 0.12 + 0.3 }}
>
{edge.label}
</motion.text>
)}
</g>
);
}
interface ExecutionFlowProps {
version: string;
}
export function ExecutionFlow({ version }: ExecutionFlowProps) {
const t = useTranslations("version");
const [flow, setFlow] = useState<ReturnType<typeof getFlowForVersion>>(null);
useEffect(() => {
setFlow(getFlowForVersion(version));
}, [version]);
if (!flow) return null;
const maxY = Math.max(...flow.nodes.map((n) => n.y)) + 50;
return (
<section>
<h2 className="mb-4 text-xl font-semibold">{t("execution_flow")}</h2>
<div className="overflow-x-auto rounded-xl border border-[var(--color-border)] bg-[var(--color-bg)] p-4">
<svg
viewBox={`0 0 600 ${maxY}`}
className="mx-auto w-full max-w-[600px]"
style={{ minHeight: 300 }}
>
<defs>
<marker
id="arrowhead"
markerWidth={8}
markerHeight={6}
refX={8}
refY={3}
orient="auto"
>
<polygon
points="0 0, 8 3, 0 6"
fill="var(--color-text-secondary)"
/>
</marker>
</defs>
{flow.edges.map((edge, i) => (
<EdgePath key={`${edge.from}-${edge.to}`} edge={edge} nodes={flow.nodes} index={i} />
))}
{flow.nodes.map((node, i) => (
<motion.g
key={node.id}
initial={{ opacity: 0, y: -10 }}
animate={{ opacity: 1, y: 0 }}
transition={{ delay: i * 0.06, duration: 0.3 }}
>
<NodeShape node={node} />
</motion.g>
))}
</svg>
</div>
</section>
);
}

View File

@ -0,0 +1,70 @@
"use client";
import { useState, useEffect, useRef } from "react";
import { motion, AnimatePresence } from "framer-motion";
const FLOW_STEPS = [
{ role: "user", label: "user", color: "bg-blue-500" },
{ role: "assistant", label: "assistant", color: "bg-zinc-600" },
{ role: "tool_call", label: "tool_call", color: "bg-amber-500" },
{ role: "tool_result", label: "tool_result", color: "bg-emerald-500" },
{ role: "assistant", label: "assistant", color: "bg-zinc-600" },
{ role: "tool_call", label: "tool_call", color: "bg-amber-500" },
{ role: "tool_result", label: "tool_result", color: "bg-emerald-500" },
{ role: "assistant", label: "assistant (final)", color: "bg-zinc-600" },
];
export function MessageFlow() {
const [count, setCount] = useState(0);
const intervalRef = useRef<ReturnType<typeof setInterval> | null>(null);
useEffect(() => {
intervalRef.current = setInterval(() => {
setCount((prev) => {
if (prev >= FLOW_STEPS.length) {
setTimeout(() => setCount(0), 1500);
return prev;
}
return prev + 1;
});
}, 800);
return () => {
if (intervalRef.current) clearInterval(intervalRef.current);
};
}, []);
return (
<div className="overflow-hidden rounded-xl border border-[var(--color-border)] bg-[var(--color-bg)] p-4">
<div className="mb-3 flex items-center gap-2">
<span className="font-mono text-xs text-[var(--color-text-secondary)]">
messages[]
</span>
<span className="ml-auto rounded bg-zinc-100 px-1.5 py-0.5 font-mono text-xs tabular-nums dark:bg-zinc-800">
len={count}
</span>
</div>
<div className="flex gap-1.5 overflow-x-auto pb-1">
<AnimatePresence>
{FLOW_STEPS.slice(0, count).map((step, i) => (
<motion.div
key={i}
initial={{ opacity: 0, scale: 0.7, width: 0 }}
animate={{ opacity: 1, scale: 1, width: "auto" }}
transition={{ duration: 0.25 }}
className={`flex shrink-0 items-center rounded-md px-2.5 py-1.5 ${step.color}`}
>
<span className="whitespace-nowrap font-mono text-[10px] font-medium text-white">
{step.label}
</span>
</motion.div>
))}
</AnimatePresence>
{count === 0 && (
<div className="flex h-7 items-center text-xs text-[var(--color-text-secondary)]">
[]
</div>
)}
</div>
</div>
);
}

View File

@ -0,0 +1,102 @@
"use client";
import { useMemo } from "react";
interface SourceViewerProps {
source: string;
filename: string;
}
function highlightLine(line: string): React.ReactNode[] {
const trimmed = line.trimStart();
if (trimmed.startsWith("#")) {
return [
<span key={0} className="text-zinc-400 italic">
{line}
</span>,
];
}
if (trimmed.startsWith("@")) {
return [
<span key={0} className="text-amber-400">
{line}
</span>,
];
}
if (trimmed.startsWith('"""') || trimmed.startsWith("'''")) {
return [
<span key={0} className="text-emerald-500">
{line}
</span>,
];
}
const keywordSet = new Set([
"def", "class", "import", "from", "return", "if", "elif", "else",
"while", "for", "in", "not", "and", "or", "is", "None", "True",
"False", "try", "except", "raise", "with", "as", "yield", "break",
"continue", "pass", "global", "lambda", "async", "await",
]);
const parts = line.split(
/(\b(?:def|class|import|from|return|if|elif|else|while|for|in|not|and|or|is|None|True|False|try|except|raise|with|as|yield|break|continue|pass|global|lambda|async|await|self)\b|"(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*'|f"(?:[^"\\]|\\.)*"|f'(?:[^'\\]|\\.)*'|#.*$|\b\d+(?:\.\d+)?\b)/
);
return parts.map((part, idx) => {
if (!part) return null;
if (keywordSet.has(part)) {
return <span key={idx} className="text-blue-400 font-medium">{part}</span>;
}
if (part === "self") {
return <span key={idx} className="text-purple-400">{part}</span>;
}
if (part.startsWith("#")) {
return <span key={idx} className="text-zinc-400 italic">{part}</span>;
}
if (
(part.startsWith('"') && part.endsWith('"')) ||
(part.startsWith("'") && part.endsWith("'")) ||
(part.startsWith('f"') && part.endsWith('"')) ||
(part.startsWith("f'") && part.endsWith("'"))
) {
return <span key={idx} className="text-emerald-500">{part}</span>;
}
if (/^\d+(?:\.\d+)?$/.test(part)) {
return <span key={idx} className="text-orange-400">{part}</span>;
}
return <span key={idx}>{part}</span>;
});
}
export function SourceViewer({ source, filename }: SourceViewerProps) {
const lines = useMemo(() => source.split("\n"), [source]);
return (
<div className="rounded-lg border border-zinc-200 dark:border-zinc-700">
<div className="flex items-center gap-2 border-b border-zinc-200 px-4 py-2 dark:border-zinc-700">
<div className="flex gap-1.5">
<span className="h-3 w-3 rounded-full bg-red-400" />
<span className="h-3 w-3 rounded-full bg-yellow-400" />
<span className="h-3 w-3 rounded-full bg-green-400" />
</div>
<span className="font-mono text-xs text-zinc-400">{filename}</span>
</div>
<div className="overflow-x-auto bg-zinc-950">
<pre className="p-2 text-[10px] leading-4 sm:p-4 sm:text-xs sm:leading-5">
<code>
{lines.map((line, i) => (
<div key={i} className="flex">
<span className="mr-2 inline-block w-6 shrink-0 select-none text-right text-zinc-600 sm:mr-4 sm:w-8">
{i + 1}
</span>
<span className="text-zinc-200">
{highlightLine(line)}
</span>
</div>
))}
</code>
</pre>
</div>
</div>
);
}

View File

@ -0,0 +1,205 @@
"use client";
import { useState, useMemo } from "react";
import { diffLines, Change } from "diff";
import { cn } from "@/lib/utils";
interface CodeDiffProps {
oldSource: string;
newSource: string;
oldLabel: string;
newLabel: string;
}
export function CodeDiff({ oldSource, newSource, oldLabel, newLabel }: CodeDiffProps) {
const [viewMode, setViewMode] = useState<"unified" | "split">("unified");
const changes = useMemo(() => diffLines(oldSource, newSource), [oldSource, newSource]);
return (
<div>
<div className="mb-4 flex flex-col gap-2 sm:flex-row sm:items-center sm:justify-between">
<div className="min-w-0 truncate text-sm text-zinc-500 dark:text-zinc-400">
<span className="font-medium text-zinc-700 dark:text-zinc-300">{oldLabel}</span>
{" -> "}
<span className="font-medium text-zinc-700 dark:text-zinc-300">{newLabel}</span>
</div>
<div className="flex shrink-0 rounded-lg border border-zinc-200 dark:border-zinc-700">
<button
onClick={() => setViewMode("unified")}
className={cn(
"min-h-[36px] px-3 text-xs font-medium transition-colors",
viewMode === "unified"
? "bg-zinc-900 text-white dark:bg-white dark:text-zinc-900"
: "text-zinc-500 hover:text-zinc-700 dark:text-zinc-400"
)}
>
Unified
</button>
<button
onClick={() => setViewMode("split")}
className={cn(
"min-h-[36px] px-3 text-xs font-medium transition-colors sm:inline-flex hidden",
viewMode === "split"
? "bg-zinc-900 text-white dark:bg-white dark:text-zinc-900"
: "text-zinc-500 hover:text-zinc-700 dark:text-zinc-400"
)}
>
Split
</button>
</div>
</div>
{viewMode === "unified" ? (
<UnifiedView changes={changes} />
) : (
<SplitView changes={changes} />
)}
</div>
);
}
function UnifiedView({ changes }: { changes: Change[] }) {
let oldLine = 1;
let newLine = 1;
const rows: { oldNum: number | null; newNum: number | null; type: "add" | "remove" | "context"; text: string }[] = [];
for (const change of changes) {
const lines = change.value.replace(/\n$/, "").split("\n");
for (const line of lines) {
if (change.added) {
rows.push({ oldNum: null, newNum: newLine++, type: "add", text: line });
} else if (change.removed) {
rows.push({ oldNum: oldLine++, newNum: null, type: "remove", text: line });
} else {
rows.push({ oldNum: oldLine++, newNum: newLine++, type: "context", text: line });
}
}
}
return (
<div className="overflow-x-auto rounded-lg border border-zinc-200 dark:border-zinc-700">
<table className="w-full border-collapse font-mono text-xs leading-5">
<tbody>
{rows.map((row, i) => (
<tr
key={i}
className={cn(
row.type === "add" && "bg-green-50 dark:bg-green-950/30",
row.type === "remove" && "bg-red-50 dark:bg-red-950/30"
)}
>
<td className="w-10 select-none border-r border-zinc-200 px-2 text-right text-zinc-400 dark:border-zinc-700 dark:text-zinc-600">
{row.oldNum ?? ""}
</td>
<td className="w-10 select-none border-r border-zinc-200 px-2 text-right text-zinc-400 dark:border-zinc-700 dark:text-zinc-600">
{row.newNum ?? ""}
</td>
<td className="w-4 select-none px-1 text-center">
{row.type === "add" && <span className="text-green-600 dark:text-green-400">+</span>}
{row.type === "remove" && <span className="text-red-600 dark:text-red-400">-</span>}
</td>
<td className="whitespace-pre px-2">
<span
className={cn(
row.type === "add" && "text-green-800 dark:text-green-300",
row.type === "remove" && "text-red-800 dark:text-red-300",
row.type === "context" && "text-zinc-700 dark:text-zinc-300"
)}
>
{row.text}
</span>
</td>
</tr>
))}
</tbody>
</table>
</div>
);
}
function SplitView({ changes }: { changes: Change[] }) {
let oldLine = 1;
let newLine = 1;
type SplitRow = {
left: { num: number | null; text: string; type: "remove" | "context" | "empty" };
right: { num: number | null; text: string; type: "add" | "context" | "empty" };
};
const rows: SplitRow[] = [];
for (const change of changes) {
const lines = change.value.replace(/\n$/, "").split("\n");
if (change.removed) {
for (const line of lines) {
rows.push({
left: { num: oldLine++, text: line, type: "remove" },
right: { num: null, text: "", type: "empty" },
});
}
} else if (change.added) {
let filled = 0;
for (const line of lines) {
// Try to fill in empty right-side slots from preceding removes
const lastUnfilled = rows.length - lines.length + filled;
if (
lastUnfilled >= 0 &&
lastUnfilled < rows.length &&
rows[lastUnfilled].right.type === "empty" &&
rows[lastUnfilled].left.type === "remove"
) {
rows[lastUnfilled].right = { num: newLine++, text: line, type: "add" };
} else {
rows.push({
left: { num: null, text: "", type: "empty" },
right: { num: newLine++, text: line, type: "add" },
});
}
filled++;
}
} else {
for (const line of lines) {
rows.push({
left: { num: oldLine++, text: line, type: "context" },
right: { num: newLine++, text: line, type: "context" },
});
}
}
}
const cellClass = (type: string) =>
cn(
"whitespace-pre px-2",
type === "add" && "bg-green-50 text-green-800 dark:bg-green-950/30 dark:text-green-300",
type === "remove" && "bg-red-50 text-red-800 dark:bg-red-950/30 dark:text-red-300",
type === "context" && "text-zinc-700 dark:text-zinc-300",
type === "empty" && "bg-zinc-50 dark:bg-zinc-900"
);
return (
<div className="overflow-x-auto rounded-lg border border-zinc-200 dark:border-zinc-700">
<table className="w-full border-collapse font-mono text-xs leading-5">
<tbody>
{rows.map((row, i) => (
<tr key={i}>
<td className="w-10 select-none border-r border-zinc-200 px-2 text-right text-zinc-400 dark:border-zinc-700 dark:text-zinc-600">
{row.left.num ?? ""}
</td>
<td className={cn("w-1/2 border-r border-zinc-200 dark:border-zinc-700", cellClass(row.left.type))}>
{row.left.text}
</td>
<td className="w-10 select-none border-r border-zinc-200 px-2 text-right text-zinc-400 dark:border-zinc-700 dark:text-zinc-600">
{row.right.num ?? ""}
</td>
<td className={cn("w-1/2", cellClass(row.right.type))}>
{row.right.text}
</td>
</tr>
))}
</tbody>
</table>
</div>
);
}

View File

@ -0,0 +1,134 @@
"use client";
import { motion } from "framer-motion";
import { useTranslations } from "@/lib/i18n";
import { Card } from "@/components/ui/card";
interface WhatsNewProps {
diff: {
from: string;
to: string;
newClasses: string[];
newFunctions: string[];
newTools: string[];
locDelta: number;
} | null;
}
export function WhatsNew({ diff }: WhatsNewProps) {
const t = useTranslations("version");
const td = useTranslations("diff");
if (!diff) {
return null;
}
const hasContent =
diff.newClasses.length > 0 ||
diff.newTools.length > 0 ||
diff.newFunctions.length > 0 ||
diff.locDelta !== 0;
if (!hasContent) {
return null;
}
return (
<div className="space-y-4">
<h2 className="text-xl font-semibold">{t("whats_new")}</h2>
<div className="grid gap-4 sm:grid-cols-2">
{diff.newClasses.length > 0 && (
<motion.div
initial={{ opacity: 0, y: 12 }}
animate={{ opacity: 1, y: 0 }}
transition={{ delay: 0.1 }}
>
<Card className="h-full">
<h3 className="mb-2 text-sm font-medium text-zinc-500 dark:text-zinc-400">
{td("new_classes")}
</h3>
<div className="space-y-1.5">
{diff.newClasses.map((cls) => (
<div
key={cls}
className="rounded-md bg-emerald-50 px-3 py-1.5 font-mono text-sm font-medium text-emerald-700 dark:bg-emerald-900/20 dark:text-emerald-300"
>
{cls}
</div>
))}
</div>
</Card>
</motion.div>
)}
{diff.newTools.length > 0 && (
<motion.div
initial={{ opacity: 0, y: 12 }}
animate={{ opacity: 1, y: 0 }}
transition={{ delay: 0.15 }}
>
<Card className="h-full">
<h3 className="mb-2 text-sm font-medium text-zinc-500 dark:text-zinc-400">
{td("new_tools")}
</h3>
<div className="flex flex-wrap gap-1.5">
{diff.newTools.map((tool) => (
<span
key={tool}
className="rounded-full bg-blue-50 px-3 py-1 font-mono text-xs font-medium text-blue-700 dark:bg-blue-900/20 dark:text-blue-300"
>
{tool}
</span>
))}
</div>
</Card>
</motion.div>
)}
{diff.newFunctions.length > 0 && (
<motion.div
initial={{ opacity: 0, y: 12 }}
animate={{ opacity: 1, y: 0 }}
transition={{ delay: 0.2 }}
>
<Card className="h-full">
<h3 className="mb-2 text-sm font-medium text-zinc-500 dark:text-zinc-400">
{td("new_functions")}
</h3>
<ul className="space-y-1 text-sm text-zinc-700 dark:text-zinc-300">
{diff.newFunctions.map((fn) => (
<li key={fn} className="font-mono">
<span className="text-zinc-400 dark:text-zinc-500">
def{" "}
</span>
{fn}()
</li>
))}
</ul>
</Card>
</motion.div>
)}
{diff.locDelta !== 0 && (
<motion.div
initial={{ opacity: 0, y: 12 }}
animate={{ opacity: 1, y: 0 }}
transition={{ delay: 0.25 }}
>
<Card className="flex h-full items-center">
<div>
<h3 className="mb-1 text-sm font-medium text-zinc-500 dark:text-zinc-400">
{td("loc_delta")}
</h3>
<p className="text-2xl font-bold text-emerald-600 dark:text-emerald-400">
+{diff.locDelta} lines
</p>
</div>
</Card>
</motion.div>
)}
</div>
</div>
);
}

View File

@ -0,0 +1,91 @@
"use client";
import { useMemo } from "react";
import { useLocale } from "@/lib/i18n";
import docsData from "@/data/generated/docs.json";
import { unified } from "unified";
import remarkParse from "remark-parse";
import remarkGfm from "remark-gfm";
import remarkRehype from "remark-rehype";
import rehypeRaw from "rehype-raw";
import rehypeHighlight from "rehype-highlight";
import rehypeStringify from "rehype-stringify";
interface DocRendererProps {
version: string;
}
function renderMarkdown(md: string): string {
const result = unified()
.use(remarkParse)
.use(remarkGfm)
.use(remarkRehype, { allowDangerousHtml: true })
.use(rehypeRaw)
.use(rehypeHighlight, { detect: false, ignoreMissing: true })
.use(rehypeStringify)
.processSync(md);
return String(result);
}
function postProcessHtml(html: string): string {
// Add language labels to highlighted code blocks
html = html.replace(
/<pre><code class="hljs language-(\w+)">/g,
'<pre class="code-block" data-language="$1"><code class="hljs language-$1">'
);
// Wrap plain pre>code (ASCII art / diagrams) in diagram container
html = html.replace(
/<pre><code(?! class="hljs)([^>]*)>/g,
'<pre class="ascii-diagram"><code$1>'
);
// Mark the first blockquote as hero callout
html = html.replace(
/<blockquote>/,
'<blockquote class="hero-callout">'
);
// Remove the h1 (it's redundant with the page header)
html = html.replace(/<h1>.*?<\/h1>\n?/, "");
// Fix ordered list counter for interrupted lists (ol start="N")
html = html.replace(
/<ol start="(\d+)">/g,
(_, start) => `<ol style="counter-reset:step-counter ${parseInt(start) - 1}">`
);
return html;
}
export function DocRenderer({ version }: DocRendererProps) {
const locale = useLocale();
const doc = useMemo(() => {
const match = docsData.find(
(d: { version: string; locale: string }) =>
d.version === version && d.locale === locale
);
if (match) return match;
return docsData.find(
(d: { version: string; locale: string }) =>
d.version === version && d.locale === "en"
);
}, [version, locale]);
if (!doc) return null;
const html = useMemo(() => {
const raw = renderMarkdown(doc.content);
return postProcessHtml(raw);
}, [doc.content]);
return (
<div className="py-4">
<div
className="prose-custom"
dangerouslySetInnerHTML={{ __html: html }}
/>
</div>
);
}

View File

@ -0,0 +1,167 @@
"use client";
import Link from "next/link";
import { usePathname } from "next/navigation";
import { useTranslations, useLocale } from "@/lib/i18n";
import { Github, Menu, X, Sun, Moon } from "lucide-react";
import { useState } from "react";
import { cn } from "@/lib/utils";
const NAV_ITEMS = [
{ key: "timeline", href: "/timeline" },
{ key: "compare", href: "/compare" },
{ key: "layers", href: "/layers" },
] as const;
const LOCALES = [
{ code: "en", label: "EN" },
{ code: "zh", label: "中文" },
{ code: "ja", label: "日本語" },
];
export function Header() {
const t = useTranslations("nav");
const pathname = usePathname();
const locale = useLocale();
const [mobileOpen, setMobileOpen] = useState(false);
const [dark, setDark] = useState(() => {
if (typeof window !== "undefined") {
const stored = localStorage.getItem("theme");
if (stored) return stored === "dark";
return window.matchMedia("(prefers-color-scheme: dark)").matches;
}
return false;
});
function toggleDark() {
const next = !dark;
setDark(next);
document.documentElement.classList.toggle("dark", next);
localStorage.setItem("theme", next ? "dark" : "light");
}
function switchLocale(newLocale: string) {
const newPath = pathname.replace(`/${locale}`, `/${newLocale}`);
window.location.href = newPath;
}
return (
<header className="sticky top-0 z-50 border-b border-[var(--color-border)] bg-[var(--color-bg)]/80 backdrop-blur-sm">
<div className="mx-auto flex h-14 max-w-7xl items-center justify-between px-4 sm:px-6 lg:px-8">
<Link href={`/${locale}`} className="text-lg font-bold">
Learn Claude Code
</Link>
{/* Desktop nav */}
<nav className="hidden items-center gap-6 md:flex">
{NAV_ITEMS.map((item) => (
<Link
key={item.key}
href={`/${locale}${item.href}`}
className={cn(
"text-sm font-medium transition-colors hover:text-zinc-900 dark:hover:text-white",
pathname.includes(item.href)
? "text-zinc-900 dark:text-white"
: "text-zinc-500 dark:text-zinc-400"
)}
>
{t(item.key)}
</Link>
))}
{/* Locale switcher */}
<div className="flex items-center gap-1 rounded-lg border border-[var(--color-border)] p-0.5">
{LOCALES.map((l) => (
<button
key={l.code}
onClick={() => switchLocale(l.code)}
className={cn(
"rounded-md px-2 py-1 text-xs font-medium transition-colors",
locale === l.code
? "bg-zinc-900 text-white dark:bg-white dark:text-zinc-900"
: "text-zinc-500 hover:text-zinc-700 dark:text-zinc-400"
)}
>
{l.label}
</button>
))}
</div>
<button
onClick={toggleDark}
className="rounded-md p-1.5 text-zinc-500 hover:text-zinc-700 dark:text-zinc-400 dark:hover:text-white"
>
{dark ? <Sun size={16} /> : <Moon size={16} />}
</button>
<a
href="https://github.com/shareAI-lab/learn-claude-code"
target="_blank"
rel="noopener"
className="text-zinc-500 hover:text-zinc-700 dark:text-zinc-400 dark:hover:text-white"
>
<Github size={18} />
</a>
</nav>
{/* Mobile hamburger */}
<button
onClick={() => setMobileOpen(!mobileOpen)}
className="flex min-h-[44px] min-w-[44px] items-center justify-center md:hidden"
>
{mobileOpen ? <X size={20} /> : <Menu size={20} />}
</button>
</div>
{/* Mobile menu */}
{mobileOpen && (
<div className="border-t border-[var(--color-border)] bg-[var(--color-bg)] p-4 md:hidden">
{NAV_ITEMS.map((item) => (
<Link
key={item.key}
href={`/${locale}${item.href}`}
className="flex min-h-[44px] items-center text-sm"
onClick={() => setMobileOpen(false)}
>
{t(item.key)}
</Link>
))}
<div className="mt-3 flex items-center justify-between border-t border-[var(--color-border)] pt-3">
<div className="flex gap-2">
{LOCALES.map((l) => (
<button
key={l.code}
onClick={() => switchLocale(l.code)}
className={cn(
"min-h-[44px] min-w-[44px] rounded-md px-3 text-xs font-medium",
locale === l.code
? "bg-zinc-900 text-white dark:bg-white dark:text-zinc-900"
: "border border-[var(--color-border)]"
)}
>
{l.label}
</button>
))}
</div>
<div className="flex items-center gap-2">
<button
onClick={toggleDark}
className="flex min-h-[44px] min-w-[44px] items-center justify-center rounded-md text-zinc-500 hover:text-zinc-700 dark:text-zinc-400 dark:hover:text-white"
>
{dark ? <Sun size={18} /> : <Moon size={18} />}
</button>
<a
href="https://github.com/shareAI-lab/learn-claude-code"
target="_blank"
rel="noopener"
className="flex min-h-[44px] min-w-[44px] items-center justify-center text-zinc-500 hover:text-zinc-700 dark:text-zinc-400 dark:hover:text-white"
>
<Github size={18} />
</a>
</div>
</div>
</div>
)}
</header>
);
}

View File

@ -0,0 +1,66 @@
"use client";
import Link from "next/link";
import { usePathname } from "next/navigation";
import { LAYERS, VERSION_META } from "@/lib/constants";
import { useTranslations } from "@/lib/i18n";
import { cn } from "@/lib/utils";
const LAYER_DOT_BG: Record<string, string> = {
tools: "bg-blue-500",
planning: "bg-emerald-500",
memory: "bg-purple-500",
concurrency: "bg-amber-500",
collaboration: "bg-red-500",
};
export function Sidebar() {
const pathname = usePathname();
const locale = pathname.split("/")[1] || "en";
const t = useTranslations("sessions");
const tLayer = useTranslations("layer_labels");
return (
<nav className="hidden w-56 shrink-0 md:block">
<div className="sticky top-[calc(3.5rem+2rem)] space-y-5">
{LAYERS.map((layer) => (
<div key={layer.id}>
<div className="flex items-center gap-1.5 pb-1.5">
<span className={cn("h-2 w-2 rounded-full", LAYER_DOT_BG[layer.id])} />
<span className="text-[11px] font-semibold uppercase tracking-wider text-zinc-400 dark:text-zinc-500">
{tLayer(layer.id)}
</span>
</div>
<ul className="space-y-0.5">
{layer.versions.map((vId) => {
const meta = VERSION_META[vId];
const href = `/${locale}/${vId}`;
const isActive =
pathname === href ||
pathname === `${href}/` ||
pathname.startsWith(`${href}/diff`);
return (
<li key={vId}>
<Link
href={href}
className={cn(
"block rounded-md px-2.5 py-1.5 text-sm transition-colors",
isActive
? "bg-zinc-100 font-medium text-zinc-900 dark:bg-zinc-800 dark:text-white"
: "text-zinc-500 hover:bg-zinc-50 hover:text-zinc-700 dark:text-zinc-400 dark:hover:bg-zinc-800/50 dark:hover:text-zinc-300"
)}
>
<span className="font-mono text-xs">{vId}</span>
<span className="ml-1.5">{t(vId) || meta?.title}</span>
</Link>
</li>
);
})}
</ul>
</div>
))}
</div>
</nav>
);
}

View File

@ -0,0 +1,95 @@
"use client";
import { useRef, useEffect, useState } from "react";
import { AnimatePresence } from "framer-motion";
import { useTranslations } from "@/lib/i18n";
import { useSimulator } from "@/hooks/useSimulator";
import { SimulatorControls } from "./simulator-controls";
import { SimulatorMessage } from "./simulator-message";
import type { Scenario } from "@/types/agent-data";
const scenarioModules: Record<string, () => Promise<{ default: Scenario }>> = {
s01: () => import("@/data/scenarios/s01.json") as Promise<{ default: Scenario }>,
s02: () => import("@/data/scenarios/s02.json") as Promise<{ default: Scenario }>,
s03: () => import("@/data/scenarios/s03.json") as Promise<{ default: Scenario }>,
s04: () => import("@/data/scenarios/s04.json") as Promise<{ default: Scenario }>,
s05: () => import("@/data/scenarios/s05.json") as Promise<{ default: Scenario }>,
s06: () => import("@/data/scenarios/s06.json") as Promise<{ default: Scenario }>,
s07: () => import("@/data/scenarios/s07.json") as Promise<{ default: Scenario }>,
s08: () => import("@/data/scenarios/s08.json") as Promise<{ default: Scenario }>,
s09: () => import("@/data/scenarios/s09.json") as Promise<{ default: Scenario }>,
s10: () => import("@/data/scenarios/s10.json") as Promise<{ default: Scenario }>,
s11: () => import("@/data/scenarios/s11.json") as Promise<{ default: Scenario }>,
};
interface AgentLoopSimulatorProps {
version: string;
}
export function AgentLoopSimulator({ version }: AgentLoopSimulatorProps) {
const t = useTranslations("version");
const [scenario, setScenario] = useState<Scenario | null>(null);
const scrollRef = useRef<HTMLDivElement>(null);
useEffect(() => {
const loader = scenarioModules[version];
if (loader) {
loader().then((mod) => setScenario(mod.default));
}
}, [version]);
const sim = useSimulator(scenario?.steps ?? []);
useEffect(() => {
if (scrollRef.current) {
scrollRef.current.scrollTo({
top: scrollRef.current.scrollHeight,
behavior: "smooth",
});
}
}, [sim.visibleSteps.length]);
if (!scenario) return null;
return (
<section>
<h2 className="mb-2 text-xl font-semibold">{t("simulator")}</h2>
<p className="mb-4 text-sm text-[var(--color-text-secondary)]">
{scenario.description}
</p>
<div className="overflow-hidden rounded-xl border border-[var(--color-border)]">
<div className="border-b border-[var(--color-border)] bg-zinc-50 px-4 py-3 dark:bg-zinc-900">
<SimulatorControls
isPlaying={sim.isPlaying}
isComplete={sim.isComplete}
currentIndex={sim.currentIndex}
totalSteps={sim.totalSteps}
speed={sim.speed}
onPlay={sim.play}
onPause={sim.pause}
onStep={sim.stepForward}
onReset={sim.reset}
onSpeedChange={sim.setSpeed}
/>
</div>
<div
ref={scrollRef}
className="flex max-h-[500px] min-h-[200px] flex-col gap-3 overflow-y-auto p-4"
>
{sim.visibleSteps.length === 0 && (
<div className="flex flex-1 items-center justify-center text-sm text-[var(--color-text-secondary)]">
Press Play or Step to begin
</div>
)}
<AnimatePresence mode="popLayout">
{sim.visibleSteps.map((step, i) => (
<SimulatorMessage key={i} step={step} index={i} />
))}
</AnimatePresence>
</div>
</div>
</section>
);
}

View File

@ -0,0 +1,99 @@
"use client";
import { useTranslations } from "@/lib/i18n";
import { Play, Pause, SkipForward, RotateCcw } from "lucide-react";
import { cn } from "@/lib/utils";
interface SimulatorControlsProps {
isPlaying: boolean;
isComplete: boolean;
currentIndex: number;
totalSteps: number;
speed: number;
onPlay: () => void;
onPause: () => void;
onStep: () => void;
onReset: () => void;
onSpeedChange: (speed: number) => void;
}
const SPEEDS = [0.5, 1, 2, 4];
export function SimulatorControls({
isPlaying,
isComplete,
currentIndex,
totalSteps,
speed,
onPlay,
onPause,
onStep,
onReset,
onSpeedChange,
}: SimulatorControlsProps) {
const t = useTranslations("sim");
return (
<div className="flex flex-wrap items-center gap-3">
<div className="flex items-center gap-1.5">
{isPlaying ? (
<button
onClick={onPause}
className="flex h-9 w-9 items-center justify-center rounded-lg bg-zinc-900 text-white transition-colors hover:bg-zinc-700 dark:bg-white dark:text-zinc-900 dark:hover:bg-zinc-200"
title={t("pause")}
>
<Pause size={16} />
</button>
) : (
<button
onClick={onPlay}
disabled={isComplete}
className="flex h-9 w-9 items-center justify-center rounded-lg bg-zinc-900 text-white transition-colors hover:bg-zinc-700 disabled:opacity-40 dark:bg-white dark:text-zinc-900 dark:hover:bg-zinc-200"
title={t("play")}
>
<Play size={16} />
</button>
)}
<button
onClick={onStep}
disabled={isComplete}
className="flex h-9 w-9 items-center justify-center rounded-lg border border-[var(--color-border)] transition-colors hover:bg-zinc-100 disabled:opacity-40 dark:hover:bg-zinc-800"
title={t("step")}
>
<SkipForward size={16} />
</button>
<button
onClick={onReset}
className="flex h-9 w-9 items-center justify-center rounded-lg border border-[var(--color-border)] transition-colors hover:bg-zinc-100 dark:hover:bg-zinc-800"
title={t("reset")}
>
<RotateCcw size={16} />
</button>
</div>
<div className="flex items-center gap-1.5">
<span className="text-xs text-[var(--color-text-secondary)]">
{t("speed")}:
</span>
{SPEEDS.map((s) => (
<button
key={s}
onClick={() => onSpeedChange(s)}
className={cn(
"rounded px-2 py-1 text-xs font-medium transition-colors",
speed === s
? "bg-zinc-900 text-white dark:bg-white dark:text-zinc-900"
: "text-[var(--color-text-secondary)] hover:text-[var(--color-text)]"
)}
>
{s}x
</button>
))}
</div>
<span className="ml-auto text-xs tabular-nums text-[var(--color-text-secondary)]">
{Math.max(0, currentIndex + 1)} {t("step_of")} {totalSteps}
</span>
</div>
);
}

Some files were not shown because too many files have changed in this diff Show More