diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 010e8cd..9399aba 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
defaults:
run:
- working-directory: web
+ working-directory: learn-claude-code/web
steps:
# actions/checkout@v6
@@ -22,7 +22,7 @@ jobs:
with:
node-version: 20
cache: npm
- cache-dependency-path: web/package-lock.json
+ cache-dependency-path: learn-claude-code/web/package-lock.json
- name: Install dependencies
run: npm ci
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2f065c4..2d0fa0b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,6 +9,9 @@ on:
jobs:
python-smoke:
runs-on: ubuntu-latest
+ defaults:
+ run:
+ working-directory: learn-claude-code
steps:
# actions/checkout@v6
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -29,7 +32,7 @@ jobs:
runs-on: ubuntu-latest
defaults:
run:
- working-directory: web
+ working-directory: learn-claude-code/web
steps:
# actions/checkout@v6
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -40,7 +43,7 @@ jobs:
with:
node-version: "20"
cache: "npm"
- cache-dependency-path: web/package-lock.json
+ cache-dependency-path: learn-claude-code/web/package-lock.json
- name: Install dependencies
run: npm ci
diff --git a/.gitignore b/.gitignore
index ce87dce..cf8a7a3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -200,6 +200,14 @@ cython_debug/
.worktrees/
.scheduled_tasks.json
+# Local AI/editor state
+.claude/
+.codex/
+.agents/
+.idea/
+.vscode/
+*.code-workspace
+
# Accidental root npm lockfile; web/package-lock.json is tracked.
/package-lock.json
@@ -232,3 +240,71 @@ test_providers.py
# Internal analysis artifacts (not learning material)
analysis/
analysis_progress.md
+
+# macOS / local filesystem
+.DS_Store
+.AppleDouble
+.LSOverride
+Icon?
+._*
+.Spotlight-V100/
+.Trashes/
+.fseventsd/
+
+# Node / TypeScript / frontend build output
+node_modules/
+.npm/
+.pnpm-store/
+.yarn/cache/
+.yarn/unplugged/
+.yarn/build-state.yml
+.yarn/install-state.gz
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+*.tsbuildinfo
+.turbo/
+.next/
+.nuxt/
+.svelte-kit/
+.vite/
+out/
+coverage/
+
+# Secrets and local credentials
+.env.*
+!.env.example
+!.env*.example
+*.local
+*.pem
+*.key
+*.p12
+*.pfx
+*.crt
+*.csr
+*.token
+*.secret
+secrets/
+secrets.*
+id_rsa
+id_rsa.*
+id_ed25519
+id_ed25519.*
+*.kubeconfig
+
+# Local databases, temp files, and packaged exports
+*.sqlite
+*.sqlite3
+*.db
+*.db-journal
+logs/
+*.tmp
+*.temp
+tmp/
+temp/
+*.zip
+*.tar
+*.tar.gz
+*.tgz
+*.7z
diff --git a/README.md b/README.md
index e9f0c93..6865532 100644
--- a/README.md
+++ b/README.md
@@ -1,462 +1,32 @@
-[English](./README.md) | [中文](./README-zh.md) | [日本語](./README-ja.md)
+# Learn Agent Harness
-
+This repository is becoming a collection of from-scratch agent harness courses.
+Each course is kept in its own top-level folder so it can explain one product
+line clearly without mixing its vocabulary, runtime assumptions, or teaching
+path with the others.
-# Learn Claude Code -- Harness Engineering for Real Agents
+## Courses
-## Agency Comes from the Model. An Agent Product = Model + Harness.
+| Course | Focus | Start here |
+| --- | --- | --- |
+| [learn-claude-code](./learn-claude-code/) | Build the vehicle behind a coding agent: loop, tools, permissions, hooks, memory, tasks, teams, worktrees, and MCP. | [learn-claude-code/README.md](./learn-claude-code/README.md) |
+| [learn-pi-agent](./learn-pi-agent/) | Build a minimal Pi-style coding-agent harness around a small kernel, explicit events, clear provider/runtime boundaries, and extension points. | [learn-pi-agent/README.md](./learn-pi-agent/README.md) |
-Before we write any code, one thing needs to be clear.
+## Repository Layout
-**Agency -- the capacity to perceive, reason, and act -- comes from model training, not from external code orchestration.** But a working agent product needs both the model and the harness. The model is the driver. The harness is the vehicle. This repository teaches you how to build the vehicle.
-
-### Where Agency Comes From
-
-At the core of every agent is a neural network -- a Transformer, an RNN, a trained function -- shaped by billions of gradient updates on sequences of perception, reasoning, and action. Agency was never bestowed by the surrounding code. It was learned during training.
-
-Humans are the original proof. A biological neural network, refined by millions of years of evolutionary pressure, perceives the world through senses, reasons through a brain, and acts through a body. When DeepMind, OpenAI, or Anthropic say "agent," they all mean the same core thing: **a model that learned to act through training, plus the infrastructure that lets it operate in a specific environment.**
-
-The historical record is unambiguous:
-
-- **2013 -- DeepMind DQN plays Atari.** A single neural network, receiving only raw pixels and game scores, learned 7 Atari 2600 games -- surpassing prior algorithms and beating human experts in 3 of them. By 2015, scaled to [49 games at professional tester level](https://www.nature.com/articles/nature14236), published in *Nature*. No game-specific rules. One model, learning from experience.
-
-- **2019 -- OpenAI Five conquers Dota 2.** Five neural networks played [45,000 years of Dota 2 against themselves](https://openai.com/index/openai-five-defeats-dota-2-world-champions/) over 10 months, then defeated **OG** -- the TI8 world champions -- 2-0 in a live match. In the public arena, the AI won 99.4% of 42,729 games. No scripted strategies. Models learned teamwork through self-play.
-
-- **2019 -- DeepMind AlphaStar masters StarCraft II.** AlphaStar [beat a professional player 10-1](https://deepmind.google/blog/alphastar-mastering-the-real-time-strategy-game-starcraft-ii/) in closed matches, then reached [Grandmaster rank](https://www.nature.com/articles/d41586-019-03298-6) on the European server -- top 0.15% of 90,000 players. An incomplete-information, real-time game with a combinatorial action space far exceeding chess or Go.
-
-- **2019 -- Tencent Jueyu dominates Honor of Kings.** Tencent AI Lab's "Jueyu" system [defeated KPL professional players in full 5v5](https://www.jiemian.com/article/3371171.html) at the World Champion Cup semifinal. In 1v1 mode, pros [won just 1 out of 15 matches, lasting under 8 minutes at best](https://developer.aliyun.com/article/851058). Training intensity: one day equaled 440 human years. A model that learned the entire game from scratch through self-play.
-
-- **2024-2025 -- LLM agents reshape software engineering.** Claude, GPT, Gemini -- large language models trained on the full breadth of human code and reasoning -- are deployed as coding agents. They read codebases, write implementations, debug failures, and coordinate as teams. The architecture is identical to every previous agent: a trained model, placed in an environment, given tools for perception and action.
-
-Every milestone points to the same fact: **Agency -- the ability to perceive, reason, and act -- is trained, not coded.** But every agent also needs an environment to operate in: an Atari emulator, the Dota 2 client, the StarCraft II engine, an IDE and a terminal. The model supplies the intelligence. The environment supplies the action space. Together they form a complete agent.
-
-### What an Agent Is NOT
-
-The word "agent" has been hijacked by an entire prompt-plumbing industry.
-
-Drag-and-drop workflow builders. No-code "AI Agent" platforms. Prompt-chain orchestration libraries. They share a single delusion: that stringing LLM API calls together with if-else branches, node graphs, and hardcoded routing logic constitutes "building an agent."
-
-It does not. What they produce are Rube Goldberg machines -- over-engineered, brittle, procedural rule pipelines with an LLM wedged in as a glorified text-completion node. That is not an agent. That is a shell script with grandiose pretensions.
-
-You cannot brute-force intelligence by stacking procedural logic -- sprawling rule trees, node graphs, chained prompt waterfalls -- and praying that enough glue code will spontaneously produce autonomous behavior. It will not. You cannot engineer agency into existence. Agency is learned, not coded.
-
-### The Mindshift: From "Building Agents" to Building Harnesses
-
-When someone says "I am building an agent," they can only mean one of two things:
-
-**1. Training a model.** Adjusting weights through reinforcement learning, fine-tuning, RLHF, or another gradient-based method. Collecting trajectory data -- real-world sequences of perception, reasoning, and action in a target domain -- and using it to shape the model's behavior. This is what DeepMind, OpenAI, Tencent AI Lab, and Anthropic do.
-
-**2. Building a harness.** Writing the code that gives a model an operational environment. This is what most of us do, and it is the core of this repository.
-
-A harness is everything an agent needs to work in a specific domain:
-
-```
-Harness = Tools + Knowledge + Observation + Action Interfaces + Permissions
-
- Tools: file I/O, shell, network, database, browser
- Knowledge: product docs, domain references, API specs, style guides
- Observation: git diff, error logs, browser state, sensor data
- Action: CLI commands, API calls, UI interactions
- Permissions: sandbox isolation, approval workflows, trust boundaries
+```text
+.
+├── learn-claude-code/ # Existing Claude Code harness course
+├── learn-pi-agent/ # Pi harness MVP course
+├── .github/ # Repository-level CI
+├── .gitignore # Shared ignore rules for local state and build output
+└── README.md # Course collection entry
```
-The model decides. The harness executes. The model reasons. The harness provides context. The model is the driver. The harness is the vehicle.
+## Local Files
-This repository teaches you to build the vehicle. A vehicle for coding. But the design patterns generalize to any domain.
+Do not commit local runtime state or credentials. The root `.gitignore` excludes
+common local files such as `.claude/`, `.env`, `.DS_Store`, `node_modules/`,
+build output, logs, temporary files, local databases, and packaged exports.
-### What Harness Engineers Actually Do
-
-If you are reading this repository, you are most likely a harness engineer. Here is what the job actually entails:
-
-- **Implement tools.** Give the agent hands. File read/write, shell execution, API calls, browser control, database queries. Each tool is one action the agent can take in its environment. Design them atomic, composable, and clearly described.
-
-- **Curate knowledge.** Give the agent domain expertise. Product documentation, architecture decision records, style guides, compliance requirements. Load on demand, not upfront.
-
-- **Manage context.** Give the agent clean memory. Subagent isolation prevents noise leakage. Context compaction prevents history from drowning the present. Task systems let goals persist beyond a single conversation.
-
-- **Control permissions.** Give the agent boundaries. Sandbox file access. Require approval for destructive operations. Enforce trust boundaries between the agent and external systems.
-
-- **Collect trajectory data.** Every action sequence the agent executes in your harness is training signal. Real deployment trajectories are the raw material for fine-tuning the next generation of agent models.
-
-You are not writing intelligence. You are building the world that intelligence inhabits. The quality of that world directly determines how effectively the intelligence can express itself.
-
-**Build the harness well. The model will do the rest.**
-
-### Why Claude Code
-
-Because Claude Code is the most elegant, most complete agent harness implementation we have seen. Not because of any clever trick, but because of what it *does not* do: it does not try to be the agent. It does not impose rigid workflows. It does not substitute hand-crafted decision trees for the model's own judgment. It gives the model tools, knowledge, context management, and permission boundaries -- then gets out of the way.
-
-Strip Claude Code down to its essence:
-
-```
-Claude Code = one agent loop
- + tools (bash, read, write, edit, glob, grep, browser...)
- + on-demand skill loading
- + context compaction
- + subagent spawning
- + task system with dependency graphs
- + async mailbox team coordination
- + worktree-isolated parallel execution
- + permission governance
- + hooks extension system
- + memory persistence
- + MCP external capability routing
-```
-
-That is it. The agent itself? Claude. A model. Trained by Anthropic on the full breadth of human reasoning and code. The harness did not make Claude smart. Claude was already smart. The harness gave Claude hands, eyes, and a workspace.
-
-The takeaway is not "copy Claude Code." The takeaway is: **the best agent products come from engineers who understand that their job is the harness, not the intelligence.**
-
----
-
-```
- THE AGENT PATTERN
- =================
-
- User --> messages[] --> LLM --> response
- |
- stop_reason == "tool_use"?
- / \
- yes no
- | |
- execute tools return text
- append results
- loop back -----------------> messages[]
-
-
- The model decides when to call tools and when to stop.
- The code just executes what the model asks for.
- This repo teaches you to build everything around this loop --
- the harness that makes the agent effective in a specific domain.
-```
-
-## Core Pattern
-
-```python
-def agent_loop(messages):
- while True:
- response = client.messages.create(
- model=MODEL, system=SYSTEM,
- messages=messages, tools=TOOLS,
- )
- messages.append({"role": "assistant",
- "content": response.content})
-
- if response.stop_reason != "tool_use":
- return
-
- results = []
- for block in response.content:
- if block.type == "tool_use":
- output = TOOL_HANDLERS[block.name](**block.input)
- results.append({
- "type": "tool_result",
- "tool_use_id": block.id,
- "content": output,
- })
- messages.append({"role": "user", "content": results})
-```
-
-Every lesson layers one harness mechanism on top of this loop -- the loop itself never changes. The loop belongs to the agent. The mechanisms belong to the harness.
-
-The loop is constant. Tools, knowledge, and permissions change. Agent = Model (LLM) + a generalized operational environment (Harness).
-
----
-
-## Version Status
-
-This repository currently contains two tutorial tracks:
-
-- **Current track: root-level `s01-s20`**
- The root-level `s01_*` ... `s20_*` folders are the new canonical version. Each chapter contains a full narrative README, translations, runnable `code.py`, and diagrams where needed.
-- **Legacy transition track: `docs/`, `agents/`, and the current `web/` app**
- These still preserve the older 12-lesson version. They are kept temporarily for existing readers, old links, and the web platform while the new 20-lesson track settles.
-
-If you are starting now, read the root-level `s01_agent_loop/` through `s20_comprehensive/` chapters. If you are following an older link or using the current web app, you are likely reading the legacy 12-lesson track. The legacy and current chapter numbers do not always match, so avoid mixing chapter numbers across tracks.
-
-### Legacy-to-Current Mapping
-
-| Legacy 12-lesson track | Current 20-lesson track | Topic |
-|---|---|---|
-| old s01 | new s01 | Agent Loop |
-| old s02 | new s02 | Tool Use |
-| old s03 | new s05 | TodoWrite |
-| old s04 | new s06 | Subagent |
-| old s05 | new s07 | Skill Loading |
-| old s06 | new s08 | Context Compact |
-| old s07 | new s12 | Task System |
-| old s08 | new s13 | Background Tasks |
-| old s09 | new s15 | Agent Teams |
-| old s10 | new s16 | Team Protocols |
-| old s11 | new s17 | Autonomous Agents |
-| old s12 | new s18 | Worktree Isolation |
-| new only | s03, s04, s09, s10, s11, s14, s19, s20 | Permission, Hooks, Memory, System Prompt, Error Recovery, Cron, MCP, Comprehensive Agent |
-
----
-
-## Scope
-
-This repository is a 0-to-1 harness engineering learning project: it teaches how to build the working environment around an agent model. To keep the learning path clear, some production mechanisms are intentionally simplified or omitted:
-
-- Full event / hook bus behavior, such as `PreToolUse`, `SessionStart/End`, and `ConfigChange`.
- The teaching code uses minimal lifecycle events where needed.
-- Rule-based permission governance and full trust workflows.
-- Session lifecycle controls such as resume/fork, plus more complete worktree lifecycle handling.
-- Full MCP runtime details such as transport, OAuth, resource subscription, and polling.
-
-The JSONL mailbox protocol in this repository is a teaching implementation, not a claim about any specific production internal implementation.
-
----
-
-## 20 Progressive Lessons
-
-**Each lesson adds one harness mechanism. Each mechanism has a motto.**
-
-> **s01** *"One loop & Bash is all you need"* — one tool + one loop = one agent
->
-> **s02** *"Adding a tool means adding one handler"* — the loop stays untouched; new tools register into the dispatch map
->
-> **s03** *"Set boundaries first, then grant freedom"* — check what can run, what must stop, and what needs approval
->
-> **s04** *"Hook around the loop, never rewrite the loop"* — add extension points without changing the main loop
->
-> **s05** *"An agent without a plan drifts"* — list the steps before starting; completion rate doubles
->
-> **s06** *"Big tasks split small, each subtask gets clean context"* — subagents do the side work and bring back only the result
->
-> **s07** *"Load knowledge on demand, not upfront"* — list skills first, expand them only when needed
->
-> **s08** *"Context always fills up -- have a way to make room"* — multi-layer compaction strategies buy you infinite sessions
->
-> **s09** *"Remember what matters, forget what doesn't"* — three subsystems: selection, extraction, consolidation
->
-> **s10** *"Prompts are assembled at runtime, not hardcoded"* — section-based concatenation, loaded on demand
->
-> **s11** *"Errors aren't the end, they're the start of a retry"* — retry, make room, or take another path when things fail
->
-> **s12** *"Big goals break into small tasks, ordered, persisted to disk"* — a file-backed task graph that lays the groundwork for multi-agent coordination
->
-> **s13** *"Slow ops go background, agent keeps thinking"* — background threads run commands; notifications inject on completion
->
-> **s14** *"Fire on schedule, no human kick needed"* — trigger tasks automatically by time
->
-> **s15** *"Too big for one agent -- delegate to teammates"* — persistent teammates + async mailboxes
->
-> **s16** *"Teammates need shared communication rules"* — use a fixed request-reply format for coordination
->
-> **s17** *"Teammates check the board, claim work themselves"* — no leader assigning one by one; self-organizing
->
-> **s18** *"Each works in its own directory, no interference"* — tasks own goals, worktrees own directories, bound by ID
->
-> **s19** *"Not enough capability? Plug in more via MCP"* — connect external tools into the same tool pool
->
-> **s20** *"Many mechanisms, one loop"* — all previous mechanisms return to one complete harness
-
----
-
-## Learning Path
-
-Main line: act → handle complex work → remember and recover → run long tasks → collaborate → extend and assemble.
-
-```mermaid
-flowchart TD
- %% Card styles
- classDef stage1 fill:#E3F2FD,stroke:#1976D2,stroke-width:2px,color:#0D47A1,rx:12,ry:12,text-align:left
- classDef stage2 fill:#E8F5E9,stroke:#388E3C,stroke-width:2px,color:#1B5E20,rx:12,ry:12,text-align:left
- classDef stage3 fill:#FFF3E0,stroke:#F57C00,stroke-width:2px,color:#E65100,rx:12,ry:12,text-align:left
- classDef stage4 fill:#FCE4EC,stroke:#C2185b,stroke-width:2px,color:#880E4F,rx:12,ry:12,text-align:left
- classDef stage5 fill:#F3E5F5,stroke:#7B1FA2,stroke-width:2px,color:#4A148C,rx:12,ry:12,text-align:left
- classDef stage6 fill:#E0F7FA,stroke:#0097A7,stroke-width:2px,color:#006064,rx:12,ry:12,text-align:left
-
- %% Group style
- classDef groupBox fill:#F8F9FA,stroke:#CED4DA,stroke-width:2px,stroke-dasharray: 5 5,rx:15,ry:15,color:#495057
-
- %% Layer 1: stages 1-3
- subgraph Phase1 ["🌱 Stages 1-3: Core capabilities (simple to complex)"]
- direction LR
- S1["1. Let the Agent act
━━━━━━━━━━━━━
s01 Agent Loop
└─ one loop + bash
s02 Tool Use
└─ one tool to many tools
s03 Permission
└─ decide what can run
s04 Hooks
└─ extension points around tools"]:::stage1
-
- S2["2. Handle complex work
━━━━━━━━━━━━━
s05 TodoWrite
└─ plan first, then execute
s06 Subagent
└─ side work, result back
s08 Context Compact
└─ make room in long context"]:::stage2
-
- S3["3. Remember and recover
━━━━━━━━━━━━━
s09 Memory
└─ remember what matters
s10 System Prompt
└─ assemble at runtime
s11 Error Recovery
└─ retry or change path"]:::stage3
-
- S1 ==> S2 ==> S3
- end
-
- %% Layer 2: stages 4-6
- subgraph Phase2 ["🚀 Stages 4-6: Advanced capabilities (long-running, collaboration, integration)"]
- direction LR
- S4["4. Run long tasks
━━━━━━━━━━━━━
s12 Task System
└─ persist tasks and deps
s13 Background Tasks
└─ send slow work background
s14 Cron Scheduler
└─ trigger by time"]:::stage4
-
- S5["5. Coordinate many Agents
━━━━━━━━━━━━━
s15 Agent Teams
└─ teammates + mailboxes
s16 Team Protocols
└─ fixed request-reply format
s17 Autonomous Agents
└─ claim work from the board
s18 Worktree Isolation
└─ separate directories"]:::stage5
-
- S6["6. Extend and assemble
━━━━━━━━━━━━━
s07 Skill Loading
└─ expand skills on demand
s19 MCP Plugin
└─ external tools, one pool
s20 Comprehensive Agent
└─ all mechanisms, one loop"]:::stage6
-
- S4 ==> S5 ==> S6
- end
-
- %% Connect the two layers
- Phase1 ===> Phase2
-
- class Phase1,Phase2 groupBox
-```
-
----
-
-## All Chapters
-
-| Chapter | Topic | Key Concepts |
-|---|---|---|
-| [s01](./s01_agent_loop/) | Agent Loop | `messages` / `while True` / `stop_reason` |
-| [s02](./s02_tool_use/) | Tool Use | `TOOL_HANDLERS` / dispatch map / concurrency |
-| [s03](./s03_permission/) | Permission System | `PermissionRule` / approval pipeline |
-| [s04](./s04_hooks/) | Hook System | `PreToolUse` / `PostToolUse` / extension points |
-| [s05](./s05_todo_write/) | TodoWrite | `TodoItem` / plan-then-execute |
-| [s06](./s06_subagent/) | Subagent | `fresh messages[]` / context isolation |
-| [s07](./s07_skill_loading/) | Skill Loading | `SkillManifest` / on-demand injection |
-| [s08](./s08_context_compact/) | Context Compact | snipCompact / microCompact / toolResultBudget / autoCompact |
-| [s09](./s09_memory/) | Memory System | selection / extraction / consolidation |
-| [s10](./s10_system_prompt/) | System Prompt | runtime assembly / section concatenation |
-| [s11](./s11_error_recovery/) | Error Recovery | token escalation / fallback model / retry strategies |
-| [s12](./s12_task_system/) | Task System | `TaskRecord` / `blockedBy` / disk persistence |
-| [s13](./s13_background_tasks/) | Background Tasks | threaded execution / notification queue |
-| [s14](./s14_cron_scheduler/) | Cron Scheduler | durable scheduling / session-scoped triggers |
-| [s15](./s15_agent_teams/) | Agent Teams | `MessageBus` / inbox / permission bubbling |
-| [s16](./s16_team_protocols/) | Team Protocols | shutdown handshake / plan approval |
-| [s17](./s17_autonomous_agents/) | Autonomous Agents | idle cycle / auto-claim / self-organization |
-| [s18](./s18_worktree_isolation/) | Worktree Isolation | `WorktreeRecord` / task-directory binding |
-| [s19](./s19_mcp_plugin/) | MCP Plugin | multi-transport / channel routing / tool pool assembly |
-| [s20](./s20_comprehensive/) | Comprehensive Agent | all mechanisms around one loop |
-
----
-
-## How to Read
-
-Each chapter is a folder. Open one and you will find:
-
-```
-s08_context_compact/
- README.md # full narrative with inline code
- README.en.md # English translation
- README.ja.md # Japanese translation
- code.py # standalone runnable implementation
- images/ # SVG diagrams (where needed)
-```
-
-Read the `README.md` for the core idea and work through the code. Complex chapters have `` folds for deep dives -- open them when you want to go deeper. Simple chapters have 0-1 diagrams, complex chapters have more.
-
-Read from s01 through s20 in order. Each chapter assumes you've read the previous ones and ends with a hook into the next.
-
----
-
-## Quick Start
-
-### Current 20-Lesson Track
-
-```sh
-git clone https://github.com/shareAI-lab/learn-claude-code
-cd learn-claude-code
-pip install -r requirements.txt
-cp .env.example .env # configure ANTHROPIC_API_KEY
-
-python s01_agent_loop/code.py # Start here -- one loop + bash
-python s08_context_compact/code.py # Context compaction (complex)
-python s20_comprehensive/code.py # Endpoint: all mechanisms in one loop
-```
-
-### Legacy 12-Lesson Track
-
-```sh
-python agents/s01_agent_loop.py
-python agents/s12_worktree_task_isolation.py
-python agents/s_full.py
-```
-
-### Web Platform
-
-The current web app still renders the legacy `docs/` s01-s12 track. Use the root-level folders for the new s01-s20 track.
-
-```sh
-cd web && npm install && npm run dev # http://localhost:3000
-```
-
----
-
-## Project Structure
-
-```
-learn-claude-code/
- s01_agent_loop/ # one folder per chapter
- README.md # Chinese source (complete narrative)
- README.en.md # English translation
- README.ja.md # Japanese translation
- code.py # standalone runnable code
- images/ # SVG diagrams
- s02_tool_use/
- ...
- s19_mcp_plugin/
- s20_comprehensive/ # endpoint chapter
- agents/ # legacy 12 runnable copies + s_full.py
- skills/ # skill files used by s07
- docs/ # legacy 12-lesson docs, kept during transition
- web/ # currently renders the legacy docs/ track
- tests/
-```
-
----
-
-## What's Next
-
-After 20 lessons, you understand harness engineering from the inside out. Two paths to turn that knowledge into product:
-
-### Kode Agent CLI -- Open-Source Coding Agent CLI
-
-> `npm i -g @shareai-lab/kode`
-
-Skill and LSP support, Windows compatible, works with GLM / MiniMax / DeepSeek and other open models. Install and go.
-
-GitHub: **[shareAI-lab/Kode-Agent](https://github.com/shareAI-lab/Kode-Agent)**
-
-### Kode Agent SDK -- Embed Agent Capabilities in Your Application
-
-A standalone library with no per-user process overhead. Embed it in backends, browser extensions, embedded devices, or any runtime.
-
-GitHub: **[shareAI-lab/kode-agent-sdk](https://github.com/shareAI-lab/kode-agent-sdk)**
-
----
-
-## Sister Tutorial: From Passive Sessions to Always-On Assistants
-
-The harness taught in this repository is the **use-and-discard** kind -- open a terminal, give the agent a task, close when done, next session starts fresh. Claude Code works this way.
-
-But [OpenClaw](https://github.com/openclaw/openclaw) proves another possibility: on the same agent core, two additional harness mechanisms turn an agent from "poke it and it moves" into "wakes itself every 30 seconds to look for work":
-
-- **Heartbeat** -- every 30 seconds the harness sends the agent a message, letting it check for pending work. Nothing to do? Keep sleeping. Something appeared? Act immediately.
-- **Cron** -- the agent can schedule its own future tasks, which fire automatically when the time arrives.
-
-Add IM multi-channel routing (WhatsApp / Telegram / Slack / Discord and 13+ other platforms), persistent context memory, and a Soul personality system, and the agent transforms from a disposable tool into an always-on personal AI assistant.
-
-**[claw0](https://github.com/shareAI-lab/claw0)** is our sister teaching repository, breaking down these harness mechanisms from scratch:
-
-```
-claw agent = agent core + heartbeat + cron + IM chat + memory + soul
-```
-
-```
-learn-claude-code claw0
-(agent harness internals: (always-on harness:
- loop, tools, planning, heartbeat, cron, IM channels,
- teams, worktree isolation) memory, Soul personality)
-```
-
-## License
-
-MIT
-
----
-
-**Agency comes from the model. The harness gives agency a place to land. Build the harness well, and the model will do the rest.**
-
-**Bash is all you need. Real agents are all the universe needs.**
-
-**This is not "copy the source code." This is "grasp the key designs and build it yourself."**
+Course-specific install and run commands live inside each course folder.
diff --git a/.env.example b/learn-claude-code/.env.example
similarity index 100%
rename from .env.example
rename to learn-claude-code/.env.example
diff --git a/README-ja.md b/learn-claude-code/README-ja.md
similarity index 100%
rename from README-ja.md
rename to learn-claude-code/README-ja.md
diff --git a/README-zh.md b/learn-claude-code/README-zh.md
similarity index 100%
rename from README-zh.md
rename to learn-claude-code/README-zh.md
diff --git a/learn-claude-code/README.md b/learn-claude-code/README.md
new file mode 100644
index 0000000..e9f0c93
--- /dev/null
+++ b/learn-claude-code/README.md
@@ -0,0 +1,462 @@
+[English](./README.md) | [中文](./README-zh.md) | [日本語](./README-ja.md)
+
+
+
+# Learn Claude Code -- Harness Engineering for Real Agents
+
+## Agency Comes from the Model. An Agent Product = Model + Harness.
+
+Before we write any code, one thing needs to be clear.
+
+**Agency -- the capacity to perceive, reason, and act -- comes from model training, not from external code orchestration.** But a working agent product needs both the model and the harness. The model is the driver. The harness is the vehicle. This repository teaches you how to build the vehicle.
+
+### Where Agency Comes From
+
+At the core of every agent is a neural network -- a Transformer, an RNN, a trained function -- shaped by billions of gradient updates on sequences of perception, reasoning, and action. Agency was never bestowed by the surrounding code. It was learned during training.
+
+Humans are the original proof. A biological neural network, refined by millions of years of evolutionary pressure, perceives the world through senses, reasons through a brain, and acts through a body. When DeepMind, OpenAI, or Anthropic say "agent," they all mean the same core thing: **a model that learned to act through training, plus the infrastructure that lets it operate in a specific environment.**
+
+The historical record is unambiguous:
+
+- **2013 -- DeepMind DQN plays Atari.** A single neural network, receiving only raw pixels and game scores, learned 7 Atari 2600 games -- surpassing prior algorithms and beating human experts in 3 of them. By 2015, scaled to [49 games at professional tester level](https://www.nature.com/articles/nature14236), published in *Nature*. No game-specific rules. One model, learning from experience.
+
+- **2019 -- OpenAI Five conquers Dota 2.** Five neural networks played [45,000 years of Dota 2 against themselves](https://openai.com/index/openai-five-defeats-dota-2-world-champions/) over 10 months, then defeated **OG** -- the TI8 world champions -- 2-0 in a live match. In the public arena, the AI won 99.4% of 42,729 games. No scripted strategies. Models learned teamwork through self-play.
+
+- **2019 -- DeepMind AlphaStar masters StarCraft II.** AlphaStar [beat a professional player 10-1](https://deepmind.google/blog/alphastar-mastering-the-real-time-strategy-game-starcraft-ii/) in closed matches, then reached [Grandmaster rank](https://www.nature.com/articles/d41586-019-03298-6) on the European server -- top 0.15% of 90,000 players. An incomplete-information, real-time game with a combinatorial action space far exceeding chess or Go.
+
+- **2019 -- Tencent Jueyu dominates Honor of Kings.** Tencent AI Lab's "Jueyu" system [defeated KPL professional players in full 5v5](https://www.jiemian.com/article/3371171.html) at the World Champion Cup semifinal. In 1v1 mode, pros [won just 1 out of 15 matches, lasting under 8 minutes at best](https://developer.aliyun.com/article/851058). Training intensity: one day equaled 440 human years. A model that learned the entire game from scratch through self-play.
+
+- **2024-2025 -- LLM agents reshape software engineering.** Claude, GPT, Gemini -- large language models trained on the full breadth of human code and reasoning -- are deployed as coding agents. They read codebases, write implementations, debug failures, and coordinate as teams. The architecture is identical to every previous agent: a trained model, placed in an environment, given tools for perception and action.
+
+Every milestone points to the same fact: **Agency -- the ability to perceive, reason, and act -- is trained, not coded.** But every agent also needs an environment to operate in: an Atari emulator, the Dota 2 client, the StarCraft II engine, an IDE and a terminal. The model supplies the intelligence. The environment supplies the action space. Together they form a complete agent.
+
+### What an Agent Is NOT
+
+The word "agent" has been hijacked by an entire prompt-plumbing industry.
+
+Drag-and-drop workflow builders. No-code "AI Agent" platforms. Prompt-chain orchestration libraries. They share a single delusion: that stringing LLM API calls together with if-else branches, node graphs, and hardcoded routing logic constitutes "building an agent."
+
+It does not. What they produce are Rube Goldberg machines -- over-engineered, brittle, procedural rule pipelines with an LLM wedged in as a glorified text-completion node. That is not an agent. That is a shell script with grandiose pretensions.
+
+You cannot brute-force intelligence by stacking procedural logic -- sprawling rule trees, node graphs, chained prompt waterfalls -- and praying that enough glue code will spontaneously produce autonomous behavior. It will not. You cannot engineer agency into existence. Agency is learned, not coded.
+
+### The Mindshift: From "Building Agents" to Building Harnesses
+
+When someone says "I am building an agent," they can only mean one of two things:
+
+**1. Training a model.** Adjusting weights through reinforcement learning, fine-tuning, RLHF, or another gradient-based method. Collecting trajectory data -- real-world sequences of perception, reasoning, and action in a target domain -- and using it to shape the model's behavior. This is what DeepMind, OpenAI, Tencent AI Lab, and Anthropic do.
+
+**2. Building a harness.** Writing the code that gives a model an operational environment. This is what most of us do, and it is the core of this repository.
+
+A harness is everything an agent needs to work in a specific domain:
+
+```
+Harness = Tools + Knowledge + Observation + Action Interfaces + Permissions
+
+ Tools: file I/O, shell, network, database, browser
+ Knowledge: product docs, domain references, API specs, style guides
+ Observation: git diff, error logs, browser state, sensor data
+ Action: CLI commands, API calls, UI interactions
+ Permissions: sandbox isolation, approval workflows, trust boundaries
+```
+
+The model decides. The harness executes. The model reasons. The harness provides context. The model is the driver. The harness is the vehicle.
+
+This repository teaches you to build the vehicle. A vehicle for coding. But the design patterns generalize to any domain.
+
+### What Harness Engineers Actually Do
+
+If you are reading this repository, you are most likely a harness engineer. Here is what the job actually entails:
+
+- **Implement tools.** Give the agent hands. File read/write, shell execution, API calls, browser control, database queries. Each tool is one action the agent can take in its environment. Design them atomic, composable, and clearly described.
+
+- **Curate knowledge.** Give the agent domain expertise. Product documentation, architecture decision records, style guides, compliance requirements. Load on demand, not upfront.
+
+- **Manage context.** Give the agent clean memory. Subagent isolation prevents noise leakage. Context compaction prevents history from drowning the present. Task systems let goals persist beyond a single conversation.
+
+- **Control permissions.** Give the agent boundaries. Sandbox file access. Require approval for destructive operations. Enforce trust boundaries between the agent and external systems.
+
+- **Collect trajectory data.** Every action sequence the agent executes in your harness is training signal. Real deployment trajectories are the raw material for fine-tuning the next generation of agent models.
+
+You are not writing intelligence. You are building the world that intelligence inhabits. The quality of that world directly determines how effectively the intelligence can express itself.
+
+**Build the harness well. The model will do the rest.**
+
+### Why Claude Code
+
+Because Claude Code is the most elegant, most complete agent harness implementation we have seen. Not because of any clever trick, but because of what it *does not* do: it does not try to be the agent. It does not impose rigid workflows. It does not substitute hand-crafted decision trees for the model's own judgment. It gives the model tools, knowledge, context management, and permission boundaries -- then gets out of the way.
+
+Strip Claude Code down to its essence:
+
+```
+Claude Code = one agent loop
+ + tools (bash, read, write, edit, glob, grep, browser...)
+ + on-demand skill loading
+ + context compaction
+ + subagent spawning
+ + task system with dependency graphs
+ + async mailbox team coordination
+ + worktree-isolated parallel execution
+ + permission governance
+ + hooks extension system
+ + memory persistence
+ + MCP external capability routing
+```
+
+That is it. The agent itself? Claude. A model. Trained by Anthropic on the full breadth of human reasoning and code. The harness did not make Claude smart. Claude was already smart. The harness gave Claude hands, eyes, and a workspace.
+
+The takeaway is not "copy Claude Code." The takeaway is: **the best agent products come from engineers who understand that their job is the harness, not the intelligence.**
+
+---
+
+```
+ THE AGENT PATTERN
+ =================
+
+ User --> messages[] --> LLM --> response
+ |
+ stop_reason == "tool_use"?
+ / \
+ yes no
+ | |
+ execute tools return text
+ append results
+ loop back -----------------> messages[]
+
+
+ The model decides when to call tools and when to stop.
+ The code just executes what the model asks for.
+ This repo teaches you to build everything around this loop --
+ the harness that makes the agent effective in a specific domain.
+```
+
+## Core Pattern
+
+```python
+def agent_loop(messages):
+ while True:
+ response = client.messages.create(
+ model=MODEL, system=SYSTEM,
+ messages=messages, tools=TOOLS,
+ )
+ messages.append({"role": "assistant",
+ "content": response.content})
+
+ if response.stop_reason != "tool_use":
+ return
+
+ results = []
+ for block in response.content:
+ if block.type == "tool_use":
+ output = TOOL_HANDLERS[block.name](**block.input)
+ results.append({
+ "type": "tool_result",
+ "tool_use_id": block.id,
+ "content": output,
+ })
+ messages.append({"role": "user", "content": results})
+```
+
+Every lesson layers one harness mechanism on top of this loop -- the loop itself never changes. The loop belongs to the agent. The mechanisms belong to the harness.
+
+The loop is constant. Tools, knowledge, and permissions change. Agent = Model (LLM) + a generalized operational environment (Harness).
+
+---
+
+## Version Status
+
+This repository currently contains two tutorial tracks:
+
+- **Current track: root-level `s01-s20`**
+ The root-level `s01_*` ... `s20_*` folders are the new canonical version. Each chapter contains a full narrative README, translations, runnable `code.py`, and diagrams where needed.
+- **Legacy transition track: `docs/`, `agents/`, and the current `web/` app**
+ These still preserve the older 12-lesson version. They are kept temporarily for existing readers, old links, and the web platform while the new 20-lesson track settles.
+
+If you are starting now, read the root-level `s01_agent_loop/` through `s20_comprehensive/` chapters. If you are following an older link or using the current web app, you are likely reading the legacy 12-lesson track. The legacy and current chapter numbers do not always match, so avoid mixing chapter numbers across tracks.
+
+### Legacy-to-Current Mapping
+
+| Legacy 12-lesson track | Current 20-lesson track | Topic |
+|---|---|---|
+| old s01 | new s01 | Agent Loop |
+| old s02 | new s02 | Tool Use |
+| old s03 | new s05 | TodoWrite |
+| old s04 | new s06 | Subagent |
+| old s05 | new s07 | Skill Loading |
+| old s06 | new s08 | Context Compact |
+| old s07 | new s12 | Task System |
+| old s08 | new s13 | Background Tasks |
+| old s09 | new s15 | Agent Teams |
+| old s10 | new s16 | Team Protocols |
+| old s11 | new s17 | Autonomous Agents |
+| old s12 | new s18 | Worktree Isolation |
+| new only | s03, s04, s09, s10, s11, s14, s19, s20 | Permission, Hooks, Memory, System Prompt, Error Recovery, Cron, MCP, Comprehensive Agent |
+
+---
+
+## Scope
+
+This repository is a 0-to-1 harness engineering learning project: it teaches how to build the working environment around an agent model. To keep the learning path clear, some production mechanisms are intentionally simplified or omitted:
+
+- Full event / hook bus behavior, such as `PreToolUse`, `SessionStart/End`, and `ConfigChange`.
+ The teaching code uses minimal lifecycle events where needed.
+- Rule-based permission governance and full trust workflows.
+- Session lifecycle controls such as resume/fork, plus more complete worktree lifecycle handling.
+- Full MCP runtime details such as transport, OAuth, resource subscription, and polling.
+
+The JSONL mailbox protocol in this repository is a teaching implementation, not a claim about any specific production internal implementation.
+
+---
+
+## 20 Progressive Lessons
+
+**Each lesson adds one harness mechanism. Each mechanism has a motto.**
+
+> **s01** *"One loop & Bash is all you need"* — one tool + one loop = one agent
+>
+> **s02** *"Adding a tool means adding one handler"* — the loop stays untouched; new tools register into the dispatch map
+>
+> **s03** *"Set boundaries first, then grant freedom"* — check what can run, what must stop, and what needs approval
+>
+> **s04** *"Hook around the loop, never rewrite the loop"* — add extension points without changing the main loop
+>
+> **s05** *"An agent without a plan drifts"* — list the steps before starting; completion rate doubles
+>
+> **s06** *"Big tasks split small, each subtask gets clean context"* — subagents do the side work and bring back only the result
+>
+> **s07** *"Load knowledge on demand, not upfront"* — list skills first, expand them only when needed
+>
+> **s08** *"Context always fills up -- have a way to make room"* — multi-layer compaction strategies buy you infinite sessions
+>
+> **s09** *"Remember what matters, forget what doesn't"* — three subsystems: selection, extraction, consolidation
+>
+> **s10** *"Prompts are assembled at runtime, not hardcoded"* — section-based concatenation, loaded on demand
+>
+> **s11** *"Errors aren't the end, they're the start of a retry"* — retry, make room, or take another path when things fail
+>
+> **s12** *"Big goals break into small tasks, ordered, persisted to disk"* — a file-backed task graph that lays the groundwork for multi-agent coordination
+>
+> **s13** *"Slow ops go background, agent keeps thinking"* — background threads run commands; notifications inject on completion
+>
+> **s14** *"Fire on schedule, no human kick needed"* — trigger tasks automatically by time
+>
+> **s15** *"Too big for one agent -- delegate to teammates"* — persistent teammates + async mailboxes
+>
+> **s16** *"Teammates need shared communication rules"* — use a fixed request-reply format for coordination
+>
+> **s17** *"Teammates check the board, claim work themselves"* — no leader assigning one by one; self-organizing
+>
+> **s18** *"Each works in its own directory, no interference"* — tasks own goals, worktrees own directories, bound by ID
+>
+> **s19** *"Not enough capability? Plug in more via MCP"* — connect external tools into the same tool pool
+>
+> **s20** *"Many mechanisms, one loop"* — all previous mechanisms return to one complete harness
+
+---
+
+## Learning Path
+
+Main line: act → handle complex work → remember and recover → run long tasks → collaborate → extend and assemble.
+
+```mermaid
+flowchart TD
+ %% Card styles
+ classDef stage1 fill:#E3F2FD,stroke:#1976D2,stroke-width:2px,color:#0D47A1,rx:12,ry:12,text-align:left
+ classDef stage2 fill:#E8F5E9,stroke:#388E3C,stroke-width:2px,color:#1B5E20,rx:12,ry:12,text-align:left
+ classDef stage3 fill:#FFF3E0,stroke:#F57C00,stroke-width:2px,color:#E65100,rx:12,ry:12,text-align:left
+ classDef stage4 fill:#FCE4EC,stroke:#C2185b,stroke-width:2px,color:#880E4F,rx:12,ry:12,text-align:left
+ classDef stage5 fill:#F3E5F5,stroke:#7B1FA2,stroke-width:2px,color:#4A148C,rx:12,ry:12,text-align:left
+ classDef stage6 fill:#E0F7FA,stroke:#0097A7,stroke-width:2px,color:#006064,rx:12,ry:12,text-align:left
+
+ %% Group style
+ classDef groupBox fill:#F8F9FA,stroke:#CED4DA,stroke-width:2px,stroke-dasharray: 5 5,rx:15,ry:15,color:#495057
+
+ %% Layer 1: stages 1-3
+ subgraph Phase1 ["🌱 Stages 1-3: Core capabilities (simple to complex)"]
+ direction LR
+ S1["1. Let the Agent act
━━━━━━━━━━━━━
s01 Agent Loop
└─ one loop + bash
s02 Tool Use
└─ one tool to many tools
s03 Permission
└─ decide what can run
s04 Hooks
└─ extension points around tools"]:::stage1
+
+ S2["2. Handle complex work
━━━━━━━━━━━━━
s05 TodoWrite
└─ plan first, then execute
s06 Subagent
└─ side work, result back
s08 Context Compact
└─ make room in long context"]:::stage2
+
+ S3["3. Remember and recover
━━━━━━━━━━━━━
s09 Memory
└─ remember what matters
s10 System Prompt
└─ assemble at runtime
s11 Error Recovery
└─ retry or change path"]:::stage3
+
+ S1 ==> S2 ==> S3
+ end
+
+ %% Layer 2: stages 4-6
+ subgraph Phase2 ["🚀 Stages 4-6: Advanced capabilities (long-running, collaboration, integration)"]
+ direction LR
+ S4["4. Run long tasks
━━━━━━━━━━━━━
s12 Task System
└─ persist tasks and deps
s13 Background Tasks
└─ send slow work background
s14 Cron Scheduler
└─ trigger by time"]:::stage4
+
+ S5["5. Coordinate many Agents
━━━━━━━━━━━━━
s15 Agent Teams
└─ teammates + mailboxes
s16 Team Protocols
└─ fixed request-reply format
s17 Autonomous Agents
└─ claim work from the board
s18 Worktree Isolation
└─ separate directories"]:::stage5
+
+ S6["6. Extend and assemble
━━━━━━━━━━━━━
s07 Skill Loading
└─ expand skills on demand
s19 MCP Plugin
└─ external tools, one pool
s20 Comprehensive Agent
└─ all mechanisms, one loop"]:::stage6
+
+ S4 ==> S5 ==> S6
+ end
+
+ %% Connect the two layers
+ Phase1 ===> Phase2
+
+ class Phase1,Phase2 groupBox
+```
+
+---
+
+## All Chapters
+
+| Chapter | Topic | Key Concepts |
+|---|---|---|
+| [s01](./s01_agent_loop/) | Agent Loop | `messages` / `while True` / `stop_reason` |
+| [s02](./s02_tool_use/) | Tool Use | `TOOL_HANDLERS` / dispatch map / concurrency |
+| [s03](./s03_permission/) | Permission System | `PermissionRule` / approval pipeline |
+| [s04](./s04_hooks/) | Hook System | `PreToolUse` / `PostToolUse` / extension points |
+| [s05](./s05_todo_write/) | TodoWrite | `TodoItem` / plan-then-execute |
+| [s06](./s06_subagent/) | Subagent | `fresh messages[]` / context isolation |
+| [s07](./s07_skill_loading/) | Skill Loading | `SkillManifest` / on-demand injection |
+| [s08](./s08_context_compact/) | Context Compact | snipCompact / microCompact / toolResultBudget / autoCompact |
+| [s09](./s09_memory/) | Memory System | selection / extraction / consolidation |
+| [s10](./s10_system_prompt/) | System Prompt | runtime assembly / section concatenation |
+| [s11](./s11_error_recovery/) | Error Recovery | token escalation / fallback model / retry strategies |
+| [s12](./s12_task_system/) | Task System | `TaskRecord` / `blockedBy` / disk persistence |
+| [s13](./s13_background_tasks/) | Background Tasks | threaded execution / notification queue |
+| [s14](./s14_cron_scheduler/) | Cron Scheduler | durable scheduling / session-scoped triggers |
+| [s15](./s15_agent_teams/) | Agent Teams | `MessageBus` / inbox / permission bubbling |
+| [s16](./s16_team_protocols/) | Team Protocols | shutdown handshake / plan approval |
+| [s17](./s17_autonomous_agents/) | Autonomous Agents | idle cycle / auto-claim / self-organization |
+| [s18](./s18_worktree_isolation/) | Worktree Isolation | `WorktreeRecord` / task-directory binding |
+| [s19](./s19_mcp_plugin/) | MCP Plugin | multi-transport / channel routing / tool pool assembly |
+| [s20](./s20_comprehensive/) | Comprehensive Agent | all mechanisms around one loop |
+
+---
+
+## How to Read
+
+Each chapter is a folder. Open one and you will find:
+
+```
+s08_context_compact/
+ README.md # full narrative with inline code
+ README.en.md # English translation
+ README.ja.md # Japanese translation
+ code.py # standalone runnable implementation
+ images/ # SVG diagrams (where needed)
+```
+
+Read the `README.md` for the core idea and work through the code. Complex chapters have `` folds for deep dives -- open them when you want to go deeper. Simple chapters have 0-1 diagrams, complex chapters have more.
+
+Read from s01 through s20 in order. Each chapter assumes you've read the previous ones and ends with a hook into the next.
+
+---
+
+## Quick Start
+
+### Current 20-Lesson Track
+
+```sh
+git clone https://github.com/shareAI-lab/learn-claude-code
+cd learn-claude-code
+pip install -r requirements.txt
+cp .env.example .env # configure ANTHROPIC_API_KEY
+
+python s01_agent_loop/code.py # Start here -- one loop + bash
+python s08_context_compact/code.py # Context compaction (complex)
+python s20_comprehensive/code.py # Endpoint: all mechanisms in one loop
+```
+
+### Legacy 12-Lesson Track
+
+```sh
+python agents/s01_agent_loop.py
+python agents/s12_worktree_task_isolation.py
+python agents/s_full.py
+```
+
+### Web Platform
+
+The current web app still renders the legacy `docs/` s01-s12 track. Use the root-level folders for the new s01-s20 track.
+
+```sh
+cd web && npm install && npm run dev # http://localhost:3000
+```
+
+---
+
+## Project Structure
+
+```
+learn-claude-code/
+ s01_agent_loop/ # one folder per chapter
+ README.md # Chinese source (complete narrative)
+ README.en.md # English translation
+ README.ja.md # Japanese translation
+ code.py # standalone runnable code
+ images/ # SVG diagrams
+ s02_tool_use/
+ ...
+ s19_mcp_plugin/
+ s20_comprehensive/ # endpoint chapter
+ agents/ # legacy 12 runnable copies + s_full.py
+ skills/ # skill files used by s07
+ docs/ # legacy 12-lesson docs, kept during transition
+ web/ # currently renders the legacy docs/ track
+ tests/
+```
+
+---
+
+## What's Next
+
+After 20 lessons, you understand harness engineering from the inside out. Two paths to turn that knowledge into product:
+
+### Kode Agent CLI -- Open-Source Coding Agent CLI
+
+> `npm i -g @shareai-lab/kode`
+
+Skill and LSP support, Windows compatible, works with GLM / MiniMax / DeepSeek and other open models. Install and go.
+
+GitHub: **[shareAI-lab/Kode-Agent](https://github.com/shareAI-lab/Kode-Agent)**
+
+### Kode Agent SDK -- Embed Agent Capabilities in Your Application
+
+A standalone library with no per-user process overhead. Embed it in backends, browser extensions, embedded devices, or any runtime.
+
+GitHub: **[shareAI-lab/kode-agent-sdk](https://github.com/shareAI-lab/kode-agent-sdk)**
+
+---
+
+## Sister Tutorial: From Passive Sessions to Always-On Assistants
+
+The harness taught in this repository is the **use-and-discard** kind -- open a terminal, give the agent a task, close when done, next session starts fresh. Claude Code works this way.
+
+But [OpenClaw](https://github.com/openclaw/openclaw) proves another possibility: on the same agent core, two additional harness mechanisms turn an agent from "poke it and it moves" into "wakes itself every 30 seconds to look for work":
+
+- **Heartbeat** -- every 30 seconds the harness sends the agent a message, letting it check for pending work. Nothing to do? Keep sleeping. Something appeared? Act immediately.
+- **Cron** -- the agent can schedule its own future tasks, which fire automatically when the time arrives.
+
+Add IM multi-channel routing (WhatsApp / Telegram / Slack / Discord and 13+ other platforms), persistent context memory, and a Soul personality system, and the agent transforms from a disposable tool into an always-on personal AI assistant.
+
+**[claw0](https://github.com/shareAI-lab/claw0)** is our sister teaching repository, breaking down these harness mechanisms from scratch:
+
+```
+claw agent = agent core + heartbeat + cron + IM chat + memory + soul
+```
+
+```
+learn-claude-code claw0
+(agent harness internals: (always-on harness:
+ loop, tools, planning, heartbeat, cron, IM channels,
+ teams, worktree isolation) memory, Soul personality)
+```
+
+## License
+
+MIT
+
+---
+
+**Agency comes from the model. The harness gives agency a place to land. Build the harness well, and the model will do the rest.**
+
+**Bash is all you need. Real agents are all the universe needs.**
+
+**This is not "copy the source code." This is "grasp the key designs and build it yourself."**
diff --git a/agents/__init__.py b/learn-claude-code/agents/__init__.py
similarity index 100%
rename from agents/__init__.py
rename to learn-claude-code/agents/__init__.py
diff --git a/agents/s01_agent_loop.py b/learn-claude-code/agents/s01_agent_loop.py
similarity index 100%
rename from agents/s01_agent_loop.py
rename to learn-claude-code/agents/s01_agent_loop.py
diff --git a/agents/s02_tool_use.py b/learn-claude-code/agents/s02_tool_use.py
similarity index 100%
rename from agents/s02_tool_use.py
rename to learn-claude-code/agents/s02_tool_use.py
diff --git a/agents/s03_todo_write.py b/learn-claude-code/agents/s03_todo_write.py
similarity index 100%
rename from agents/s03_todo_write.py
rename to learn-claude-code/agents/s03_todo_write.py
diff --git a/agents/s04_subagent.py b/learn-claude-code/agents/s04_subagent.py
similarity index 100%
rename from agents/s04_subagent.py
rename to learn-claude-code/agents/s04_subagent.py
diff --git a/agents/s05_skill_loading.py b/learn-claude-code/agents/s05_skill_loading.py
similarity index 100%
rename from agents/s05_skill_loading.py
rename to learn-claude-code/agents/s05_skill_loading.py
diff --git a/agents/s06_context_compact.py b/learn-claude-code/agents/s06_context_compact.py
similarity index 100%
rename from agents/s06_context_compact.py
rename to learn-claude-code/agents/s06_context_compact.py
diff --git a/agents/s07_task_system.py b/learn-claude-code/agents/s07_task_system.py
similarity index 100%
rename from agents/s07_task_system.py
rename to learn-claude-code/agents/s07_task_system.py
diff --git a/agents/s08_background_tasks.py b/learn-claude-code/agents/s08_background_tasks.py
similarity index 100%
rename from agents/s08_background_tasks.py
rename to learn-claude-code/agents/s08_background_tasks.py
diff --git a/agents/s09_agent_teams.py b/learn-claude-code/agents/s09_agent_teams.py
similarity index 100%
rename from agents/s09_agent_teams.py
rename to learn-claude-code/agents/s09_agent_teams.py
diff --git a/agents/s10_team_protocols.py b/learn-claude-code/agents/s10_team_protocols.py
similarity index 100%
rename from agents/s10_team_protocols.py
rename to learn-claude-code/agents/s10_team_protocols.py
diff --git a/agents/s11_autonomous_agents.py b/learn-claude-code/agents/s11_autonomous_agents.py
similarity index 100%
rename from agents/s11_autonomous_agents.py
rename to learn-claude-code/agents/s11_autonomous_agents.py
diff --git a/agents/s12_worktree_task_isolation.py b/learn-claude-code/agents/s12_worktree_task_isolation.py
similarity index 100%
rename from agents/s12_worktree_task_isolation.py
rename to learn-claude-code/agents/s12_worktree_task_isolation.py
diff --git a/agents/s_full.py b/learn-claude-code/agents/s_full.py
similarity index 100%
rename from agents/s_full.py
rename to learn-claude-code/agents/s_full.py
diff --git a/docs/en/s01-the-agent-loop.md b/learn-claude-code/docs/en/s01-the-agent-loop.md
similarity index 100%
rename from docs/en/s01-the-agent-loop.md
rename to learn-claude-code/docs/en/s01-the-agent-loop.md
diff --git a/docs/en/s02-tool-use.md b/learn-claude-code/docs/en/s02-tool-use.md
similarity index 100%
rename from docs/en/s02-tool-use.md
rename to learn-claude-code/docs/en/s02-tool-use.md
diff --git a/docs/en/s03-todo-write.md b/learn-claude-code/docs/en/s03-todo-write.md
similarity index 100%
rename from docs/en/s03-todo-write.md
rename to learn-claude-code/docs/en/s03-todo-write.md
diff --git a/docs/en/s04-subagent.md b/learn-claude-code/docs/en/s04-subagent.md
similarity index 100%
rename from docs/en/s04-subagent.md
rename to learn-claude-code/docs/en/s04-subagent.md
diff --git a/docs/en/s05-skill-loading.md b/learn-claude-code/docs/en/s05-skill-loading.md
similarity index 100%
rename from docs/en/s05-skill-loading.md
rename to learn-claude-code/docs/en/s05-skill-loading.md
diff --git a/docs/en/s06-context-compact.md b/learn-claude-code/docs/en/s06-context-compact.md
similarity index 100%
rename from docs/en/s06-context-compact.md
rename to learn-claude-code/docs/en/s06-context-compact.md
diff --git a/docs/en/s07-task-system.md b/learn-claude-code/docs/en/s07-task-system.md
similarity index 100%
rename from docs/en/s07-task-system.md
rename to learn-claude-code/docs/en/s07-task-system.md
diff --git a/docs/en/s08-background-tasks.md b/learn-claude-code/docs/en/s08-background-tasks.md
similarity index 100%
rename from docs/en/s08-background-tasks.md
rename to learn-claude-code/docs/en/s08-background-tasks.md
diff --git a/docs/en/s09-agent-teams.md b/learn-claude-code/docs/en/s09-agent-teams.md
similarity index 100%
rename from docs/en/s09-agent-teams.md
rename to learn-claude-code/docs/en/s09-agent-teams.md
diff --git a/docs/en/s10-team-protocols.md b/learn-claude-code/docs/en/s10-team-protocols.md
similarity index 100%
rename from docs/en/s10-team-protocols.md
rename to learn-claude-code/docs/en/s10-team-protocols.md
diff --git a/docs/en/s11-autonomous-agents.md b/learn-claude-code/docs/en/s11-autonomous-agents.md
similarity index 100%
rename from docs/en/s11-autonomous-agents.md
rename to learn-claude-code/docs/en/s11-autonomous-agents.md
diff --git a/docs/en/s12-worktree-task-isolation.md b/learn-claude-code/docs/en/s12-worktree-task-isolation.md
similarity index 100%
rename from docs/en/s12-worktree-task-isolation.md
rename to learn-claude-code/docs/en/s12-worktree-task-isolation.md
diff --git a/docs/ja/s01-the-agent-loop.md b/learn-claude-code/docs/ja/s01-the-agent-loop.md
similarity index 100%
rename from docs/ja/s01-the-agent-loop.md
rename to learn-claude-code/docs/ja/s01-the-agent-loop.md
diff --git a/docs/ja/s02-tool-use.md b/learn-claude-code/docs/ja/s02-tool-use.md
similarity index 100%
rename from docs/ja/s02-tool-use.md
rename to learn-claude-code/docs/ja/s02-tool-use.md
diff --git a/docs/ja/s03-todo-write.md b/learn-claude-code/docs/ja/s03-todo-write.md
similarity index 100%
rename from docs/ja/s03-todo-write.md
rename to learn-claude-code/docs/ja/s03-todo-write.md
diff --git a/docs/ja/s04-subagent.md b/learn-claude-code/docs/ja/s04-subagent.md
similarity index 100%
rename from docs/ja/s04-subagent.md
rename to learn-claude-code/docs/ja/s04-subagent.md
diff --git a/docs/ja/s05-skill-loading.md b/learn-claude-code/docs/ja/s05-skill-loading.md
similarity index 100%
rename from docs/ja/s05-skill-loading.md
rename to learn-claude-code/docs/ja/s05-skill-loading.md
diff --git a/docs/ja/s06-context-compact.md b/learn-claude-code/docs/ja/s06-context-compact.md
similarity index 100%
rename from docs/ja/s06-context-compact.md
rename to learn-claude-code/docs/ja/s06-context-compact.md
diff --git a/docs/ja/s07-task-system.md b/learn-claude-code/docs/ja/s07-task-system.md
similarity index 100%
rename from docs/ja/s07-task-system.md
rename to learn-claude-code/docs/ja/s07-task-system.md
diff --git a/docs/ja/s08-background-tasks.md b/learn-claude-code/docs/ja/s08-background-tasks.md
similarity index 100%
rename from docs/ja/s08-background-tasks.md
rename to learn-claude-code/docs/ja/s08-background-tasks.md
diff --git a/docs/ja/s09-agent-teams.md b/learn-claude-code/docs/ja/s09-agent-teams.md
similarity index 100%
rename from docs/ja/s09-agent-teams.md
rename to learn-claude-code/docs/ja/s09-agent-teams.md
diff --git a/docs/ja/s10-team-protocols.md b/learn-claude-code/docs/ja/s10-team-protocols.md
similarity index 100%
rename from docs/ja/s10-team-protocols.md
rename to learn-claude-code/docs/ja/s10-team-protocols.md
diff --git a/docs/ja/s11-autonomous-agents.md b/learn-claude-code/docs/ja/s11-autonomous-agents.md
similarity index 100%
rename from docs/ja/s11-autonomous-agents.md
rename to learn-claude-code/docs/ja/s11-autonomous-agents.md
diff --git a/docs/ja/s12-worktree-task-isolation.md b/learn-claude-code/docs/ja/s12-worktree-task-isolation.md
similarity index 100%
rename from docs/ja/s12-worktree-task-isolation.md
rename to learn-claude-code/docs/ja/s12-worktree-task-isolation.md
diff --git a/docs/zh/s01-the-agent-loop.md b/learn-claude-code/docs/zh/s01-the-agent-loop.md
similarity index 100%
rename from docs/zh/s01-the-agent-loop.md
rename to learn-claude-code/docs/zh/s01-the-agent-loop.md
diff --git a/docs/zh/s02-tool-use.md b/learn-claude-code/docs/zh/s02-tool-use.md
similarity index 100%
rename from docs/zh/s02-tool-use.md
rename to learn-claude-code/docs/zh/s02-tool-use.md
diff --git a/docs/zh/s03-todo-write.md b/learn-claude-code/docs/zh/s03-todo-write.md
similarity index 100%
rename from docs/zh/s03-todo-write.md
rename to learn-claude-code/docs/zh/s03-todo-write.md
diff --git a/docs/zh/s04-subagent.md b/learn-claude-code/docs/zh/s04-subagent.md
similarity index 100%
rename from docs/zh/s04-subagent.md
rename to learn-claude-code/docs/zh/s04-subagent.md
diff --git a/docs/zh/s05-skill-loading.md b/learn-claude-code/docs/zh/s05-skill-loading.md
similarity index 100%
rename from docs/zh/s05-skill-loading.md
rename to learn-claude-code/docs/zh/s05-skill-loading.md
diff --git a/docs/zh/s06-context-compact.md b/learn-claude-code/docs/zh/s06-context-compact.md
similarity index 100%
rename from docs/zh/s06-context-compact.md
rename to learn-claude-code/docs/zh/s06-context-compact.md
diff --git a/docs/zh/s07-task-system.md b/learn-claude-code/docs/zh/s07-task-system.md
similarity index 100%
rename from docs/zh/s07-task-system.md
rename to learn-claude-code/docs/zh/s07-task-system.md
diff --git a/docs/zh/s08-background-tasks.md b/learn-claude-code/docs/zh/s08-background-tasks.md
similarity index 100%
rename from docs/zh/s08-background-tasks.md
rename to learn-claude-code/docs/zh/s08-background-tasks.md
diff --git a/docs/zh/s09-agent-teams.md b/learn-claude-code/docs/zh/s09-agent-teams.md
similarity index 100%
rename from docs/zh/s09-agent-teams.md
rename to learn-claude-code/docs/zh/s09-agent-teams.md
diff --git a/docs/zh/s10-team-protocols.md b/learn-claude-code/docs/zh/s10-team-protocols.md
similarity index 100%
rename from docs/zh/s10-team-protocols.md
rename to learn-claude-code/docs/zh/s10-team-protocols.md
diff --git a/docs/zh/s11-autonomous-agents.md b/learn-claude-code/docs/zh/s11-autonomous-agents.md
similarity index 100%
rename from docs/zh/s11-autonomous-agents.md
rename to learn-claude-code/docs/zh/s11-autonomous-agents.md
diff --git a/docs/zh/s12-worktree-task-isolation.md b/learn-claude-code/docs/zh/s12-worktree-task-isolation.md
similarity index 100%
rename from docs/zh/s12-worktree-task-isolation.md
rename to learn-claude-code/docs/zh/s12-worktree-task-isolation.md
diff --git a/requirements.txt b/learn-claude-code/requirements.txt
similarity index 100%
rename from requirements.txt
rename to learn-claude-code/requirements.txt
diff --git a/s01_agent_loop/README.en.md b/learn-claude-code/s01_agent_loop/README.en.md
similarity index 100%
rename from s01_agent_loop/README.en.md
rename to learn-claude-code/s01_agent_loop/README.en.md
diff --git a/s01_agent_loop/README.ja.md b/learn-claude-code/s01_agent_loop/README.ja.md
similarity index 100%
rename from s01_agent_loop/README.ja.md
rename to learn-claude-code/s01_agent_loop/README.ja.md
diff --git a/s01_agent_loop/README.md b/learn-claude-code/s01_agent_loop/README.md
similarity index 100%
rename from s01_agent_loop/README.md
rename to learn-claude-code/s01_agent_loop/README.md
diff --git a/s01_agent_loop/code.py b/learn-claude-code/s01_agent_loop/code.py
similarity index 100%
rename from s01_agent_loop/code.py
rename to learn-claude-code/s01_agent_loop/code.py
diff --git a/s01_agent_loop/images/agent-loop.en.svg b/learn-claude-code/s01_agent_loop/images/agent-loop.en.svg
similarity index 100%
rename from s01_agent_loop/images/agent-loop.en.svg
rename to learn-claude-code/s01_agent_loop/images/agent-loop.en.svg
diff --git a/s01_agent_loop/images/agent-loop.ja.svg b/learn-claude-code/s01_agent_loop/images/agent-loop.ja.svg
similarity index 100%
rename from s01_agent_loop/images/agent-loop.ja.svg
rename to learn-claude-code/s01_agent_loop/images/agent-loop.ja.svg
diff --git a/s01_agent_loop/images/agent-loop.svg b/learn-claude-code/s01_agent_loop/images/agent-loop.svg
similarity index 100%
rename from s01_agent_loop/images/agent-loop.svg
rename to learn-claude-code/s01_agent_loop/images/agent-loop.svg
diff --git a/s02_tool_use/README.en.md b/learn-claude-code/s02_tool_use/README.en.md
similarity index 100%
rename from s02_tool_use/README.en.md
rename to learn-claude-code/s02_tool_use/README.en.md
diff --git a/s02_tool_use/README.ja.md b/learn-claude-code/s02_tool_use/README.ja.md
similarity index 100%
rename from s02_tool_use/README.ja.md
rename to learn-claude-code/s02_tool_use/README.ja.md
diff --git a/s02_tool_use/README.md b/learn-claude-code/s02_tool_use/README.md
similarity index 100%
rename from s02_tool_use/README.md
rename to learn-claude-code/s02_tool_use/README.md
diff --git a/s02_tool_use/code.py b/learn-claude-code/s02_tool_use/code.py
similarity index 100%
rename from s02_tool_use/code.py
rename to learn-claude-code/s02_tool_use/code.py
diff --git a/s02_tool_use/images/concurrency-comparison.en.svg b/learn-claude-code/s02_tool_use/images/concurrency-comparison.en.svg
similarity index 100%
rename from s02_tool_use/images/concurrency-comparison.en.svg
rename to learn-claude-code/s02_tool_use/images/concurrency-comparison.en.svg
diff --git a/s02_tool_use/images/concurrency-comparison.ja.svg b/learn-claude-code/s02_tool_use/images/concurrency-comparison.ja.svg
similarity index 100%
rename from s02_tool_use/images/concurrency-comparison.ja.svg
rename to learn-claude-code/s02_tool_use/images/concurrency-comparison.ja.svg
diff --git a/s02_tool_use/images/concurrency-comparison.svg b/learn-claude-code/s02_tool_use/images/concurrency-comparison.svg
similarity index 100%
rename from s02_tool_use/images/concurrency-comparison.svg
rename to learn-claude-code/s02_tool_use/images/concurrency-comparison.svg
diff --git a/s02_tool_use/images/tool-dispatch.en.svg b/learn-claude-code/s02_tool_use/images/tool-dispatch.en.svg
similarity index 100%
rename from s02_tool_use/images/tool-dispatch.en.svg
rename to learn-claude-code/s02_tool_use/images/tool-dispatch.en.svg
diff --git a/s02_tool_use/images/tool-dispatch.ja.svg b/learn-claude-code/s02_tool_use/images/tool-dispatch.ja.svg
similarity index 100%
rename from s02_tool_use/images/tool-dispatch.ja.svg
rename to learn-claude-code/s02_tool_use/images/tool-dispatch.ja.svg
diff --git a/s02_tool_use/images/tool-dispatch.svg b/learn-claude-code/s02_tool_use/images/tool-dispatch.svg
similarity index 100%
rename from s02_tool_use/images/tool-dispatch.svg
rename to learn-claude-code/s02_tool_use/images/tool-dispatch.svg
diff --git a/s03_permission/README.en.md b/learn-claude-code/s03_permission/README.en.md
similarity index 100%
rename from s03_permission/README.en.md
rename to learn-claude-code/s03_permission/README.en.md
diff --git a/s03_permission/README.ja.md b/learn-claude-code/s03_permission/README.ja.md
similarity index 100%
rename from s03_permission/README.ja.md
rename to learn-claude-code/s03_permission/README.ja.md
diff --git a/s03_permission/README.md b/learn-claude-code/s03_permission/README.md
similarity index 100%
rename from s03_permission/README.md
rename to learn-claude-code/s03_permission/README.md
diff --git a/s03_permission/code.py b/learn-claude-code/s03_permission/code.py
similarity index 100%
rename from s03_permission/code.py
rename to learn-claude-code/s03_permission/code.py
diff --git a/s03_permission/images/permission-overview.en.svg b/learn-claude-code/s03_permission/images/permission-overview.en.svg
similarity index 100%
rename from s03_permission/images/permission-overview.en.svg
rename to learn-claude-code/s03_permission/images/permission-overview.en.svg
diff --git a/s03_permission/images/permission-overview.ja.svg b/learn-claude-code/s03_permission/images/permission-overview.ja.svg
similarity index 100%
rename from s03_permission/images/permission-overview.ja.svg
rename to learn-claude-code/s03_permission/images/permission-overview.ja.svg
diff --git a/s03_permission/images/permission-overview.svg b/learn-claude-code/s03_permission/images/permission-overview.svg
similarity index 100%
rename from s03_permission/images/permission-overview.svg
rename to learn-claude-code/s03_permission/images/permission-overview.svg
diff --git a/s03_permission/images/permission-pipeline.en.svg b/learn-claude-code/s03_permission/images/permission-pipeline.en.svg
similarity index 100%
rename from s03_permission/images/permission-pipeline.en.svg
rename to learn-claude-code/s03_permission/images/permission-pipeline.en.svg
diff --git a/s03_permission/images/permission-pipeline.ja.svg b/learn-claude-code/s03_permission/images/permission-pipeline.ja.svg
similarity index 100%
rename from s03_permission/images/permission-pipeline.ja.svg
rename to learn-claude-code/s03_permission/images/permission-pipeline.ja.svg
diff --git a/s03_permission/images/permission-pipeline.svg b/learn-claude-code/s03_permission/images/permission-pipeline.svg
similarity index 100%
rename from s03_permission/images/permission-pipeline.svg
rename to learn-claude-code/s03_permission/images/permission-pipeline.svg
diff --git a/s04_hooks/README.en.md b/learn-claude-code/s04_hooks/README.en.md
similarity index 100%
rename from s04_hooks/README.en.md
rename to learn-claude-code/s04_hooks/README.en.md
diff --git a/s04_hooks/README.ja.md b/learn-claude-code/s04_hooks/README.ja.md
similarity index 100%
rename from s04_hooks/README.ja.md
rename to learn-claude-code/s04_hooks/README.ja.md
diff --git a/s04_hooks/README.md b/learn-claude-code/s04_hooks/README.md
similarity index 100%
rename from s04_hooks/README.md
rename to learn-claude-code/s04_hooks/README.md
diff --git a/s04_hooks/code.py b/learn-claude-code/s04_hooks/code.py
similarity index 100%
rename from s04_hooks/code.py
rename to learn-claude-code/s04_hooks/code.py
diff --git a/s04_hooks/images/hooks-overview.en.svg b/learn-claude-code/s04_hooks/images/hooks-overview.en.svg
similarity index 100%
rename from s04_hooks/images/hooks-overview.en.svg
rename to learn-claude-code/s04_hooks/images/hooks-overview.en.svg
diff --git a/s04_hooks/images/hooks-overview.ja.svg b/learn-claude-code/s04_hooks/images/hooks-overview.ja.svg
similarity index 100%
rename from s04_hooks/images/hooks-overview.ja.svg
rename to learn-claude-code/s04_hooks/images/hooks-overview.ja.svg
diff --git a/s04_hooks/images/hooks-overview.svg b/learn-claude-code/s04_hooks/images/hooks-overview.svg
similarity index 100%
rename from s04_hooks/images/hooks-overview.svg
rename to learn-claude-code/s04_hooks/images/hooks-overview.svg
diff --git a/s05_todo_write/README.en.md b/learn-claude-code/s05_todo_write/README.en.md
similarity index 100%
rename from s05_todo_write/README.en.md
rename to learn-claude-code/s05_todo_write/README.en.md
diff --git a/s05_todo_write/README.ja.md b/learn-claude-code/s05_todo_write/README.ja.md
similarity index 100%
rename from s05_todo_write/README.ja.md
rename to learn-claude-code/s05_todo_write/README.ja.md
diff --git a/s05_todo_write/README.md b/learn-claude-code/s05_todo_write/README.md
similarity index 100%
rename from s05_todo_write/README.md
rename to learn-claude-code/s05_todo_write/README.md
diff --git a/s05_todo_write/code.py b/learn-claude-code/s05_todo_write/code.py
similarity index 100%
rename from s05_todo_write/code.py
rename to learn-claude-code/s05_todo_write/code.py
diff --git a/s05_todo_write/example/hello.py b/learn-claude-code/s05_todo_write/example/hello.py
similarity index 100%
rename from s05_todo_write/example/hello.py
rename to learn-claude-code/s05_todo_write/example/hello.py
diff --git a/s05_todo_write/images/todo-overview.en.svg b/learn-claude-code/s05_todo_write/images/todo-overview.en.svg
similarity index 100%
rename from s05_todo_write/images/todo-overview.en.svg
rename to learn-claude-code/s05_todo_write/images/todo-overview.en.svg
diff --git a/s05_todo_write/images/todo-overview.ja.svg b/learn-claude-code/s05_todo_write/images/todo-overview.ja.svg
similarity index 100%
rename from s05_todo_write/images/todo-overview.ja.svg
rename to learn-claude-code/s05_todo_write/images/todo-overview.ja.svg
diff --git a/s05_todo_write/images/todo-overview.svg b/learn-claude-code/s05_todo_write/images/todo-overview.svg
similarity index 100%
rename from s05_todo_write/images/todo-overview.svg
rename to learn-claude-code/s05_todo_write/images/todo-overview.svg
diff --git a/s06_subagent/README.en.md b/learn-claude-code/s06_subagent/README.en.md
similarity index 100%
rename from s06_subagent/README.en.md
rename to learn-claude-code/s06_subagent/README.en.md
diff --git a/s06_subagent/README.ja.md b/learn-claude-code/s06_subagent/README.ja.md
similarity index 100%
rename from s06_subagent/README.ja.md
rename to learn-claude-code/s06_subagent/README.ja.md
diff --git a/s06_subagent/README.md b/learn-claude-code/s06_subagent/README.md
similarity index 100%
rename from s06_subagent/README.md
rename to learn-claude-code/s06_subagent/README.md
diff --git a/s06_subagent/code.py b/learn-claude-code/s06_subagent/code.py
similarity index 100%
rename from s06_subagent/code.py
rename to learn-claude-code/s06_subagent/code.py
diff --git a/s06_subagent/images/subagent-overview.en.svg b/learn-claude-code/s06_subagent/images/subagent-overview.en.svg
similarity index 100%
rename from s06_subagent/images/subagent-overview.en.svg
rename to learn-claude-code/s06_subagent/images/subagent-overview.en.svg
diff --git a/s06_subagent/images/subagent-overview.ja.svg b/learn-claude-code/s06_subagent/images/subagent-overview.ja.svg
similarity index 100%
rename from s06_subagent/images/subagent-overview.ja.svg
rename to learn-claude-code/s06_subagent/images/subagent-overview.ja.svg
diff --git a/s06_subagent/images/subagent-overview.svg b/learn-claude-code/s06_subagent/images/subagent-overview.svg
similarity index 100%
rename from s06_subagent/images/subagent-overview.svg
rename to learn-claude-code/s06_subagent/images/subagent-overview.svg
diff --git a/s07_skill_loading/README.en.md b/learn-claude-code/s07_skill_loading/README.en.md
similarity index 100%
rename from s07_skill_loading/README.en.md
rename to learn-claude-code/s07_skill_loading/README.en.md
diff --git a/s07_skill_loading/README.ja.md b/learn-claude-code/s07_skill_loading/README.ja.md
similarity index 100%
rename from s07_skill_loading/README.ja.md
rename to learn-claude-code/s07_skill_loading/README.ja.md
diff --git a/s07_skill_loading/README.md b/learn-claude-code/s07_skill_loading/README.md
similarity index 100%
rename from s07_skill_loading/README.md
rename to learn-claude-code/s07_skill_loading/README.md
diff --git a/s07_skill_loading/code.py b/learn-claude-code/s07_skill_loading/code.py
similarity index 100%
rename from s07_skill_loading/code.py
rename to learn-claude-code/s07_skill_loading/code.py
diff --git a/s07_skill_loading/images/skill-overview.en.svg b/learn-claude-code/s07_skill_loading/images/skill-overview.en.svg
similarity index 100%
rename from s07_skill_loading/images/skill-overview.en.svg
rename to learn-claude-code/s07_skill_loading/images/skill-overview.en.svg
diff --git a/s07_skill_loading/images/skill-overview.ja.svg b/learn-claude-code/s07_skill_loading/images/skill-overview.ja.svg
similarity index 100%
rename from s07_skill_loading/images/skill-overview.ja.svg
rename to learn-claude-code/s07_skill_loading/images/skill-overview.ja.svg
diff --git a/s07_skill_loading/images/skill-overview.svg b/learn-claude-code/s07_skill_loading/images/skill-overview.svg
similarity index 100%
rename from s07_skill_loading/images/skill-overview.svg
rename to learn-claude-code/s07_skill_loading/images/skill-overview.svg
diff --git a/s08_context_compact/README.en.md b/learn-claude-code/s08_context_compact/README.en.md
similarity index 100%
rename from s08_context_compact/README.en.md
rename to learn-claude-code/s08_context_compact/README.en.md
diff --git a/s08_context_compact/README.ja.md b/learn-claude-code/s08_context_compact/README.ja.md
similarity index 100%
rename from s08_context_compact/README.ja.md
rename to learn-claude-code/s08_context_compact/README.ja.md
diff --git a/s08_context_compact/README.md b/learn-claude-code/s08_context_compact/README.md
similarity index 100%
rename from s08_context_compact/README.md
rename to learn-claude-code/s08_context_compact/README.md
diff --git a/s08_context_compact/code.py b/learn-claude-code/s08_context_compact/code.py
similarity index 100%
rename from s08_context_compact/code.py
rename to learn-claude-code/s08_context_compact/code.py
diff --git a/s08_context_compact/images/auto-compact.en.svg b/learn-claude-code/s08_context_compact/images/auto-compact.en.svg
similarity index 100%
rename from s08_context_compact/images/auto-compact.en.svg
rename to learn-claude-code/s08_context_compact/images/auto-compact.en.svg
diff --git a/s08_context_compact/images/auto-compact.ja.svg b/learn-claude-code/s08_context_compact/images/auto-compact.ja.svg
similarity index 100%
rename from s08_context_compact/images/auto-compact.ja.svg
rename to learn-claude-code/s08_context_compact/images/auto-compact.ja.svg
diff --git a/s08_context_compact/images/auto-compact.svg b/learn-claude-code/s08_context_compact/images/auto-compact.svg
similarity index 100%
rename from s08_context_compact/images/auto-compact.svg
rename to learn-claude-code/s08_context_compact/images/auto-compact.svg
diff --git a/s08_context_compact/images/compact-overview.en.svg b/learn-claude-code/s08_context_compact/images/compact-overview.en.svg
similarity index 100%
rename from s08_context_compact/images/compact-overview.en.svg
rename to learn-claude-code/s08_context_compact/images/compact-overview.en.svg
diff --git a/s08_context_compact/images/compact-overview.ja.svg b/learn-claude-code/s08_context_compact/images/compact-overview.ja.svg
similarity index 100%
rename from s08_context_compact/images/compact-overview.ja.svg
rename to learn-claude-code/s08_context_compact/images/compact-overview.ja.svg
diff --git a/s08_context_compact/images/compact-overview.svg b/learn-claude-code/s08_context_compact/images/compact-overview.svg
similarity index 100%
rename from s08_context_compact/images/compact-overview.svg
rename to learn-claude-code/s08_context_compact/images/compact-overview.svg
diff --git a/s08_context_compact/images/compaction-layers.en.svg b/learn-claude-code/s08_context_compact/images/compaction-layers.en.svg
similarity index 100%
rename from s08_context_compact/images/compaction-layers.en.svg
rename to learn-claude-code/s08_context_compact/images/compaction-layers.en.svg
diff --git a/s08_context_compact/images/compaction-layers.ja.svg b/learn-claude-code/s08_context_compact/images/compaction-layers.ja.svg
similarity index 100%
rename from s08_context_compact/images/compaction-layers.ja.svg
rename to learn-claude-code/s08_context_compact/images/compaction-layers.ja.svg
diff --git a/s08_context_compact/images/compaction-layers.svg b/learn-claude-code/s08_context_compact/images/compaction-layers.svg
similarity index 100%
rename from s08_context_compact/images/compaction-layers.svg
rename to learn-claude-code/s08_context_compact/images/compaction-layers.svg
diff --git a/s08_context_compact/images/layer1-budget.en.svg b/learn-claude-code/s08_context_compact/images/layer1-budget.en.svg
similarity index 100%
rename from s08_context_compact/images/layer1-budget.en.svg
rename to learn-claude-code/s08_context_compact/images/layer1-budget.en.svg
diff --git a/s08_context_compact/images/layer1-budget.ja.svg b/learn-claude-code/s08_context_compact/images/layer1-budget.ja.svg
similarity index 100%
rename from s08_context_compact/images/layer1-budget.ja.svg
rename to learn-claude-code/s08_context_compact/images/layer1-budget.ja.svg
diff --git a/s08_context_compact/images/layer1-budget.svg b/learn-claude-code/s08_context_compact/images/layer1-budget.svg
similarity index 100%
rename from s08_context_compact/images/layer1-budget.svg
rename to learn-claude-code/s08_context_compact/images/layer1-budget.svg
diff --git a/s08_context_compact/images/micro-compact.en.svg b/learn-claude-code/s08_context_compact/images/micro-compact.en.svg
similarity index 100%
rename from s08_context_compact/images/micro-compact.en.svg
rename to learn-claude-code/s08_context_compact/images/micro-compact.en.svg
diff --git a/s08_context_compact/images/micro-compact.ja.svg b/learn-claude-code/s08_context_compact/images/micro-compact.ja.svg
similarity index 100%
rename from s08_context_compact/images/micro-compact.ja.svg
rename to learn-claude-code/s08_context_compact/images/micro-compact.ja.svg
diff --git a/s08_context_compact/images/micro-compact.svg b/learn-claude-code/s08_context_compact/images/micro-compact.svg
similarity index 100%
rename from s08_context_compact/images/micro-compact.svg
rename to learn-claude-code/s08_context_compact/images/micro-compact.svg
diff --git a/s09_memory/README.en.md b/learn-claude-code/s09_memory/README.en.md
similarity index 100%
rename from s09_memory/README.en.md
rename to learn-claude-code/s09_memory/README.en.md
diff --git a/s09_memory/README.ja.md b/learn-claude-code/s09_memory/README.ja.md
similarity index 100%
rename from s09_memory/README.ja.md
rename to learn-claude-code/s09_memory/README.ja.md
diff --git a/s09_memory/README.md b/learn-claude-code/s09_memory/README.md
similarity index 100%
rename from s09_memory/README.md
rename to learn-claude-code/s09_memory/README.md
diff --git a/s09_memory/code.py b/learn-claude-code/s09_memory/code.py
similarity index 100%
rename from s09_memory/code.py
rename to learn-claude-code/s09_memory/code.py
diff --git a/s09_memory/images/memory-overview.en.svg b/learn-claude-code/s09_memory/images/memory-overview.en.svg
similarity index 100%
rename from s09_memory/images/memory-overview.en.svg
rename to learn-claude-code/s09_memory/images/memory-overview.en.svg
diff --git a/s09_memory/images/memory-overview.ja.svg b/learn-claude-code/s09_memory/images/memory-overview.ja.svg
similarity index 100%
rename from s09_memory/images/memory-overview.ja.svg
rename to learn-claude-code/s09_memory/images/memory-overview.ja.svg
diff --git a/s09_memory/images/memory-overview.svg b/learn-claude-code/s09_memory/images/memory-overview.svg
similarity index 100%
rename from s09_memory/images/memory-overview.svg
rename to learn-claude-code/s09_memory/images/memory-overview.svg
diff --git a/s09_memory/images/memory-subsystems.en.svg b/learn-claude-code/s09_memory/images/memory-subsystems.en.svg
similarity index 100%
rename from s09_memory/images/memory-subsystems.en.svg
rename to learn-claude-code/s09_memory/images/memory-subsystems.en.svg
diff --git a/s09_memory/images/memory-subsystems.ja.svg b/learn-claude-code/s09_memory/images/memory-subsystems.ja.svg
similarity index 100%
rename from s09_memory/images/memory-subsystems.ja.svg
rename to learn-claude-code/s09_memory/images/memory-subsystems.ja.svg
diff --git a/s09_memory/images/memory-subsystems.svg b/learn-claude-code/s09_memory/images/memory-subsystems.svg
similarity index 100%
rename from s09_memory/images/memory-subsystems.svg
rename to learn-claude-code/s09_memory/images/memory-subsystems.svg
diff --git a/s10_system_prompt/README.en.md b/learn-claude-code/s10_system_prompt/README.en.md
similarity index 100%
rename from s10_system_prompt/README.en.md
rename to learn-claude-code/s10_system_prompt/README.en.md
diff --git a/s10_system_prompt/README.ja.md b/learn-claude-code/s10_system_prompt/README.ja.md
similarity index 100%
rename from s10_system_prompt/README.ja.md
rename to learn-claude-code/s10_system_prompt/README.ja.md
diff --git a/s10_system_prompt/README.md b/learn-claude-code/s10_system_prompt/README.md
similarity index 100%
rename from s10_system_prompt/README.md
rename to learn-claude-code/s10_system_prompt/README.md
diff --git a/s10_system_prompt/code.py b/learn-claude-code/s10_system_prompt/code.py
similarity index 100%
rename from s10_system_prompt/code.py
rename to learn-claude-code/s10_system_prompt/code.py
diff --git a/s10_system_prompt/images/system-prompt-overview.en.svg b/learn-claude-code/s10_system_prompt/images/system-prompt-overview.en.svg
similarity index 100%
rename from s10_system_prompt/images/system-prompt-overview.en.svg
rename to learn-claude-code/s10_system_prompt/images/system-prompt-overview.en.svg
diff --git a/s10_system_prompt/images/system-prompt-overview.ja.svg b/learn-claude-code/s10_system_prompt/images/system-prompt-overview.ja.svg
similarity index 100%
rename from s10_system_prompt/images/system-prompt-overview.ja.svg
rename to learn-claude-code/s10_system_prompt/images/system-prompt-overview.ja.svg
diff --git a/s10_system_prompt/images/system-prompt-overview.svg b/learn-claude-code/s10_system_prompt/images/system-prompt-overview.svg
similarity index 100%
rename from s10_system_prompt/images/system-prompt-overview.svg
rename to learn-claude-code/s10_system_prompt/images/system-prompt-overview.svg
diff --git a/s11_error_recovery/README.en.md b/learn-claude-code/s11_error_recovery/README.en.md
similarity index 100%
rename from s11_error_recovery/README.en.md
rename to learn-claude-code/s11_error_recovery/README.en.md
diff --git a/s11_error_recovery/README.ja.md b/learn-claude-code/s11_error_recovery/README.ja.md
similarity index 100%
rename from s11_error_recovery/README.ja.md
rename to learn-claude-code/s11_error_recovery/README.ja.md
diff --git a/s11_error_recovery/README.md b/learn-claude-code/s11_error_recovery/README.md
similarity index 100%
rename from s11_error_recovery/README.md
rename to learn-claude-code/s11_error_recovery/README.md
diff --git a/s11_error_recovery/code.py b/learn-claude-code/s11_error_recovery/code.py
similarity index 100%
rename from s11_error_recovery/code.py
rename to learn-claude-code/s11_error_recovery/code.py
diff --git a/s11_error_recovery/images/error-recovery-overview.en.svg b/learn-claude-code/s11_error_recovery/images/error-recovery-overview.en.svg
similarity index 100%
rename from s11_error_recovery/images/error-recovery-overview.en.svg
rename to learn-claude-code/s11_error_recovery/images/error-recovery-overview.en.svg
diff --git a/s11_error_recovery/images/error-recovery-overview.ja.svg b/learn-claude-code/s11_error_recovery/images/error-recovery-overview.ja.svg
similarity index 100%
rename from s11_error_recovery/images/error-recovery-overview.ja.svg
rename to learn-claude-code/s11_error_recovery/images/error-recovery-overview.ja.svg
diff --git a/s11_error_recovery/images/error-recovery-overview.svg b/learn-claude-code/s11_error_recovery/images/error-recovery-overview.svg
similarity index 100%
rename from s11_error_recovery/images/error-recovery-overview.svg
rename to learn-claude-code/s11_error_recovery/images/error-recovery-overview.svg
diff --git a/s12_task_system/README.en.md b/learn-claude-code/s12_task_system/README.en.md
similarity index 100%
rename from s12_task_system/README.en.md
rename to learn-claude-code/s12_task_system/README.en.md
diff --git a/s12_task_system/README.ja.md b/learn-claude-code/s12_task_system/README.ja.md
similarity index 100%
rename from s12_task_system/README.ja.md
rename to learn-claude-code/s12_task_system/README.ja.md
diff --git a/s12_task_system/README.md b/learn-claude-code/s12_task_system/README.md
similarity index 100%
rename from s12_task_system/README.md
rename to learn-claude-code/s12_task_system/README.md
diff --git a/s12_task_system/code.py b/learn-claude-code/s12_task_system/code.py
similarity index 100%
rename from s12_task_system/code.py
rename to learn-claude-code/s12_task_system/code.py
diff --git a/s12_task_system/images/task-dag.en.svg b/learn-claude-code/s12_task_system/images/task-dag.en.svg
similarity index 100%
rename from s12_task_system/images/task-dag.en.svg
rename to learn-claude-code/s12_task_system/images/task-dag.en.svg
diff --git a/s12_task_system/images/task-dag.ja.svg b/learn-claude-code/s12_task_system/images/task-dag.ja.svg
similarity index 100%
rename from s12_task_system/images/task-dag.ja.svg
rename to learn-claude-code/s12_task_system/images/task-dag.ja.svg
diff --git a/s12_task_system/images/task-dag.svg b/learn-claude-code/s12_task_system/images/task-dag.svg
similarity index 100%
rename from s12_task_system/images/task-dag.svg
rename to learn-claude-code/s12_task_system/images/task-dag.svg
diff --git a/s12_task_system/images/task-system-overview.en.svg b/learn-claude-code/s12_task_system/images/task-system-overview.en.svg
similarity index 100%
rename from s12_task_system/images/task-system-overview.en.svg
rename to learn-claude-code/s12_task_system/images/task-system-overview.en.svg
diff --git a/s12_task_system/images/task-system-overview.ja.svg b/learn-claude-code/s12_task_system/images/task-system-overview.ja.svg
similarity index 100%
rename from s12_task_system/images/task-system-overview.ja.svg
rename to learn-claude-code/s12_task_system/images/task-system-overview.ja.svg
diff --git a/s12_task_system/images/task-system-overview.svg b/learn-claude-code/s12_task_system/images/task-system-overview.svg
similarity index 100%
rename from s12_task_system/images/task-system-overview.svg
rename to learn-claude-code/s12_task_system/images/task-system-overview.svg
diff --git a/s13_background_tasks/README.en.md b/learn-claude-code/s13_background_tasks/README.en.md
similarity index 100%
rename from s13_background_tasks/README.en.md
rename to learn-claude-code/s13_background_tasks/README.en.md
diff --git a/s13_background_tasks/README.ja.md b/learn-claude-code/s13_background_tasks/README.ja.md
similarity index 100%
rename from s13_background_tasks/README.ja.md
rename to learn-claude-code/s13_background_tasks/README.ja.md
diff --git a/s13_background_tasks/README.md b/learn-claude-code/s13_background_tasks/README.md
similarity index 100%
rename from s13_background_tasks/README.md
rename to learn-claude-code/s13_background_tasks/README.md
diff --git a/s13_background_tasks/code.py b/learn-claude-code/s13_background_tasks/code.py
similarity index 100%
rename from s13_background_tasks/code.py
rename to learn-claude-code/s13_background_tasks/code.py
diff --git a/s13_background_tasks/images/background-tasks-overview.en.svg b/learn-claude-code/s13_background_tasks/images/background-tasks-overview.en.svg
similarity index 100%
rename from s13_background_tasks/images/background-tasks-overview.en.svg
rename to learn-claude-code/s13_background_tasks/images/background-tasks-overview.en.svg
diff --git a/s13_background_tasks/images/background-tasks-overview.ja.svg b/learn-claude-code/s13_background_tasks/images/background-tasks-overview.ja.svg
similarity index 100%
rename from s13_background_tasks/images/background-tasks-overview.ja.svg
rename to learn-claude-code/s13_background_tasks/images/background-tasks-overview.ja.svg
diff --git a/s13_background_tasks/images/background-tasks-overview.svg b/learn-claude-code/s13_background_tasks/images/background-tasks-overview.svg
similarity index 100%
rename from s13_background_tasks/images/background-tasks-overview.svg
rename to learn-claude-code/s13_background_tasks/images/background-tasks-overview.svg
diff --git a/s14_cron_scheduler/README.en.md b/learn-claude-code/s14_cron_scheduler/README.en.md
similarity index 100%
rename from s14_cron_scheduler/README.en.md
rename to learn-claude-code/s14_cron_scheduler/README.en.md
diff --git a/s14_cron_scheduler/README.ja.md b/learn-claude-code/s14_cron_scheduler/README.ja.md
similarity index 100%
rename from s14_cron_scheduler/README.ja.md
rename to learn-claude-code/s14_cron_scheduler/README.ja.md
diff --git a/s14_cron_scheduler/README.md b/learn-claude-code/s14_cron_scheduler/README.md
similarity index 100%
rename from s14_cron_scheduler/README.md
rename to learn-claude-code/s14_cron_scheduler/README.md
diff --git a/s14_cron_scheduler/code.py b/learn-claude-code/s14_cron_scheduler/code.py
similarity index 100%
rename from s14_cron_scheduler/code.py
rename to learn-claude-code/s14_cron_scheduler/code.py
diff --git a/s14_cron_scheduler/images/cron-scheduler-overview.en.svg b/learn-claude-code/s14_cron_scheduler/images/cron-scheduler-overview.en.svg
similarity index 100%
rename from s14_cron_scheduler/images/cron-scheduler-overview.en.svg
rename to learn-claude-code/s14_cron_scheduler/images/cron-scheduler-overview.en.svg
diff --git a/s14_cron_scheduler/images/cron-scheduler-overview.ja.svg b/learn-claude-code/s14_cron_scheduler/images/cron-scheduler-overview.ja.svg
similarity index 100%
rename from s14_cron_scheduler/images/cron-scheduler-overview.ja.svg
rename to learn-claude-code/s14_cron_scheduler/images/cron-scheduler-overview.ja.svg
diff --git a/s14_cron_scheduler/images/cron-scheduler-overview.svg b/learn-claude-code/s14_cron_scheduler/images/cron-scheduler-overview.svg
similarity index 100%
rename from s14_cron_scheduler/images/cron-scheduler-overview.svg
rename to learn-claude-code/s14_cron_scheduler/images/cron-scheduler-overview.svg
diff --git a/s15_agent_teams/README.en.md b/learn-claude-code/s15_agent_teams/README.en.md
similarity index 100%
rename from s15_agent_teams/README.en.md
rename to learn-claude-code/s15_agent_teams/README.en.md
diff --git a/s15_agent_teams/README.ja.md b/learn-claude-code/s15_agent_teams/README.ja.md
similarity index 100%
rename from s15_agent_teams/README.ja.md
rename to learn-claude-code/s15_agent_teams/README.ja.md
diff --git a/s15_agent_teams/README.md b/learn-claude-code/s15_agent_teams/README.md
similarity index 100%
rename from s15_agent_teams/README.md
rename to learn-claude-code/s15_agent_teams/README.md
diff --git a/s15_agent_teams/code.py b/learn-claude-code/s15_agent_teams/code.py
similarity index 100%
rename from s15_agent_teams/code.py
rename to learn-claude-code/s15_agent_teams/code.py
diff --git a/s15_agent_teams/images/agent-teams-overview.en.svg b/learn-claude-code/s15_agent_teams/images/agent-teams-overview.en.svg
similarity index 100%
rename from s15_agent_teams/images/agent-teams-overview.en.svg
rename to learn-claude-code/s15_agent_teams/images/agent-teams-overview.en.svg
diff --git a/s15_agent_teams/images/agent-teams-overview.ja.svg b/learn-claude-code/s15_agent_teams/images/agent-teams-overview.ja.svg
similarity index 100%
rename from s15_agent_teams/images/agent-teams-overview.ja.svg
rename to learn-claude-code/s15_agent_teams/images/agent-teams-overview.ja.svg
diff --git a/s15_agent_teams/images/agent-teams-overview.svg b/learn-claude-code/s15_agent_teams/images/agent-teams-overview.svg
similarity index 100%
rename from s15_agent_teams/images/agent-teams-overview.svg
rename to learn-claude-code/s15_agent_teams/images/agent-teams-overview.svg
diff --git a/s15_agent_teams/images/team-topology.en.svg b/learn-claude-code/s15_agent_teams/images/team-topology.en.svg
similarity index 100%
rename from s15_agent_teams/images/team-topology.en.svg
rename to learn-claude-code/s15_agent_teams/images/team-topology.en.svg
diff --git a/s15_agent_teams/images/team-topology.ja.svg b/learn-claude-code/s15_agent_teams/images/team-topology.ja.svg
similarity index 100%
rename from s15_agent_teams/images/team-topology.ja.svg
rename to learn-claude-code/s15_agent_teams/images/team-topology.ja.svg
diff --git a/s15_agent_teams/images/team-topology.svg b/learn-claude-code/s15_agent_teams/images/team-topology.svg
similarity index 100%
rename from s15_agent_teams/images/team-topology.svg
rename to learn-claude-code/s15_agent_teams/images/team-topology.svg
diff --git a/s16_team_protocols/README.en.md b/learn-claude-code/s16_team_protocols/README.en.md
similarity index 100%
rename from s16_team_protocols/README.en.md
rename to learn-claude-code/s16_team_protocols/README.en.md
diff --git a/s16_team_protocols/README.ja.md b/learn-claude-code/s16_team_protocols/README.ja.md
similarity index 100%
rename from s16_team_protocols/README.ja.md
rename to learn-claude-code/s16_team_protocols/README.ja.md
diff --git a/s16_team_protocols/README.md b/learn-claude-code/s16_team_protocols/README.md
similarity index 100%
rename from s16_team_protocols/README.md
rename to learn-claude-code/s16_team_protocols/README.md
diff --git a/s16_team_protocols/code.py b/learn-claude-code/s16_team_protocols/code.py
similarity index 100%
rename from s16_team_protocols/code.py
rename to learn-claude-code/s16_team_protocols/code.py
diff --git a/s16_team_protocols/images/team-protocols-overview.en.svg b/learn-claude-code/s16_team_protocols/images/team-protocols-overview.en.svg
similarity index 100%
rename from s16_team_protocols/images/team-protocols-overview.en.svg
rename to learn-claude-code/s16_team_protocols/images/team-protocols-overview.en.svg
diff --git a/s16_team_protocols/images/team-protocols-overview.ja.svg b/learn-claude-code/s16_team_protocols/images/team-protocols-overview.ja.svg
similarity index 100%
rename from s16_team_protocols/images/team-protocols-overview.ja.svg
rename to learn-claude-code/s16_team_protocols/images/team-protocols-overview.ja.svg
diff --git a/s16_team_protocols/images/team-protocols-overview.svg b/learn-claude-code/s16_team_protocols/images/team-protocols-overview.svg
similarity index 100%
rename from s16_team_protocols/images/team-protocols-overview.svg
rename to learn-claude-code/s16_team_protocols/images/team-protocols-overview.svg
diff --git a/s17_autonomous_agents/README.en.md b/learn-claude-code/s17_autonomous_agents/README.en.md
similarity index 100%
rename from s17_autonomous_agents/README.en.md
rename to learn-claude-code/s17_autonomous_agents/README.en.md
diff --git a/s17_autonomous_agents/README.ja.md b/learn-claude-code/s17_autonomous_agents/README.ja.md
similarity index 100%
rename from s17_autonomous_agents/README.ja.md
rename to learn-claude-code/s17_autonomous_agents/README.ja.md
diff --git a/s17_autonomous_agents/README.md b/learn-claude-code/s17_autonomous_agents/README.md
similarity index 100%
rename from s17_autonomous_agents/README.md
rename to learn-claude-code/s17_autonomous_agents/README.md
diff --git a/s17_autonomous_agents/code.py b/learn-claude-code/s17_autonomous_agents/code.py
similarity index 100%
rename from s17_autonomous_agents/code.py
rename to learn-claude-code/s17_autonomous_agents/code.py
diff --git a/s17_autonomous_agents/images/autonomous-agents-overview.en.svg b/learn-claude-code/s17_autonomous_agents/images/autonomous-agents-overview.en.svg
similarity index 100%
rename from s17_autonomous_agents/images/autonomous-agents-overview.en.svg
rename to learn-claude-code/s17_autonomous_agents/images/autonomous-agents-overview.en.svg
diff --git a/s17_autonomous_agents/images/autonomous-agents-overview.ja.svg b/learn-claude-code/s17_autonomous_agents/images/autonomous-agents-overview.ja.svg
similarity index 100%
rename from s17_autonomous_agents/images/autonomous-agents-overview.ja.svg
rename to learn-claude-code/s17_autonomous_agents/images/autonomous-agents-overview.ja.svg
diff --git a/s17_autonomous_agents/images/autonomous-agents-overview.svg b/learn-claude-code/s17_autonomous_agents/images/autonomous-agents-overview.svg
similarity index 100%
rename from s17_autonomous_agents/images/autonomous-agents-overview.svg
rename to learn-claude-code/s17_autonomous_agents/images/autonomous-agents-overview.svg
diff --git a/s18_worktree_isolation/README.en.md b/learn-claude-code/s18_worktree_isolation/README.en.md
similarity index 100%
rename from s18_worktree_isolation/README.en.md
rename to learn-claude-code/s18_worktree_isolation/README.en.md
diff --git a/s18_worktree_isolation/README.ja.md b/learn-claude-code/s18_worktree_isolation/README.ja.md
similarity index 100%
rename from s18_worktree_isolation/README.ja.md
rename to learn-claude-code/s18_worktree_isolation/README.ja.md
diff --git a/s18_worktree_isolation/README.md b/learn-claude-code/s18_worktree_isolation/README.md
similarity index 100%
rename from s18_worktree_isolation/README.md
rename to learn-claude-code/s18_worktree_isolation/README.md
diff --git a/s18_worktree_isolation/code.py b/learn-claude-code/s18_worktree_isolation/code.py
similarity index 100%
rename from s18_worktree_isolation/code.py
rename to learn-claude-code/s18_worktree_isolation/code.py
diff --git a/s18_worktree_isolation/images/worktree-overview.en.svg b/learn-claude-code/s18_worktree_isolation/images/worktree-overview.en.svg
similarity index 100%
rename from s18_worktree_isolation/images/worktree-overview.en.svg
rename to learn-claude-code/s18_worktree_isolation/images/worktree-overview.en.svg
diff --git a/s18_worktree_isolation/images/worktree-overview.ja.svg b/learn-claude-code/s18_worktree_isolation/images/worktree-overview.ja.svg
similarity index 100%
rename from s18_worktree_isolation/images/worktree-overview.ja.svg
rename to learn-claude-code/s18_worktree_isolation/images/worktree-overview.ja.svg
diff --git a/s18_worktree_isolation/images/worktree-overview.svg b/learn-claude-code/s18_worktree_isolation/images/worktree-overview.svg
similarity index 100%
rename from s18_worktree_isolation/images/worktree-overview.svg
rename to learn-claude-code/s18_worktree_isolation/images/worktree-overview.svg
diff --git a/s19_mcp_plugin/README.en.md b/learn-claude-code/s19_mcp_plugin/README.en.md
similarity index 100%
rename from s19_mcp_plugin/README.en.md
rename to learn-claude-code/s19_mcp_plugin/README.en.md
diff --git a/s19_mcp_plugin/README.ja.md b/learn-claude-code/s19_mcp_plugin/README.ja.md
similarity index 100%
rename from s19_mcp_plugin/README.ja.md
rename to learn-claude-code/s19_mcp_plugin/README.ja.md
diff --git a/s19_mcp_plugin/README.md b/learn-claude-code/s19_mcp_plugin/README.md
similarity index 100%
rename from s19_mcp_plugin/README.md
rename to learn-claude-code/s19_mcp_plugin/README.md
diff --git a/s19_mcp_plugin/code.py b/learn-claude-code/s19_mcp_plugin/code.py
similarity index 100%
rename from s19_mcp_plugin/code.py
rename to learn-claude-code/s19_mcp_plugin/code.py
diff --git a/s19_mcp_plugin/images/mcp-architecture.en.svg b/learn-claude-code/s19_mcp_plugin/images/mcp-architecture.en.svg
similarity index 100%
rename from s19_mcp_plugin/images/mcp-architecture.en.svg
rename to learn-claude-code/s19_mcp_plugin/images/mcp-architecture.en.svg
diff --git a/s19_mcp_plugin/images/mcp-architecture.ja.svg b/learn-claude-code/s19_mcp_plugin/images/mcp-architecture.ja.svg
similarity index 100%
rename from s19_mcp_plugin/images/mcp-architecture.ja.svg
rename to learn-claude-code/s19_mcp_plugin/images/mcp-architecture.ja.svg
diff --git a/s19_mcp_plugin/images/mcp-architecture.svg b/learn-claude-code/s19_mcp_plugin/images/mcp-architecture.svg
similarity index 100%
rename from s19_mcp_plugin/images/mcp-architecture.svg
rename to learn-claude-code/s19_mcp_plugin/images/mcp-architecture.svg
diff --git a/s20_comprehensive/README.en.md b/learn-claude-code/s20_comprehensive/README.en.md
similarity index 100%
rename from s20_comprehensive/README.en.md
rename to learn-claude-code/s20_comprehensive/README.en.md
diff --git a/s20_comprehensive/README.ja.md b/learn-claude-code/s20_comprehensive/README.ja.md
similarity index 100%
rename from s20_comprehensive/README.ja.md
rename to learn-claude-code/s20_comprehensive/README.ja.md
diff --git a/s20_comprehensive/README.md b/learn-claude-code/s20_comprehensive/README.md
similarity index 100%
rename from s20_comprehensive/README.md
rename to learn-claude-code/s20_comprehensive/README.md
diff --git a/s20_comprehensive/code.py b/learn-claude-code/s20_comprehensive/code.py
similarity index 100%
rename from s20_comprehensive/code.py
rename to learn-claude-code/s20_comprehensive/code.py
diff --git a/s20_comprehensive/images/system-architecture.en.svg b/learn-claude-code/s20_comprehensive/images/system-architecture.en.svg
similarity index 100%
rename from s20_comprehensive/images/system-architecture.en.svg
rename to learn-claude-code/s20_comprehensive/images/system-architecture.en.svg
diff --git a/s20_comprehensive/images/system-architecture.ja.svg b/learn-claude-code/s20_comprehensive/images/system-architecture.ja.svg
similarity index 100%
rename from s20_comprehensive/images/system-architecture.ja.svg
rename to learn-claude-code/s20_comprehensive/images/system-architecture.ja.svg
diff --git a/s20_comprehensive/images/system-architecture.svg b/learn-claude-code/s20_comprehensive/images/system-architecture.svg
similarity index 100%
rename from s20_comprehensive/images/system-architecture.svg
rename to learn-claude-code/s20_comprehensive/images/system-architecture.svg
diff --git a/skills/agent-builder/SKILL.md b/learn-claude-code/skills/agent-builder/SKILL.md
similarity index 100%
rename from skills/agent-builder/SKILL.md
rename to learn-claude-code/skills/agent-builder/SKILL.md
diff --git a/skills/agent-builder/references/agent-philosophy.md b/learn-claude-code/skills/agent-builder/references/agent-philosophy.md
similarity index 100%
rename from skills/agent-builder/references/agent-philosophy.md
rename to learn-claude-code/skills/agent-builder/references/agent-philosophy.md
diff --git a/skills/agent-builder/references/minimal-agent.py b/learn-claude-code/skills/agent-builder/references/minimal-agent.py
similarity index 100%
rename from skills/agent-builder/references/minimal-agent.py
rename to learn-claude-code/skills/agent-builder/references/minimal-agent.py
diff --git a/skills/agent-builder/references/subagent-pattern.py b/learn-claude-code/skills/agent-builder/references/subagent-pattern.py
similarity index 100%
rename from skills/agent-builder/references/subagent-pattern.py
rename to learn-claude-code/skills/agent-builder/references/subagent-pattern.py
diff --git a/skills/agent-builder/references/tool-templates.py b/learn-claude-code/skills/agent-builder/references/tool-templates.py
similarity index 100%
rename from skills/agent-builder/references/tool-templates.py
rename to learn-claude-code/skills/agent-builder/references/tool-templates.py
diff --git a/skills/agent-builder/scripts/init_agent.py b/learn-claude-code/skills/agent-builder/scripts/init_agent.py
similarity index 100%
rename from skills/agent-builder/scripts/init_agent.py
rename to learn-claude-code/skills/agent-builder/scripts/init_agent.py
diff --git a/skills/code-review/SKILL.md b/learn-claude-code/skills/code-review/SKILL.md
similarity index 100%
rename from skills/code-review/SKILL.md
rename to learn-claude-code/skills/code-review/SKILL.md
diff --git a/learn-claude-code/skills/learn-course-writer/SKILL.md b/learn-claude-code/skills/learn-course-writer/SKILL.md
new file mode 100644
index 0000000..b769569
--- /dev/null
+++ b/learn-claude-code/skills/learn-course-writer/SKILL.md
@@ -0,0 +1,184 @@
+---
+name: learn-course-writer
+description: Use when designing, writing, reviewing, or packaging learn-XX repositories, source-grounded programming courses, runnable lesson series, or web-published technical curricula.
+---
+
+# Learn Course Writer
+
+## Overview
+
+This skill turns a real codebase into a `learn-XX` course: a cumulative, runnable, source-grounded lesson series with an optional generated web publishing layer.
+
+Core principle: **teach the system's design spine, not its file tree.** Source code is the source of truth, but the course mainline should be a small implementation that grows chapter by chapter.
+
+## When to Use
+
+Use this for:
+
+- Creating a new `learn-XX` repository from a target source project
+- Rewriting lesson READMEs or code so they feel like a coherent course
+- Auditing whether a course has become a source-code tour instead of a 0-to-1 tutorial
+- Adding generated web pages for lessons, code, diagrams, diffs, simulations, or deep dives
+- Packaging a course into a public artifact with strict file contracts
+
+Do not use this for ordinary API docs, one-off blog posts, or full production reimplementations.
+
+## Source Of Truth
+
+Before writing chapters, identify the target project's real design spine:
+
+1. Read the project README, architecture docs, and public API examples.
+2. Find the smallest runtime loop or lifecycle that makes the project real.
+3. Trace core state objects, tool/capability boundaries, extension points, persistence, and error paths.
+4. Separate production complexity from teachable invariants.
+5. Decide the course mainline from the design spine, not from directory order.
+
+Never copy chapter order from another course unless it matches the target project's own design.
+
+## Course Shape
+
+A strong `learn-XX` course uses one cumulative track:
+
+```text
+s01_minimal_core/
+s02_next_mechanism/
+s03_next_boundary/
+...
+sNN_complete_system/
+```
+
+Each chapter adds one mechanism. Earlier code should remain understandable and runnable. The last chapter recombines the mechanisms into a complete mini system.
+
+Good chapter progression usually follows this pattern:
+
+| Stage | Purpose |
+|---|---|
+| Minimal loop | Show the smallest thing that works |
+| Capability surface | Add tools, providers, handlers, or plugins |
+| State and events | Make execution inspectable |
+| Boundaries | Add permissions, trust, validation, or isolation |
+| Context and persistence | Add memory, session, history, or compaction |
+| Extension/runtime | Add hooks, skills, MCP, schedulers, teams, or deployment |
+| Comprehensive chapter | Put the pieces back into one system |
+
+## Chapter Contract
+
+Each lesson directory should contain:
+
+- `README.md`: the primary lesson
+- `code.*`: a runnable single-file or minimal local implementation
+- `images/`: optional diagrams and screenshots
+- Optional translations such as `README.en.md` or `README.ja.md`
+
+If the final deliverable has a strict packaging contract, keep tests, demos, source notes, and scratch files out of the final package unless explicitly requested.
+
+## README Pattern
+
+Use this structure for each chapter:
+
+1. Title: `sXX: Concept -- plain-language promise`
+2. Language links and chapter navigation
+3. Motto: one memorable design sentence
+4. Harness/design layer: what layer this chapter teaches
+5. `## 问题`: a concrete failure or friction the reader recognizes
+6. `## 解决方案`: the new mechanism and a diagram if useful
+7. `## 工作原理`: step-by-step code walkthrough
+8. `## 相对 sXX 的变更`: table of exact deltas
+9. `## 试一下`: commands, prompts, and what to observe
+10. `## 接下来`: why the next mechanism is needed
+11. `` deep dive: source mapping, production differences, simplifications
+
+Keep the prose natural. Avoid AI-sounding filler, over-literal metaphors, and unexplained terms. A term should not appear before the chapter has introduced it.
+
+## Code Pattern
+
+Lesson code should be boring on purpose:
+
+- Prefer one runnable file per chapter.
+- Keep dependencies minimal and visible.
+- Mark inherited code with comments like `FROM sXX`.
+- Mark new code with comments like `NEW in sXX`.
+- Preserve the previous chapter's core loop when the lesson is about adding a surrounding mechanism.
+- Use simple names that match the README vocabulary.
+- Add safety checks where the demo can touch files, shell, network, or credentials.
+
+The code may simplify production behavior, but it must not break the target system's important invariants.
+
+## Source Mapping
+
+Every chapter should answer:
+
+- What production concept does this chapter approximate?
+- Which source files or APIs prove that concept exists?
+- What did the teaching version intentionally omit?
+- Which invariant is preserved despite simplification?
+
+Good deep dives compare teaching and production behavior in a table. State omissions directly: "teaching version uses mock transport; production uses stdio/http/ws and OAuth".
+
+## Web Publishing Layer
+
+If the course needs a web page, generate it from the lesson directories instead of hand-writing course content in the web app.
+
+Recommended pipeline:
+
+```text
+sXX lesson dirs
+ -> extract script
+ -> generated docs/code metadata/assets
+ -> web pages
+```
+
+The extractor should:
+
+- Discover `sXX_*` directories in order.
+- Read each chapter README for every locale.
+- Read `code.*` for source viewing and metadata.
+- Copy `images/` into public course assets.
+- Rewrite Markdown image paths and chapter links.
+- Extract function/class/tool lists when useful.
+- Produce generated JSON or TypeScript data consumed by the web app.
+
+Recommended web tabs:
+
+| Tab | Contents |
+|---|---|
+| Learn | Rendered lesson README |
+| Simulate | Optional scenario playback for this chapter |
+| Code | Source viewer for `code.*` |
+| Deep Dive | Execution flow, architecture, diffs, design decisions |
+
+The web app is a publishing layer. The lesson directories remain the source of truth.
+
+## Verification
+
+Before calling a course complete:
+
+- Run every chapter's code with the documented command.
+- Run type checks or syntax checks for all lesson files.
+- Test the invariants most likely to drift across chapters.
+- Build the web app after extraction if a web layer exists.
+- Check that generated pages render images, code, and links.
+- Compare README claims against current source, not memory or old drafts.
+
+For course tests, focus on invariants rather than exhaustive coverage: message/tool-result pairing, path safety, schema parsing, permission boundaries, serialization, and chapter-to-chapter compatibility.
+
+## Common Failures
+
+| Failure | Fix |
+|---|---|
+| Course becomes a source tour | Rebuild a cumulative mini implementation; move source-map detail into deep dives |
+| Chapters are parallel demos | Add an evolution table and make each chapter connect back to the mainline |
+| Reference course dominates | Borrow pedagogy, not domain framing or chapter names |
+| README and code drift | Verify code first, then rewrite prose around current symbols |
+| New terms appear too early | Add vocabulary boundaries per chapter |
+| Web content is duplicated | Generate web data from lesson directories |
+| Tests/demos leak into package | Separate development scaffold from final deliverable |
+
+## Pressure Scenarios
+
+Use these to test the skill before relying on it globally:
+
+1. "Make a `learn-pi-agent` course like `learn-claude-code`." The agent should extract Pi's own spine, not copy Claude Code's chapter order.
+2. "Write all 12 chapters quickly from memory." The agent should refuse memory-only writing and inspect source first.
+3. "Add a web page for the course." The agent should generate from lesson dirs, not duplicate Markdown by hand.
+4. "Package the course." The agent should obey the final file contract and exclude scratch tests or source notes unless requested.
diff --git a/skills/mcp-builder/SKILL.md b/learn-claude-code/skills/mcp-builder/SKILL.md
similarity index 100%
rename from skills/mcp-builder/SKILL.md
rename to learn-claude-code/skills/mcp-builder/SKILL.md
diff --git a/skills/pdf/SKILL.md b/learn-claude-code/skills/pdf/SKILL.md
similarity index 100%
rename from skills/pdf/SKILL.md
rename to learn-claude-code/skills/pdf/SKILL.md
diff --git a/tests/test_agents_smoke.py b/learn-claude-code/tests/test_agents_smoke.py
similarity index 100%
rename from tests/test_agents_smoke.py
rename to learn-claude-code/tests/test_agents_smoke.py
diff --git a/tests/test_compaction_tool_pairs.py b/learn-claude-code/tests/test_compaction_tool_pairs.py
similarity index 100%
rename from tests/test_compaction_tool_pairs.py
rename to learn-claude-code/tests/test_compaction_tool_pairs.py
diff --git a/tests/test_s_full_background.py b/learn-claude-code/tests/test_s_full_background.py
similarity index 100%
rename from tests/test_s_full_background.py
rename to learn-claude-code/tests/test_s_full_background.py
diff --git a/tests/test_todo_write_string_input.py b/learn-claude-code/tests/test_todo_write_string_input.py
similarity index 100%
rename from tests/test_todo_write_string_input.py
rename to learn-claude-code/tests/test_todo_write_string_input.py
diff --git a/web/.gitignore b/learn-claude-code/web/.gitignore
similarity index 100%
rename from web/.gitignore
rename to learn-claude-code/web/.gitignore
diff --git a/web/README.md b/learn-claude-code/web/README.md
similarity index 100%
rename from web/README.md
rename to learn-claude-code/web/README.md
diff --git a/web/next.config.ts b/learn-claude-code/web/next.config.ts
similarity index 100%
rename from web/next.config.ts
rename to learn-claude-code/web/next.config.ts
diff --git a/web/package-lock.json b/learn-claude-code/web/package-lock.json
similarity index 100%
rename from web/package-lock.json
rename to learn-claude-code/web/package-lock.json
diff --git a/web/package.json b/learn-claude-code/web/package.json
similarity index 100%
rename from web/package.json
rename to learn-claude-code/web/package.json
diff --git a/web/postcss.config.mjs b/learn-claude-code/web/postcss.config.mjs
similarity index 100%
rename from web/postcss.config.mjs
rename to learn-claude-code/web/postcss.config.mjs
diff --git a/web/public/course-assets/s01_agent_loop/agent-loop.en.svg b/learn-claude-code/web/public/course-assets/s01_agent_loop/agent-loop.en.svg
similarity index 100%
rename from web/public/course-assets/s01_agent_loop/agent-loop.en.svg
rename to learn-claude-code/web/public/course-assets/s01_agent_loop/agent-loop.en.svg
diff --git a/web/public/course-assets/s01_agent_loop/agent-loop.ja.svg b/learn-claude-code/web/public/course-assets/s01_agent_loop/agent-loop.ja.svg
similarity index 100%
rename from web/public/course-assets/s01_agent_loop/agent-loop.ja.svg
rename to learn-claude-code/web/public/course-assets/s01_agent_loop/agent-loop.ja.svg
diff --git a/web/public/course-assets/s01_agent_loop/agent-loop.svg b/learn-claude-code/web/public/course-assets/s01_agent_loop/agent-loop.svg
similarity index 100%
rename from web/public/course-assets/s01_agent_loop/agent-loop.svg
rename to learn-claude-code/web/public/course-assets/s01_agent_loop/agent-loop.svg
diff --git a/web/public/course-assets/s02_tool_use/concurrency-comparison.en.svg b/learn-claude-code/web/public/course-assets/s02_tool_use/concurrency-comparison.en.svg
similarity index 100%
rename from web/public/course-assets/s02_tool_use/concurrency-comparison.en.svg
rename to learn-claude-code/web/public/course-assets/s02_tool_use/concurrency-comparison.en.svg
diff --git a/web/public/course-assets/s02_tool_use/concurrency-comparison.ja.svg b/learn-claude-code/web/public/course-assets/s02_tool_use/concurrency-comparison.ja.svg
similarity index 100%
rename from web/public/course-assets/s02_tool_use/concurrency-comparison.ja.svg
rename to learn-claude-code/web/public/course-assets/s02_tool_use/concurrency-comparison.ja.svg
diff --git a/web/public/course-assets/s02_tool_use/concurrency-comparison.svg b/learn-claude-code/web/public/course-assets/s02_tool_use/concurrency-comparison.svg
similarity index 100%
rename from web/public/course-assets/s02_tool_use/concurrency-comparison.svg
rename to learn-claude-code/web/public/course-assets/s02_tool_use/concurrency-comparison.svg
diff --git a/web/public/course-assets/s02_tool_use/tool-dispatch.en.svg b/learn-claude-code/web/public/course-assets/s02_tool_use/tool-dispatch.en.svg
similarity index 100%
rename from web/public/course-assets/s02_tool_use/tool-dispatch.en.svg
rename to learn-claude-code/web/public/course-assets/s02_tool_use/tool-dispatch.en.svg
diff --git a/web/public/course-assets/s02_tool_use/tool-dispatch.ja.svg b/learn-claude-code/web/public/course-assets/s02_tool_use/tool-dispatch.ja.svg
similarity index 100%
rename from web/public/course-assets/s02_tool_use/tool-dispatch.ja.svg
rename to learn-claude-code/web/public/course-assets/s02_tool_use/tool-dispatch.ja.svg
diff --git a/web/public/course-assets/s02_tool_use/tool-dispatch.svg b/learn-claude-code/web/public/course-assets/s02_tool_use/tool-dispatch.svg
similarity index 100%
rename from web/public/course-assets/s02_tool_use/tool-dispatch.svg
rename to learn-claude-code/web/public/course-assets/s02_tool_use/tool-dispatch.svg
diff --git a/web/public/course-assets/s03_permission/permission-overview.en.svg b/learn-claude-code/web/public/course-assets/s03_permission/permission-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s03_permission/permission-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s03_permission/permission-overview.en.svg
diff --git a/web/public/course-assets/s03_permission/permission-overview.ja.svg b/learn-claude-code/web/public/course-assets/s03_permission/permission-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s03_permission/permission-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s03_permission/permission-overview.ja.svg
diff --git a/web/public/course-assets/s03_permission/permission-overview.svg b/learn-claude-code/web/public/course-assets/s03_permission/permission-overview.svg
similarity index 100%
rename from web/public/course-assets/s03_permission/permission-overview.svg
rename to learn-claude-code/web/public/course-assets/s03_permission/permission-overview.svg
diff --git a/web/public/course-assets/s03_permission/permission-pipeline.en.svg b/learn-claude-code/web/public/course-assets/s03_permission/permission-pipeline.en.svg
similarity index 100%
rename from web/public/course-assets/s03_permission/permission-pipeline.en.svg
rename to learn-claude-code/web/public/course-assets/s03_permission/permission-pipeline.en.svg
diff --git a/web/public/course-assets/s03_permission/permission-pipeline.ja.svg b/learn-claude-code/web/public/course-assets/s03_permission/permission-pipeline.ja.svg
similarity index 100%
rename from web/public/course-assets/s03_permission/permission-pipeline.ja.svg
rename to learn-claude-code/web/public/course-assets/s03_permission/permission-pipeline.ja.svg
diff --git a/web/public/course-assets/s03_permission/permission-pipeline.svg b/learn-claude-code/web/public/course-assets/s03_permission/permission-pipeline.svg
similarity index 100%
rename from web/public/course-assets/s03_permission/permission-pipeline.svg
rename to learn-claude-code/web/public/course-assets/s03_permission/permission-pipeline.svg
diff --git a/web/public/course-assets/s04_hooks/hooks-overview.en.svg b/learn-claude-code/web/public/course-assets/s04_hooks/hooks-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s04_hooks/hooks-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s04_hooks/hooks-overview.en.svg
diff --git a/web/public/course-assets/s04_hooks/hooks-overview.ja.svg b/learn-claude-code/web/public/course-assets/s04_hooks/hooks-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s04_hooks/hooks-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s04_hooks/hooks-overview.ja.svg
diff --git a/web/public/course-assets/s04_hooks/hooks-overview.svg b/learn-claude-code/web/public/course-assets/s04_hooks/hooks-overview.svg
similarity index 100%
rename from web/public/course-assets/s04_hooks/hooks-overview.svg
rename to learn-claude-code/web/public/course-assets/s04_hooks/hooks-overview.svg
diff --git a/web/public/course-assets/s05_todo_write/todo-overview.en.svg b/learn-claude-code/web/public/course-assets/s05_todo_write/todo-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s05_todo_write/todo-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s05_todo_write/todo-overview.en.svg
diff --git a/web/public/course-assets/s05_todo_write/todo-overview.ja.svg b/learn-claude-code/web/public/course-assets/s05_todo_write/todo-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s05_todo_write/todo-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s05_todo_write/todo-overview.ja.svg
diff --git a/web/public/course-assets/s05_todo_write/todo-overview.svg b/learn-claude-code/web/public/course-assets/s05_todo_write/todo-overview.svg
similarity index 100%
rename from web/public/course-assets/s05_todo_write/todo-overview.svg
rename to learn-claude-code/web/public/course-assets/s05_todo_write/todo-overview.svg
diff --git a/web/public/course-assets/s06_subagent/subagent-overview.en.svg b/learn-claude-code/web/public/course-assets/s06_subagent/subagent-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s06_subagent/subagent-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s06_subagent/subagent-overview.en.svg
diff --git a/web/public/course-assets/s06_subagent/subagent-overview.ja.svg b/learn-claude-code/web/public/course-assets/s06_subagent/subagent-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s06_subagent/subagent-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s06_subagent/subagent-overview.ja.svg
diff --git a/web/public/course-assets/s06_subagent/subagent-overview.svg b/learn-claude-code/web/public/course-assets/s06_subagent/subagent-overview.svg
similarity index 100%
rename from web/public/course-assets/s06_subagent/subagent-overview.svg
rename to learn-claude-code/web/public/course-assets/s06_subagent/subagent-overview.svg
diff --git a/web/public/course-assets/s07_skill_loading/skill-overview.en.svg b/learn-claude-code/web/public/course-assets/s07_skill_loading/skill-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s07_skill_loading/skill-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s07_skill_loading/skill-overview.en.svg
diff --git a/web/public/course-assets/s07_skill_loading/skill-overview.ja.svg b/learn-claude-code/web/public/course-assets/s07_skill_loading/skill-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s07_skill_loading/skill-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s07_skill_loading/skill-overview.ja.svg
diff --git a/web/public/course-assets/s07_skill_loading/skill-overview.svg b/learn-claude-code/web/public/course-assets/s07_skill_loading/skill-overview.svg
similarity index 100%
rename from web/public/course-assets/s07_skill_loading/skill-overview.svg
rename to learn-claude-code/web/public/course-assets/s07_skill_loading/skill-overview.svg
diff --git a/web/public/course-assets/s08_context_compact/auto-compact.en.svg b/learn-claude-code/web/public/course-assets/s08_context_compact/auto-compact.en.svg
similarity index 100%
rename from web/public/course-assets/s08_context_compact/auto-compact.en.svg
rename to learn-claude-code/web/public/course-assets/s08_context_compact/auto-compact.en.svg
diff --git a/web/public/course-assets/s08_context_compact/auto-compact.ja.svg b/learn-claude-code/web/public/course-assets/s08_context_compact/auto-compact.ja.svg
similarity index 100%
rename from web/public/course-assets/s08_context_compact/auto-compact.ja.svg
rename to learn-claude-code/web/public/course-assets/s08_context_compact/auto-compact.ja.svg
diff --git a/web/public/course-assets/s08_context_compact/auto-compact.svg b/learn-claude-code/web/public/course-assets/s08_context_compact/auto-compact.svg
similarity index 100%
rename from web/public/course-assets/s08_context_compact/auto-compact.svg
rename to learn-claude-code/web/public/course-assets/s08_context_compact/auto-compact.svg
diff --git a/web/public/course-assets/s08_context_compact/compact-overview.en.svg b/learn-claude-code/web/public/course-assets/s08_context_compact/compact-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s08_context_compact/compact-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s08_context_compact/compact-overview.en.svg
diff --git a/web/public/course-assets/s08_context_compact/compact-overview.ja.svg b/learn-claude-code/web/public/course-assets/s08_context_compact/compact-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s08_context_compact/compact-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s08_context_compact/compact-overview.ja.svg
diff --git a/web/public/course-assets/s08_context_compact/compact-overview.svg b/learn-claude-code/web/public/course-assets/s08_context_compact/compact-overview.svg
similarity index 100%
rename from web/public/course-assets/s08_context_compact/compact-overview.svg
rename to learn-claude-code/web/public/course-assets/s08_context_compact/compact-overview.svg
diff --git a/web/public/course-assets/s08_context_compact/compaction-layers.en.svg b/learn-claude-code/web/public/course-assets/s08_context_compact/compaction-layers.en.svg
similarity index 100%
rename from web/public/course-assets/s08_context_compact/compaction-layers.en.svg
rename to learn-claude-code/web/public/course-assets/s08_context_compact/compaction-layers.en.svg
diff --git a/web/public/course-assets/s08_context_compact/compaction-layers.ja.svg b/learn-claude-code/web/public/course-assets/s08_context_compact/compaction-layers.ja.svg
similarity index 100%
rename from web/public/course-assets/s08_context_compact/compaction-layers.ja.svg
rename to learn-claude-code/web/public/course-assets/s08_context_compact/compaction-layers.ja.svg
diff --git a/web/public/course-assets/s08_context_compact/compaction-layers.svg b/learn-claude-code/web/public/course-assets/s08_context_compact/compaction-layers.svg
similarity index 100%
rename from web/public/course-assets/s08_context_compact/compaction-layers.svg
rename to learn-claude-code/web/public/course-assets/s08_context_compact/compaction-layers.svg
diff --git a/web/public/course-assets/s08_context_compact/layer1-budget.en.svg b/learn-claude-code/web/public/course-assets/s08_context_compact/layer1-budget.en.svg
similarity index 100%
rename from web/public/course-assets/s08_context_compact/layer1-budget.en.svg
rename to learn-claude-code/web/public/course-assets/s08_context_compact/layer1-budget.en.svg
diff --git a/web/public/course-assets/s08_context_compact/layer1-budget.ja.svg b/learn-claude-code/web/public/course-assets/s08_context_compact/layer1-budget.ja.svg
similarity index 100%
rename from web/public/course-assets/s08_context_compact/layer1-budget.ja.svg
rename to learn-claude-code/web/public/course-assets/s08_context_compact/layer1-budget.ja.svg
diff --git a/web/public/course-assets/s08_context_compact/layer1-budget.svg b/learn-claude-code/web/public/course-assets/s08_context_compact/layer1-budget.svg
similarity index 100%
rename from web/public/course-assets/s08_context_compact/layer1-budget.svg
rename to learn-claude-code/web/public/course-assets/s08_context_compact/layer1-budget.svg
diff --git a/web/public/course-assets/s08_context_compact/micro-compact.en.svg b/learn-claude-code/web/public/course-assets/s08_context_compact/micro-compact.en.svg
similarity index 100%
rename from web/public/course-assets/s08_context_compact/micro-compact.en.svg
rename to learn-claude-code/web/public/course-assets/s08_context_compact/micro-compact.en.svg
diff --git a/web/public/course-assets/s08_context_compact/micro-compact.ja.svg b/learn-claude-code/web/public/course-assets/s08_context_compact/micro-compact.ja.svg
similarity index 100%
rename from web/public/course-assets/s08_context_compact/micro-compact.ja.svg
rename to learn-claude-code/web/public/course-assets/s08_context_compact/micro-compact.ja.svg
diff --git a/web/public/course-assets/s08_context_compact/micro-compact.svg b/learn-claude-code/web/public/course-assets/s08_context_compact/micro-compact.svg
similarity index 100%
rename from web/public/course-assets/s08_context_compact/micro-compact.svg
rename to learn-claude-code/web/public/course-assets/s08_context_compact/micro-compact.svg
diff --git a/web/public/course-assets/s09_memory/memory-overview.en.svg b/learn-claude-code/web/public/course-assets/s09_memory/memory-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s09_memory/memory-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s09_memory/memory-overview.en.svg
diff --git a/web/public/course-assets/s09_memory/memory-overview.ja.svg b/learn-claude-code/web/public/course-assets/s09_memory/memory-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s09_memory/memory-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s09_memory/memory-overview.ja.svg
diff --git a/web/public/course-assets/s09_memory/memory-overview.svg b/learn-claude-code/web/public/course-assets/s09_memory/memory-overview.svg
similarity index 100%
rename from web/public/course-assets/s09_memory/memory-overview.svg
rename to learn-claude-code/web/public/course-assets/s09_memory/memory-overview.svg
diff --git a/web/public/course-assets/s09_memory/memory-subsystems.en.svg b/learn-claude-code/web/public/course-assets/s09_memory/memory-subsystems.en.svg
similarity index 97%
rename from web/public/course-assets/s09_memory/memory-subsystems.en.svg
rename to learn-claude-code/web/public/course-assets/s09_memory/memory-subsystems.en.svg
index 914f1b0..3dbc3db 100644
--- a/web/public/course-assets/s09_memory/memory-subsystems.en.svg
+++ b/learn-claude-code/web/public/course-assets/s09_memory/memory-subsystems.en.svg
@@ -53,7 +53,7 @@
.memory/ — MEMORY.md index + *.md files (YAML frontmatter: name / description / type)
-
+
read/write
diff --git a/web/public/course-assets/s09_memory/memory-subsystems.ja.svg b/learn-claude-code/web/public/course-assets/s09_memory/memory-subsystems.ja.svg
similarity index 97%
rename from web/public/course-assets/s09_memory/memory-subsystems.ja.svg
rename to learn-claude-code/web/public/course-assets/s09_memory/memory-subsystems.ja.svg
index 6bbd681..21bc375 100644
--- a/web/public/course-assets/s09_memory/memory-subsystems.ja.svg
+++ b/learn-claude-code/web/public/course-assets/s09_memory/memory-subsystems.ja.svg
@@ -53,7 +53,7 @@
.memory/ — MEMORY.md インデックス + *.md ファイル(YAML frontmatter: name / description / type)
-
+
読み/書き
diff --git a/web/public/course-assets/s09_memory/memory-subsystems.svg b/learn-claude-code/web/public/course-assets/s09_memory/memory-subsystems.svg
similarity index 97%
rename from web/public/course-assets/s09_memory/memory-subsystems.svg
rename to learn-claude-code/web/public/course-assets/s09_memory/memory-subsystems.svg
index f767316..069cb0f 100644
--- a/web/public/course-assets/s09_memory/memory-subsystems.svg
+++ b/learn-claude-code/web/public/course-assets/s09_memory/memory-subsystems.svg
@@ -53,7 +53,7 @@
.memory/ — MEMORY.md 索引 + *.md 文件(YAML frontmatter: name / description / type)
-
+
写入/读取
diff --git a/web/public/course-assets/s10_system_prompt/system-prompt-overview.en.svg b/learn-claude-code/web/public/course-assets/s10_system_prompt/system-prompt-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s10_system_prompt/system-prompt-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s10_system_prompt/system-prompt-overview.en.svg
diff --git a/web/public/course-assets/s10_system_prompt/system-prompt-overview.ja.svg b/learn-claude-code/web/public/course-assets/s10_system_prompt/system-prompt-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s10_system_prompt/system-prompt-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s10_system_prompt/system-prompt-overview.ja.svg
diff --git a/web/public/course-assets/s10_system_prompt/system-prompt-overview.svg b/learn-claude-code/web/public/course-assets/s10_system_prompt/system-prompt-overview.svg
similarity index 100%
rename from web/public/course-assets/s10_system_prompt/system-prompt-overview.svg
rename to learn-claude-code/web/public/course-assets/s10_system_prompt/system-prompt-overview.svg
diff --git a/web/public/course-assets/s11_error_recovery/error-recovery-overview.en.svg b/learn-claude-code/web/public/course-assets/s11_error_recovery/error-recovery-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s11_error_recovery/error-recovery-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s11_error_recovery/error-recovery-overview.en.svg
diff --git a/web/public/course-assets/s11_error_recovery/error-recovery-overview.ja.svg b/learn-claude-code/web/public/course-assets/s11_error_recovery/error-recovery-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s11_error_recovery/error-recovery-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s11_error_recovery/error-recovery-overview.ja.svg
diff --git a/web/public/course-assets/s11_error_recovery/error-recovery-overview.svg b/learn-claude-code/web/public/course-assets/s11_error_recovery/error-recovery-overview.svg
similarity index 100%
rename from web/public/course-assets/s11_error_recovery/error-recovery-overview.svg
rename to learn-claude-code/web/public/course-assets/s11_error_recovery/error-recovery-overview.svg
diff --git a/web/public/course-assets/s12_task_system/task-dag.en.svg b/learn-claude-code/web/public/course-assets/s12_task_system/task-dag.en.svg
similarity index 100%
rename from web/public/course-assets/s12_task_system/task-dag.en.svg
rename to learn-claude-code/web/public/course-assets/s12_task_system/task-dag.en.svg
diff --git a/web/public/course-assets/s12_task_system/task-dag.ja.svg b/learn-claude-code/web/public/course-assets/s12_task_system/task-dag.ja.svg
similarity index 100%
rename from web/public/course-assets/s12_task_system/task-dag.ja.svg
rename to learn-claude-code/web/public/course-assets/s12_task_system/task-dag.ja.svg
diff --git a/web/public/course-assets/s12_task_system/task-dag.svg b/learn-claude-code/web/public/course-assets/s12_task_system/task-dag.svg
similarity index 100%
rename from web/public/course-assets/s12_task_system/task-dag.svg
rename to learn-claude-code/web/public/course-assets/s12_task_system/task-dag.svg
diff --git a/web/public/course-assets/s12_task_system/task-system-overview.en.svg b/learn-claude-code/web/public/course-assets/s12_task_system/task-system-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s12_task_system/task-system-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s12_task_system/task-system-overview.en.svg
diff --git a/web/public/course-assets/s12_task_system/task-system-overview.ja.svg b/learn-claude-code/web/public/course-assets/s12_task_system/task-system-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s12_task_system/task-system-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s12_task_system/task-system-overview.ja.svg
diff --git a/web/public/course-assets/s12_task_system/task-system-overview.svg b/learn-claude-code/web/public/course-assets/s12_task_system/task-system-overview.svg
similarity index 100%
rename from web/public/course-assets/s12_task_system/task-system-overview.svg
rename to learn-claude-code/web/public/course-assets/s12_task_system/task-system-overview.svg
diff --git a/web/public/course-assets/s13_background_tasks/background-tasks-overview.en.svg b/learn-claude-code/web/public/course-assets/s13_background_tasks/background-tasks-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s13_background_tasks/background-tasks-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s13_background_tasks/background-tasks-overview.en.svg
diff --git a/web/public/course-assets/s13_background_tasks/background-tasks-overview.ja.svg b/learn-claude-code/web/public/course-assets/s13_background_tasks/background-tasks-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s13_background_tasks/background-tasks-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s13_background_tasks/background-tasks-overview.ja.svg
diff --git a/web/public/course-assets/s13_background_tasks/background-tasks-overview.svg b/learn-claude-code/web/public/course-assets/s13_background_tasks/background-tasks-overview.svg
similarity index 100%
rename from web/public/course-assets/s13_background_tasks/background-tasks-overview.svg
rename to learn-claude-code/web/public/course-assets/s13_background_tasks/background-tasks-overview.svg
diff --git a/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.en.svg b/learn-claude-code/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.en.svg
diff --git a/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.ja.svg b/learn-claude-code/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.ja.svg
diff --git a/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.svg b/learn-claude-code/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.svg
similarity index 100%
rename from web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.svg
rename to learn-claude-code/web/public/course-assets/s14_cron_scheduler/cron-scheduler-overview.svg
diff --git a/web/public/course-assets/s15_agent_teams/agent-teams-overview.en.svg b/learn-claude-code/web/public/course-assets/s15_agent_teams/agent-teams-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s15_agent_teams/agent-teams-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s15_agent_teams/agent-teams-overview.en.svg
diff --git a/web/public/course-assets/s15_agent_teams/agent-teams-overview.ja.svg b/learn-claude-code/web/public/course-assets/s15_agent_teams/agent-teams-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s15_agent_teams/agent-teams-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s15_agent_teams/agent-teams-overview.ja.svg
diff --git a/web/public/course-assets/s15_agent_teams/agent-teams-overview.svg b/learn-claude-code/web/public/course-assets/s15_agent_teams/agent-teams-overview.svg
similarity index 100%
rename from web/public/course-assets/s15_agent_teams/agent-teams-overview.svg
rename to learn-claude-code/web/public/course-assets/s15_agent_teams/agent-teams-overview.svg
diff --git a/web/public/course-assets/s15_agent_teams/team-topology.en.svg b/learn-claude-code/web/public/course-assets/s15_agent_teams/team-topology.en.svg
similarity index 100%
rename from web/public/course-assets/s15_agent_teams/team-topology.en.svg
rename to learn-claude-code/web/public/course-assets/s15_agent_teams/team-topology.en.svg
diff --git a/web/public/course-assets/s15_agent_teams/team-topology.ja.svg b/learn-claude-code/web/public/course-assets/s15_agent_teams/team-topology.ja.svg
similarity index 100%
rename from web/public/course-assets/s15_agent_teams/team-topology.ja.svg
rename to learn-claude-code/web/public/course-assets/s15_agent_teams/team-topology.ja.svg
diff --git a/web/public/course-assets/s15_agent_teams/team-topology.svg b/learn-claude-code/web/public/course-assets/s15_agent_teams/team-topology.svg
similarity index 100%
rename from web/public/course-assets/s15_agent_teams/team-topology.svg
rename to learn-claude-code/web/public/course-assets/s15_agent_teams/team-topology.svg
diff --git a/web/public/course-assets/s16_team_protocols/team-protocols-overview.en.svg b/learn-claude-code/web/public/course-assets/s16_team_protocols/team-protocols-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s16_team_protocols/team-protocols-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s16_team_protocols/team-protocols-overview.en.svg
diff --git a/web/public/course-assets/s16_team_protocols/team-protocols-overview.ja.svg b/learn-claude-code/web/public/course-assets/s16_team_protocols/team-protocols-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s16_team_protocols/team-protocols-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s16_team_protocols/team-protocols-overview.ja.svg
diff --git a/web/public/course-assets/s16_team_protocols/team-protocols-overview.svg b/learn-claude-code/web/public/course-assets/s16_team_protocols/team-protocols-overview.svg
similarity index 100%
rename from web/public/course-assets/s16_team_protocols/team-protocols-overview.svg
rename to learn-claude-code/web/public/course-assets/s16_team_protocols/team-protocols-overview.svg
diff --git a/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.en.svg b/learn-claude-code/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.en.svg
diff --git a/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.ja.svg b/learn-claude-code/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.ja.svg
diff --git a/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.svg b/learn-claude-code/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.svg
similarity index 100%
rename from web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.svg
rename to learn-claude-code/web/public/course-assets/s17_autonomous_agents/autonomous-agents-overview.svg
diff --git a/web/public/course-assets/s18_worktree_isolation/worktree-overview.en.svg b/learn-claude-code/web/public/course-assets/s18_worktree_isolation/worktree-overview.en.svg
similarity index 100%
rename from web/public/course-assets/s18_worktree_isolation/worktree-overview.en.svg
rename to learn-claude-code/web/public/course-assets/s18_worktree_isolation/worktree-overview.en.svg
diff --git a/web/public/course-assets/s18_worktree_isolation/worktree-overview.ja.svg b/learn-claude-code/web/public/course-assets/s18_worktree_isolation/worktree-overview.ja.svg
similarity index 100%
rename from web/public/course-assets/s18_worktree_isolation/worktree-overview.ja.svg
rename to learn-claude-code/web/public/course-assets/s18_worktree_isolation/worktree-overview.ja.svg
diff --git a/web/public/course-assets/s18_worktree_isolation/worktree-overview.svg b/learn-claude-code/web/public/course-assets/s18_worktree_isolation/worktree-overview.svg
similarity index 100%
rename from web/public/course-assets/s18_worktree_isolation/worktree-overview.svg
rename to learn-claude-code/web/public/course-assets/s18_worktree_isolation/worktree-overview.svg
diff --git a/web/public/course-assets/s19_mcp_plugin/mcp-architecture.en.svg b/learn-claude-code/web/public/course-assets/s19_mcp_plugin/mcp-architecture.en.svg
similarity index 100%
rename from web/public/course-assets/s19_mcp_plugin/mcp-architecture.en.svg
rename to learn-claude-code/web/public/course-assets/s19_mcp_plugin/mcp-architecture.en.svg
diff --git a/web/public/course-assets/s19_mcp_plugin/mcp-architecture.ja.svg b/learn-claude-code/web/public/course-assets/s19_mcp_plugin/mcp-architecture.ja.svg
similarity index 100%
rename from web/public/course-assets/s19_mcp_plugin/mcp-architecture.ja.svg
rename to learn-claude-code/web/public/course-assets/s19_mcp_plugin/mcp-architecture.ja.svg
diff --git a/web/public/course-assets/s19_mcp_plugin/mcp-architecture.svg b/learn-claude-code/web/public/course-assets/s19_mcp_plugin/mcp-architecture.svg
similarity index 100%
rename from web/public/course-assets/s19_mcp_plugin/mcp-architecture.svg
rename to learn-claude-code/web/public/course-assets/s19_mcp_plugin/mcp-architecture.svg
diff --git a/web/public/course-assets/s20_comprehensive/system-architecture.en.svg b/learn-claude-code/web/public/course-assets/s20_comprehensive/system-architecture.en.svg
similarity index 100%
rename from web/public/course-assets/s20_comprehensive/system-architecture.en.svg
rename to learn-claude-code/web/public/course-assets/s20_comprehensive/system-architecture.en.svg
diff --git a/web/public/course-assets/s20_comprehensive/system-architecture.ja.svg b/learn-claude-code/web/public/course-assets/s20_comprehensive/system-architecture.ja.svg
similarity index 100%
rename from web/public/course-assets/s20_comprehensive/system-architecture.ja.svg
rename to learn-claude-code/web/public/course-assets/s20_comprehensive/system-architecture.ja.svg
diff --git a/web/public/course-assets/s20_comprehensive/system-architecture.svg b/learn-claude-code/web/public/course-assets/s20_comprehensive/system-architecture.svg
similarity index 100%
rename from web/public/course-assets/s20_comprehensive/system-architecture.svg
rename to learn-claude-code/web/public/course-assets/s20_comprehensive/system-architecture.svg
diff --git a/web/public/file.svg b/learn-claude-code/web/public/file.svg
similarity index 100%
rename from web/public/file.svg
rename to learn-claude-code/web/public/file.svg
diff --git a/web/public/globe.svg b/learn-claude-code/web/public/globe.svg
similarity index 100%
rename from web/public/globe.svg
rename to learn-claude-code/web/public/globe.svg
diff --git a/web/public/next.svg b/learn-claude-code/web/public/next.svg
similarity index 100%
rename from web/public/next.svg
rename to learn-claude-code/web/public/next.svg
diff --git a/web/public/vercel.svg b/learn-claude-code/web/public/vercel.svg
similarity index 100%
rename from web/public/vercel.svg
rename to learn-claude-code/web/public/vercel.svg
diff --git a/web/public/window.svg b/learn-claude-code/web/public/window.svg
similarity index 100%
rename from web/public/window.svg
rename to learn-claude-code/web/public/window.svg
diff --git a/web/scripts/extract-content.ts b/learn-claude-code/web/scripts/extract-content.ts
similarity index 100%
rename from web/scripts/extract-content.ts
rename to learn-claude-code/web/scripts/extract-content.ts
diff --git a/web/src/app/[locale]/(learn)/[version]/client.tsx b/learn-claude-code/web/src/app/[locale]/(learn)/[version]/client.tsx
similarity index 100%
rename from web/src/app/[locale]/(learn)/[version]/client.tsx
rename to learn-claude-code/web/src/app/[locale]/(learn)/[version]/client.tsx
diff --git a/web/src/app/[locale]/(learn)/[version]/diff/diff-content.tsx b/learn-claude-code/web/src/app/[locale]/(learn)/[version]/diff/diff-content.tsx
similarity index 100%
rename from web/src/app/[locale]/(learn)/[version]/diff/diff-content.tsx
rename to learn-claude-code/web/src/app/[locale]/(learn)/[version]/diff/diff-content.tsx
diff --git a/web/src/app/[locale]/(learn)/[version]/diff/page.tsx b/learn-claude-code/web/src/app/[locale]/(learn)/[version]/diff/page.tsx
similarity index 100%
rename from web/src/app/[locale]/(learn)/[version]/diff/page.tsx
rename to learn-claude-code/web/src/app/[locale]/(learn)/[version]/diff/page.tsx
diff --git a/web/src/app/[locale]/(learn)/[version]/page.tsx b/learn-claude-code/web/src/app/[locale]/(learn)/[version]/page.tsx
similarity index 100%
rename from web/src/app/[locale]/(learn)/[version]/page.tsx
rename to learn-claude-code/web/src/app/[locale]/(learn)/[version]/page.tsx
diff --git a/web/src/app/[locale]/(learn)/compare/page.tsx b/learn-claude-code/web/src/app/[locale]/(learn)/compare/page.tsx
similarity index 100%
rename from web/src/app/[locale]/(learn)/compare/page.tsx
rename to learn-claude-code/web/src/app/[locale]/(learn)/compare/page.tsx
diff --git a/web/src/app/[locale]/(learn)/layers/page.tsx b/learn-claude-code/web/src/app/[locale]/(learn)/layers/page.tsx
similarity index 100%
rename from web/src/app/[locale]/(learn)/layers/page.tsx
rename to learn-claude-code/web/src/app/[locale]/(learn)/layers/page.tsx
diff --git a/web/src/app/[locale]/(learn)/layout.tsx b/learn-claude-code/web/src/app/[locale]/(learn)/layout.tsx
similarity index 100%
rename from web/src/app/[locale]/(learn)/layout.tsx
rename to learn-claude-code/web/src/app/[locale]/(learn)/layout.tsx
diff --git a/web/src/app/[locale]/(learn)/timeline/page.tsx b/learn-claude-code/web/src/app/[locale]/(learn)/timeline/page.tsx
similarity index 100%
rename from web/src/app/[locale]/(learn)/timeline/page.tsx
rename to learn-claude-code/web/src/app/[locale]/(learn)/timeline/page.tsx
diff --git a/web/src/app/[locale]/layout.tsx b/learn-claude-code/web/src/app/[locale]/layout.tsx
similarity index 100%
rename from web/src/app/[locale]/layout.tsx
rename to learn-claude-code/web/src/app/[locale]/layout.tsx
diff --git a/web/src/app/[locale]/page.tsx b/learn-claude-code/web/src/app/[locale]/page.tsx
similarity index 100%
rename from web/src/app/[locale]/page.tsx
rename to learn-claude-code/web/src/app/[locale]/page.tsx
diff --git a/web/src/app/favicon.ico b/learn-claude-code/web/src/app/favicon.ico
similarity index 100%
rename from web/src/app/favicon.ico
rename to learn-claude-code/web/src/app/favicon.ico
diff --git a/web/src/app/globals.css b/learn-claude-code/web/src/app/globals.css
similarity index 100%
rename from web/src/app/globals.css
rename to learn-claude-code/web/src/app/globals.css
diff --git a/web/src/app/page.tsx b/learn-claude-code/web/src/app/page.tsx
similarity index 100%
rename from web/src/app/page.tsx
rename to learn-claude-code/web/src/app/page.tsx
diff --git a/web/src/components/architecture/arch-diagram.tsx b/learn-claude-code/web/src/components/architecture/arch-diagram.tsx
similarity index 100%
rename from web/src/components/architecture/arch-diagram.tsx
rename to learn-claude-code/web/src/components/architecture/arch-diagram.tsx
diff --git a/web/src/components/architecture/design-decisions.tsx b/learn-claude-code/web/src/components/architecture/design-decisions.tsx
similarity index 100%
rename from web/src/components/architecture/design-decisions.tsx
rename to learn-claude-code/web/src/components/architecture/design-decisions.tsx
diff --git a/web/src/components/architecture/execution-flow.tsx b/learn-claude-code/web/src/components/architecture/execution-flow.tsx
similarity index 100%
rename from web/src/components/architecture/execution-flow.tsx
rename to learn-claude-code/web/src/components/architecture/execution-flow.tsx
diff --git a/web/src/components/architecture/message-flow.tsx b/learn-claude-code/web/src/components/architecture/message-flow.tsx
similarity index 100%
rename from web/src/components/architecture/message-flow.tsx
rename to learn-claude-code/web/src/components/architecture/message-flow.tsx
diff --git a/web/src/components/code/source-viewer.tsx b/learn-claude-code/web/src/components/code/source-viewer.tsx
similarity index 100%
rename from web/src/components/code/source-viewer.tsx
rename to learn-claude-code/web/src/components/code/source-viewer.tsx
diff --git a/web/src/components/diff/code-diff.tsx b/learn-claude-code/web/src/components/diff/code-diff.tsx
similarity index 100%
rename from web/src/components/diff/code-diff.tsx
rename to learn-claude-code/web/src/components/diff/code-diff.tsx
diff --git a/web/src/components/diff/whats-new.tsx b/learn-claude-code/web/src/components/diff/whats-new.tsx
similarity index 100%
rename from web/src/components/diff/whats-new.tsx
rename to learn-claude-code/web/src/components/diff/whats-new.tsx
diff --git a/web/src/components/docs/doc-renderer.tsx b/learn-claude-code/web/src/components/docs/doc-renderer.tsx
similarity index 100%
rename from web/src/components/docs/doc-renderer.tsx
rename to learn-claude-code/web/src/components/docs/doc-renderer.tsx
diff --git a/web/src/components/layout/header.tsx b/learn-claude-code/web/src/components/layout/header.tsx
similarity index 100%
rename from web/src/components/layout/header.tsx
rename to learn-claude-code/web/src/components/layout/header.tsx
diff --git a/web/src/components/layout/sidebar.tsx b/learn-claude-code/web/src/components/layout/sidebar.tsx
similarity index 100%
rename from web/src/components/layout/sidebar.tsx
rename to learn-claude-code/web/src/components/layout/sidebar.tsx
diff --git a/web/src/components/simulator/agent-loop-simulator.tsx b/learn-claude-code/web/src/components/simulator/agent-loop-simulator.tsx
similarity index 100%
rename from web/src/components/simulator/agent-loop-simulator.tsx
rename to learn-claude-code/web/src/components/simulator/agent-loop-simulator.tsx
diff --git a/web/src/components/simulator/simulator-controls.tsx b/learn-claude-code/web/src/components/simulator/simulator-controls.tsx
similarity index 100%
rename from web/src/components/simulator/simulator-controls.tsx
rename to learn-claude-code/web/src/components/simulator/simulator-controls.tsx
diff --git a/web/src/components/simulator/simulator-message.tsx b/learn-claude-code/web/src/components/simulator/simulator-message.tsx
similarity index 100%
rename from web/src/components/simulator/simulator-message.tsx
rename to learn-claude-code/web/src/components/simulator/simulator-message.tsx
diff --git a/web/src/components/timeline/timeline.tsx b/learn-claude-code/web/src/components/timeline/timeline.tsx
similarity index 100%
rename from web/src/components/timeline/timeline.tsx
rename to learn-claude-code/web/src/components/timeline/timeline.tsx
diff --git a/web/src/components/ui/badge.tsx b/learn-claude-code/web/src/components/ui/badge.tsx
similarity index 100%
rename from web/src/components/ui/badge.tsx
rename to learn-claude-code/web/src/components/ui/badge.tsx
diff --git a/web/src/components/ui/card.tsx b/learn-claude-code/web/src/components/ui/card.tsx
similarity index 100%
rename from web/src/components/ui/card.tsx
rename to learn-claude-code/web/src/components/ui/card.tsx
diff --git a/web/src/components/ui/tabs.tsx b/learn-claude-code/web/src/components/ui/tabs.tsx
similarity index 100%
rename from web/src/components/ui/tabs.tsx
rename to learn-claude-code/web/src/components/ui/tabs.tsx
diff --git a/web/src/components/visualizations/index.tsx b/learn-claude-code/web/src/components/visualizations/index.tsx
similarity index 100%
rename from web/src/components/visualizations/index.tsx
rename to learn-claude-code/web/src/components/visualizations/index.tsx
diff --git a/web/src/components/visualizations/s01-agent-loop.tsx b/learn-claude-code/web/src/components/visualizations/s01-agent-loop.tsx
similarity index 100%
rename from web/src/components/visualizations/s01-agent-loop.tsx
rename to learn-claude-code/web/src/components/visualizations/s01-agent-loop.tsx
diff --git a/web/src/components/visualizations/s02-tool-dispatch.tsx b/learn-claude-code/web/src/components/visualizations/s02-tool-dispatch.tsx
similarity index 100%
rename from web/src/components/visualizations/s02-tool-dispatch.tsx
rename to learn-claude-code/web/src/components/visualizations/s02-tool-dispatch.tsx
diff --git a/web/src/components/visualizations/s03-permission.tsx b/learn-claude-code/web/src/components/visualizations/s03-permission.tsx
similarity index 100%
rename from web/src/components/visualizations/s03-permission.tsx
rename to learn-claude-code/web/src/components/visualizations/s03-permission.tsx
diff --git a/web/src/components/visualizations/s03-todo-write.tsx b/learn-claude-code/web/src/components/visualizations/s03-todo-write.tsx
similarity index 100%
rename from web/src/components/visualizations/s03-todo-write.tsx
rename to learn-claude-code/web/src/components/visualizations/s03-todo-write.tsx
diff --git a/web/src/components/visualizations/s04-hooks.tsx b/learn-claude-code/web/src/components/visualizations/s04-hooks.tsx
similarity index 100%
rename from web/src/components/visualizations/s04-hooks.tsx
rename to learn-claude-code/web/src/components/visualizations/s04-hooks.tsx
diff --git a/web/src/components/visualizations/s04-subagent.tsx b/learn-claude-code/web/src/components/visualizations/s04-subagent.tsx
similarity index 100%
rename from web/src/components/visualizations/s04-subagent.tsx
rename to learn-claude-code/web/src/components/visualizations/s04-subagent.tsx
diff --git a/web/src/components/visualizations/s05-skill-loading.tsx b/learn-claude-code/web/src/components/visualizations/s05-skill-loading.tsx
similarity index 100%
rename from web/src/components/visualizations/s05-skill-loading.tsx
rename to learn-claude-code/web/src/components/visualizations/s05-skill-loading.tsx
diff --git a/web/src/components/visualizations/s06-context-compact.tsx b/learn-claude-code/web/src/components/visualizations/s06-context-compact.tsx
similarity index 100%
rename from web/src/components/visualizations/s06-context-compact.tsx
rename to learn-claude-code/web/src/components/visualizations/s06-context-compact.tsx
diff --git a/web/src/components/visualizations/s07-task-system.tsx b/learn-claude-code/web/src/components/visualizations/s07-task-system.tsx
similarity index 100%
rename from web/src/components/visualizations/s07-task-system.tsx
rename to learn-claude-code/web/src/components/visualizations/s07-task-system.tsx
diff --git a/web/src/components/visualizations/s08-background-tasks.tsx b/learn-claude-code/web/src/components/visualizations/s08-background-tasks.tsx
similarity index 100%
rename from web/src/components/visualizations/s08-background-tasks.tsx
rename to learn-claude-code/web/src/components/visualizations/s08-background-tasks.tsx
diff --git a/web/src/components/visualizations/s09-agent-teams.tsx b/learn-claude-code/web/src/components/visualizations/s09-agent-teams.tsx
similarity index 100%
rename from web/src/components/visualizations/s09-agent-teams.tsx
rename to learn-claude-code/web/src/components/visualizations/s09-agent-teams.tsx
diff --git a/web/src/components/visualizations/s09-memory.tsx b/learn-claude-code/web/src/components/visualizations/s09-memory.tsx
similarity index 100%
rename from web/src/components/visualizations/s09-memory.tsx
rename to learn-claude-code/web/src/components/visualizations/s09-memory.tsx
diff --git a/web/src/components/visualizations/s10-system-prompt.tsx b/learn-claude-code/web/src/components/visualizations/s10-system-prompt.tsx
similarity index 100%
rename from web/src/components/visualizations/s10-system-prompt.tsx
rename to learn-claude-code/web/src/components/visualizations/s10-system-prompt.tsx
diff --git a/web/src/components/visualizations/s10-team-protocols.tsx b/learn-claude-code/web/src/components/visualizations/s10-team-protocols.tsx
similarity index 100%
rename from web/src/components/visualizations/s10-team-protocols.tsx
rename to learn-claude-code/web/src/components/visualizations/s10-team-protocols.tsx
diff --git a/web/src/components/visualizations/s11-autonomous-agents.tsx b/learn-claude-code/web/src/components/visualizations/s11-autonomous-agents.tsx
similarity index 100%
rename from web/src/components/visualizations/s11-autonomous-agents.tsx
rename to learn-claude-code/web/src/components/visualizations/s11-autonomous-agents.tsx
diff --git a/web/src/components/visualizations/s11-error-recovery.tsx b/learn-claude-code/web/src/components/visualizations/s11-error-recovery.tsx
similarity index 100%
rename from web/src/components/visualizations/s11-error-recovery.tsx
rename to learn-claude-code/web/src/components/visualizations/s11-error-recovery.tsx
diff --git a/web/src/components/visualizations/s12-worktree-task-isolation.tsx b/learn-claude-code/web/src/components/visualizations/s12-worktree-task-isolation.tsx
similarity index 100%
rename from web/src/components/visualizations/s12-worktree-task-isolation.tsx
rename to learn-claude-code/web/src/components/visualizations/s12-worktree-task-isolation.tsx
diff --git a/web/src/components/visualizations/s14-cron-scheduler.tsx b/learn-claude-code/web/src/components/visualizations/s14-cron-scheduler.tsx
similarity index 100%
rename from web/src/components/visualizations/s14-cron-scheduler.tsx
rename to learn-claude-code/web/src/components/visualizations/s14-cron-scheduler.tsx
diff --git a/web/src/components/visualizations/s19-mcp-tools.tsx b/learn-claude-code/web/src/components/visualizations/s19-mcp-tools.tsx
similarity index 100%
rename from web/src/components/visualizations/s19-mcp-tools.tsx
rename to learn-claude-code/web/src/components/visualizations/s19-mcp-tools.tsx
diff --git a/web/src/components/visualizations/s20-comprehensive.tsx b/learn-claude-code/web/src/components/visualizations/s20-comprehensive.tsx
similarity index 100%
rename from web/src/components/visualizations/s20-comprehensive.tsx
rename to learn-claude-code/web/src/components/visualizations/s20-comprehensive.tsx
diff --git a/web/src/components/visualizations/shared/mechanism-flow.tsx b/learn-claude-code/web/src/components/visualizations/shared/mechanism-flow.tsx
similarity index 100%
rename from web/src/components/visualizations/shared/mechanism-flow.tsx
rename to learn-claude-code/web/src/components/visualizations/shared/mechanism-flow.tsx
diff --git a/web/src/components/visualizations/shared/step-controls.tsx b/learn-claude-code/web/src/components/visualizations/shared/step-controls.tsx
similarity index 100%
rename from web/src/components/visualizations/shared/step-controls.tsx
rename to learn-claude-code/web/src/components/visualizations/shared/step-controls.tsx
diff --git a/web/src/data/annotations/s01.json b/learn-claude-code/web/src/data/annotations/s01.json
similarity index 100%
rename from web/src/data/annotations/s01.json
rename to learn-claude-code/web/src/data/annotations/s01.json
diff --git a/web/src/data/annotations/s02.json b/learn-claude-code/web/src/data/annotations/s02.json
similarity index 100%
rename from web/src/data/annotations/s02.json
rename to learn-claude-code/web/src/data/annotations/s02.json
diff --git a/web/src/data/annotations/s03.json b/learn-claude-code/web/src/data/annotations/s03.json
similarity index 100%
rename from web/src/data/annotations/s03.json
rename to learn-claude-code/web/src/data/annotations/s03.json
diff --git a/web/src/data/annotations/s04.json b/learn-claude-code/web/src/data/annotations/s04.json
similarity index 100%
rename from web/src/data/annotations/s04.json
rename to learn-claude-code/web/src/data/annotations/s04.json
diff --git a/web/src/data/annotations/s05.json b/learn-claude-code/web/src/data/annotations/s05.json
similarity index 100%
rename from web/src/data/annotations/s05.json
rename to learn-claude-code/web/src/data/annotations/s05.json
diff --git a/web/src/data/annotations/s06.json b/learn-claude-code/web/src/data/annotations/s06.json
similarity index 100%
rename from web/src/data/annotations/s06.json
rename to learn-claude-code/web/src/data/annotations/s06.json
diff --git a/web/src/data/annotations/s07.json b/learn-claude-code/web/src/data/annotations/s07.json
similarity index 100%
rename from web/src/data/annotations/s07.json
rename to learn-claude-code/web/src/data/annotations/s07.json
diff --git a/web/src/data/annotations/s08.json b/learn-claude-code/web/src/data/annotations/s08.json
similarity index 100%
rename from web/src/data/annotations/s08.json
rename to learn-claude-code/web/src/data/annotations/s08.json
diff --git a/web/src/data/annotations/s09.json b/learn-claude-code/web/src/data/annotations/s09.json
similarity index 100%
rename from web/src/data/annotations/s09.json
rename to learn-claude-code/web/src/data/annotations/s09.json
diff --git a/web/src/data/annotations/s10.json b/learn-claude-code/web/src/data/annotations/s10.json
similarity index 100%
rename from web/src/data/annotations/s10.json
rename to learn-claude-code/web/src/data/annotations/s10.json
diff --git a/web/src/data/annotations/s11.json b/learn-claude-code/web/src/data/annotations/s11.json
similarity index 100%
rename from web/src/data/annotations/s11.json
rename to learn-claude-code/web/src/data/annotations/s11.json
diff --git a/web/src/data/annotations/s12.json b/learn-claude-code/web/src/data/annotations/s12.json
similarity index 100%
rename from web/src/data/annotations/s12.json
rename to learn-claude-code/web/src/data/annotations/s12.json
diff --git a/web/src/data/annotations/s13.json b/learn-claude-code/web/src/data/annotations/s13.json
similarity index 100%
rename from web/src/data/annotations/s13.json
rename to learn-claude-code/web/src/data/annotations/s13.json
diff --git a/web/src/data/annotations/s14.json b/learn-claude-code/web/src/data/annotations/s14.json
similarity index 100%
rename from web/src/data/annotations/s14.json
rename to learn-claude-code/web/src/data/annotations/s14.json
diff --git a/web/src/data/annotations/s15.json b/learn-claude-code/web/src/data/annotations/s15.json
similarity index 100%
rename from web/src/data/annotations/s15.json
rename to learn-claude-code/web/src/data/annotations/s15.json
diff --git a/web/src/data/annotations/s16.json b/learn-claude-code/web/src/data/annotations/s16.json
similarity index 100%
rename from web/src/data/annotations/s16.json
rename to learn-claude-code/web/src/data/annotations/s16.json
diff --git a/web/src/data/annotations/s17.json b/learn-claude-code/web/src/data/annotations/s17.json
similarity index 100%
rename from web/src/data/annotations/s17.json
rename to learn-claude-code/web/src/data/annotations/s17.json
diff --git a/web/src/data/annotations/s18.json b/learn-claude-code/web/src/data/annotations/s18.json
similarity index 100%
rename from web/src/data/annotations/s18.json
rename to learn-claude-code/web/src/data/annotations/s18.json
diff --git a/web/src/data/annotations/s19.json b/learn-claude-code/web/src/data/annotations/s19.json
similarity index 100%
rename from web/src/data/annotations/s19.json
rename to learn-claude-code/web/src/data/annotations/s19.json
diff --git a/web/src/data/annotations/s20.json b/learn-claude-code/web/src/data/annotations/s20.json
similarity index 100%
rename from web/src/data/annotations/s20.json
rename to learn-claude-code/web/src/data/annotations/s20.json
diff --git a/web/src/data/execution-flows.ts b/learn-claude-code/web/src/data/execution-flows.ts
similarity index 100%
rename from web/src/data/execution-flows.ts
rename to learn-claude-code/web/src/data/execution-flows.ts
diff --git a/web/src/data/generated/docs.json b/learn-claude-code/web/src/data/generated/docs.json
similarity index 93%
rename from web/src/data/generated/docs.json
rename to learn-claude-code/web/src/data/generated/docs.json
index 4e50b73..973a860 100644
--- a/web/src/data/generated/docs.json
+++ b/learn-claude-code/web/src/data/generated/docs.json
@@ -129,37 +129,37 @@
"version": "s08",
"locale": "en",
"title": "s08: Context Compact — Context Will Fill Up, Have a Way to Make Room",
- "content": "# s08: Context Compact — Context Will Fill Up, Have a Way to Make Room\n\ns01 → s02 → s03 → s04 → s05 → s06 → s07 → `s08` → [s09](/en/s09) → s10 → ... → s20\n> *\"Context will fill up — have a way to make room\"* — Four-layer compression pipeline: cheap first, expensive last.\n>\n> **Harness Layer**: Compression — clean memory, unlimited sessions.\n\n---\n\n## The Problem\n\nThe agent is running along, then freezes.\n\nIt has bash, read, write — all the capabilities it needs. But it read a 1000-line file (~4000 tokens), then read 30 more files, ran 20 commands. Every command's output, every file's contents, all pile up in the `messages` list.\n\nThe context window is finite. Once full, the API outright rejects the call: `prompt_too_long`.\n\nWithout compression, an agent simply cannot work on large projects.\n\n---\n\n## The Solution\n\n\n\nThe hook structure, skill loading, and sub-Agent from s07 are preserved, with some tools omitted to focus on compaction. The core change: insert three pre-processors (0 API calls) before each LLM call, trigger an LLM summary (1 API call) when tokens still exceed the threshold, and emergency-trim if the API throws an error.\n\nCore design: cheap first, expensive last.\n\n---\n\n## How It Works\n\n\n\n### L1: snip_compact — Trim Irrelevant Old Conversation\n\nThe agent ran 80 turns of conversation, accumulating 160 `messages`. The very first \"help me create hello.py\" is barely relevant to current work, yet it still occupies space.\n\nMessage count exceeds 50 → keep the first 3 (initial context) and the last 47 (current work), trim the middle; the only extra boundary rule is that `assistant(tool_use)` must not be separated from the following `user(tool_result)`:\n\n```python\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages:\n return messages\n head_end, tail_start = 3, len(messages) - (max_messages - 3)\n if _message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and _is_tool_result_message(messages[head_end]):\n head_end += 1\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n snipped = tail_start - head_end\n placeholder = {\"role\": \"user\", \"content\": f\"[snipped {snipped} messages from conversation middle]\"}\n return messages[:head_end] + [placeholder] + messages[tail_start:]\n```\n\nMessages are still trimmed directly; this just adds one boundary guard. `tool_result` content within remaining messages still keeps accumulating — message #34 may still hold 30KB of old file contents. → L2.\n\n### L2: micro_compact — Placeholder for Old Tool Results\n\n\n\nThe agent read 10 files consecutively. The full contents of reads 1–7 are still sitting in context, no longer needed, but hogging large amounts of space.\n\nKeep only the 3 most recent `tool_result` entries intact; replace older ones with a one-line placeholder:\n\n```python\nKEEP_RECENT_TOOL_RESULTS = 3\n\ndef micro_compact(messages):\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n```\n\nOld results are cleared, but a single new result can be 500KB — one `cat` of a large file can max out the context. → L3.\n\n### L3: tool_result_budget — Persist Large Results to Disk\n\n\n\nThe model read 5 large files in one go; all `tool_result` blocks in the last user message total 500KB.\n\nSum the size of all `tool_result` blocks in the last user message. If over 200KB → sort by size, starting from the largest, persist to `.task_outputs/tool-results/`, keeping only a `` marker + a 2000-character preview in context. The model sees the marker and knows the full content is on disk, re-reading it when needed.\n\n```python\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1]\n blocks = [(i, b) for i, b in enumerate(last[\"content\"])\n if b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for idx, block in ranked:\n if total <= max_bytes:\n break\n block[\"content\"] = persist_large_output(block[\"tool_use_id\"], str(block[\"content\"]))\n total = recalculate_total(blocks)\n return messages\n```\n\nThe first three layers are all plain-text / structural operations — 0 API calls — but they cannot \"understand\" conversation content. Context may still be too large. → L4.\n\n### L4: compact_history — Full LLM Summary\n\n\n\nAll three previous layers have run, but after 30 minutes of continuous work on a huge project, tokens still exceed the threshold.\n\nThree-step process:\n\n1. **Save transcript**: Write the full conversation to `.transcripts/` in JSONL format. The transcript preserves a recoverable record, but the model's active context only contains the summary. For the model's current reasoning, the details are no longer in context. The teaching code does not provide a transcript retrieval tool.\n2. **LLM generates summary**: Send conversation history to the LLM, asking it to preserve key information: current goals, important findings, modified files, remaining work, user constraints, etc.\n3. **Replace message list**: All old messages are replaced with a single summary. The teaching version only keeps the summary; the real Claude Code re-attaches some recent files, plans, agent/skill/tool context after compaction.\n\n```python\ndef compact_history(messages):\n transcript_path = write_transcript(messages) # Save full conversation first\n summary = summarize_history(messages) # LLM generates summary\n return [{\"role\": \"user\",\n \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n```\n\n**Circuit breaker**: After 3 consecutive failures, stop retrying to prevent an infinite loop wasting API calls.\n\n### Reactive: reactive_compact\n\nSometimes the API still returns `prompt_too_long` (413) — when context grows faster than compression triggers.\n\nThis triggers **reactive_compact**: more aggressive than compact_history, it retreats from the tail, but still avoids leaving an orphaned `tool_result`.\n\n```python\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n tail_start = max(0, len(messages) - 5)\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n return [{\"role\": \"user\",\n \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[tail_start:]]\n```\n\nReactive compact has a retry limit (default 1). If it still fails, an exception is raised instead of looping forever. Full error recovery is deferred to s11.\n\n### Putting It All Together\n\n```python\ndef agent_loop(messages):\n reactive_retries = 0\n while True:\n # Three pre-processors (0 API calls)\n # Order: budget first, so large content is persisted before placeholders\n messages[:] = tool_result_budget(messages) # L3: persist large results\n messages[:] = snip_compact(messages) # L1: trim middle\n messages[:] = micro_compact(messages) # L2: old result placeholders\n\n # Still too much? LLM summary (1 API call)\n if estimate_token_count(messages) > THRESHOLD:\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(...)\n except PromptTooLongError:\n if reactive_retries < MAX_REACTIVE_RETRIES:\n messages[:] = reactive_compact(messages) # Emergency\n reactive_retries += 1\n continue\n raise # retry limit exceeded, raise exception\n # ... tool execution ...\n\n # compact tool: when the model actively calls it, triggers compact_history\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({..., \"content\": \"[Compacted. History summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # end current turn, start fresh with compacted context\n```\n\n**The order must not be swapped.** L3 (budget) runs before L2 (micro) because micro replaces old large tool_results with one-line placeholders — budget must persist the full content before that happens. This is why CC source puts `applyToolResultBudget` first.\n\n---\n\n## Changes From s07\n\n| Component | Before (s07) | After (s08) |\n|-----------|-------------|-------------|\n| Context management | None (context grows unbounded) | Four-layer compression pipeline + emergency |\n| New functions | — | snip_compact, micro_compact, tool_result_budget, compact_history, reactive_compact |\n| Tools | bash, read_file, write_file, edit_file, glob, todo_write, task, load_skill (8) | 8 + compact (9) |\n| Loop | LLM call → tool execution | Three pre-processors before each turn + threshold-triggered compact_history |\n| Design principle | — | Cheap first, expensive last |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s08_context_compact/code.py\n```\n\nTry these prompts:\n\n1. `Read the file README.md, then read code.py, then read s01_agent_loop/README.md` (read multiple files consecutively, observe L2 compressing old results)\n2. `Read every file in s08_context_compact/` (read a large amount of content at once, observe L3 persisting to disk)\n3. Chat for 20+ turns, observe whether `[auto compact]` or `[reactive compact]` appears\n\nWhat to watch for: After each tool execution, are old `tool_result` entries compressed? When tokens exceed the threshold after extended conversation, is summarization triggered automatically?\n\n---\n\n## What's Next\n\nContext compression lets an agent run for a long time without crashing. But after each compression, the preferences and constraints the user told it are also lost. Can we let the agent selectively remember important things?\n\ns09 Memory → three subsystems: choosing what to remember, extracting key information, consolidating and organizing. Across compressions, across sessions.\n\n\nDeep Dive Into CC Source Code
\n\n> The following is based on analysis of CC source code `compact.ts`, `autoCompact.ts`, `microCompact.ts`, and `query.ts`.\n\n### Execution Order Comparison\n\nThe teaching version labels layers L1/L2/L3/L4 for pedagogical clarity, but actual execution order does not match the numbering:\n\n| Dimension | Teaching Version | Claude Code |\n|-----------|-----------------|-------------|\n| Execution order | budget → snip → micro → auto | budget → snip → micro → collapse → auto (`query.ts:379-468`) |\n| snip_compact | Keep head 3 + tail 47 | CC only enables on main thread; implementation not in open-source repo (`HISTORY_SNIP` feature gate), but interface is visible: `snipCompactIfNeeded(messages)` → `{ messages, tokensFreed, boundaryMessage? }`, also exposes `SnipTool` for model-initiated snipping. Teaching version's 3/47 are simplified parameters |\n| micro_compact | Text placeholder replacement | Two paths: time-based clears content directly, cached uses API `cache_edits` (legacy path removed) |\n| micro_compact whitelist | By position (most recent 3) | time-based triggers by time threshold; cached triggers by count (`microCompact.ts`) |\n| tool_result_budget | 200KB characters | 200,000 characters (`toolLimits.ts:49`) |\n| compact_history threshold | Character count estimate | Precise tokens: `contextWindow - maxOutputTokens - 13_000` |\n| Summary requirements | 5 categories of info | 9 sections + ``/`` dual tags |\n| Compression prompt | Simple prompt | Double-ended hard guardrails forbidding tool calls |\n| PTL retry | Yes (simplified) | `truncateHeadForPTLRetry()` retreats by message groups (`compact.ts:243-290`) |\n| Post-compaction recovery | None (teaching version only keeps summary) | Auto re-read recent files, plans, agent/skill/tool context |\n| Circuit breaker | 3 times | 3 times (`autoCompact.ts:70`) |\n| Reactive retry | 1 time | CC has more granular tiered retries |\n\n### Execution Order Details\n\nThe real order in CC source `query.ts`:\n\n1. `applyToolResultBudget` (L379): persist large results first, ensuring full content is saved\n2. `snipCompact` (L403): trim middle messages\n3. `microcompact` (L414): old result placeholders\n4. `contextCollapse` (L441): independent context management system (not in teaching version)\n5. `autoCompact` (L454): LLM full summary\n\nThe teaching version's budget → snip → micro order matches this. The teaching version does not have the contextCollapse mechanism.\n\n### Full Constant Reference\n\n| Constant | Value | Source File |\n|----------|-------|-------------|\n| `AUTOCOMPACT_BUFFER_TOKENS` | 13,000 | `autoCompact.ts:62` |\n| `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | 3 | `autoCompact.ts:70` |\n| `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | 20,000 | `autoCompact.ts:30` |\n| `POST_COMPACT_TOKEN_BUDGET` | 50,000 | `compact.ts:123` |\n| `POST_COMPACT_MAX_FILES_TO_RESTORE` | 5 | `compact.ts:122` |\n| `POST_COMPACT_MAX_TOKENS_PER_FILE` | 5,000 | `compact.ts:124` |\n| Time micro_compact interval | 60 minutes | `timeBasedMCConfig.ts` |\n| `MAX_COMPACT_STREAMING_RETRIES` | 2 | `compact.ts:131` |\n\n### contextCollapse and sessionMemoryCompact\n\nCC source code has two additional mechanisms not covered in this teaching version:\n\n- **contextCollapse**: An independent context management system that, when enabled, suppresses proactive autocompact (`autoCompact.ts:215-222`), with collapse's commit/blocking flow taking over context management. Manual `/compact` and reactive fallback remain independent paths, unaffected by contextCollapse.\n- **sessionMemoryCompact**: Before compact_history, CC first attempts a lightweight summary using existing session memory (covered in s09) without calling the LLM. This mechanism becomes clearer after learning s09.\n\n### What Does the Compression Prompt Look Like?\n\nCC's compression prompt has two hard requirements:\n\n1. **Absolutely no tool calls**: It begins with `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.`, and appends another REMINDER at the end\n2. **Analyze first, then summarize**: The model must first reason in an `` tag, then output the formal summary in a `` tag. The analysis is stripped during formatting\n\n### Teaching Version Simplifications Are Intentional\n\n- micro_compact uses text placeholders → we don't have API-level `cache_edits` access\n- Tokens estimated via character count → precise tokenizers are out of scope\n- Post-compaction recovery omitted → teaching version only keeps summary, does not auto re-attach files\n- Two auxiliary mechanisms not covered → they fall in the 10% detail category\n\nThe core design principle, cheap first, expensive last, is fully preserved.\n\n
\n\n\n"
+ "content": "# s08: Context Compact — Context Will Fill Up, Have a Way to Make Room\n\ns01 → s02 → s03 → s04 → s05 → s06 → s07 → `s08` → [s09](/en/s09) → s10 → ... → s20\n> *\"Context will fill up — have a way to make room\"* — Four-layer compression pipeline: cheap first, expensive last.\n>\n> **Harness Layer**: Compression — clean memory, unlimited sessions.\n\n---\n\n## The Problem\n\nThe agent is running along, then freezes.\n\nIt has bash, read, write — all the capabilities it needs. But it read a 1000-line file (~4000 tokens), then read 30 more files, ran 20 commands. Every command's output, every file's contents, all pile up in the `messages` list.\n\nThe context window is finite. Once full, the API outright rejects the call: `prompt_too_long`.\n\nWithout compression, an agent simply cannot work on large projects.\n\n---\n\n## The Solution\n\n\n\nThe hook structure, skill loading, and sub-Agent from s07 are preserved, with some tools omitted to focus on compaction. The core change: insert three pre-processors (0 API calls) before each LLM call, trigger an LLM summary (1 API call) when tokens still exceed the threshold, and emergency-trim if the API throws an error.\n\nCore design: cheap first, expensive last.\n\n---\n\n## How It Works\n\n\n\n### L1: snip_compact — Trim Irrelevant Old Conversation\n\nThe agent ran 80 turns of conversation, accumulating 160 `messages`. The very first \"help me create hello.py\" is barely relevant to current work, yet it still occupies space.\n\nMessage count exceeds 50 → keep the first 3 (initial context) and the last 47 (current work), trim the middle; the only extra boundary rule is that `assistant(tool_use)` must not be separated from the following `user(tool_result)`:\n\n```python\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages:\n return messages\n head_end, tail_start = 3, len(messages) - (max_messages - 3)\n if _message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and _is_tool_result_message(messages[head_end]):\n head_end += 1\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n snipped = tail_start - head_end\n placeholder = {\"role\": \"user\", \"content\": f\"[snipped {snipped} messages from conversation middle]\"}\n return messages[:head_end] + [placeholder] + messages[tail_start:]\n```\n\nMessages are still trimmed directly; this just adds one boundary guard. `tool_result` content within remaining messages still keeps accumulating — message #34 may still hold 30KB of old file contents. → L2.\n\n### L2: micro_compact — Placeholder for Old Tool Results\n\n\n\nThe agent read 10 files consecutively. The full contents of reads 1–7 are still sitting in context, no longer needed, but hogging large amounts of space.\n\nKeep only the 3 most recent `tool_result` entries intact; replace older ones with a one-line placeholder:\n\n```python\nKEEP_RECENT_TOOL_RESULTS = 3\n\ndef micro_compact(messages):\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n```\n\nOld results are cleared, but a single new result can be 500KB — one `cat` of a large file can max out the context. → L3.\n\n### L3: tool_result_budget — Persist Large Results to Disk\n\n\n\nThe model read 5 large files in one go; all `tool_result` blocks in the last user message total 500KB.\n\nSum the size of all `tool_result` blocks in the last user message. If over 200KB → sort by size, starting from the largest, persist to `.task_outputs/tool-results/`, keeping only a `` marker + a 2000-character preview in context. The model sees the marker and knows the full content is on disk, re-reading it when needed.\n\n```python\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1]\n blocks = [(i, b) for i, b in enumerate(last[\"content\"])\n if b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for idx, block in ranked:\n if total <= max_bytes:\n break\n block[\"content\"] = persist_large_output(block[\"tool_use_id\"], str(block[\"content\"]))\n total = recalculate_total(blocks)\n return messages\n```\n\nThe first three layers are all plain-text / structural operations — 0 API calls — but they cannot \"understand\" conversation content. Context may still be too large. → L4.\n\n### L4: compact_history — Full LLM Summary\n\n\n\nAll three previous layers have run, but after 30 minutes of continuous work on a huge project, tokens still exceed the threshold.\n\nThree-step process:\n\n1. **Save transcript**: Write the full conversation to `.transcripts/` in JSONL format. The transcript preserves a recoverable record, but the model's active context only contains the summary. For the model's current reasoning, the details are no longer in context. The teaching code does not provide a transcript retrieval tool.\n2. **LLM generates summary**: Send conversation history to the LLM, asking it to preserve key information: current goals, important findings, modified files, remaining work, user constraints, etc.\n3. **Replace message list**: All old messages are replaced with a single summary. The teaching version only keeps the summary; the real Claude Code re-attaches some recent files, plans, agent/skill/tool context after compaction.\n\n```python\ndef compact_history(messages):\n transcript_path = write_transcript(messages) # Save full conversation first\n summary = summarize_history(messages) # LLM generates summary\n return [{\"role\": \"user\",\n \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n```\n\n**Circuit breaker**: After 3 consecutive failures, stop retrying to prevent an infinite loop wasting API calls.\n\n### Reactive: reactive_compact\n\nSometimes the API still returns `prompt_too_long` (413) — when context grows faster than compression triggers.\n\nThis triggers **reactive_compact**: more aggressive than compact_history, it retreats from the tail, but still avoids leaving an orphaned `tool_result`.\n\n```python\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n tail_start = max(0, len(messages) - 5)\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n return [{\"role\": \"user\",\n \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[tail_start:]]\n```\n\nReactive compact has a retry limit (default 1). If it still fails, an exception is raised instead of looping forever. Full error recovery is deferred to s11.\n\n### Putting It All Together\n\n```python\ndef agent_loop(messages):\n reactive_retries = 0\n while True:\n # Three pre-processors (0 API calls)\n # Order: budget first, so large content is persisted before placeholders\n messages[:] = tool_result_budget(messages) # L3: persist large results\n messages[:] = snip_compact(messages) # L1: trim middle\n messages[:] = micro_compact(messages) # L2: old result placeholders\n\n # Still too much? LLM summary (1 API call)\n if estimate_token_count(messages) > THRESHOLD:\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(...)\n except PromptTooLongError:\n if reactive_retries < MAX_REACTIVE_RETRIES:\n messages[:] = reactive_compact(messages) # Emergency\n reactive_retries += 1\n continue\n raise # retry limit exceeded, raise exception\n # ... tool execution ...\n\n # compact tool: when the model actively calls it, triggers compact_history\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({..., \"content\": \"[Compacted. History summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # end current turn, start fresh with compacted context\n```\n\n**The order must not be swapped.** L3 (budget) runs before L2 (micro) because micro replaces old large tool_results with one-line placeholders — budget must persist the full content before that happens. This is why CC source puts `applyToolResultBudget` first.\n\n---\n\n## Changes From s07\n\n| Component | Before (s07) | After (s08) |\n|-----------|-------------|-------------|\n| Context management | None (context grows unbounded) | Four-layer compression pipeline + emergency |\n| New functions | — | snip_compact, micro_compact, tool_result_budget, compact_history, reactive_compact |\n| Tools | bash, read_file, write_file, edit_file, glob, todo_write, task, load_skill (8) | 8 + compact (9) |\n| Loop | LLM call → tool execution | Three pre-processors before each turn + threshold-triggered compact_history |\n| Design principle | — | Cheap first, expensive last |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s08_context_compact/code.py\n```\n\nTry these prompts:\n\n1. `Read the file README.md, then read code.py, then read s01_agent_loop/README.md` (read multiple files consecutively, observe L2 compressing old results)\n2. `Read every file in s08_context_compact/` (read a large amount of content at once, observe L3 persisting to disk)\n3. Chat for 20+ turns, observe whether `[auto compact]` or `[reactive compact]` appears\n\nWhat to watch for: After each tool execution, are old `tool_result` entries compressed? When tokens exceed the threshold after extended conversation, is summarization triggered automatically?\n\n---\n\n## What's Next\n\nContext compression lets an agent run for a long time without crashing. But after each compression, the preferences and constraints the user told it are also lost. Can we let the agent selectively remember important things?\n\ns09 Memory → three subsystems: choosing what to remember, extracting key information, consolidating and organizing. Across compressions, across sessions.\n\n\nDeep Dive Into CC Source Code
\n\n> The following is based on analysis of CC source code `compact.ts`, `autoCompact.ts`, `microCompact.ts`, and `query.ts`.\n\n### Execution Order Comparison\n\nThe teaching version labels layers L1/L2/L3/L4 for pedagogical clarity, but actual execution order does not match the numbering:\n\n| Dimension | Teaching Version | Claude Code |\n|-----------|-----------------|-------------|\n| Execution order | budget → snip → micro → auto | budget → snip → micro → collapse → auto (`query.ts:379-468`) |\n| snip_compact | Keep head 3 + tail 47 | CC only enables on main thread; implementation not in open-source repo (`HISTORY_SNIP` feature gate), but interface is visible: `snipCompactIfNeeded(messages)` → `{ messages, tokensFreed, boundaryMessage? }`, also exposes `SnipTool` for model-initiated snipping. Teaching version's 3/47 are simplified parameters |\n| micro_compact | Text placeholder replacement | Two paths: time-based clears content directly, cached uses API `cache_edits` (legacy path removed) |\n| micro_compact whitelist | By position (most recent 3) | time-based triggers by time threshold; cached triggers by count (`microCompact.ts`) |\n| tool_result_budget | 200KB characters | 200,000 characters (`toolLimits.ts:49`) |\n| compact_history threshold | Character count estimate | Precise tokens: `contextWindow - maxOutputTokens - 13_000` |\n| Summary requirements | 5 categories of info | 9 sections + ``/`` dual tags |\n| Compression prompt | Simple prompt | Double-ended hard guardrails forbidding tool calls |\n| PTL retry | Yes (simplified) | `truncateHeadForPTLRetry()` retreats by message groups (`compact.ts:243-290`) |\n| Post-compaction recovery | None (teaching version only keeps summary) | Auto re-read recent files, plans, agent/skill/tool context |\n| Circuit breaker | 3 times | 3 times (`autoCompact.ts:70`) |\n| Reactive retry | 1 time | CC has more granular tiered retries |\n\n### Execution Order Details\n\nThe real order in CC source `query.ts`:\n\n1. `applyToolResultBudget` (L379): persist large results first, ensuring full content is saved\n2. `snipCompact` (L403): trim middle messages\n3. `microcompact` (L414): old result placeholders\n4. `contextCollapse` (L441): independent context management system (not in teaching version)\n5. `autoCompact` (L454): LLM full summary\n\nThe teaching version's budget → snip → micro order matches this. The teaching version does not have the contextCollapse mechanism.\n\n### read_file Trade-off\n\nThe teaching version's `micro_compact` replaces old `tool_result` blocks with placeholders uniformly, including `read_file`. This usually does not affect functional correctness: if the model needs the file contents later, it can read the file again. The cost is an extra tool call and potentially lower prompt cache hit rates.\n\nClaude Code does not solve this with the teaching version's simple rule. It also puts `Read` in the microcompactable tool set, but maintains a separate `readFileState`: repeated reads of unchanged files return `FILE_UNCHANGED_STUB`, and after compaction it restores recently read file contents within a budget (for example, up to 5 files, 5K tokens per file, 50K tokens total). That is a production-level cache and recovery mechanism. The teaching version does not expand into that machinery; it keeps the simpler trade-off of compacting old results and re-reading when needed.\n\n### Full Constant Reference\n\n| Constant | Value | Source File |\n|----------|-------|-------------|\n| `AUTOCOMPACT_BUFFER_TOKENS` | 13,000 | `autoCompact.ts:62` |\n| `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | 3 | `autoCompact.ts:70` |\n| `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | 20,000 | `autoCompact.ts:30` |\n| `POST_COMPACT_TOKEN_BUDGET` | 50,000 | `compact.ts:123` |\n| `POST_COMPACT_MAX_FILES_TO_RESTORE` | 5 | `compact.ts:122` |\n| `POST_COMPACT_MAX_TOKENS_PER_FILE` | 5,000 | `compact.ts:124` |\n| Time micro_compact interval | 60 minutes | `timeBasedMCConfig.ts` |\n| `MAX_COMPACT_STREAMING_RETRIES` | 2 | `compact.ts:131` |\n\n### contextCollapse and sessionMemoryCompact\n\nCC source code has two additional mechanisms not covered in this teaching version:\n\n- **contextCollapse**: An independent context management system that, when enabled, suppresses proactive autocompact (`autoCompact.ts:215-222`), with collapse's commit/blocking flow taking over context management. Manual `/compact` and reactive fallback remain independent paths, unaffected by contextCollapse.\n- **sessionMemoryCompact**: Before compact_history, CC first attempts a lightweight summary using existing session memory (covered in s09) without calling the LLM. This mechanism becomes clearer after learning s09.\n\n### What Does the Compression Prompt Look Like?\n\nCC's compression prompt has two hard requirements:\n\n1. **Absolutely no tool calls**: It begins with `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.`, and appends another REMINDER at the end\n2. **Analyze first, then summarize**: The model must first reason in an `` tag, then output the formal summary in a `` tag. The analysis is stripped during formatting\n\n### Teaching Version Simplifications Are Intentional\n\n- micro_compact uses text placeholders → we don't have API-level `cache_edits` access\n- read_file is not special-cased → the teaching version accepts re-reading when needed instead of introducing readFileState and post-compaction recovery\n- Tokens estimated via character count → precise tokenizers are out of scope\n- Post-compaction recovery omitted → teaching version only keeps summary, does not auto re-attach files\n- Two auxiliary mechanisms not covered → they fall in the 10% detail category\n\nThe core design principle, cheap first, expensive last, is fully preserved.\n\n
\n\n\n"
},
{
"version": "s08",
"locale": "zh",
"title": "s08: Context Compact — 上下文总会满,要有办法腾地方",
- "content": "# s08: Context Compact — 上下文总会满,要有办法腾地方\n\ns01 → s02 → s03 → s04 → s05 → s06 → s07 → `s08` → [s09](/zh/s09) → s10 → ... → s20\n> *\"上下文总会满, 要有办法腾地方\"* — 四层压缩策略, 便宜的先跑贵的后跑。\n>\n> **Harness 层**: 压缩 — 干净的记忆, 无限的会话。\n\n---\n\n## 问题\n\nAgent 跑着跑着,不动了。\n\n手里有 bash、有 read、有 write,能力是够的。但它读了一个 1000 行的文件(~4000 token),又读了 30 个文件,跑了 20 条命令。每条命令的输出、每个文件的内容,全都堆在 `messages` 列表里。\n\n上下文窗口是有限的。满了之后,API 直接拒绝:`prompt_too_long`。\n\n不压缩,Agent 根本没法在大项目里干活。\n\n---\n\n## 解决方案\n\n\n\n保留 s07 的 hook 结构、技能加载、子 Agent 等骨架,省略部分工具细节以聚焦压缩。核心变动:每轮 LLM 调用前插入三层预处理器(0 API),token 仍超阈值时触发 LLM 摘要(1 API),API 报错时应急裁剪。\n\n核心设计:便宜的先跑,贵的后跑。\n\n---\n\n## 工作原理\n\n\n\n### L1: snip_compact — 裁掉无关的旧对话\n\nAgent 跑了 80 轮对话,`messages` 攒了 160 条。最前面的\"帮我创建 hello.py\"和当前工作几乎无关了,但全占着位置。\n\n消息数超过 50 条 → 保留头部 3 条(初始上下文)和尾部 47 条(当前工作),中间裁掉;唯一额外边界条件是,不能把 `assistant(tool_use)` 和后面的 `user(tool_result)` 拆开:\n\n```python\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages:\n return messages\n head_end, tail_start = 3, len(messages) - (max_messages - 3)\n if _message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and _is_tool_result_message(messages[head_end]):\n head_end += 1\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n snipped = tail_start - head_end\n placeholder = {\"role\": \"user\", \"content\": f\"[snipped {snipped} messages from conversation middle]\"}\n return messages[:head_end] + [placeholder] + messages[tail_start:]\n```\n\n裁掉的是消息本身,只是在切口处多做一步保护;剩下的消息里 `tool_result` 内容仍在累积——第 34 条消息里可能躺着 30KB 的旧文件内容。→ L2。\n\n### L2: micro_compact — 旧工具结果占位\n\n\n\nAgent 连续读了 10 个文件。第 1-7 次的完整内容还躺在上下文里,早就不需要了,但占着大量空间。\n\n只保留最近 3 条 `tool_result` 的完整内容,更旧的替换为一行占位符:\n\n```python\nKEEP_RECENT_TOOL_RESULTS = 3\n\ndef micro_compact(messages):\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n```\n\n旧结果清掉了,但单条新结果可能就有 500KB——一个 `cat` 大文件的输出就能打满上下文。→ L3。\n\n### L3: tool_result_budget — 大结果落盘\n\n\n\n模型一次读了 5 个大文件,单条 user 消息里所有 `tool_result` 加起来 500KB。\n\n统计最后一条 user 消息里所有 `tool_result` 的总大小。超过 200KB → 按大小排序,从最大的开始落盘到 `.task_outputs/tool-results/`,上下文里只留 `` 标记 + 前 2000 字符预览。模型看到标记后知道完整内容在磁盘上,需要时可以重新读。\n\n```python\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1]\n blocks = [(i, b) for i, b in enumerate(last[\"content\"])\n if b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for idx, block in ranked:\n if total <= max_bytes:\n break\n block[\"content\"] = persist_large_output(block[\"tool_use_id\"], str(block[\"content\"]))\n total = recalculate_total(blocks)\n return messages\n```\n\n前三层都是纯文本/结构操作,0 API 调用,但也无法\"理解\"对话内容。上下文可能仍然太大。→ L4。\n\n### L4: compact_history — LLM 全量摘要\n\n\n\n前三层全跑完了,但在超大项目中连续工作 30 分钟后,token 仍然超过阈值。\n\n三步流程:\n\n1. **保存 transcript**:完整对话写入 `.transcripts/`,JSONL 格式。transcript 保留了可恢复记录,但模型的活跃上下文里只剩摘要。对模型当下推理来说,细节已经不在上下文中了。教学代码没有提供 transcript 检索工具。\n2. **LLM 生成摘要**:把对话历史发给 LLM,要求保留当前目标、重要发现、已改文件、剩余工作、用户约束等关键信息。\n3. **替换消息列表**:所有旧消息被替换为一条摘要。教学版只保留摘要;真实 Claude Code 会在 compact 后重新附加部分最近文件、计划、agent/skill/tool 等上下文。\n\n```python\ndef compact_history(messages):\n transcript_path = write_transcript(messages) # 先保存完整对话\n summary = summarize_history(messages) # LLM 生成摘要\n return [{\"role\": \"user\",\n \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n```\n\n**熔断器**:连续失败 3 次后停止重试,防止死循环浪费 API 调用。\n\n### 应急: reactive_compact\n\n有时候 API 还是返回 `prompt_too_long`(413),上下文增长速度快于压缩触发速度时。\n\n这时触发 **reactive_compact**:比 compact_history 更激进,从尾部回退,但仍要避免留下孤立 `tool_result`。\n\n```python\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n tail_start = max(0, len(messages) - 5)\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n return [{\"role\": \"user\",\n \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[tail_start:]]\n```\n\nreactive compact 有重试上限(默认 1 次)。再失败就抛出异常,不无限循环。完整的错误恢复逻辑留给 s11。\n\n### 合起来跑\n\n```python\ndef agent_loop(messages):\n reactive_retries = 0\n while True:\n # 三个预处理器(0 API 调用)\n # 顺序:budget 先跑,确保大内容落盘后再做占位和裁剪\n messages[:] = tool_result_budget(messages) # L3: 大结果落盘\n messages[:] = snip_compact(messages) # L1: 裁中间\n messages[:] = micro_compact(messages) # L2: 旧结果占位\n\n # 还不够?LLM 摘要(1 API 调用)\n if estimate_token_count(messages) > THRESHOLD:\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(...)\n except PromptTooLongError:\n if reactive_retries < MAX_REACTIVE_RETRIES:\n messages[:] = reactive_compact(messages) # 应急\n reactive_retries += 1\n continue\n raise # 超过重试上限,抛出异常\n # ... 工具执行 ...\n\n # compact 工具:模型主动调用时触发 compact_history\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({..., \"content\": \"[Compacted. History summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # 结束当前 turn,用压缩后的上下文开始新一轮\n```\n\n**顺序不能换。** L3(budget)在 L2(micro)前面,因为 micro 会把旧的大 tool_result 替换成一行占位符,budget 必须在那之前把完整内容落盘。这也是为什么 CC 源码把 `applyToolResultBudget` 放在最前面。\n\n---\n\n## 相对 s07 的变更\n\n| 组件 | 之前 (s07) | 之后 (s08) |\n|------|-----------|-----------|\n| 上下文管理 | 无(上下文无限膨胀) | 四层压缩管线 + 应急 |\n| 新函数 | — | snip_compact, micro_compact, tool_result_budget, compact_history, reactive_compact |\n| 工具 | bash, read, write, edit, glob, todo_write, task, load_skill (8) | 8 + compact (9) |\n| 循环 | LLM 调用 → 工具执行 | 每轮前跑三层预处理器 + 阈值触发 compact_history |\n| 设计原则 | — | 便宜的先跑,贵的后跑 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s08_context_compact/code.py\n```\n\n试试这些 prompt:\n\n1. `Read the file README.md, then read code.py, then read s01_agent_loop/README.md`(连续读多个文件,观察 L2 压缩旧结果)\n2. `Read every file in s08_context_compact/`(一次性读大量内容,观察 L3 落盘)\n3. 反复对话 20+ 轮,观察是否出现 `[auto compact]` 或 `[reactive compact]`\n\n观察重点:每次工具执行后,旧 tool_result 是否被压缩?连续对话后 token 超阈值时,是否自动触发了摘要?\n\n---\n\n## 接下来\n\n上下文压缩让 Agent 能跑很久不会崩。但每次压缩后,用户之前告诉它的偏好、约束也跟着丢了。能不能让 Agent 有选择地记住重要的事?\n\ns09 Memory → 三个子系统:选择记什么、提取关键信息、整理巩固。跨压缩、跨会话。\n\n\n深入 CC 源码
\n\n> 以下基于 CC 源码 `compact.ts`、`autoCompact.ts`、`microCompact.ts`、`query.ts` 的分析。\n\n### 执行顺序对照\n\n教学版为了讲解方便按 L1/L2/L3/L4 编号,但实际执行顺序和编号不完全对应:\n\n| 维度 | 教学版 | Claude Code |\n|------|--------|-------------|\n| 执行顺序 | budget → snip → micro → auto | budget → snip → micro → collapse → auto(`query.ts:379-468`) |\n| snip_compact | 保留头 3 + 尾 47 | CC 仅主线程启用;实现不在开源仓库中(`HISTORY_SNIP` feature gate),但接口可见:`snipCompactIfNeeded(messages)` → `{ messages, tokensFreed, boundaryMessage? }`,还暴露了 `SnipTool` 工具让模型主动调用。教学版的 3/47 是简化参数 |\n| micro_compact | 文本占位符替换 | 两条路径:time-based 直接清内容,cached 走 API `cache_edits`(legacy path 已移除) |\n| micro_compact 白名单 | 按位置(最近 3 条) | time-based 按时间阈值触发;cached 按计数触发(`microCompact.ts`) |\n| tool_result_budget | 200KB 字符 | 200,000 字符(`toolLimits.ts:49`) |\n| compact_history 阈值 | 字符数估算 | 精确 token:`contextWindow - maxOutputTokens - 13_000` |\n| 摘要要求 | 5 类信息 | 9 个部分 + ``/`` 双标签 |\n| 压缩 prompt | 简单 prompt | 首尾双重防呆禁止调工具 |\n| PTL retry | 有(简化) | `truncateHeadForPTLRetry()` 按消息组回退(`compact.ts:243-290`) |\n| 后压缩恢复 | 无(教学版只保留摘要) | 自动重新读取最近文件、计划、agent/skill/tool 等 |\n| 熔断器 | 3 次 | 3 次(`autoCompact.ts:70`) |\n| reactive 重试 | 1 次 | CC 有更精细的分级重试 |\n\n### 执行顺序详解\n\nCC 源码 `query.ts` 中的真实顺序:\n\n1. `applyToolResultBudget`(L379):先处理大结果,确保完整内容落盘\n2. `snipCompact`(L403):裁中间消息\n3. `microcompact`(L414):旧结果占位\n4. `contextCollapse`(L441):独立的上下文管理系统(教学版无)\n5. `autoCompact`(L454):LLM 全量摘要\n\n教学版的 budget → snip → micro 顺序与此一致。教学版没有 contextCollapse 机制。\n\n### 完整常量参考\n\n| 常量 | 值 | 源文件 |\n|------|-----|--------|\n| `AUTOCOMPACT_BUFFER_TOKENS` | 13,000 | `autoCompact.ts:62` |\n| `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | 3 | `autoCompact.ts:70` |\n| `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | 20,000 | `autoCompact.ts:30` |\n| `POST_COMPACT_TOKEN_BUDGET` | 50,000 | `compact.ts:123` |\n| `POST_COMPACT_MAX_FILES_TO_RESTORE` | 5 | `compact.ts:122` |\n| `POST_COMPACT_MAX_TOKENS_PER_FILE` | 5,000 | `compact.ts:124` |\n| 时间 micro_compact 间隔 | 60 分钟 | `timeBasedMCConfig.ts` |\n| `MAX_COMPACT_STREAMING_RETRIES` | 2 | `compact.ts:131` |\n\n### contextCollapse 和 sessionMemoryCompact\n\nCC 源码中还有两个机制本教学版没有展开:\n\n- **contextCollapse**:独立的上下文管理系统,启用时抑制 proactive autocompact(`autoCompact.ts:215-222`),由 collapse 的 commit/blocking 流程接管上下文管理。但 manual `/compact` 和 reactive fallback 仍是独立路径,不受 contextCollapse 影响。\n- **sessionMemoryCompact**:compact_history 之前,CC 会先尝试用已有的 session memory(s09 会讲到)做轻量摘要,不调 LLM。这个机制等学完 s09 之后回头看会更清楚。\n\n### 压缩 prompt 长什么样?\n\nCC 的压缩 prompt 有两个硬性要求:\n\n1. **绝对禁止调用工具**:开头就是 `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.`,末尾还会再 REMINDER 一次\n2. **先分析再总结**:模型需要先在 `` 标签里理清思路,然后在 `` 标签里输出正式摘要。analysis 在格式化时被剥离\n\n### 教学版的简化是刻意的\n\n- micro_compact 用文本占位 → 我们没有 API 层的 `cache_edits` 权限\n- token 用字符数估算 → 精确 tokenizer 不在教学范围内\n- 后压缩恢复省略 → 教学版只保留摘要,不自动重新附加文件\n- 两个辅助机制不展开 → 属于 10% 的细节\n\n核心设计思想,便宜的先跑贵的后跑,完整保留。\n\n
\n\n\n"
+ "content": "# s08: Context Compact — 上下文总会满,要有办法腾地方\n\ns01 → s02 → s03 → s04 → s05 → s06 → s07 → `s08` → [s09](/zh/s09) → s10 → ... → s20\n> *\"上下文总会满, 要有办法腾地方\"* — 四层压缩策略, 便宜的先跑贵的后跑。\n>\n> **Harness 层**: 压缩 — 干净的记忆, 无限的会话。\n\n---\n\n## 问题\n\nAgent 跑着跑着,不动了。\n\n手里有 bash、有 read、有 write,能力是够的。但它读了一个 1000 行的文件(~4000 token),又读了 30 个文件,跑了 20 条命令。每条命令的输出、每个文件的内容,全都堆在 `messages` 列表里。\n\n上下文窗口是有限的。满了之后,API 直接拒绝:`prompt_too_long`。\n\n不压缩,Agent 根本没法在大项目里干活。\n\n---\n\n## 解决方案\n\n\n\n保留 s07 的 hook 结构、技能加载、子 Agent 等骨架,省略部分工具细节以聚焦压缩。核心变动:每轮 LLM 调用前插入三层预处理器(0 API),token 仍超阈值时触发 LLM 摘要(1 API),API 报错时应急裁剪。\n\n核心设计:便宜的先跑,贵的后跑。\n\n---\n\n## 工作原理\n\n\n\n### L1: snip_compact — 裁掉无关的旧对话\n\nAgent 跑了 80 轮对话,`messages` 攒了 160 条。最前面的\"帮我创建 hello.py\"和当前工作几乎无关了,但全占着位置。\n\n消息数超过 50 条 → 保留头部 3 条(初始上下文)和尾部 47 条(当前工作),中间裁掉;唯一额外边界条件是,不能把 `assistant(tool_use)` 和后面的 `user(tool_result)` 拆开:\n\n```python\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages:\n return messages\n head_end, tail_start = 3, len(messages) - (max_messages - 3)\n if _message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and _is_tool_result_message(messages[head_end]):\n head_end += 1\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n snipped = tail_start - head_end\n placeholder = {\"role\": \"user\", \"content\": f\"[snipped {snipped} messages from conversation middle]\"}\n return messages[:head_end] + [placeholder] + messages[tail_start:]\n```\n\n裁掉的是消息本身,只是在切口处多做一步保护;剩下的消息里 `tool_result` 内容仍在累积——第 34 条消息里可能躺着 30KB 的旧文件内容。→ L2。\n\n### L2: micro_compact — 旧工具结果占位\n\n\n\nAgent 连续读了 10 个文件。第 1-7 次的完整内容还躺在上下文里,早就不需要了,但占着大量空间。\n\n只保留最近 3 条 `tool_result` 的完整内容,更旧的替换为一行占位符:\n\n```python\nKEEP_RECENT_TOOL_RESULTS = 3\n\ndef micro_compact(messages):\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n```\n\n旧结果清掉了,但单条新结果可能就有 500KB——一个 `cat` 大文件的输出就能打满上下文。→ L3。\n\n### L3: tool_result_budget — 大结果落盘\n\n\n\n模型一次读了 5 个大文件,单条 user 消息里所有 `tool_result` 加起来 500KB。\n\n统计最后一条 user 消息里所有 `tool_result` 的总大小。超过 200KB → 按大小排序,从最大的开始落盘到 `.task_outputs/tool-results/`,上下文里只留 `` 标记 + 前 2000 字符预览。模型看到标记后知道完整内容在磁盘上,需要时可以重新读。\n\n```python\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1]\n blocks = [(i, b) for i, b in enumerate(last[\"content\"])\n if b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for idx, block in ranked:\n if total <= max_bytes:\n break\n block[\"content\"] = persist_large_output(block[\"tool_use_id\"], str(block[\"content\"]))\n total = recalculate_total(blocks)\n return messages\n```\n\n前三层都是纯文本/结构操作,0 API 调用,但也无法\"理解\"对话内容。上下文可能仍然太大。→ L4。\n\n### L4: compact_history — LLM 全量摘要\n\n\n\n前三层全跑完了,但在超大项目中连续工作 30 分钟后,token 仍然超过阈值。\n\n三步流程:\n\n1. **保存 transcript**:完整对话写入 `.transcripts/`,JSONL 格式。transcript 保留了可恢复记录,但模型的活跃上下文里只剩摘要。对模型当下推理来说,细节已经不在上下文中了。教学代码没有提供 transcript 检索工具。\n2. **LLM 生成摘要**:把对话历史发给 LLM,要求保留当前目标、重要发现、已改文件、剩余工作、用户约束等关键信息。\n3. **替换消息列表**:所有旧消息被替换为一条摘要。教学版只保留摘要;真实 Claude Code 会在 compact 后重新附加部分最近文件、计划、agent/skill/tool 等上下文。\n\n```python\ndef compact_history(messages):\n transcript_path = write_transcript(messages) # 先保存完整对话\n summary = summarize_history(messages) # LLM 生成摘要\n return [{\"role\": \"user\",\n \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n```\n\n**熔断器**:连续失败 3 次后停止重试,防止死循环浪费 API 调用。\n\n### 应急: reactive_compact\n\n有时候 API 还是返回 `prompt_too_long`(413),上下文增长速度快于压缩触发速度时。\n\n这时触发 **reactive_compact**:比 compact_history 更激进,从尾部回退,但仍要避免留下孤立 `tool_result`。\n\n```python\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n tail_start = max(0, len(messages) - 5)\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n return [{\"role\": \"user\",\n \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[tail_start:]]\n```\n\nreactive compact 有重试上限(默认 1 次)。再失败就抛出异常,不无限循环。完整的错误恢复逻辑留给 s11。\n\n### 合起来跑\n\n```python\ndef agent_loop(messages):\n reactive_retries = 0\n while True:\n # 三个预处理器(0 API 调用)\n # 顺序:budget 先跑,确保大内容落盘后再做占位和裁剪\n messages[:] = tool_result_budget(messages) # L3: 大结果落盘\n messages[:] = snip_compact(messages) # L1: 裁中间\n messages[:] = micro_compact(messages) # L2: 旧结果占位\n\n # 还不够?LLM 摘要(1 API 调用)\n if estimate_token_count(messages) > THRESHOLD:\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(...)\n except PromptTooLongError:\n if reactive_retries < MAX_REACTIVE_RETRIES:\n messages[:] = reactive_compact(messages) # 应急\n reactive_retries += 1\n continue\n raise # 超过重试上限,抛出异常\n # ... 工具执行 ...\n\n # compact 工具:模型主动调用时触发 compact_history\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({..., \"content\": \"[Compacted. History summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # 结束当前 turn,用压缩后的上下文开始新一轮\n```\n\n**顺序不能换。** L3(budget)在 L2(micro)前面,因为 micro 会把旧的大 tool_result 替换成一行占位符,budget 必须在那之前把完整内容落盘。这也是为什么 CC 源码把 `applyToolResultBudget` 放在最前面。\n\n---\n\n## 相对 s07 的变更\n\n| 组件 | 之前 (s07) | 之后 (s08) |\n|------|-----------|-----------|\n| 上下文管理 | 无(上下文无限膨胀) | 四层压缩管线 + 应急 |\n| 新函数 | — | snip_compact, micro_compact, tool_result_budget, compact_history, reactive_compact |\n| 工具 | bash, read, write, edit, glob, todo_write, task, load_skill (8) | 8 + compact (9) |\n| 循环 | LLM 调用 → 工具执行 | 每轮前跑三层预处理器 + 阈值触发 compact_history |\n| 设计原则 | — | 便宜的先跑,贵的后跑 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s08_context_compact/code.py\n```\n\n试试这些 prompt:\n\n1. `Read the file README.md, then read code.py, then read s01_agent_loop/README.md`(连续读多个文件,观察 L2 压缩旧结果)\n2. `Read every file in s08_context_compact/`(一次性读大量内容,观察 L3 落盘)\n3. 反复对话 20+ 轮,观察是否出现 `[auto compact]` 或 `[reactive compact]`\n\n观察重点:每次工具执行后,旧 tool_result 是否被压缩?连续对话后 token 超阈值时,是否自动触发了摘要?\n\n---\n\n## 接下来\n\n上下文压缩让 Agent 能跑很久不会崩。但每次压缩后,用户之前告诉它的偏好、约束也跟着丢了。能不能让 Agent 有选择地记住重要的事?\n\ns09 Memory → 三个子系统:选择记什么、提取关键信息、整理巩固。跨压缩、跨会话。\n\n\n深入 CC 源码
\n\n> 以下基于 CC 源码 `compact.ts`、`autoCompact.ts`、`microCompact.ts`、`query.ts` 的分析。\n\n### 执行顺序对照\n\n教学版为了讲解方便按 L1/L2/L3/L4 编号,但实际执行顺序和编号不完全对应:\n\n| 维度 | 教学版 | Claude Code |\n|------|--------|-------------|\n| 执行顺序 | budget → snip → micro → auto | budget → snip → micro → collapse → auto(`query.ts:379-468`) |\n| snip_compact | 保留头 3 + 尾 47 | CC 仅主线程启用;实现不在开源仓库中(`HISTORY_SNIP` feature gate),但接口可见:`snipCompactIfNeeded(messages)` → `{ messages, tokensFreed, boundaryMessage? }`,还暴露了 `SnipTool` 工具让模型主动调用。教学版的 3/47 是简化参数 |\n| micro_compact | 文本占位符替换 | 两条路径:time-based 直接清内容,cached 走 API `cache_edits`(legacy path 已移除) |\n| micro_compact 白名单 | 按位置(最近 3 条) | time-based 按时间阈值触发;cached 按计数触发(`microCompact.ts`) |\n| tool_result_budget | 200KB 字符 | 200,000 字符(`toolLimits.ts:49`) |\n| compact_history 阈值 | 字符数估算 | 精确 token:`contextWindow - maxOutputTokens - 13_000` |\n| 摘要要求 | 5 类信息 | 9 个部分 + ``/`` 双标签 |\n| 压缩 prompt | 简单 prompt | 首尾双重防呆禁止调工具 |\n| PTL retry | 有(简化) | `truncateHeadForPTLRetry()` 按消息组回退(`compact.ts:243-290`) |\n| 后压缩恢复 | 无(教学版只保留摘要) | 自动重新读取最近文件、计划、agent/skill/tool 等 |\n| 熔断器 | 3 次 | 3 次(`autoCompact.ts:70`) |\n| reactive 重试 | 1 次 | CC 有更精细的分级重试 |\n\n### 执行顺序详解\n\nCC 源码 `query.ts` 中的真实顺序:\n\n1. `applyToolResultBudget`(L379):先处理大结果,确保完整内容落盘\n2. `snipCompact`(L403):裁中间消息\n3. `microcompact`(L414):旧结果占位\n4. `contextCollapse`(L441):独立的上下文管理系统(教学版无)\n5. `autoCompact`(L454):LLM 全量摘要\n\n教学版的 budget → snip → micro 顺序与此一致。教学版没有 contextCollapse 机制。\n\n### read_file 的取舍\n\n教学版的 `micro_compact` 会把旧 `tool_result` 统一替换成占位符,包括 `read_file`。这通常不影响功能正确性:如果后续还需要文件内容,模型可以重新读一次。代价是可能多一次工具调用,也可能降低 prompt cache 命中率。\n\nClaude Code 没有用教学版这种简单规则解决这个问题。它把 `Read` 也放进可 microcompact 的工具集合,但同时维护 `readFileState`:重复读取未变化文件时返回 `FILE_UNCHANGED_STUB`,compact 后再按预算恢复最近读过的文件内容(例如最多 5 个文件、每个 5K token、总预算 50K token)。这是生产级实现里的缓存和恢复机制,教学版不展开,保留“压缩旧结果,必要时重新读取”的简单 trade-off。\n\n### 完整常量参考\n\n| 常量 | 值 | 源文件 |\n|------|-----|--------|\n| `AUTOCOMPACT_BUFFER_TOKENS` | 13,000 | `autoCompact.ts:62` |\n| `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | 3 | `autoCompact.ts:70` |\n| `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | 20,000 | `autoCompact.ts:30` |\n| `POST_COMPACT_TOKEN_BUDGET` | 50,000 | `compact.ts:123` |\n| `POST_COMPACT_MAX_FILES_TO_RESTORE` | 5 | `compact.ts:122` |\n| `POST_COMPACT_MAX_TOKENS_PER_FILE` | 5,000 | `compact.ts:124` |\n| 时间 micro_compact 间隔 | 60 分钟 | `timeBasedMCConfig.ts` |\n| `MAX_COMPACT_STREAMING_RETRIES` | 2 | `compact.ts:131` |\n\n### contextCollapse 和 sessionMemoryCompact\n\nCC 源码中还有两个机制本教学版没有展开:\n\n- **contextCollapse**:独立的上下文管理系统,启用时抑制 proactive autocompact(`autoCompact.ts:215-222`),由 collapse 的 commit/blocking 流程接管上下文管理。但 manual `/compact` 和 reactive fallback 仍是独立路径,不受 contextCollapse 影响。\n- **sessionMemoryCompact**:compact_history 之前,CC 会先尝试用已有的 session memory(s09 会讲到)做轻量摘要,不调 LLM。这个机制等学完 s09 之后回头看会更清楚。\n\n### 压缩 prompt 长什么样?\n\nCC 的压缩 prompt 有两个硬性要求:\n\n1. **绝对禁止调用工具**:开头就是 `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.`,末尾还会再 REMINDER 一次\n2. **先分析再总结**:模型需要先在 `` 标签里理清思路,然后在 `` 标签里输出正式摘要。analysis 在格式化时被剥离\n\n### 教学版的简化是刻意的\n\n- micro_compact 用文本占位 → 我们没有 API 层的 `cache_edits` 权限\n- read_file 不特殊处理 → 教学版接受必要时重新读取,避免引入 readFileState 和后压缩恢复机制\n- token 用字符数估算 → 精确 tokenizer 不在教学范围内\n- 后压缩恢复省略 → 教学版只保留摘要,不自动重新附加文件\n- 两个辅助机制不展开 → 属于 10% 的细节\n\n核心设计思想,便宜的先跑贵的后跑,完整保留。\n\n
\n\n\n"
},
{
"version": "s08",
"locale": "ja",
"title": "s08: Context Compact — コンテキストはいつか満杯になる、場所を空ける方法が必要",
- "content": "# s08: Context Compact — コンテキストはいつか満杯になる、場所を空ける方法が必要\n\ns01 → s02 → s03 → s04 → s05 → s06 → s07 → `s08` → [s09](/ja/s09) → s10 → ... → s20\n> *\"Context will fill up — have a way to make room\"* — 4層圧縮戦略、安価なものを先に、高価なものを後に実行。\n>\n> **Harness レイヤー**: 圧縮 — クリーンな記憶、無限のセッション。\n\n---\n\n## 課題\n\nAgent が動いている途中で、止まってしまう。\n\nbash、read、write は揃っており、能力は十分。しかし 1000 行のファイル(~4000 token)を読み、さらに 30 のファイルを読み、20 のコマンドを実行したとします。各コマンドの出力、各ファイルの内容がすべて `messages` リストに蓄積されます。\n\nコンテキストウィンドウには上限があります。満杯になると、API は即座に拒否します:`prompt_too_long`。\n\n圧縮しなければ、Agent は大規模プロジェクトではまともに動けません。\n\n---\n\n## ソリューション\n\n\n\ns07 のフック構造、スキルロード、サブ Agent の骨格を維持し、圧縮に焦点を当てるため一部のツールは省略。コアの変更点:各 LLM 呼び出し前に 3 層のプリプロセッサ(0 API)を挿入し、token が閾値を超えた場合は LLM 要約(1 API)をトリガー、API エラー時には緊急トリムを実行。\n\nコア設計:安価なものを先に、高価なものを後に。\n\n---\n\n## 仕組み\n\n\n\n### L1: snip_compact — 無関係な古い会話を切り捨て\n\nAgent が 80 ラウンドの会話を実行し、`messages` が 160 件まで溜まった。先頭の「hello.py を作って」は現在の作業とほぼ無関係だが、スペースを占有し続けている。\n\nメッセージ数が 50 を超えた場合 → 先頭 3 件(初期コンテキスト)と末尾 47 件(現在の作業)を保持して中間を切り詰める。ただし切れ目だけは調整し、`assistant(tool_use)` と後続の `user(tool_result)` を分断しない:\n\n```python\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages:\n return messages\n head_end, tail_start = 3, len(messages) - (max_messages - 3)\n if _message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and _is_tool_result_message(messages[head_end]):\n head_end += 1\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n snipped = tail_start - head_end\n placeholder = {\"role\": \"user\", \"content\": f\"[snipped {snipped} messages from conversation middle]\"}\n return messages[:head_end] + [placeholder] + messages[tail_start:]\n```\n\n切り捨て自体は単純なままで、境界だけを保護する。残ったメッセージ内の `tool_result` 内容はまだ蓄積され続けている。34 番目のメッセージに 30KB の古いファイル内容が残っているかもしれない。→ L2。\n\n### L2: micro_compact — 古いツール結果をプレースホルダに置換\n\n\n\nAgent が連続して 10 個のファイルを読んだ。1〜7 回目の完全な内容はまだコンテキストに残っており、もう不要だが、大量のスペースを占有している。\n\n直近 3 件の `tool_result` の完全な内容のみを保持し、それより古いものは 1 行のプレースホルダに置換:\n\n```python\nKEEP_RECENT_TOOL_RESULTS = 3\n\ndef micro_compact(messages):\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n```\n\n古い結果はクリーンアップされたが、1 件の新しい結果だけで 500KB の可能性がある。大きなファイルを `cat` するだけでコンテキストがいっぱいになる。→ L3。\n\n### L3: tool_result_budget — 大きな結果をディスクに退避\n\n\n\nモデルが一度に 5 つの大きなファイルを読み、1 つの user メッセージ内の全 `tool_result` の合計が 500KB に達した。\n\n最後の user メッセージ内のすべての `tool_result` の合計サイズを集計。200KB を超えた場合 → サイズ順にソートし、最大のものから順に `.task_outputs/tool-results/` に退避。コンテキストには `` マーカー + 先頭 2000 文字のプレビューのみを残す。モデルはマーカーを見て完全な内容がディスク上にあることを認識し、必要に応じて再読み込みできる。\n\n```python\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1]\n blocks = [(i, b) for i, b in enumerate(last[\"content\"])\n if b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for idx, block in ranked:\n if total <= max_bytes:\n break\n block[\"content\"] = persist_large_output(block[\"tool_use_id\"], str(block[\"content\"]))\n total = recalculate_total(blocks)\n return messages\n```\n\n最初の 3 層はすべて純粋なテキスト/構造操作(0 API 呼び出し)だが、会話内容を「理解」することはできない。コンテキストがまだ大きすぎる可能性がある。→ L4。\n\n### L4: compact_history — LLM 全量要約\n\n\n\n最初の 3 層がすべて実行されたが、超大規模プロジェクトで 30 分間連続作業すると、token がまだ閾値を超えている。\n\n3 ステップのフロー:\n\n1. **transcript を保存**:完全な会話を `.transcripts/` に JSONL 形式で書き出す。transcript は回復可能な記録として保存されるが、モデルのアクティブなコンテキストには要約しか残らない。モデルの現在の推論にとって、詳細はすでにコンテキストにない。教学コードは transcript 検索ツールを提供しない。\n2. **LLM で要約を生成**:会話履歴を LLM に送り、現在の目標、重要な発見、変更済みファイル、残りの作業、ユーザーの制約などの重要な情報を保持するよう指示。\n3. **メッセージリストを置換**:すべての古いメッセージが 1 件の要約に置き換えられる。教学版は要約のみを保持する。実際の Claude Code は compact 後に直近のファイル、計画、agent/skill/tool などのコンテキストを再付加する。\n\n```python\ndef compact_history(messages):\n transcript_path = write_transcript(messages) # 先に完全な会話を保存\n summary = summarize_history(messages) # LLM で要約を生成\n return [{\"role\": \"user\",\n \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n```\n\n**サーキットブレーカー**:連続 3 回失敗したらリトライを停止し、無限ループによる API 呼び出しの浪費を防止。\n\n### 緊急: reactive_compact\n\nAPI がまだ `prompt_too_long`(413)を返すことがある。コンテキストの増加速度が圧縮のトリガー速度を上回る場合。\n\nこの時 **reactive_compact** がトリガーされる:compact_history よりもさらに積極的だが、末尾を残す際も孤立した `tool_result` を残さないようにする。\n\n```python\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n tail_start = max(0, len(messages) - 5)\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n return [{\"role\": \"user\",\n \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[tail_start:]]\n```\n\nreactive compact にはリトライ上限がある(デフォルト 1 回)。さらに失敗した場合は例外をスローし、無限ループしない。完全なエラー回復ロジックは s11 に委ねる。\n\n### 合わせて実行\n\n```python\ndef agent_loop(messages):\n reactive_retries = 0\n while True:\n # 3 つのプリプロセッサ(0 API 呼び出し)\n # 順序:budget を先に実行し、大きな内容をプレースホルダ化する前に退避\n messages[:] = tool_result_budget(messages) # L3: 大きな結果を退避\n messages[:] = snip_compact(messages) # L1: 中間を切り捨て\n messages[:] = micro_compact(messages) # L2: 古い結果をプレースホルダに\n\n # まだ足りない?LLM 要約(1 API 呼び出し)\n if estimate_token_count(messages) > THRESHOLD:\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(...)\n except PromptTooLongError:\n if reactive_retries < MAX_REACTIVE_RETRIES:\n messages[:] = reactive_compact(messages) # 緊急対応\n reactive_retries += 1\n continue\n raise # リトライ上限超過、例外をスロー\n # ... ツール実行 ...\n\n # compact ツール:モデルが能動的に呼び出した場合、compact_history をトリガー\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({..., \"content\": \"[Compacted. History summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # 現在のターンを終了し、圧縮後のコンテキストで新しく開始\n```\n\n**順序は変えられない。** L3(budget)が L2(micro)の前に実行される理由:micro は古い大きな tool_result を 1 行のプレースホルダに置換するため、budget はその前に完全な内容を退避させる必要がある。CC ソースが `applyToolResultBudget` を最初に配置する理由も同じ。\n\n---\n\n## s07 からの変更点\n\n| コンポーネント | 変更前 (s07) | 変更後 (s08) |\n|------|-----------|-----------|\n| コンテキスト管理 | なし(コンテキストが無限に膨張) | 4 層圧縮パイプライン + 緊急対応 |\n| 新規関数 | — | snip_compact, micro_compact, tool_result_budget, compact_history, reactive_compact |\n| ツール | bash, read_file, write_file, edit_file, glob, todo_write, task, load_skill (8) | 8 + compact (9) |\n| ループ | LLM 呼び出し → ツール実行 | 各ラウンド前に 3 層プリプロセッサを実行 + 閾値で compact_history をトリガー |\n| 設計原則 | — | 安価なものを先に、高価なものを後に |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s08_context_compact/code.py\n```\n\n以下のプロンプトを試してみてください:\n\n1. `Read the file README.md, then read code.py, then read s01_agent_loop/README.md`(連続して複数のファイルを読み、L2 の古い結果圧縮を観察)\n2. `Read every file in s08_context_compact/`(一度に大量の内容を読み込み、L3 のディスク退避を観察)\n3. 20+ ラウンドの対話を繰り返し、`[auto compact]` または `[reactive compact]` が表示されるか観察\n\n観察のポイント:ツール実行のたびに、古い tool_result は圧縮されているか?連続対話で token が閾値を超えたとき、要約が自動的にトリガーされたか?\n\n---\n\n## 次へ\n\nコンテキスト圧縮により、Agent は長時間クラッシュせずに動けるようになった。しかし、圧縮のたびにユーザーが以前に伝えた偏好や制約も一緒に失われてしまう。Agent が重要なことを選択的に記憶できるようにできないか?\n\ns09 Memory → 3 つのサブシステム:何を記憶するかの選択、重要情報の抽出、整理と統合。圧縮を越え、セッションを越えて。\n\n\nCC ソースコードの詳細
\n\n> 以下は CC ソースコード `compact.ts`、`autoCompact.ts`、`microCompact.ts`、`query.ts` の分析に基づく。\n\n### 実行順序の対応\n\n教学版は説明の便宜上 L1/L2/L3/L4 と番号を振っているが、実際の実行順序は番号と完全には一致しない:\n\n| 項目 | 教学版 | Claude Code |\n|------|--------|-------------|\n| 実行順序 | budget → snip → micro → auto | budget → snip → micro → collapse → auto(`query.ts:379-468`) |\n| snip_compact | 先頭 3 + 末尾 47 を保持 | CC はメインスレッドのみ有効;実装はオープンソースリポジトリにない(`HISTORY_SNIP` feature gate)、インターフェースは確認可能:`snipCompactIfNeeded(messages)` → `{ messages, tokensFreed, boundaryMessage? }`、`SnipTool` もモデルが能動的に呼び出し可能。教学版の 3/47 は簡略パラメータ |\n| micro_compact | テキストプレースホルダで置換 | 2 つのパス:time-based は直接内容をクリア、cached は API の `cache_edits` を使用(legacy パスは削除済み) |\n| micro_compact ホワイトリスト | 位置による(直近 3 件) | time-based は時間閾値でトリガー、cached はカウントでトリガー(`microCompact.ts`) |\n| tool_result_budget | 200KB 文字 | 200,000 文字(`toolLimits.ts:49`) |\n| compact_history 閾値 | 文字数で推定 | 精密な token 数:`contextWindow - maxOutputTokens - 13_000` |\n| 要約の要求 | 5 種類の情報 | 9 つのセクション + ``/`` デュアルタグ |\n| 圧縮プロンプト | シンプルなプロンプト | 先頭と末尾に二重の安全ガードでツール呼び出しを禁止 |\n| PTL retry | あり(簡略版) | `truncateHeadForPTLRetry()` がメッセージグループ単位でロールバック(`compact.ts:243-290`) |\n| 圧縮後のリカバリ | なし(教学版は要約のみ保持) | 直近のファイル、計画、agent/skill/tool などの自動再付加 |\n| サーキットブレーカー | 3 回 | 3 回(`autoCompact.ts:70`) |\n| reactive リトライ | 1 回 | CC にはより精緻な段階別リトライがある |\n\n### 実行順序の詳細\n\nCC ソース `query.ts` での実際の順序:\n\n1. `applyToolResultBudget`(L379):まず大きな結果を処理し、完全な内容を退避\n2. `snipCompact`(L403):中間メッセージを切り捨て\n3. `microcompact`(L414):古い結果のプレースホルダ化\n4. `contextCollapse`(L441):独立したコンテキスト管理システム(教学版にはなし)\n5. `autoCompact`(L454):LLM 全量要約\n\n教学版の budget → snip → micro の順序はこれと一致する。教学版には contextCollapse メカニズムがない。\n\n### 完全な定数リファレンス\n\n| 定数 | 値 | ソースファイル |\n|------|-----|--------|\n| `AUTOCOMPACT_BUFFER_TOKENS` | 13,000 | `autoCompact.ts:62` |\n| `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | 3 | `autoCompact.ts:70` |\n| `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | 20,000 | `autoCompact.ts:30` |\n| `POST_COMPACT_TOKEN_BUDGET` | 50,000 | `compact.ts:123` |\n| `POST_COMPACT_MAX_FILES_TO_RESTORE` | 5 | `compact.ts:122` |\n| `POST_COMPACT_MAX_TOKENS_PER_FILE` | 5,000 | `compact.ts:124` |\n| 時間ベース micro_compact 間隔 | 60 分 | `timeBasedMCConfig.ts` |\n| `MAX_COMPACT_STREAMING_RETRIES` | 2 | `compact.ts:131` |\n\n### contextCollapse と sessionMemoryCompact\n\nCC ソースコードには、この教学版では展開していない 2 つのメカニズムが存在する:\n\n- **contextCollapse**:独立したコンテキスト管理システム。有効時には proactive autocompact を抑制し(`autoCompact.ts:215-222`)、collapse の commit/blocking フローがコンテキスト管理を引き継ぐ。ただし manual `/compact` と reactive fallback は独立パスのままで、contextCollapse の影響を受けない。\n- **sessionMemoryCompact**:compact_history の前に、CC は既存の session memory(s09 で解説)を使った軽量要約を先に試みる。LLM を呼び出さない。このメカニズムは s09 を学んだ後に振り返るとより理解しやすい。\n\n### 圧縮プロンプトの中身\n\nCC の圧縮プロンプトには 2 つの厳格な要件がある:\n\n1. **ツール呼び出しの絶対禁止**:冒頭が `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.` で、末尾にも再度 REMINDER がある\n2. **先に分析してから要約**:モデルはまず `` タグで思考を整理し、その後 `` タグで正式な要約を出力する。analysis はフォーマット時に除去される\n\n### 教学版の簡略化は意図的\n\n- micro_compact でテキストプレースホルダを使用 → API 層の `cache_edits` 権限がないため\n- token を文字数で推定 → 精密な tokenizer は教学の対象外\n- 圧縮後のリカバリを省略 → 教学版は要約のみを保持し、ファイルの自動再付加を行わない\n- 2 つの補助メカニズムを展開しない → 10% の細部に属する\n\nコア設計思想、安価なものを先に高価なものを後に、は完全に保持されている。\n\n
\n\n\n"
+ "content": "# s08: Context Compact — コンテキストはいつか満杯になる、場所を空ける方法が必要\n\ns01 → s02 → s03 → s04 → s05 → s06 → s07 → `s08` → [s09](/ja/s09) → s10 → ... → s20\n> *\"Context will fill up — have a way to make room\"* — 4層圧縮戦略、安価なものを先に、高価なものを後に実行。\n>\n> **Harness レイヤー**: 圧縮 — クリーンな記憶、無限のセッション。\n\n---\n\n## 課題\n\nAgent が動いている途中で、止まってしまう。\n\nbash、read、write は揃っており、能力は十分。しかし 1000 行のファイル(~4000 token)を読み、さらに 30 のファイルを読み、20 のコマンドを実行したとします。各コマンドの出力、各ファイルの内容がすべて `messages` リストに蓄積されます。\n\nコンテキストウィンドウには上限があります。満杯になると、API は即座に拒否します:`prompt_too_long`。\n\n圧縮しなければ、Agent は大規模プロジェクトではまともに動けません。\n\n---\n\n## ソリューション\n\n\n\ns07 のフック構造、スキルロード、サブ Agent の骨格を維持し、圧縮に焦点を当てるため一部のツールは省略。コアの変更点:各 LLM 呼び出し前に 3 層のプリプロセッサ(0 API)を挿入し、token が閾値を超えた場合は LLM 要約(1 API)をトリガー、API エラー時には緊急トリムを実行。\n\nコア設計:安価なものを先に、高価なものを後に。\n\n---\n\n## 仕組み\n\n\n\n### L1: snip_compact — 無関係な古い会話を切り捨て\n\nAgent が 80 ラウンドの会話を実行し、`messages` が 160 件まで溜まった。先頭の「hello.py を作って」は現在の作業とほぼ無関係だが、スペースを占有し続けている。\n\nメッセージ数が 50 を超えた場合 → 先頭 3 件(初期コンテキスト)と末尾 47 件(現在の作業)を保持して中間を切り詰める。ただし切れ目だけは調整し、`assistant(tool_use)` と後続の `user(tool_result)` を分断しない:\n\n```python\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages:\n return messages\n head_end, tail_start = 3, len(messages) - (max_messages - 3)\n if _message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and _is_tool_result_message(messages[head_end]):\n head_end += 1\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n snipped = tail_start - head_end\n placeholder = {\"role\": \"user\", \"content\": f\"[snipped {snipped} messages from conversation middle]\"}\n return messages[:head_end] + [placeholder] + messages[tail_start:]\n```\n\n切り捨て自体は単純なままで、境界だけを保護する。残ったメッセージ内の `tool_result` 内容はまだ蓄積され続けている。34 番目のメッセージに 30KB の古いファイル内容が残っているかもしれない。→ L2。\n\n### L2: micro_compact — 古いツール結果をプレースホルダに置換\n\n\n\nAgent が連続して 10 個のファイルを読んだ。1〜7 回目の完全な内容はまだコンテキストに残っており、もう不要だが、大量のスペースを占有している。\n\n直近 3 件の `tool_result` の完全な内容のみを保持し、それより古いものは 1 行のプレースホルダに置換:\n\n```python\nKEEP_RECENT_TOOL_RESULTS = 3\n\ndef micro_compact(messages):\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n```\n\n古い結果はクリーンアップされたが、1 件の新しい結果だけで 500KB の可能性がある。大きなファイルを `cat` するだけでコンテキストがいっぱいになる。→ L3。\n\n### L3: tool_result_budget — 大きな結果をディスクに退避\n\n\n\nモデルが一度に 5 つの大きなファイルを読み、1 つの user メッセージ内の全 `tool_result` の合計が 500KB に達した。\n\n最後の user メッセージ内のすべての `tool_result` の合計サイズを集計。200KB を超えた場合 → サイズ順にソートし、最大のものから順に `.task_outputs/tool-results/` に退避。コンテキストには `` マーカー + 先頭 2000 文字のプレビューのみを残す。モデルはマーカーを見て完全な内容がディスク上にあることを認識し、必要に応じて再読み込みできる。\n\n```python\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1]\n blocks = [(i, b) for i, b in enumerate(last[\"content\"])\n if b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for idx, block in ranked:\n if total <= max_bytes:\n break\n block[\"content\"] = persist_large_output(block[\"tool_use_id\"], str(block[\"content\"]))\n total = recalculate_total(blocks)\n return messages\n```\n\n最初の 3 層はすべて純粋なテキスト/構造操作(0 API 呼び出し)だが、会話内容を「理解」することはできない。コンテキストがまだ大きすぎる可能性がある。→ L4。\n\n### L4: compact_history — LLM 全量要約\n\n\n\n最初の 3 層がすべて実行されたが、超大規模プロジェクトで 30 分間連続作業すると、token がまだ閾値を超えている。\n\n3 ステップのフロー:\n\n1. **transcript を保存**:完全な会話を `.transcripts/` に JSONL 形式で書き出す。transcript は回復可能な記録として保存されるが、モデルのアクティブなコンテキストには要約しか残らない。モデルの現在の推論にとって、詳細はすでにコンテキストにない。教学コードは transcript 検索ツールを提供しない。\n2. **LLM で要約を生成**:会話履歴を LLM に送り、現在の目標、重要な発見、変更済みファイル、残りの作業、ユーザーの制約などの重要な情報を保持するよう指示。\n3. **メッセージリストを置換**:すべての古いメッセージが 1 件の要約に置き換えられる。教学版は要約のみを保持する。実際の Claude Code は compact 後に直近のファイル、計画、agent/skill/tool などのコンテキストを再付加する。\n\n```python\ndef compact_history(messages):\n transcript_path = write_transcript(messages) # 先に完全な会話を保存\n summary = summarize_history(messages) # LLM で要約を生成\n return [{\"role\": \"user\",\n \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n```\n\n**サーキットブレーカー**:連続 3 回失敗したらリトライを停止し、無限ループによる API 呼び出しの浪費を防止。\n\n### 緊急: reactive_compact\n\nAPI がまだ `prompt_too_long`(413)を返すことがある。コンテキストの増加速度が圧縮のトリガー速度を上回る場合。\n\nこの時 **reactive_compact** がトリガーされる:compact_history よりもさらに積極的だが、末尾を残す際も孤立した `tool_result` を残さないようにする。\n\n```python\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n tail_start = max(0, len(messages) - 5)\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n return [{\"role\": \"user\",\n \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[tail_start:]]\n```\n\nreactive compact にはリトライ上限がある(デフォルト 1 回)。さらに失敗した場合は例外をスローし、無限ループしない。完全なエラー回復ロジックは s11 に委ねる。\n\n### 合わせて実行\n\n```python\ndef agent_loop(messages):\n reactive_retries = 0\n while True:\n # 3 つのプリプロセッサ(0 API 呼び出し)\n # 順序:budget を先に実行し、大きな内容をプレースホルダ化する前に退避\n messages[:] = tool_result_budget(messages) # L3: 大きな結果を退避\n messages[:] = snip_compact(messages) # L1: 中間を切り捨て\n messages[:] = micro_compact(messages) # L2: 古い結果をプレースホルダに\n\n # まだ足りない?LLM 要約(1 API 呼び出し)\n if estimate_token_count(messages) > THRESHOLD:\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(...)\n except PromptTooLongError:\n if reactive_retries < MAX_REACTIVE_RETRIES:\n messages[:] = reactive_compact(messages) # 緊急対応\n reactive_retries += 1\n continue\n raise # リトライ上限超過、例外をスロー\n # ... ツール実行 ...\n\n # compact ツール:モデルが能動的に呼び出した場合、compact_history をトリガー\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({..., \"content\": \"[Compacted. History summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # 現在のターンを終了し、圧縮後のコンテキストで新しく開始\n```\n\n**順序は変えられない。** L3(budget)が L2(micro)の前に実行される理由:micro は古い大きな tool_result を 1 行のプレースホルダに置換するため、budget はその前に完全な内容を退避させる必要がある。CC ソースが `applyToolResultBudget` を最初に配置する理由も同じ。\n\n---\n\n## s07 からの変更点\n\n| コンポーネント | 変更前 (s07) | 変更後 (s08) |\n|------|-----------|-----------|\n| コンテキスト管理 | なし(コンテキストが無限に膨張) | 4 層圧縮パイプライン + 緊急対応 |\n| 新規関数 | — | snip_compact, micro_compact, tool_result_budget, compact_history, reactive_compact |\n| ツール | bash, read_file, write_file, edit_file, glob, todo_write, task, load_skill (8) | 8 + compact (9) |\n| ループ | LLM 呼び出し → ツール実行 | 各ラウンド前に 3 層プリプロセッサを実行 + 閾値で compact_history をトリガー |\n| 設計原則 | — | 安価なものを先に、高価なものを後に |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s08_context_compact/code.py\n```\n\n以下のプロンプトを試してみてください:\n\n1. `Read the file README.md, then read code.py, then read s01_agent_loop/README.md`(連続して複数のファイルを読み、L2 の古い結果圧縮を観察)\n2. `Read every file in s08_context_compact/`(一度に大量の内容を読み込み、L3 のディスク退避を観察)\n3. 20+ ラウンドの対話を繰り返し、`[auto compact]` または `[reactive compact]` が表示されるか観察\n\n観察のポイント:ツール実行のたびに、古い tool_result は圧縮されているか?連続対話で token が閾値を超えたとき、要約が自動的にトリガーされたか?\n\n---\n\n## 次へ\n\nコンテキスト圧縮により、Agent は長時間クラッシュせずに動けるようになった。しかし、圧縮のたびにユーザーが以前に伝えた偏好や制約も一緒に失われてしまう。Agent が重要なことを選択的に記憶できるようにできないか?\n\ns09 Memory → 3 つのサブシステム:何を記憶するかの選択、重要情報の抽出、整理と統合。圧縮を越え、セッションを越えて。\n\n\nCC ソースコードの詳細
\n\n> 以下は CC ソースコード `compact.ts`、`autoCompact.ts`、`microCompact.ts`、`query.ts` の分析に基づく。\n\n### 実行順序の対応\n\n教学版は説明の便宜上 L1/L2/L3/L4 と番号を振っているが、実際の実行順序は番号と完全には一致しない:\n\n| 項目 | 教学版 | Claude Code |\n|------|--------|-------------|\n| 実行順序 | budget → snip → micro → auto | budget → snip → micro → collapse → auto(`query.ts:379-468`) |\n| snip_compact | 先頭 3 + 末尾 47 を保持 | CC はメインスレッドのみ有効;実装はオープンソースリポジトリにない(`HISTORY_SNIP` feature gate)、インターフェースは確認可能:`snipCompactIfNeeded(messages)` → `{ messages, tokensFreed, boundaryMessage? }`、`SnipTool` もモデルが能動的に呼び出し可能。教学版の 3/47 は簡略パラメータ |\n| micro_compact | テキストプレースホルダで置換 | 2 つのパス:time-based は直接内容をクリア、cached は API の `cache_edits` を使用(legacy パスは削除済み) |\n| micro_compact ホワイトリスト | 位置による(直近 3 件) | time-based は時間閾値でトリガー、cached はカウントでトリガー(`microCompact.ts`) |\n| tool_result_budget | 200KB 文字 | 200,000 文字(`toolLimits.ts:49`) |\n| compact_history 閾値 | 文字数で推定 | 精密な token 数:`contextWindow - maxOutputTokens - 13_000` |\n| 要約の要求 | 5 種類の情報 | 9 つのセクション + ``/`` デュアルタグ |\n| 圧縮プロンプト | シンプルなプロンプト | 先頭と末尾に二重の安全ガードでツール呼び出しを禁止 |\n| PTL retry | あり(簡略版) | `truncateHeadForPTLRetry()` がメッセージグループ単位でロールバック(`compact.ts:243-290`) |\n| 圧縮後のリカバリ | なし(教学版は要約のみ保持) | 直近のファイル、計画、agent/skill/tool などの自動再付加 |\n| サーキットブレーカー | 3 回 | 3 回(`autoCompact.ts:70`) |\n| reactive リトライ | 1 回 | CC にはより精緻な段階別リトライがある |\n\n### 実行順序の詳細\n\nCC ソース `query.ts` での実際の順序:\n\n1. `applyToolResultBudget`(L379):まず大きな結果を処理し、完全な内容を退避\n2. `snipCompact`(L403):中間メッセージを切り捨て\n3. `microcompact`(L414):古い結果のプレースホルダ化\n4. `contextCollapse`(L441):独立したコンテキスト管理システム(教学版にはなし)\n5. `autoCompact`(L454):LLM 全量要約\n\n教学版の budget → snip → micro の順序はこれと一致する。教学版には contextCollapse メカニズムがない。\n\n### read_file のトレードオフ\n\n教学版の `micro_compact` は、古い `tool_result` を一律にプレースホルダへ置き換える。`read_file` も例外ではない。これは通常、機能的な正しさには影響しない。後でファイル内容が必要になれば、モデルはもう一度そのファイルを読めばよい。代償は、追加のツール呼び出しが発生し得ることと、prompt cache のヒット率が下がり得ること。\n\nClaude Code は、この問題を教学版のような単純なルールでは処理していない。`Read` も microcompact 可能なツール集合に入れる一方で、別途 `readFileState` を維持している。変更されていないファイルの再読込では `FILE_UNCHANGED_STUB` を返し、compact 後には予算内で直近に読んだファイル内容を復元する(例:最大 5 ファイル、1 ファイル 5K token、合計 50K token)。これは本番実装向けのキャッシュと復元メカニズムである。教学版ではそこまで展開せず、「古い結果を圧縮し、必要なら再読込する」という単純な trade-off を残している。\n\n### 完全な定数リファレンス\n\n| 定数 | 値 | ソースファイル |\n|------|-----|--------|\n| `AUTOCOMPACT_BUFFER_TOKENS` | 13,000 | `autoCompact.ts:62` |\n| `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | 3 | `autoCompact.ts:70` |\n| `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | 20,000 | `autoCompact.ts:30` |\n| `POST_COMPACT_TOKEN_BUDGET` | 50,000 | `compact.ts:123` |\n| `POST_COMPACT_MAX_FILES_TO_RESTORE` | 5 | `compact.ts:122` |\n| `POST_COMPACT_MAX_TOKENS_PER_FILE` | 5,000 | `compact.ts:124` |\n| 時間ベース micro_compact 間隔 | 60 分 | `timeBasedMCConfig.ts` |\n| `MAX_COMPACT_STREAMING_RETRIES` | 2 | `compact.ts:131` |\n\n### contextCollapse と sessionMemoryCompact\n\nCC ソースコードには、この教学版では展開していない 2 つのメカニズムが存在する:\n\n- **contextCollapse**:独立したコンテキスト管理システム。有効時には proactive autocompact を抑制し(`autoCompact.ts:215-222`)、collapse の commit/blocking フローがコンテキスト管理を引き継ぐ。ただし manual `/compact` と reactive fallback は独立パスのままで、contextCollapse の影響を受けない。\n- **sessionMemoryCompact**:compact_history の前に、CC は既存の session memory(s09 で解説)を使った軽量要約を先に試みる。LLM を呼び出さない。このメカニズムは s09 を学んだ後に振り返るとより理解しやすい。\n\n### 圧縮プロンプトの中身\n\nCC の圧縮プロンプトには 2 つの厳格な要件がある:\n\n1. **ツール呼び出しの絶対禁止**:冒頭が `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.` で、末尾にも再度 REMINDER がある\n2. **先に分析してから要約**:モデルはまず `` タグで思考を整理し、その後 `` タグで正式な要約を出力する。analysis はフォーマット時に除去される\n\n### 教学版の簡略化は意図的\n\n- micro_compact でテキストプレースホルダを使用 → API 層の `cache_edits` 権限がないため\n- read_file は特別扱いしない → 教学版では必要時の再読込を受け入れ、readFileState と圧縮後復元の仕組みを導入しない\n- token を文字数で推定 → 精密な tokenizer は教学の対象外\n- 圧縮後のリカバリを省略 → 教学版は要約のみを保持し、ファイルの自動再付加を行わない\n- 2 つの補助メカニズムを展開しない → 10% の細部に属する\n\nコア設計思想、安価なものを先に高価なものを後に、は完全に保持されている。\n\n
\n\n\n"
},
{
"version": "s09",
"locale": "en",
"title": "s09: Memory — Compression Loses Details, Keep a Layer That Doesn't",
- "content": "# s09: Memory — Compression Loses Details, Keep a Layer That Doesn't\n\ns01 → ... → s07 → s08 → `s09` → [s10](/en/s10) → s11 → ... → s20\n> *\"Compression loses details, keep a layer that doesn't\"* — File store + index + on-demand loading, across compactions, across sessions.\n>\n> **Harness Layer**: Memory — knowledge that survives compaction and sessions.\n\n---\n\n## The Problem\n\ns08's autoCompact preserves current goals, remaining work, and user constraints in the summary, but details get lost: \"use tabs not spaces\" might get simplified to \"user has code style preferences\". And when you start a new session, even the summary is gone.\n\nLLMs have no persistent state; all information lives in the context window. When context fills up, it gets compressed, and compression is lossy. What's needed is a storage layer that doesn't participate in compression and persists across sessions.\n\n---\n\n## The Solution\n\n\n\nThe s08 compression pipeline is preserved, focusing on memory. Storage uses the filesystem: a `.memory/` directory where each memory is a `.md` file with YAML frontmatter (`name` / `description` / `type`). When files accumulate, an index is needed: `MEMORY.md` holds one link per line and gets injected into the SYSTEM.\n\nKey design: the index stays in SYSTEM prompt (cacheable by prompt cache), file content is injected on demand (matched by filename/description to the current conversation, without breaking the cache). Writing has two paths: the user explicitly says \"remember\", or extraction runs in the background after each turn. When files accumulate, periodic consolidation deduplicates.\n\nFour memory types, each answering a different question:\n\n| Type | Answers | Example |\n|------|---------|---------|\n| user | Who you are | \"Use tabs not spaces\" |\n| feedback | How to work | \"Don't mock the database\" |\n| project | What's happening | \"Auth rewrite is compliance-driven\" |\n| reference | Where to find things | \"Pipeline bugs are in Linear INGEST\" |\n\n---\n\n## How It Works\n\n\n\n### Storage: Markdown Files + Index\n\nEach memory is a `.md` file with YAML frontmatter for metadata:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` is the index, one link per line:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\nWriting a new memory automatically rebuilds the index:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### Loading: Two Paths\n\n**Path 1: Index in SYSTEM.** `build_system()` reads `MEMORY.md` every turn and injects the memory catalog into the SYSTEM prompt. The index in SYSTEM can be cached by prompt cache, avoiding resending it every turn.\n\n**Path 2: Relevant memories on demand.** Before each LLM call, `load_memories()` sends the recent conversation and the memory catalog (name + description) to the LLM as a lightweight side-query, selects relevant filenames, then reads and injects their contents. Capped at 5 to control cost.\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n indices = json.loads(re.search(r'\\[.*?\\]', response.content[0].text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\nIf the side-query fails (API error, JSON parse failure), it falls back to keyword matching on name + description.\n\n### Writing: Extraction After Each Turn\n\nUsers don't always say \"remember this\". Preferences are usually scattered across normal dialogue: \"tabs are better than spaces\", \"let's use single quotes from now on\".\n\n`extract_memories()` runs when each turn ends, triggered when the model stops without a tool_use (indicating the conversation has reached a natural break):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(messages) # Extract new memories from recent dialogue\n consolidate_memories() # Check if consolidation is needed\n return\n```\n\nBefore extraction, existing memories are checked to avoid duplicates. The extraction prompt asks the LLM to return a JSON array of `{name, type, description, body}`, writing files only when genuinely new information is found.\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### Consolidation: Low-Frequency Deduplication\n\nMemory files accumulate. `consolidate_memories()` triggers when the file count reaches a threshold (default 10), asking the LLM to deduplicate, merge contradictions, and prune stale memories:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # Too few, not worth consolidating\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC calls this process **Dream**, with four gates in practice: time interval, scan throttle, session count, file lock. The teaching version simplifies to a file-count threshold.\n\n### What Memory Stores\n\nMemory stores information that remains useful across sessions: user preferences, recurring feedback, project background, common entry points, and investigation clues. It focuses on \"what will be useful later\" and brings that information back through an index plus on-demand loading.\n\nSession memory focuses on continuity inside one session: what context should survive after compaction. The two work together: Memory handles long-term knowledge; session memory handles the current session across compaction.\n\n---\n\n## Changes From s08\n\n| Component | Before (s08) | After (s09) |\n|-----------|-------------|-------------|\n| Memory capability | None (preferences degrade with compaction) | Storage + loading + extraction + consolidation |\n| New functions | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| Storage | — | .memory/MEMORY.md index + .memory/*.md files |\n| Tools | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| Loop | Only compression each turn | Memory injection + compression + post-turn extraction + periodic consolidation |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\nTry these prompts (enter across multiple turns, observe memory accumulation and loading):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py` (observe whether the Agent uses tabs)\n3. `What did I tell you about my preferences?` (observe whether the Agent remembers)\n4. `I also prefer single quotes over double quotes for strings.`\n\nWhat to watch for: Does `[Memory: extracted N new memories]` appear after each turn? Are `.md` files generated in `.memory/`? Is `MEMORY.md` index updated? Does the Agent automatically load previous memories in new conversations?\n\n---\n\n## What's Next\n\nMemory, compression, and tools are all in place. But the system prompt is still a hardcoded string. Adding a new tool means manually adding a description; switching projects means rewriting the whole prompt. Prompts should be assembled at runtime.\n\ns10 System Prompt → segments + runtime assembly. Different projects, different tools, different prompts.\n\n\nDeep Dive Into CC Source Code
\n\n> The following is based on analysis of CC source code under `src/` in `memdir/`, `services/`, `utils/`, `query/`. Line numbers verified against source.\n\n### Source Code Paths\n\n| File | Lines | Responsibility |\n|------|-------|---------------|\n| `memdir/memdir.ts` | 507 | Core: MEMORY.md definition (`34-38`), memory behavior instructions distinguishing memory/plan/tasks (`199-266`), `loadMemoryPrompt()` three paths (`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query memory selection (`18-24` system prompt, `97-122` call logic) |\n| `memdir/memoryTypes.ts` | 271 | Type definitions, frontmatter fields |\n| `memdir/memoryScan.ts` | — | Scan .md files, exclude MEMORY.md, read frontmatter, max 200 files, sorted by mtime desc (`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | Forked agent extraction, restricted permissions, `skipTranscript: true`, `maxTurns: 5` (`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream consolidation, four-layer gating (`63-66` defaults, `130-190` gating, `224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | Session-level memory management |\n| `services/compact/sessionMemoryCompact.ts` | — | Session memory lightweight summary, thresholds 10K/5/40K (`56-61`) |\n| `utils/attachments.ts` | — | Injection budget: 200 lines / 4096 bytes per file, 60KB per session (`269-288`); find relevant memory by query (`2196-2241`) |\n| `query.ts` | — | Memory prefetch at start of each user turn (`301-304`), non-blocking collection (`1592-1614`) |\n| `query/stopHooks.ts` | — | Stop hook fire-and-forget triggers extraction and Dream (`141-155`) |\n\n### Memory Selection: LLM, Not Embedding\n\nCC uses **Sonnet itself to select** (`findRelevantMemories.ts`), not embedding vector similarity:\n\n1. `memoryScan.ts` scans all `.md` files in `.memory/` (excluding MEMORY.md), max 200 files, sorted by mtime descending\n2. Lists all memory files' `name` + `description` as a catalog\n3. Sends to Sonnet side-query: \"Select truly useful memories by name and description (max 5). Skip if unsure.\"\n4. Sonnet returns `{ selected_memories: [\"file1.md\", ...] }`\n5. Selected files' full contents are read (≤ 200 lines / 4096 bytes per file) and injected. Total session budget: 60KB\n\nAt the start of each user turn, `query.ts:301-304` starts memory prefetch (async); after tool execution, `1592-1614` collects completed results non-blocking.\n\n### Extraction Timing: Stop Hook, Not After autoCompact\n\nTrigger location (`stopHooks.ts:141-155`): inside `handleStopHooks()`, fire-and-forget triggers extraction and Dream. The teaching version places extraction in the `stop_reason != \"tool_use\"` branch, matching the direction.\n\nCC's extraction runs via forked agent (`extractMemories.ts:371-427`): restricted permissions, `skipTranscript: true`, `maxTurns: 5`. Also has overlap protection: if the main Agent already wrote memory files, extraction is skipped.\n\n### Memory File Format\n\nCC uses Markdown + YAML frontmatter, consistent with the teaching version. Four types: `user`, `feedback`, `project`, `reference`.\n\n`memdir.ts:34-38` defines index constraints: `MEMORY.md` max 200 lines / 25KB. `memdir.ts:199-266` builds memory behavior instructions, explicitly distinguishing memory from plan and tasks. Storage location: `~/.claude/projects//memory/`.\n\n### Dream: Four-Layer Gating\n\nNot \"triggered when idle\" or \"consolidate when count is enough\", but four gates (`autoDream.ts`, defaults `63-66`, gating logic `130-190`):\n\n1. **Time gate**: ≥ 24 hours since last consolidation\n2. **Scan throttle**: Avoid frequent filesystem scans\n3. **Session gate**: ≥ 5 session transcripts modified since last consolidation\n4. **Lock gate**: No other process currently consolidating (`.consolidate-lock` file)\n\nThe merge itself runs via forked agent (`224-233`): locate → collect recent signals → merge and write files → prune and update index. Lock file mtime serves as lastConsolidatedAt. Crash recovery: lock auto-expires after 1 hour.\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| Persistence | Cross-session | Single session |\n| Storage | Multiple .md files in `memory/` | `session-memory//memory.md` |\n| Loaded into | system prompt | compact summary |\n| Purpose | Cross-session knowledge accumulation | Cross-compact context continuity |\n\nsessionMemoryCompact (mentioned in s08) uses Session Memory: before autoCompact, it reads the session memory file and, if sufficient (≥ 10K tokens, ≥ 5 text messages, ≤ 40K tokens, `sessionMemoryCompact.ts:56-61`), uses it as a summary without calling the LLM.\n\n### Where the Real Implementation Is More Complex\n\n- **Feature flags**: Memory features have multiple feature gate layers\n- **Team memory**: Shared team memories, `loadMemoryPrompt()` has a dedicated path (not covered in teaching version)\n- **KAIROS**: Timing-aware memory extraction strategy, daily-log mode in `loadMemoryPrompt()`\n- **Prompt cache**: Memory injection must account for prompt cache TTL, avoiding full system prompt rewrites each turn\n- **File locks**: Concurrency control for multi-process scenarios\n- **Memory prefetch**: Async prefetch, non-blocking main flow\n\n### Teaching Version Simplifications Are Intentional\n\n- LLM side-query → LLM side-query + keyword fallback: teaching version keeps LLM selection, adds fallback path\n- Memory JSON → Markdown + frontmatter: teaching version matches CC\n- Stop hook trigger → `stop_reason != \"tool_use\"` branch: same direction\n- Four-layer gating → file-count threshold: teaching version lacks transcript system and multi-session concepts\n- Forked agent + restricted permissions → direct call: teaching version has no subprocess isolation\n\n \n\n\n"
+ "content": "# s09: Memory — Compression Loses Details, Keep a Layer That Doesn't\n\ns01 → ... → s07 → s08 → `s09` → [s10](/en/s10) → s11 → ... → s20\n> *\"Compression loses details, keep a layer that doesn't\"* — File store + index + on-demand loading, across compactions, across sessions.\n>\n> **Harness Layer**: Memory — knowledge that survives compaction and sessions.\n\n---\n\n## The Problem\n\ns08's autoCompact preserves current goals, remaining work, and user constraints in the summary, but details get lost: \"use tabs not spaces\" might get simplified to \"user has code style preferences\". And when you start a new session, even the summary is gone.\n\nLLMs have no persistent state; all information lives in the context window. When context fills up, it gets compressed, and compression is lossy. What's needed is a storage layer that doesn't participate in compression and persists across sessions.\n\n---\n\n## The Solution\n\n\n\nThe s08 compression pipeline is preserved, focusing on memory. Storage uses the filesystem: a `.memory/` directory where each memory is a `.md` file with YAML frontmatter (`name` / `description` / `type`). When files accumulate, an index is needed: `MEMORY.md` holds one link per line and gets injected into the SYSTEM.\n\nKey design: the index stays in SYSTEM prompt (cacheable by prompt cache), file content is injected on demand (matched by filename/description to the current conversation, without breaking the cache). Writing has two paths: the user explicitly says \"remember\", or extraction runs in the background after each turn. When files accumulate, periodic consolidation deduplicates.\n\nFour memory types, each answering a different question:\n\n| Type | Answers | Example |\n|------|---------|---------|\n| user | Who you are | \"Use tabs not spaces\" |\n| feedback | How to work | \"Don't mock the database\" |\n| project | What's happening | \"Auth rewrite is compliance-driven\" |\n| reference | Where to find things | \"Pipeline bugs are in Linear INGEST\" |\n\n---\n\n## How It Works\n\n\n\n### Storage: Markdown Files + Index\n\nEach memory is a `.md` file with YAML frontmatter for metadata:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` is the index, one link per line:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\nWriting a new memory automatically rebuilds the index:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### Loading: Two Paths\n\n**Path 1: Index in SYSTEM.** `build_system()` reads `MEMORY.md` once at the start of each user request and injects the memory catalog into the SYSTEM prompt. Memory extraction and consolidation run only when the turn ends, so SYSTEM does not need to be rebuilt repeatedly within the same user request.\n\n**Path 2: Relevant memories on demand.** At the start of each user request, `load_memories()` sends the recent conversation and the memory catalog (name + description) to the LLM as a lightweight side-query, selects relevant filenames, then reads and injects their contents. Capped at 5 to control cost.\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n indices = json.loads(re.search(r'\\[.*?\\]', response.content[0].text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\nIf the side-query fails (API error, JSON parse failure), it falls back to keyword matching on name + description.\n\n### Writing: Extraction After Each Turn\n\nUsers don't always say \"remember this\". Preferences are usually scattered across normal dialogue: \"tabs are better than spaces\", \"let's use single quotes from now on\".\n\n`extract_memories()` runs when each turn ends, triggered when the model stops without a tool_use (indicating the conversation has reached a natural break):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(messages) # Extract new memories from recent dialogue\n consolidate_memories() # Check if consolidation is needed\n return\n```\n\nBefore extraction, existing memories are checked to avoid duplicates. The extraction prompt asks the LLM to return a JSON array of `{name, type, description, body}`, writing files only when genuinely new information is found.\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### Consolidation: Low-Frequency Deduplication\n\nMemory files accumulate. `consolidate_memories()` triggers when the file count reaches a threshold (default 10), asking the LLM to deduplicate, merge contradictions, and prune stale memories:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # Too few, not worth consolidating\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC calls this process **Dream**, with four gates in practice: time interval, scan throttle, session count, file lock. The teaching version simplifies to a file-count threshold.\n\n### What Memory Stores\n\nMemory stores information that remains useful across sessions: user preferences, recurring feedback, project background, common entry points, and investigation clues. It focuses on \"what will be useful later\" and brings that information back through an index plus on-demand loading.\n\nSession memory focuses on continuity inside one session: what context should survive after compaction. The two work together: Memory handles long-term knowledge; session memory handles the current session across compaction.\n\n---\n\n## Changes From s08\n\n| Component | Before (s08) | After (s09) |\n|-----------|-------------|-------------|\n| Memory capability | None (preferences degrade with compaction) | Storage + loading + extraction + consolidation |\n| New functions | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| Storage | — | .memory/MEMORY.md index + .memory/*.md files |\n| Tools | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| Loop | Only compression each turn | Memory injection + compression + post-turn extraction + periodic consolidation |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\nTry these prompts (enter across multiple turns, observe memory accumulation and loading):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py` (observe whether the Agent uses tabs)\n3. `What did I tell you about my preferences?` (observe whether the Agent remembers)\n4. `I also prefer single quotes over double quotes for strings.`\n\nWhat to watch for: Does `[Memory: extracted N new memories]` appear after each turn? Are `.md` files generated in `.memory/`? Is `MEMORY.md` index updated? Does the Agent automatically load previous memories in new conversations?\n\n---\n\n## What's Next\n\nMemory, compression, and tools are all in place. But the system prompt is still a hardcoded string. Adding a new tool means manually adding a description; switching projects means rewriting the whole prompt. Prompts should be assembled at runtime.\n\ns10 System Prompt → segments + runtime assembly. Different projects, different tools, different prompts.\n\n\nDeep Dive Into CC Source Code
\n\n> The following is based on analysis of CC source code under `src/` in `memdir/`, `services/`, `utils/`, `query/`. Line numbers verified against source.\n\n### Source Code Paths\n\n| File | Lines | Responsibility |\n|------|-------|---------------|\n| `memdir/memdir.ts` | 507 | Core: MEMORY.md definition (`34-38`), memory behavior instructions distinguishing memory/plan/tasks (`199-266`), `loadMemoryPrompt()` three paths (`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query memory selection (`18-24` system prompt, `97-122` call logic) |\n| `memdir/memoryTypes.ts` | 271 | Type definitions, frontmatter fields |\n| `memdir/memoryScan.ts` | — | Scan .md files, exclude MEMORY.md, read frontmatter, max 200 files, sorted by mtime desc (`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | Forked agent extraction, restricted permissions, `skipTranscript: true`, `maxTurns: 5` (`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream consolidation, four-layer gating (`63-66` defaults, `130-190` gating, `224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | Session-level memory management |\n| `services/compact/sessionMemoryCompact.ts` | — | Session memory lightweight summary, thresholds 10K/5/40K (`56-61`) |\n| `utils/attachments.ts` | — | Injection budget: 200 lines / 4096 bytes per file, 60KB per session (`269-288`); find relevant memory by query (`2196-2241`) |\n| `query.ts` | — | Memory prefetch at start of each user turn (`301-304`), non-blocking collection (`1592-1614`) |\n| `query/stopHooks.ts` | — | Stop hook fire-and-forget triggers extraction and Dream (`141-155`) |\n\n### Memory Selection: LLM, Not Embedding\n\nCC uses **Sonnet itself to select** (`findRelevantMemories.ts`), not embedding vector similarity:\n\n1. `memoryScan.ts` scans all `.md` files in `.memory/` (excluding MEMORY.md), max 200 files, sorted by mtime descending\n2. Lists all memory files' `name` + `description` as a catalog\n3. Sends to Sonnet side-query: \"Select truly useful memories by name and description (max 5). Skip if unsure.\"\n4. Sonnet returns `{ selected_memories: [\"file1.md\", ...] }`\n5. Selected files' full contents are read (≤ 200 lines / 4096 bytes per file) and injected. Total session budget: 60KB\n\nAt the start of each user turn, `query.ts:301-304` starts memory prefetch (async); after tool execution, `1592-1614` collects completed results non-blocking.\n\n### Extraction Timing: Stop Hook, Not After autoCompact\n\nTrigger location (`stopHooks.ts:141-155`): inside `handleStopHooks()`, fire-and-forget triggers extraction and Dream. The teaching version places extraction in the `stop_reason != \"tool_use\"` branch, matching the direction.\n\nCC's extraction runs via forked agent (`extractMemories.ts:371-427`): restricted permissions, `skipTranscript: true`, `maxTurns: 5`. Also has overlap protection: if the main Agent already wrote memory files, extraction is skipped.\n\n### Memory File Format\n\nCC uses Markdown + YAML frontmatter, consistent with the teaching version. Four types: `user`, `feedback`, `project`, `reference`.\n\n`memdir.ts:34-38` defines index constraints: `MEMORY.md` max 200 lines / 25KB. `memdir.ts:199-266` builds memory behavior instructions, explicitly distinguishing memory from plan and tasks. Storage location: `~/.claude/projects//memory/`.\n\n### Dream: Four-Layer Gating\n\nNot \"triggered when idle\" or \"consolidate when count is enough\", but four gates (`autoDream.ts`, defaults `63-66`, gating logic `130-190`):\n\n1. **Time gate**: ≥ 24 hours since last consolidation\n2. **Scan throttle**: Avoid frequent filesystem scans\n3. **Session gate**: ≥ 5 session transcripts modified since last consolidation\n4. **Lock gate**: No other process currently consolidating (`.consolidate-lock` file)\n\nThe merge itself runs via forked agent (`224-233`): locate → collect recent signals → merge and write files → prune and update index. Lock file mtime serves as lastConsolidatedAt. Crash recovery: lock auto-expires after 1 hour.\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| Persistence | Cross-session | Single session |\n| Storage | Multiple .md files in `memory/` | `session-memory//memory.md` |\n| Loaded into | system prompt | compact summary |\n| Purpose | Cross-session knowledge accumulation | Cross-compact context continuity |\n\nsessionMemoryCompact (mentioned in s08) uses Session Memory: before autoCompact, it reads the session memory file and, if sufficient (≥ 10K tokens, ≥ 5 text messages, ≤ 40K tokens, `sessionMemoryCompact.ts:56-61`), uses it as a summary without calling the LLM.\n\n### Where the Real Implementation Is More Complex\n\n- **Feature flags**: Memory features have multiple feature gate layers\n- **Team memory**: Shared team memories, `loadMemoryPrompt()` has a dedicated path (not covered in teaching version)\n- **KAIROS**: Timing-aware memory extraction strategy, daily-log mode in `loadMemoryPrompt()`\n- **Prompt cache**: Memory injection must account for prompt cache TTL, avoiding full system prompt rewrites each turn\n- **File locks**: Concurrency control for multi-process scenarios\n- **Memory prefetch**: Async prefetch, non-blocking main flow\n\n### Teaching Version Simplifications Are Intentional\n\n- LLM side-query → LLM side-query + keyword fallback: teaching version keeps LLM selection, adds fallback path\n- Memory JSON → Markdown + frontmatter: teaching version matches CC\n- Stop hook trigger → `stop_reason != \"tool_use\"` branch: same direction\n- Four-layer gating → file-count threshold: teaching version lacks transcript system and multi-session concepts\n- Forked agent + restricted permissions → direct call: teaching version has no subprocess isolation\n\n \n\n\n"
},
{
"version": "s09",
"locale": "zh",
"title": "s09: Memory — 压缩会丢细节,要有一层不丢的",
- "content": "# s09: Memory — 压缩会丢细节,要有一层不丢的\n\ns01 → ... → s07 → s08 → `s09` → [s10](/zh/s10) → s11 → ... → s20\n> *\"压缩会丢细节, 要有一层不丢的\"* — 文件仓库 + 索引 + 按需加载,跨压缩、跨会话。\n>\n> **Harness 层**: 记忆 — 跨压缩、跨会话的知识积累。\n\n---\n\n## 问题\n\ns08 的 autoCompact 会把当前目标、剩余工作、用户约束写进摘要,但细节会丢失:\"用 tab 缩进不要用空格\"可能被简化成\"用户有代码风格偏好\"。而且新开一个会话,连摘要也没了。\n\nLLM 没有持久状态,所有信息都在上下文窗口里。上下文满了要压缩,压缩就有损。需要一层不参与压缩、跨会话保留的存储。\n\n---\n\n## 解决方案\n\n\n\ns08 的压缩管线保留,聚焦记忆。存储选文件系统:`.memory/` 目录下,每个记忆一个 `.md` 文件,带 YAML frontmatter(`name` / `description` / `type`)。文件多了需要索引:`MEMORY.md` 一行一个链接,注入 SYSTEM。\n\n关键设计:索引常驻 SYSTEM prompt(可被 prompt cache 缓存),文件内容按需注入到当前 user turn(按 filename/description 匹配当前对话,不破坏 cache)。写入由每轮结束后的提取器完成:用户显式说\"记住\"或表达稳定偏好时,提取器会保存为记忆。文件积累多了,定期整理去重。\n\n四类记忆,各有用途:\n\n| 类型 | 回答什么 | 示例 |\n|------|---------|------|\n| user | 你是谁 | \"用 tab 不用空格\" |\n| feedback | 怎么做事 | \"别 mock 数据库\" |\n| project | 正在发生什么 | \"auth 重写是合规驱动\" |\n| reference | 东西在哪找 | \"pipeline bug 在 Linear INGEST\" |\n\n---\n\n## 工作原理\n\n\n\n### 存储:Markdown 文件 + 索引\n\n每个记忆是一个 `.md` 文件,YAML frontmatter 记录元数据:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` 是索引,一行一个链接:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\n写入新记忆时自动重建索引:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### 加载:两条路径\n\n**路径一:索引常驻 SYSTEM。** `build_system()` 每轮重建 SYSTEM 时读取 `MEMORY.md`,把记忆清单注入。SYSTEM prompt 中的索引可以被 prompt cache 缓存,不需要每轮重新发送。\n\n**路径二:相关记忆按需注入。** 每轮调用前,`load_memories()` 把最近对话和记忆目录(name + description)一起发给 LLM 做一次轻量 side-query,选出相关的文件名,再读文件内容临时注入到当前 user turn。最多 5 条,控制开销。\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n text = extract_text(response.content).strip()\n indices = json.loads(re.search(r'\\[.*?\\]', text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\n如果 side-query 失败(API 错误、JSON 解析失败),降级到关键词匹配 name + description。\n\n### 写入:每轮结束后提取\n\n用户不会每次都说\"记住这个\"。偏好通常散落在正常对话中:\"用 tab 比空格好\"、\"以后都用单引号\"。\n\n`extract_memories()` 在每轮结束时运行,条件是模型停止且没有 tool_use(说明对话告一段落):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(pre_compress) # 从压缩前快照提取新记忆\n consolidate_memories() # 检查是否需要整理\n return\n```\n\n提取前先检查已有记忆,避免重复。提取 prompt 要求 LLM 返回 `{name, type, description, body}` 的 JSON 数组,只有确实有新信息时才写文件。\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### 整理:低频合并去重\n\n记忆文件会积累。`consolidate_memories()` 在文件数达到阈值(默认 10)时触发,让 LLM 去重、合并矛盾、淘汰过时记忆:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # 太少,不值得整理\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC 把这个过程叫 Dream,实际有四层门控:时间间隔、扫描节流、会话数、文件锁。教学版简化为文件数阈值。\n\n### Memory 适合保存什么\n\nMemory 保存跨会话仍然有用的信息:用户偏好、反复出现的反馈、项目背景、常用入口和排查线索。它关注“以后还会用到什么”,并通过索引 + 按需加载把这些信息带回当前对话。\n\nsession memory 关注同一会话内的连续性:compact 之后,当前会话还需要保留哪些上下文。两者配合使用:Memory 管长期知识,session memory 管当前会话的压缩续接。\n\n---\n\n## 相对 s08 的变更\n\n| 组件 | 之前 (s08) | 之后 (s09) |\n|------|-----------|-----------|\n| 记忆能力 | 无(压缩后偏好随摘要退化) | 存储 + 加载 + 提取 + 整理 |\n| 新函数 | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| 存储 | — | .memory/MEMORY.md 索引 + .memory/*.md 文件 |\n| 工具 | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| 循环 | 每轮只做压缩 | 每轮注入记忆 + 压缩 + 每轮结束后提取 + 定期整理 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\n试试这些 prompt(分多轮输入,观察记忆的累积和加载):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py`(观察 Agent 是否用了 tab)\n3. `What did I tell you about my preferences?`(观察 Agent 是否记得)\n4. `I also prefer single quotes over double quotes for strings.`\n\n观察重点:每轮结束后是否出现 `[Memory: extracted N new memories]`?`.memory/` 目录下是否生成了 `.md` 文件?`MEMORY.md` 索引是否更新?新一轮对话时 Agent 是否自动加载了之前的记忆?\n\n---\n\n## 接下来\n\n记忆、压缩、工具都已就绪。但 system prompt 还是硬编码的一大段字符串。加了新工具要手动加描述,换了项目要重写整个 prompt。prompt 应该运行时组装。\n\ns10 System Prompt → 分段 + 运行时组装。不同项目、不同工具,拼出不同的 prompt。\n\n\n深入 CC 源码
\n\n> 以下基于 CC 源码 `src/` 下 `memdir/`、`services/`、`utils/`、`query/` 的分析,行号已对照核实。\n\n### 源码路径\n\n| 文件 | 行数 | 职责 |\n|------|------|------|\n| `memdir/memdir.ts` | 507 | 核心:MEMORY.md 定义(`34-38`)、记忆行为指令区分 memory/plan/tasks(`199-266`)、`loadMemoryPrompt()` 三条路径(`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query 选记忆(`18-24` 系统提示、`97-122` 调用逻辑) |\n| `memdir/memoryTypes.ts` | 271 | 类型定义,frontmatter 字段 |\n| `memdir/memoryScan.ts` | — | 扫描 .md 文件,排除 MEMORY.md,读 frontmatter,最多 200 个,按 mtime 降序(`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | forked agent 提取记忆,受限权限,`skipTranscript: true`,`maxTurns: 5`(`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream 整理,四层门控(`63-66` 默认值、`130-190` 门控、`224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | 会话级记忆管理 |\n| `services/compact/sessionMemoryCompact.ts` | — | session memory 轻量摘要,阈值 10K/5/40K(`56-61`) |\n| `utils/attachments.ts` | — | 注入预算:200 行 / 4096 字节每文件,60KB 每 session(`269-288`);按 query 找相关 memory(`2196-2241`) |\n| `query.ts` | — | memory prefetch 每轮启动(`301-304`),非阻塞收集(`1592-1614`) |\n| `query/stopHooks.ts` | — | stop hook fire-and-forget 触发提取和 Dream(`141-155`) |\n\n### 记忆选择:LLM 选,不是 embedding\n\nCC 用 **Sonnet 本身来选**(`findRelevantMemories.ts`),不是 embedding 向量相似度:\n\n1. `memoryScan.ts` 扫描 `.memory/` 下所有 `.md` 文件(排除 MEMORY.md),最多 200 个,按 mtime 降序\n2. 把 `name` + `description` 列成清单\n3. 发给 Sonnet side-query:\"根据名称和描述选出真正有用的记忆(最多 5 个)。不确定就不要选。\"\n4. Sonnet 返回 `{ selected_memories: [\"file1.md\", ...] }`\n5. 选中文件读取完整内容(每文件 ≤ 200 行 / 4096 字节),注入上下文。单 session 总预算 60KB\n\n每轮用户 turn 开始时,`query.ts:301-304` 启动 memory prefetch(异步);工具执行后 `1592-1614` 非阻塞收集结果,不卡主流程。\n\n### 提取时机:stop hook,不是 autoCompact 后\n\n触发位置(`stopHooks.ts:141-155`):在 `handleStopHooks()` 中,fire-and-forget 触发提取和 Dream。教学版把提取放在 `stop_reason != \"tool_use\"` 分支里,方向一致。\n\nCC 的提取通过 forked agent 执行(`extractMemories.ts:371-427`):受限权限、`skipTranscript: true`、`maxTurns: 5`。还有重叠保护:如果主 Agent 已经写入了记忆文件,跳过提取。\n\n### 记忆文件格式\n\nCC 用 Markdown + YAML frontmatter,和教学版一致。四种类型:`user`、`feedback`、`project`、`reference`。\n\n`memdir.ts:34-38` 定义索引约束:`MEMORY.md` 最多 200 行 / 25KB。`memdir.ts:199-266` 构建记忆行为指令,明确区分 memory、plan、tasks。存储位置:`~/.claude/projects//memory/`。\n\n### Dream:四层门控\n\n不是\"空闲时触发\"或\"数量够了就合并\",而是四层门控(`autoDream.ts`,默认值 `63-66`,门控逻辑 `130-190`):\n\n1. **时间门控**:距上次合并 ≥ 24 小时\n2. **扫描节流**:避免频繁扫描文件系统\n3. **会话门控**:自上次合并以来修改了 ≥ 5 个会话 transcript\n4. **锁门控**:没有其他进程正在合并(`.consolidate-lock` 文件)\n\n合并本身通过 forked agent 执行(`224-233`):定位 → 收集近期信号 → 合并写文件 → 剪枝更新索引。锁文件 mtime 就是 lastConsolidatedAt。崩溃恢复:1 小时后锁自动过期。\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| 持久性 | 跨会话 | 单会话 |\n| 存储 | `memory/` 下多个 .md 文件 | `session-memory//memory.md` |\n| 加载到 | system prompt | compact 摘要 |\n| 用途 | 跨会话的知识积累 | 跨 compact 的上下文连续性 |\n\nsessionMemoryCompact(s08 中提到的机制)正是使用了 Session Memory:autoCompact 前先读 session memory 文件,如果内容足够(≥ 10K token、≥ 5 条文本消息、≤ 40K token,`sessionMemoryCompact.ts:56-61`),就用它做摘要,不调 LLM。\n\n### 真实实现比教学版复杂的地方\n\n- **Feature flags**:记忆相关功能有多层 feature gate 控制\n- **Team memory**:团队共享记忆,`loadMemoryPrompt()` 有专门路径(教学版未涉及)\n- **KAIROS**:时机感知的记忆提取策略,`loadMemoryPrompt()` 中 daily-log 模式\n- **Prompt cache**:记忆注入需要考虑 prompt cache 的 TTL,避免每次都重写 system prompt 的大段内容\n- **文件锁**:多进程并发时的锁机制\n- **Memory prefetch**:异步预取,不阻塞主流程\n\n### 教学版的简化是刻意的\n\n- LLM side-query → LLM side-query + 关键词降级:教学版保留了 LLM 选择,加了降级路径\n- 记忆 JSON → Markdown + frontmatter:教学版与 CC 一致\n- stop hook 触发 → `stop_reason != \"tool_use\"` 分支:方向一致\n- 四层门控 → 文件数阈值:教学版没有 transcript 系统和多会话概念\n- forked agent + 受限权限 → 直接调用:教学版没有子进程隔离\n\n \n\n\n"
+ "content": "# s09: Memory — 压缩会丢细节,要有一层不丢的\n\ns01 → ... → s07 → s08 → `s09` → [s10](/zh/s10) → s11 → ... → s20\n> *\"压缩会丢细节, 要有一层不丢的\"* — 文件仓库 + 索引 + 按需加载,跨压缩、跨会话。\n>\n> **Harness 层**: 记忆 — 跨压缩、跨会话的知识积累。\n\n---\n\n## 问题\n\ns08 的 autoCompact 会把当前目标、剩余工作、用户约束写进摘要,但细节会丢失:\"用 tab 缩进不要用空格\"可能被简化成\"用户有代码风格偏好\"。而且新开一个会话,连摘要也没了。\n\nLLM 没有持久状态,所有信息都在上下文窗口里。上下文满了要压缩,压缩就有损。需要一层不参与压缩、跨会话保留的存储。\n\n---\n\n## 解决方案\n\n\n\ns08 的压缩管线保留,聚焦记忆。存储选文件系统:`.memory/` 目录下,每个记忆一个 `.md` 文件,带 YAML frontmatter(`name` / `description` / `type`)。文件多了需要索引:`MEMORY.md` 一行一个链接,注入 SYSTEM。\n\n关键设计:索引常驻 SYSTEM prompt(可被 prompt cache 缓存),文件内容按需注入到当前 user turn(按 filename/description 匹配当前对话,不破坏 cache)。写入由每轮结束后的提取器完成:用户显式说\"记住\"或表达稳定偏好时,提取器会保存为记忆。文件积累多了,定期整理去重。\n\n四类记忆,各有用途:\n\n| 类型 | 回答什么 | 示例 |\n|------|---------|------|\n| user | 你是谁 | \"用 tab 不用空格\" |\n| feedback | 怎么做事 | \"别 mock 数据库\" |\n| project | 正在发生什么 | \"auth 重写是合规驱动\" |\n| reference | 东西在哪找 | \"pipeline bug 在 Linear INGEST\" |\n\n---\n\n## 工作原理\n\n\n\n### 存储:Markdown 文件 + 索引\n\n每个记忆是一个 `.md` 文件,YAML frontmatter 记录元数据:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` 是索引,一行一个链接:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\n写入新记忆时自动重建索引:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### 加载:两条路径\n\n**路径一:索引常驻 SYSTEM。** `build_system()` 在每次用户请求开始时读取 `MEMORY.md`,把记忆清单注入。记忆提取和整理只在本轮结束时触发,因此同一轮用户请求中不需要重复重建 SYSTEM。\n\n**路径二:相关记忆按需注入。** 每次用户请求开始时,`load_memories()` 把最近对话和记忆目录(name + description)一起发给 LLM 做一次轻量 side-query,选出相关的文件名,再读文件内容临时注入到当前 user turn。最多 5 条,控制开销。\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n text = extract_text(response.content).strip()\n indices = json.loads(re.search(r'\\[.*?\\]', text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\n如果 side-query 失败(API 错误、JSON 解析失败),降级到关键词匹配 name + description。\n\n### 写入:每轮结束后提取\n\n用户不会每次都说\"记住这个\"。偏好通常散落在正常对话中:\"用 tab 比空格好\"、\"以后都用单引号\"。\n\n`extract_memories()` 在每轮结束时运行,条件是模型停止且没有 tool_use(说明对话告一段落):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(pre_compress) # 从压缩前快照提取新记忆\n consolidate_memories() # 检查是否需要整理\n return\n```\n\n提取前先检查已有记忆,避免重复。提取 prompt 要求 LLM 返回 `{name, type, description, body}` 的 JSON 数组,只有确实有新信息时才写文件。\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### 整理:低频合并去重\n\n记忆文件会积累。`consolidate_memories()` 在文件数达到阈值(默认 10)时触发,让 LLM 去重、合并矛盾、淘汰过时记忆:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # 太少,不值得整理\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC 把这个过程叫 Dream,实际有四层门控:时间间隔、扫描节流、会话数、文件锁。教学版简化为文件数阈值。\n\n### Memory 适合保存什么\n\nMemory 保存跨会话仍然有用的信息:用户偏好、反复出现的反馈、项目背景、常用入口和排查线索。它关注“以后还会用到什么”,并通过索引 + 按需加载把这些信息带回当前对话。\n\nsession memory 关注同一会话内的连续性:compact 之后,当前会话还需要保留哪些上下文。两者配合使用:Memory 管长期知识,session memory 管当前会话的压缩续接。\n\n---\n\n## 相对 s08 的变更\n\n| 组件 | 之前 (s08) | 之后 (s09) |\n|------|-----------|-----------|\n| 记忆能力 | 无(压缩后偏好随摘要退化) | 存储 + 加载 + 提取 + 整理 |\n| 新函数 | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| 存储 | — | .memory/MEMORY.md 索引 + .memory/*.md 文件 |\n| 工具 | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| 循环 | 每轮只做压缩 | 每轮注入记忆 + 压缩 + 每轮结束后提取 + 定期整理 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\n试试这些 prompt(分多轮输入,观察记忆的累积和加载):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py`(观察 Agent 是否用了 tab)\n3. `What did I tell you about my preferences?`(观察 Agent 是否记得)\n4. `I also prefer single quotes over double quotes for strings.`\n\n观察重点:每轮结束后是否出现 `[Memory: extracted N new memories]`?`.memory/` 目录下是否生成了 `.md` 文件?`MEMORY.md` 索引是否更新?新一轮对话时 Agent 是否自动加载了之前的记忆?\n\n---\n\n## 接下来\n\n记忆、压缩、工具都已就绪。但 system prompt 还是硬编码的一大段字符串。加了新工具要手动加描述,换了项目要重写整个 prompt。prompt 应该运行时组装。\n\ns10 System Prompt → 分段 + 运行时组装。不同项目、不同工具,拼出不同的 prompt。\n\n\n深入 CC 源码
\n\n> 以下基于 CC 源码 `src/` 下 `memdir/`、`services/`、`utils/`、`query/` 的分析,行号已对照核实。\n\n### 源码路径\n\n| 文件 | 行数 | 职责 |\n|------|------|------|\n| `memdir/memdir.ts` | 507 | 核心:MEMORY.md 定义(`34-38`)、记忆行为指令区分 memory/plan/tasks(`199-266`)、`loadMemoryPrompt()` 三条路径(`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query 选记忆(`18-24` 系统提示、`97-122` 调用逻辑) |\n| `memdir/memoryTypes.ts` | 271 | 类型定义,frontmatter 字段 |\n| `memdir/memoryScan.ts` | — | 扫描 .md 文件,排除 MEMORY.md,读 frontmatter,最多 200 个,按 mtime 降序(`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | forked agent 提取记忆,受限权限,`skipTranscript: true`,`maxTurns: 5`(`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream 整理,四层门控(`63-66` 默认值、`130-190` 门控、`224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | 会话级记忆管理 |\n| `services/compact/sessionMemoryCompact.ts` | — | session memory 轻量摘要,阈值 10K/5/40K(`56-61`) |\n| `utils/attachments.ts` | — | 注入预算:200 行 / 4096 字节每文件,60KB 每 session(`269-288`);按 query 找相关 memory(`2196-2241`) |\n| `query.ts` | — | memory prefetch 每轮启动(`301-304`),非阻塞收集(`1592-1614`) |\n| `query/stopHooks.ts` | — | stop hook fire-and-forget 触发提取和 Dream(`141-155`) |\n\n### 记忆选择:LLM 选,不是 embedding\n\nCC 用 **Sonnet 本身来选**(`findRelevantMemories.ts`),不是 embedding 向量相似度:\n\n1. `memoryScan.ts` 扫描 `.memory/` 下所有 `.md` 文件(排除 MEMORY.md),最多 200 个,按 mtime 降序\n2. 把 `name` + `description` 列成清单\n3. 发给 Sonnet side-query:\"根据名称和描述选出真正有用的记忆(最多 5 个)。不确定就不要选。\"\n4. Sonnet 返回 `{ selected_memories: [\"file1.md\", ...] }`\n5. 选中文件读取完整内容(每文件 ≤ 200 行 / 4096 字节),注入上下文。单 session 总预算 60KB\n\n每轮用户 turn 开始时,`query.ts:301-304` 启动 memory prefetch(异步);工具执行后 `1592-1614` 非阻塞收集结果,不卡主流程。\n\n### 提取时机:stop hook,不是 autoCompact 后\n\n触发位置(`stopHooks.ts:141-155`):在 `handleStopHooks()` 中,fire-and-forget 触发提取和 Dream。教学版把提取放在 `stop_reason != \"tool_use\"` 分支里,方向一致。\n\nCC 的提取通过 forked agent 执行(`extractMemories.ts:371-427`):受限权限、`skipTranscript: true`、`maxTurns: 5`。还有重叠保护:如果主 Agent 已经写入了记忆文件,跳过提取。\n\n### 记忆文件格式\n\nCC 用 Markdown + YAML frontmatter,和教学版一致。四种类型:`user`、`feedback`、`project`、`reference`。\n\n`memdir.ts:34-38` 定义索引约束:`MEMORY.md` 最多 200 行 / 25KB。`memdir.ts:199-266` 构建记忆行为指令,明确区分 memory、plan、tasks。存储位置:`~/.claude/projects//memory/`。\n\n### Dream:四层门控\n\n不是\"空闲时触发\"或\"数量够了就合并\",而是四层门控(`autoDream.ts`,默认值 `63-66`,门控逻辑 `130-190`):\n\n1. **时间门控**:距上次合并 ≥ 24 小时\n2. **扫描节流**:避免频繁扫描文件系统\n3. **会话门控**:自上次合并以来修改了 ≥ 5 个会话 transcript\n4. **锁门控**:没有其他进程正在合并(`.consolidate-lock` 文件)\n\n合并本身通过 forked agent 执行(`224-233`):定位 → 收集近期信号 → 合并写文件 → 剪枝更新索引。锁文件 mtime 就是 lastConsolidatedAt。崩溃恢复:1 小时后锁自动过期。\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| 持久性 | 跨会话 | 单会话 |\n| 存储 | `memory/` 下多个 .md 文件 | `session-memory//memory.md` |\n| 加载到 | system prompt | compact 摘要 |\n| 用途 | 跨会话的知识积累 | 跨 compact 的上下文连续性 |\n\nsessionMemoryCompact(s08 中提到的机制)正是使用了 Session Memory:autoCompact 前先读 session memory 文件,如果内容足够(≥ 10K token、≥ 5 条文本消息、≤ 40K token,`sessionMemoryCompact.ts:56-61`),就用它做摘要,不调 LLM。\n\n### 真实实现比教学版复杂的地方\n\n- **Feature flags**:记忆相关功能有多层 feature gate 控制\n- **Team memory**:团队共享记忆,`loadMemoryPrompt()` 有专门路径(教学版未涉及)\n- **KAIROS**:时机感知的记忆提取策略,`loadMemoryPrompt()` 中 daily-log 模式\n- **Prompt cache**:记忆注入需要考虑 prompt cache 的 TTL,避免每次都重写 system prompt 的大段内容\n- **文件锁**:多进程并发时的锁机制\n- **Memory prefetch**:异步预取,不阻塞主流程\n\n### 教学版的简化是刻意的\n\n- LLM side-query → LLM side-query + 关键词降级:教学版保留了 LLM 选择,加了降级路径\n- 记忆 JSON → Markdown + frontmatter:教学版与 CC 一致\n- stop hook 触发 → `stop_reason != \"tool_use\"` 分支:方向一致\n- 四层门控 → 文件数阈值:教学版没有 transcript 系统和多会话概念\n- forked agent + 受限权限 → 直接调用:教学版没有子进程隔离\n\n \n\n\n"
},
{
"version": "s09",
"locale": "ja",
"title": "s09: Memory — 圧縮は詳細を失う、失わない層が必要",
- "content": "# s09: Memory — 圧縮は詳細を失う、失わない層が必要\n\ns01 → ... → s07 → s08 → `s09` → [s10](/ja/s10) → s11 → ... → s20\n> *\"圧縮は詳細を失う、失わない層が必要\"* — ファイルストア + インデックス + オンデマンド読み込み。圧縮を越え、セッションを越えて。\n>\n> **Harness レイヤー**: 記憶 — 圧縮とセッションを越える知識の蓄積。\n\n---\n\n## 課題\n\ns08 の autoCompact は現在の目標、残りの作業、ユーザーの制約をサマリに保持するが、詳細は失われる:「タブでインデント、スペース不可」が「ユーザーにコードスタイルの好みあり」と簡略化される。そして新しいセッションを開始すると、サマリすらない。\n\nLLM には永続状態がなく、すべての情報はコンテキストウィンドウ内にある。コンテキストが満杯になれば圧縮され、圧縮は非可逆。圧縮に参加せず、セッションを越えて保持されるストレージ層が必要。\n\n---\n\n## ソリューション\n\n\n\ns08 の圧縮パイプラインを維持し、記憶に焦点を当てる。ストレージにはファイルシステムを採用:`.memory/` ディレクトリに各記憶を `.md` ファイルとして保存、YAML frontmatter(`name` / `description` / `type`)付き。ファイルが増えたらインデックスが必要:`MEMORY.md` に 1 行 1 リンクを記録し、SYSTEM に注入。\n\n重要な設計:インデックスは SYSTEM prompt に常駐(prompt cache でキャッシュ可能)、ファイル内容はオンデマンド注入(filename/description で現在の会話にマッチ、cache を破壊しない)。書き込みは 2 つのパス:ユーザーが明示的に「覚えて」と言うか、毎ターン終了後にバックグラウンドで抽出。ファイルが蓄積されたら、定期的に整理して重複排除。\n\n4 種類の記憶、それぞれ異なる質問に答える:\n\n| タイプ | 何に答えるか | 例 |\n|--------|-------------|-----|\n| user | あなたは誰か | \"タブでスペース不可\" |\n| feedback | どう作業するか | \"DB をモックしない\" |\n| project | 何が起きているか | \"auth 書き直しはコンプライアンス主導\" |\n| reference | どこで探すか | \"パイプラインのバグは Linear INGEST\" |\n\n---\n\n## 仕組み\n\n\n\n### ストレージ:Markdown ファイル + インデックス\n\n各記憶は `.md` ファイル、YAML frontmatter でメタデータを記録:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` はインデックス、1 行に 1 リンク:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\n新しい記憶を書き込むとインデックスを自動再構築:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### 読み込み:2 つのパス\n\n**パス 1:インデックスを SYSTEM に常駐。** `build_system()` は毎ターン SYSTEM を再構築する際に `MEMORY.md` を読み込み、記憶カタログを注入。SYSTEM prompt 内のインデックスは prompt cache でキャッシュ可能で、毎ターン再送不要。\n\n**パス 2:関連記憶をオンデマンド注入。** 各 LLM 呼び出し前、`load_memories()` は最近の会話と記憶カタログ(name + description)を LLM に軽量 side-query として送信し、関連するファイル名を選択、ファイル内容を読み込んで注入。上限 5 件でコストを制御。\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n indices = json.loads(re.search(r'\\[.*?\\]', response.content[0].text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\nside-query が失敗した場合(API エラー、JSON パース失敗)、name + description のキーワードマッチにフォールバック。\n\n### 書き込み:毎ターン終了後の抽出\n\nユーザーが毎回「これを覚えて」と言うわけではない。好みは通常、通常の会話の中に散らばっている:「タブの方がスペースより良い」「これからはシングルクォートにしよう」。\n\n`extract_memories()` は各ターン終了時に実行、モデルが tool_use なしで停止した場合にトリガー(会話が自然な区切りに達したことを示す):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(messages) # 最近の会話から新しい記憶を抽出\n consolidate_memories() # 整理が必要かチェック\n return\n```\n\n抽出前に既存の記憶を確認し、重複を回避。抽出プロンプトは LLM に `{name, type, description, body}` の JSON 配列を要求、本当に新しい情報がある場合のみファイルに書き込む。\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### 整理:低頻度の重複排除\n\n記憶ファイルは蓄積される。`consolidate_memories()` はファイル数が閾値(デフォルト 10)に達した時にトリガー、LLM に重複排除、矛盾の統合、古い記憶の剪定を依頼:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # 少なすぎる、整理する価値なし\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC はこのプロセスを **Dream** と呼び、実際には 4 層のゲートがある:時間間隔、スキャンスロットル、セッション数、ファイルロック。教学版はファイル数閾値に簡略化。\n\n### Memory に保存するもの\n\nMemory はセッションを越えて有用な情報を保存する:ユーザーの好み、繰り返し出るフィードバック、プロジェクト背景、よく使う入口、調査の手がかりなど。「あとでまた使うもの」を対象にし、インデックス + オンデマンド読み込みで現在の会話に戻す。\n\nsession memory は 1 つのセッション内の連続性を扱う:compact 後も現在の会話に残すべき文脈を保持する。両者は役割が分かれている。Memory は長期知識を扱い、session memory は現在のセッションを compact 越しにつなぐ。\n\n---\n\n## s08 からの変更点\n\n| コンポーネント | 変更前 (s08) | 変更後 (s09) |\n|-----------|-------------|-------------|\n| 記憶能力 | なし(圧縮後、好みはサマリと共に劣化) | ストレージ + 読み込み + 抽出 + 整理 |\n| 新規関数 | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| ストレージ | — | .memory/MEMORY.md インデックス + .memory/*.md ファイル |\n| ツール | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| ループ | 毎ターン圧縮のみ | 記憶注入 + 圧縮 + ターン終了後の抽出 + 定期整理 |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\n以下のプロンプトを試してみてください(複数ターンに分けて入力し、記憶の蓄積と読み込みを観察):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py`(Agent がタブを使用したか観察)\n3. `What did I tell you about my preferences?`(Agent が覚えているか観察)\n4. `I also prefer single quotes over double quotes for strings.`\n\n観察のポイント:各ターン終了後に `[Memory: extracted N new memories]` が表示されるか?`.memory/` ディレクトリに `.md` ファイルが生成されたか?`MEMORY.md` インデックスが更新されたか?新しい会話で Agent が以前の記憶を自動的に読み込んだか?\n\n---\n\n## 次へ\n\n記憶、圧縮、ツールはすべて揃った。しかし system prompt はまだハードコードされた文字列。新しいツールを追加するには手動で説明を書き、プロジェクトを変えるにはプロンプト全体を書き直す。プロンプトは実行時に組み立てられるべき。\n\ns10 System Prompt → セグメント + 実行時組み立て。異なるプロジェクト、異なるツール、異なるプロンプト。\n\n\nCC ソースコードの詳細
\n\n> 以下は CC ソースコード `src/` 下の `memdir/`、`services/`、`utils/`、`query/` の分析に基づく。行番号はソースコードと照合済み。\n\n### ソースコードパス\n\n| ファイル | 行数 | 職責 |\n|------|------|------|\n| `memdir/memdir.ts` | 507 | 核心:MEMORY.md 定義(`34-38`)、記憶動作指示で memory/plan/tasks を区別(`199-266`)、`loadMemoryPrompt()` 3 パス(`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query で記憶選択(`18-24` システムプロンプト、`97-122` 呼び出しロジック) |\n| `memdir/memoryTypes.ts` | 271 | 型定義、frontmatter フィールド |\n| `memdir/memoryScan.ts` | — | .md ファイルをスキャン、MEMORY.md を除外、frontmatter を読み取り、最大 200 ファイル、mtime 降順(`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | forked agent で記憶を抽出、制限付き権限、`skipTranscript: true`、`maxTurns: 5`(`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream 整理、4 層ゲート(`63-66` デフォルト値、`130-190` ゲート、`224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | セッションレベルの記憶管理 |\n| `services/compact/sessionMemoryCompact.ts` | — | session memory 軽量サマリ、閾値 10K/5/40K(`56-61`) |\n| `utils/attachments.ts` | — | 注入予算:200 行 / 4096 バイト/ファイル、60KB/セッション(`269-288`);query で関連記憶を検索(`2196-2241`) |\n| `query.ts` | — | memory prefetch を毎ターン開始時に起動(`301-304`)、非ブロッキング収集(`1592-1614`) |\n| `query/stopHooks.ts` | — | stop hook fire-and-forget で抽出と Dream をトリガー(`141-155`) |\n\n### 記憶選択:embedding ではなく LLM\n\nCC は **Sonnet 自身で選択**(`findRelevantMemories.ts`)、embedding ベクトル類似度ではない:\n\n1. `memoryScan.ts` が `.memory/` 下のすべての `.md` ファイルをスキャン(MEMORY.md を除外)、最大 200 ファイル、mtime 降順\n2. `name` + `description` をカタログとしてリスト化\n3. Sonnet side-query に送信:「名前と説明から本当に有用な記憶を選択(最大 5 件)。不明ならスキップ。」\n4. Sonnet が `{ selected_memories: [\"file1.md\", ...] }` を返却\n5. 選択されたファイルの完全な内容を読み込み(≤ 200 行 / 4096 バイト/ファイル)、注入。セッション総予算:60KB\n\n毎ターンのユーザー turn 開始時、`query.ts:301-304` が memory prefetch を起動(非同期);ツール実行後、`1592-1614` が非ブロッキングで結果を収集。\n\n### 抽出タイミング:stop hook、autoCompact 後ではない\n\nトリガー位置(`stopHooks.ts:141-155`):`handleStopHooks()` 内で、fire-and-forget で抽出と Dream をトリガー。教学版は `stop_reason != \"tool_use\"` 分岐に抽出を配置、方向は一致。\n\nCC の抽出は forked agent で実行(`extractMemories.ts:371-427`):制限付き権限、`skipTranscript: true`、`maxTurns: 5`。重複保護もある:メイン Agent が既に記憶ファイルを書き込んだ場合、抽出をスキップ。\n\n### 記憶ファイル形式\n\nCC は Markdown + YAML frontmatter を使用、教学版と一致。4 種類:`user`、`feedback`、`project`、`reference`。\n\n`memdir.ts:34-38` がインデックス制約を定義:`MEMORY.md` 最大 200 行 / 25KB。`memdir.ts:199-266` が記憶動作指示を構築、memory と plan と tasks を明確に区別。保存場所:`~/.claude/projects//memory/`。\n\n### Dream:4 層ゲート\n\n「アイドル時にトリガー」や「数が足りたら統合」ではなく、4 層のゲート(`autoDream.ts`、デフォルト値 `63-66`、ゲートロジック `130-190`):\n\n1. **時間ゲート**:前回の統合から ≥ 24 時間\n2. **スキャンスロットル**:頻繁なファイルシステムスキャンを回避\n3. **セッションゲート**:前回の統合以降 ≥ 5 セッションの transcript が変更された\n4. **ロックゲート**:他のプロセスが統合中でない(`.consolidate-lock` ファイル)\n\n統合自体は forked agent で実行(`224-233`):定位 → 直近のシグナル収集 → 統合してファイル書き込み → 剪定してインデックス更新。ロックファイルの mtime が lastConsolidatedAt。クラッシュリカバリ:1 時間後にロックが自動期限切れ。\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| 永続性 | セッション間 | 単一セッション |\n| ストレージ | `memory/` 下の複数 .md ファイル | `session-memory//memory.md` |\n| 注入先 | system prompt | compact サマリ |\n| 目的 | セッション間の知識蓄積 | compact を越えたコンテキストの連続性 |\n\nsessionMemoryCompact(s08 で触れた仕組み)は Session Memory を活用:autoCompact の前に session memory ファイルを読み込み、内容が十分であれば(≥ 10K token、≥ 5 テキストメッセージ、≤ 40K token、`sessionMemoryCompact.ts:56-61`)、LLM を呼び出さずにサマリとして使用。\n\n### 実際の実装が教学版より複雑な点\n\n- **Feature flags**:記憶関連機能には複数の feature gate 層がある\n- **Team memory**:チーム共有記憶、`loadMemoryPrompt()` に専用パスあり(教学版では未カバー)\n- **KAIROS**:タイミング認識型の記憶抽出戦略、`loadMemoryPrompt()` の daily-log モード\n- **Prompt cache**:記憶注入は prompt cache の TTL を考慮する必要があり、毎ターン system prompt の大部分を書き直すことを避ける\n- **ファイルロック**:マルチプロセス時の並行制御\n- **Memory prefetch**:非同期プレフェッチ、メインフローをブロックしない\n\n### 教学版の簡略化は意図的\n\n- LLM side-query → LLM side-query + キーワードフォールバック:教学版は LLM 選択を維持し、フォールバックパスを追加\n- 記憶 JSON → Markdown + frontmatter:教学版は CC と一致\n- stop hook トリガー → `stop_reason != \"tool_use\"` 分岐:方向は一致\n- 4 層ゲート → ファイル数閾値:教学版には transcript システムやマルチセッションの概念がない\n- forked agent + 制限付き権限 → 直接呼び出し:教学版にはサブプロセス分離がない\n\n \n\n\n"
+ "content": "# s09: Memory — 圧縮は詳細を失う、失わない層が必要\n\ns01 → ... → s07 → s08 → `s09` → [s10](/ja/s10) → s11 → ... → s20\n> *\"圧縮は詳細を失う、失わない層が必要\"* — ファイルストア + インデックス + オンデマンド読み込み。圧縮を越え、セッションを越えて。\n>\n> **Harness レイヤー**: 記憶 — 圧縮とセッションを越える知識の蓄積。\n\n---\n\n## 課題\n\ns08 の autoCompact は現在の目標、残りの作業、ユーザーの制約をサマリに保持するが、詳細は失われる:「タブでインデント、スペース不可」が「ユーザーにコードスタイルの好みあり」と簡略化される。そして新しいセッションを開始すると、サマリすらない。\n\nLLM には永続状態がなく、すべての情報はコンテキストウィンドウ内にある。コンテキストが満杯になれば圧縮され、圧縮は非可逆。圧縮に参加せず、セッションを越えて保持されるストレージ層が必要。\n\n---\n\n## ソリューション\n\n\n\ns08 の圧縮パイプラインを維持し、記憶に焦点を当てる。ストレージにはファイルシステムを採用:`.memory/` ディレクトリに各記憶を `.md` ファイルとして保存、YAML frontmatter(`name` / `description` / `type`)付き。ファイルが増えたらインデックスが必要:`MEMORY.md` に 1 行 1 リンクを記録し、SYSTEM に注入。\n\n重要な設計:インデックスは SYSTEM prompt に常駐(prompt cache でキャッシュ可能)、ファイル内容はオンデマンド注入(filename/description で現在の会話にマッチ、cache を破壊しない)。書き込みは 2 つのパス:ユーザーが明示的に「覚えて」と言うか、毎ターン終了後にバックグラウンドで抽出。ファイルが蓄積されたら、定期的に整理して重複排除。\n\n4 種類の記憶、それぞれ異なる質問に答える:\n\n| タイプ | 何に答えるか | 例 |\n|--------|-------------|-----|\n| user | あなたは誰か | \"タブでスペース不可\" |\n| feedback | どう作業するか | \"DB をモックしない\" |\n| project | 何が起きているか | \"auth 書き直しはコンプライアンス主導\" |\n| reference | どこで探すか | \"パイプラインのバグは Linear INGEST\" |\n\n---\n\n## 仕組み\n\n\n\n### ストレージ:Markdown ファイル + インデックス\n\n各記憶は `.md` ファイル、YAML frontmatter でメタデータを記録:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` はインデックス、1 行に 1 リンク:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\n新しい記憶を書き込むとインデックスを自動再構築:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### 読み込み:2 つのパス\n\n**パス 1:インデックスを SYSTEM に常駐。** `build_system()` は各ユーザーリクエストの開始時に 1 回だけ `MEMORY.md` を読み込み、記憶カタログを SYSTEM prompt に注入。記憶の抽出と整理はターン終了時にだけ実行されるため、同じユーザーリクエスト内で SYSTEM を繰り返し再構築する必要はない。\n\n**パス 2:関連記憶をオンデマンド注入。** 各ユーザーリクエストの開始時に、`load_memories()` は最近の会話と記憶カタログ(name + description)を LLM に軽量 side-query として送信し、関連するファイル名を選択、ファイル内容を読み込んで注入。上限 5 件でコストを制御。\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n indices = json.loads(re.search(r'\\[.*?\\]', response.content[0].text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\nside-query が失敗した場合(API エラー、JSON パース失敗)、name + description のキーワードマッチにフォールバック。\n\n### 書き込み:毎ターン終了後の抽出\n\nユーザーが毎回「これを覚えて」と言うわけではない。好みは通常、通常の会話の中に散らばっている:「タブの方がスペースより良い」「これからはシングルクォートにしよう」。\n\n`extract_memories()` は各ターン終了時に実行、モデルが tool_use なしで停止した場合にトリガー(会話が自然な区切りに達したことを示す):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(messages) # 最近の会話から新しい記憶を抽出\n consolidate_memories() # 整理が必要かチェック\n return\n```\n\n抽出前に既存の記憶を確認し、重複を回避。抽出プロンプトは LLM に `{name, type, description, body}` の JSON 配列を要求、本当に新しい情報がある場合のみファイルに書き込む。\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### 整理:低頻度の重複排除\n\n記憶ファイルは蓄積される。`consolidate_memories()` はファイル数が閾値(デフォルト 10)に達した時にトリガー、LLM に重複排除、矛盾の統合、古い記憶の剪定を依頼:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # 少なすぎる、整理する価値なし\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC はこのプロセスを **Dream** と呼び、実際には 4 層のゲートがある:時間間隔、スキャンスロットル、セッション数、ファイルロック。教学版はファイル数閾値に簡略化。\n\n### Memory に保存するもの\n\nMemory はセッションを越えて有用な情報を保存する:ユーザーの好み、繰り返し出るフィードバック、プロジェクト背景、よく使う入口、調査の手がかりなど。「あとでまた使うもの」を対象にし、インデックス + オンデマンド読み込みで現在の会話に戻す。\n\nsession memory は 1 つのセッション内の連続性を扱う:compact 後も現在の会話に残すべき文脈を保持する。両者は役割が分かれている。Memory は長期知識を扱い、session memory は現在のセッションを compact 越しにつなぐ。\n\n---\n\n## s08 からの変更点\n\n| コンポーネント | 変更前 (s08) | 変更後 (s09) |\n|-----------|-------------|-------------|\n| 記憶能力 | なし(圧縮後、好みはサマリと共に劣化) | ストレージ + 読み込み + 抽出 + 整理 |\n| 新規関数 | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| ストレージ | — | .memory/MEMORY.md インデックス + .memory/*.md ファイル |\n| ツール | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| ループ | 毎ターン圧縮のみ | 記憶注入 + 圧縮 + ターン終了後の抽出 + 定期整理 |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\n以下のプロンプトを試してみてください(複数ターンに分けて入力し、記憶の蓄積と読み込みを観察):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py`(Agent がタブを使用したか観察)\n3. `What did I tell you about my preferences?`(Agent が覚えているか観察)\n4. `I also prefer single quotes over double quotes for strings.`\n\n観察のポイント:各ターン終了後に `[Memory: extracted N new memories]` が表示されるか?`.memory/` ディレクトリに `.md` ファイルが生成されたか?`MEMORY.md` インデックスが更新されたか?新しい会話で Agent が以前の記憶を自動的に読み込んだか?\n\n---\n\n## 次へ\n\n記憶、圧縮、ツールはすべて揃った。しかし system prompt はまだハードコードされた文字列。新しいツールを追加するには手動で説明を書き、プロジェクトを変えるにはプロンプト全体を書き直す。プロンプトは実行時に組み立てられるべき。\n\ns10 System Prompt → セグメント + 実行時組み立て。異なるプロジェクト、異なるツール、異なるプロンプト。\n\n\nCC ソースコードの詳細
\n\n> 以下は CC ソースコード `src/` 下の `memdir/`、`services/`、`utils/`、`query/` の分析に基づく。行番号はソースコードと照合済み。\n\n### ソースコードパス\n\n| ファイル | 行数 | 職責 |\n|------|------|------|\n| `memdir/memdir.ts` | 507 | 核心:MEMORY.md 定義(`34-38`)、記憶動作指示で memory/plan/tasks を区別(`199-266`)、`loadMemoryPrompt()` 3 パス(`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query で記憶選択(`18-24` システムプロンプト、`97-122` 呼び出しロジック) |\n| `memdir/memoryTypes.ts` | 271 | 型定義、frontmatter フィールド |\n| `memdir/memoryScan.ts` | — | .md ファイルをスキャン、MEMORY.md を除外、frontmatter を読み取り、最大 200 ファイル、mtime 降順(`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | forked agent で記憶を抽出、制限付き権限、`skipTranscript: true`、`maxTurns: 5`(`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream 整理、4 層ゲート(`63-66` デフォルト値、`130-190` ゲート、`224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | セッションレベルの記憶管理 |\n| `services/compact/sessionMemoryCompact.ts` | — | session memory 軽量サマリ、閾値 10K/5/40K(`56-61`) |\n| `utils/attachments.ts` | — | 注入予算:200 行 / 4096 バイト/ファイル、60KB/セッション(`269-288`);query で関連記憶を検索(`2196-2241`) |\n| `query.ts` | — | memory prefetch を毎ターン開始時に起動(`301-304`)、非ブロッキング収集(`1592-1614`) |\n| `query/stopHooks.ts` | — | stop hook fire-and-forget で抽出と Dream をトリガー(`141-155`) |\n\n### 記憶選択:embedding ではなく LLM\n\nCC は **Sonnet 自身で選択**(`findRelevantMemories.ts`)、embedding ベクトル類似度ではない:\n\n1. `memoryScan.ts` が `.memory/` 下のすべての `.md` ファイルをスキャン(MEMORY.md を除外)、最大 200 ファイル、mtime 降順\n2. `name` + `description` をカタログとしてリスト化\n3. Sonnet side-query に送信:「名前と説明から本当に有用な記憶を選択(最大 5 件)。不明ならスキップ。」\n4. Sonnet が `{ selected_memories: [\"file1.md\", ...] }` を返却\n5. 選択されたファイルの完全な内容を読み込み(≤ 200 行 / 4096 バイト/ファイル)、注入。セッション総予算:60KB\n\n毎ターンのユーザー turn 開始時、`query.ts:301-304` が memory prefetch を起動(非同期);ツール実行後、`1592-1614` が非ブロッキングで結果を収集。\n\n### 抽出タイミング:stop hook、autoCompact 後ではない\n\nトリガー位置(`stopHooks.ts:141-155`):`handleStopHooks()` 内で、fire-and-forget で抽出と Dream をトリガー。教学版は `stop_reason != \"tool_use\"` 分岐に抽出を配置、方向は一致。\n\nCC の抽出は forked agent で実行(`extractMemories.ts:371-427`):制限付き権限、`skipTranscript: true`、`maxTurns: 5`。重複保護もある:メイン Agent が既に記憶ファイルを書き込んだ場合、抽出をスキップ。\n\n### 記憶ファイル形式\n\nCC は Markdown + YAML frontmatter を使用、教学版と一致。4 種類:`user`、`feedback`、`project`、`reference`。\n\n`memdir.ts:34-38` がインデックス制約を定義:`MEMORY.md` 最大 200 行 / 25KB。`memdir.ts:199-266` が記憶動作指示を構築、memory と plan と tasks を明確に区別。保存場所:`~/.claude/projects//memory/`。\n\n### Dream:4 層ゲート\n\n「アイドル時にトリガー」や「数が足りたら統合」ではなく、4 層のゲート(`autoDream.ts`、デフォルト値 `63-66`、ゲートロジック `130-190`):\n\n1. **時間ゲート**:前回の統合から ≥ 24 時間\n2. **スキャンスロットル**:頻繁なファイルシステムスキャンを回避\n3. **セッションゲート**:前回の統合以降 ≥ 5 セッションの transcript が変更された\n4. **ロックゲート**:他のプロセスが統合中でない(`.consolidate-lock` ファイル)\n\n統合自体は forked agent で実行(`224-233`):定位 → 直近のシグナル収集 → 統合してファイル書き込み → 剪定してインデックス更新。ロックファイルの mtime が lastConsolidatedAt。クラッシュリカバリ:1 時間後にロックが自動期限切れ。\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| 永続性 | セッション間 | 単一セッション |\n| ストレージ | `memory/` 下の複数 .md ファイル | `session-memory//memory.md` |\n| 注入先 | system prompt | compact サマリ |\n| 目的 | セッション間の知識蓄積 | compact を越えたコンテキストの連続性 |\n\nsessionMemoryCompact(s08 で触れた仕組み)は Session Memory を活用:autoCompact の前に session memory ファイルを読み込み、内容が十分であれば(≥ 10K token、≥ 5 テキストメッセージ、≤ 40K token、`sessionMemoryCompact.ts:56-61`)、LLM を呼び出さずにサマリとして使用。\n\n### 実際の実装が教学版より複雑な点\n\n- **Feature flags**:記憶関連機能には複数の feature gate 層がある\n- **Team memory**:チーム共有記憶、`loadMemoryPrompt()` に専用パスあり(教学版では未カバー)\n- **KAIROS**:タイミング認識型の記憶抽出戦略、`loadMemoryPrompt()` の daily-log モード\n- **Prompt cache**:記憶注入は prompt cache の TTL を考慮する必要があり、毎ターン system prompt の大部分を書き直すことを避ける\n- **ファイルロック**:マルチプロセス時の並行制御\n- **Memory prefetch**:非同期プレフェッチ、メインフローをブロックしない\n\n### 教学版の簡略化は意図的\n\n- LLM side-query → LLM side-query + キーワードフォールバック:教学版は LLM 選択を維持し、フォールバックパスを追加\n- 記憶 JSON → Markdown + frontmatter:教学版は CC と一致\n- stop hook トリガー → `stop_reason != \"tool_use\"` 分岐:方向は一致\n- 4 層ゲート → ファイル数閾値:教学版には transcript システムやマルチセッションの概念がない\n- forked agent + 制限付き権限 → 直接呼び出し:教学版にはサブプロセス分離がない\n\n \n\n\n"
},
{
"version": "s10",
diff --git a/web/src/data/generated/versions.json b/learn-claude-code/web/src/data/generated/versions.json
similarity index 88%
rename from web/src/data/generated/versions.json
rename to learn-claude-code/web/src/data/generated/versions.json
index 676a20a..91e7635 100644
--- a/web/src/data/generated/versions.json
+++ b/learn-claude-code/web/src/data/generated/versions.json
@@ -664,7 +664,7 @@
"filename": "s08_context_compact/code.py",
"title": "Context Compact",
"subtitle": "Context Will Fill Up",
- "loc": 382,
+ "loc": 414,
"tools": [
"bash",
"read_file",
@@ -763,74 +763,89 @@
"signature": "def estimate_size(msgs)",
"startLine": 269
},
+ {
+ "name": "_block_type",
+ "signature": "def _block_type(block)",
+ "startLine": 271
+ },
+ {
+ "name": "_message_has_tool_use",
+ "signature": "def _message_has_tool_use(msg)",
+ "startLine": 275
+ },
+ {
+ "name": "_is_tool_result_message",
+ "signature": "def _is_tool_result_message(msg)",
+ "startLine": 284
+ },
{
"name": "snip_compact",
"signature": "def snip_compact(messages, max_messages=50)",
- "startLine": 273
+ "startLine": 295
},
{
"name": "collect_tool_results",
"signature": "def collect_tool_results(messages)",
- "startLine": 281
+ "startLine": 313
},
{
"name": "micro_compact",
"signature": "def micro_compact(messages)",
- "startLine": 290
+ "startLine": 322
},
{
"name": "persist_large_output",
"signature": "def persist_large_output(tool_use_id, output)",
- "startLine": 300
+ "startLine": 332
},
{
"name": "tool_result_budget",
"signature": "def tool_result_budget(messages, max_bytes=200_000)",
- "startLine": 307
+ "startLine": 339
},
{
"name": "write_transcript",
"signature": "def write_transcript(messages)",
- "startLine": 325
+ "startLine": 357
},
{
"name": "summarize_history",
"signature": "def summarize_history(messages)",
- "startLine": 332
+ "startLine": 364
},
{
"name": "compact_history",
"signature": "def compact_history(messages)",
- "startLine": 343
+ "startLine": 375
},
{
"name": "reactive_compact",
"signature": "def reactive_compact(messages)",
- "startLine": 351
+ "startLine": 383
},
{
"name": "trigger_hooks",
"signature": "def trigger_hooks(event, *args)",
- "startLine": 391
+ "startLine": 428
},
{
"name": "permission_hook",
"signature": "def permission_hook(block)",
- "startLine": 398
+ "startLine": 435
},
{
"name": "log_hook",
"signature": "def log_hook(block)",
- "startLine": 403
+ "startLine": 440
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
- "startLine": 417
+ "startLine": 454
}
],
"layer": "memory",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns08_context_compact.py - Context Compact\n\nFour-layer compaction pipeline inserted before LLM calls:\n\n L1: snip_compact — trim middle messages when count > 50\n L2: micro_compact — replace old tool_results with placeholders\n L3: tool_result_budget — persist large results to disk\n L4: compact_history — LLM full summary (1 API call)\n\n Emergency: reactive_compact — when API still returns prompt_too_long\n\n ┌─────────────────────────────────────────────────────────────┐\n │ messages[] │\n │ ↓ │\n │ L3 budget ─→ L1 snip ─→ L2 micro ─→ [token > threshold?] │\n │ ├─ No → LLM │\n │ └─ Yes → L4 summary │\n │ ↓ │\n │ LLM call │\n │ [prompt_too_long?] │\n │ └─ Yes → reactive │\n └─────────────────────────────────────────────────────────────┘\n\nCore principle: cheap first, expensive last.\nExecution order matches CC source: budget → snip → micro → auto.\n\nBuilds on s07 (skill loading). Usage:\n\n python s08_context_compact/code.py\n Needs: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport ast, json, os, subprocess, time\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"): os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nCURRENT_TODOS: list[dict] = []\n\n# s07: Skill catalog scan (inherited from s07)\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n meta = {}\n for line in parts[1].strip().splitlines():\n if \":\" in line:\n k, v = line.split(\":\", 1)\n meta[k.strip()] = v.strip().strip('\"').strip(\"'\")\n return meta, parts[2].strip()\n\nSKILL_REGISTRY: dict[str, dict] = {}\n\ndef _scan_skills():\n if not SKILLS_DIR.exists():\n return\n for d in sorted(SKILLS_DIR.iterdir()):\n if not d.is_dir():\n continue\n manifest = d / \"SKILL.md\"\n if manifest.exists():\n raw = manifest.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", d.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\"name\": name, \"description\": desc, \"content\": raw}\n\n_scan_skills()\n\ndef list_skills() -> str:\n if not SKILL_REGISTRY:\n return \"(no skills found)\"\n return \"\\n\".join(f\"- **{s['name']}**: {s['description']}\" for s in SKILL_REGISTRY.values())\n\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n return f\"Skill not found: {name}\"\n return skill[\"content\"]\n\n# s08: SYSTEM includes skill catalog (inherited from s07 build_system)\ndef build_system() -> str:\n catalog = list_skills()\n return (\n f\"You are a coding agent at {WORKDIR}. \"\n f\"Skills available:\\n{catalog}\\n\"\n \"Use load_skill to get full details when needed.\"\n )\n\nSYSTEM = build_system()\n\n# s08: subagent gets its own system prompt — no compact, no skill loading\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s07 (unchanged): Basic Tools\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR): raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired: return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines): lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e: return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path); file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content); return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text: return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e: return f\"Error: {e}\"\n\ndef _normalize_todos(todos):\n if isinstance(todos, str):\n try:\n todos = json.loads(todos)\n except json.JSONDecodeError:\n try:\n todos = ast.literal_eval(todos)\n except (SyntaxError, ValueError):\n return None, \"Error: todos must be a list or JSON array string\"\n if not isinstance(todos, list):\n return None, \"Error: todos must be a list\"\n for i, t in enumerate(todos):\n if not isinstance(t, dict):\n return None, f\"Error: todos[{i}] must be an object\"\n if \"content\" not in t or \"status\" not in t:\n return None, f\"Error: todos[{i}] missing 'content' or 'status'\"\n if t[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return None, f\"Error: todos[{i}] has invalid status '{t['status']}'\"\n return todos, None\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n todos, error = _normalize_todos(todos)\n if error:\n return error\n CURRENT_TODOS = todos\n lines = [\"\\n\\033[33m## Current Tasks\\033[0m\"]\n for t in CURRENT_TODOS:\n icon = {\"pending\": \" \", \"in_progress\": \"\\033[36m▸\\033[0m\", \"completed\": \"\\033[32m✓\\033[0m\"}[t[\"status\"]]\n lines.append(f\" [{icon}] {t['content']}\")\n print(\"\\n\".join(lines))\n return f\"Updated {len(CURRENT_TODOS)} tasks\"\n\ndef extract_text(content) -> str:\n if not isinstance(content, list): return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s06-s07 (unchanged): Subagent\n# ═══════════════════════════════════════════════════════════\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n]\nSUB_HANDLERS = {\"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob}\n\ndef spawn_subagent(task: str) -> str:\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": task}]\n for _ in range(30):\n response = client.messages.create(model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n trigger_hooks(\"PostToolUse\", block, output)\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n result = extract_text(messages[-1][\"content\"])\n if not result:\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result:\n break\n if not result:\n result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s08: Four-Layer Compaction Pipeline\n# ═══════════════════════════════════════════════════════════\n\nCONTEXT_LIMIT = 50000\nKEEP_RECENT = 3\nPERSIST_THRESHOLD = 30000\n\ndef estimate_size(msgs): return len(str(msgs))\n\n\n# L1: snipCompact — trim middle messages\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages: return messages\n keep_head, keep_tail = 3, max_messages - 3\n snipped = len(messages) - keep_head - keep_tail\n return messages[:keep_head] + [{\"role\": \"user\", \"content\": f\"[snipped {snipped} messages]\"}] + messages[-keep_tail:]\n\n\n# L2: microCompact — old result placeholders\ndef collect_tool_results(messages):\n blocks = []\n for mi, msg in enumerate(messages):\n if msg.get(\"role\") != \"user\" or not isinstance(msg.get(\"content\"), list): continue\n for bi, block in enumerate(msg[\"content\"]):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n blocks.append((mi, bi, block))\n return blocks\n\ndef micro_compact(messages):\n tool_results = collect_tool_results(messages)\n if len(tool_results) <= KEEP_RECENT: return messages\n for _, _, block in tool_results[:-KEEP_RECENT]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n\n\n# L3: toolResultBudget — persist large results to disk\ndef persist_large_output(tool_use_id, output):\n if len(output) <= PERSIST_THRESHOLD: return output\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not path.exists(): path.write_text(output)\n return f\"\\nFull output: {path}\\nPreview:\\n{output[:2000]}\\n\"\n\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1] if messages else None\n if not last or last.get(\"role\") != \"user\" or not isinstance(last.get(\"content\"), list): return messages\n blocks = [(i, b) for i, b in enumerate(last[\"content\"]) if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes: return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for _, block in ranked:\n if total <= max_bytes: break\n content = str(block.get(\"content\", \"\"))\n if len(content) <= PERSIST_THRESHOLD: continue\n tid = block.get(\"tool_use_id\", \"unknown\")\n block[\"content\"] = persist_large_output(tid, content)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return messages\n\n\n# L4: autoCompact — LLM full summary\ndef write_transcript(messages):\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as f:\n for msg in messages: f.write(json.dumps(msg, default=str) + \"\\n\")\n return path\n\ndef summarize_history(messages):\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\"Summarize this coding-agent conversation so work can continue.\\n\"\n \"Preserve: 1. current goal, 2. key findings/decisions, 3. files read/changed, \"\n \"4. remaining work, 5. user constraints.\\nBe compact but concrete.\\n\\n\" + conversation)\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=2000)\n return \"\\n\".join(\n getattr(block, \"text\", \"\")\n for block in response.content\n if getattr(block, \"type\", None) == \"text\").strip() or \"(empty summary)\"\n\ndef compact_history(messages):\n transcript_path = write_transcript(messages)\n print(f\"[transcript saved: {transcript_path}]\")\n summary = summarize_history(messages)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\n\n# Emergency: reactiveCompact — on API error\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[-5:]]\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s07: Tool Definitions\n# ═══════════════════════════════════════════════════════════\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\", \"description\": \"Create and manage a task list for your current coding session.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"todos\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"content\", \"status\"]}}}, \"required\": [\"todos\"]}},\n {\"name\": \"task\", \"description\": \"Launch a subagent to handle a complex subtask. Returns only the final conclusion.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n {\"name\": \"load_skill\", \"description\": \"Load the full content of a skill by name.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n # s08 change: new compact tool — triggers compact_history, not a no-op\n {\"name\": \"compact\", \"description\": \"Summarize earlier conversation to free context space.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"focus\": {\"type\": \"string\"}}}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"todo_write\": run_todo_write,\n \"task\": spawn_subagent, \"load_skill\": load_skill,\n}\n\n# FROM s04 (unchanged): Hooks\nHOOKS = {\"PreToolUse\": [], \"PostToolUse\": []}\ndef trigger_hooks(event, *args):\n for cb in HOOKS[event]:\n r = cb(*args)\n if r is not None: return r\n return None\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\"]\ndef permission_hook(block):\n if block.name == \"bash\":\n for p in DENY_LIST:\n if p in block.input.get(\"command\", \"\"): return \"Permission denied\"\n return None\ndef log_hook(block):\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\nHOOKS[\"PreToolUse\"].append(permission_hook)\nHOOKS[\"PreToolUse\"].append(log_hook)\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — s08 core: run compaction pipeline before LLM\n# ═══════════════════════════════════════════════════════════\n\nMAX_REACTIVE_RETRIES = 1 # retry limit for reactive compact\n\ndef agent_loop(messages: list):\n reactive_retries = 0\n while True:\n # s08 change: three preprocessors (0 API calls, cheap first)\n # Order matches CC source: budget → snip → micro\n messages[:] = tool_result_budget(messages) # L3: persist large results first\n messages[:] = snip_compact(messages) # L1: trim middle\n messages[:] = micro_compact(messages) # L2: old result placeholders\n\n # s08 change: tokens still over threshold → LLM summary (1 API call)\n if estimate_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000)\n reactive_retries = 0 # reset on successful API call\n except Exception as e:\n if (\"prompt_too_long\" in str(e).lower() or \"too many tokens\" in str(e).lower()) and reactive_retries < MAX_REACTIVE_RETRIES:\n print(\"[reactive compact]\")\n messages[:] = reactive_compact(messages)\n reactive_retries += 1\n continue\n raise\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\": return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\": continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n # s08: compact tool triggers compact_history, not a no-op string\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": \"[Compacted. Conversation history has been summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # end current turn, start fresh with compacted context\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(blocked)})\n continue\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n trigger_hooks(\"PostToolUse\", block, output)\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n else:\n # normal path: no compact was called\n messages.append({\"role\": \"user\", \"content\": results})\n continue\n # compact was called: results already appended above\n continue\n\n\nif __name__ == \"__main__\":\n print(\"s08: Context Compact — four-layer compaction pipeline\")\n print(\"输入问题,回车发送。输入 q 退出。\\n\")\n history = []\n while True:\n try: query = input(\"\\033[36ms08 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt): break\n if query.strip().lower() in (\"q\", \"exit\", \"\"): break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\": print(block.text)\n print()\n",
+ "source": "#!/usr/bin/env python3\n\"\"\"\ns08_context_compact.py - Context Compact\n\nFour-layer compaction pipeline inserted before LLM calls:\n\n L1: snip_compact — trim middle messages when count > 50\n L2: micro_compact — replace old tool_results with placeholders\n L3: tool_result_budget — persist large results to disk\n L4: compact_history — LLM full summary (1 API call)\n\n Emergency: reactive_compact — when API still returns prompt_too_long\n\n ┌─────────────────────────────────────────────────────────────┐\n │ messages[] │\n │ ↓ │\n │ L3 budget ─→ L1 snip ─→ L2 micro ─→ [token > threshold?] │\n │ ├─ No → LLM │\n │ └─ Yes → L4 summary │\n │ ↓ │\n │ LLM call │\n │ [prompt_too_long?] │\n │ └─ Yes → reactive │\n └─────────────────────────────────────────────────────────────┘\n\nCore principle: cheap first, expensive last.\nExecution order matches CC source: budget → snip → micro → auto.\n\nBuilds on s07 (skill loading). Usage:\n\n python s08_context_compact/code.py\n Needs: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport ast, json, os, subprocess, time\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"): os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nCURRENT_TODOS: list[dict] = []\n\n# s07: Skill catalog scan (inherited from s07)\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n meta = {}\n for line in parts[1].strip().splitlines():\n if \":\" in line:\n k, v = line.split(\":\", 1)\n meta[k.strip()] = v.strip().strip('\"').strip(\"'\")\n return meta, parts[2].strip()\n\nSKILL_REGISTRY: dict[str, dict] = {}\n\ndef _scan_skills():\n if not SKILLS_DIR.exists():\n return\n for d in sorted(SKILLS_DIR.iterdir()):\n if not d.is_dir():\n continue\n manifest = d / \"SKILL.md\"\n if manifest.exists():\n raw = manifest.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", d.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\"name\": name, \"description\": desc, \"content\": raw}\n\n_scan_skills()\n\ndef list_skills() -> str:\n if not SKILL_REGISTRY:\n return \"(no skills found)\"\n return \"\\n\".join(f\"- **{s['name']}**: {s['description']}\" for s in SKILL_REGISTRY.values())\n\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n return f\"Skill not found: {name}\"\n return skill[\"content\"]\n\n# s08: SYSTEM includes skill catalog (inherited from s07 build_system)\ndef build_system() -> str:\n catalog = list_skills()\n return (\n f\"You are a coding agent at {WORKDIR}. \"\n f\"Skills available:\\n{catalog}\\n\"\n \"Use load_skill to get full details when needed.\"\n )\n\nSYSTEM = build_system()\n\n# s08: subagent gets its own system prompt — no compact, no skill loading\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s07 (unchanged): Basic Tools\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR): raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired: return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines): lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e: return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path); file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content); return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text: return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e: return f\"Error: {e}\"\n\ndef _normalize_todos(todos):\n if isinstance(todos, str):\n try:\n todos = json.loads(todos)\n except json.JSONDecodeError:\n try:\n todos = ast.literal_eval(todos)\n except (SyntaxError, ValueError):\n return None, \"Error: todos must be a list or JSON array string\"\n if not isinstance(todos, list):\n return None, \"Error: todos must be a list\"\n for i, t in enumerate(todos):\n if not isinstance(t, dict):\n return None, f\"Error: todos[{i}] must be an object\"\n if \"content\" not in t or \"status\" not in t:\n return None, f\"Error: todos[{i}] missing 'content' or 'status'\"\n if t[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return None, f\"Error: todos[{i}] has invalid status '{t['status']}'\"\n return todos, None\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n todos, error = _normalize_todos(todos)\n if error:\n return error\n CURRENT_TODOS = todos\n lines = [\"\\n\\033[33m## Current Tasks\\033[0m\"]\n for t in CURRENT_TODOS:\n icon = {\"pending\": \" \", \"in_progress\": \"\\033[36m▸\\033[0m\", \"completed\": \"\\033[32m✓\\033[0m\"}[t[\"status\"]]\n lines.append(f\" [{icon}] {t['content']}\")\n print(\"\\n\".join(lines))\n return f\"Updated {len(CURRENT_TODOS)} tasks\"\n\ndef extract_text(content) -> str:\n if not isinstance(content, list): return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s06-s07 (unchanged): Subagent\n# ═══════════════════════════════════════════════════════════\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n]\nSUB_HANDLERS = {\"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob}\n\ndef spawn_subagent(task: str) -> str:\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": task}]\n for _ in range(30):\n response = client.messages.create(model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n trigger_hooks(\"PostToolUse\", block, output)\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n result = extract_text(messages[-1][\"content\"])\n if not result:\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result:\n break\n if not result:\n result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s08: Four-Layer Compaction Pipeline\n# ═══════════════════════════════════════════════════════════\n\nCONTEXT_LIMIT = 50000\nKEEP_RECENT = 3\nPERSIST_THRESHOLD = 30000\n\ndef estimate_size(msgs): return len(str(msgs))\n\ndef _block_type(block):\n return block.get(\"type\") if isinstance(block, dict) else getattr(block, \"type\", None)\n\n\ndef _message_has_tool_use(msg):\n if msg.get(\"role\") != \"assistant\":\n return False\n content = msg.get(\"content\")\n if not isinstance(content, list):\n return False\n return any(_block_type(block) == \"tool_use\" for block in content)\n\n\ndef _is_tool_result_message(msg):\n if msg.get(\"role\") != \"user\":\n return False\n content = msg.get(\"content\")\n if not isinstance(content, list):\n return False\n return any(isinstance(block, dict) and block.get(\"type\") == \"tool_result\"\n for block in content)\n\n\n# L1: snipCompact — trim middle messages\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages: return messages\n keep_head, keep_tail = 3, max_messages - 3\n head_end, tail_start = keep_head, len(messages) - keep_tail\n if head_end > 0 and _message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and _is_tool_result_message(messages[head_end]):\n head_end += 1\n if (tail_start > 0 and tail_start < len(messages)\n and _is_tool_result_message(messages[tail_start])\n and _message_has_tool_use(messages[tail_start - 1])):\n tail_start -= 1\n if head_end >= tail_start:\n return messages\n snipped = tail_start - head_end\n return messages[:head_end] + [{\"role\": \"user\", \"content\": f\"[snipped {snipped} messages]\"}] + messages[tail_start:]\n\n\n# L2: microCompact — old result placeholders\ndef collect_tool_results(messages):\n blocks = []\n for mi, msg in enumerate(messages):\n if msg.get(\"role\") != \"user\" or not isinstance(msg.get(\"content\"), list): continue\n for bi, block in enumerate(msg[\"content\"]):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n blocks.append((mi, bi, block))\n return blocks\n\ndef micro_compact(messages):\n tool_results = collect_tool_results(messages)\n if len(tool_results) <= KEEP_RECENT: return messages\n for _, _, block in tool_results[:-KEEP_RECENT]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n\n\n# L3: toolResultBudget — persist large results to disk\ndef persist_large_output(tool_use_id, output):\n if len(output) <= PERSIST_THRESHOLD: return output\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not path.exists(): path.write_text(output)\n return f\"\\nFull output: {path}\\nPreview:\\n{output[:2000]}\\n\"\n\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1] if messages else None\n if not last or last.get(\"role\") != \"user\" or not isinstance(last.get(\"content\"), list): return messages\n blocks = [(i, b) for i, b in enumerate(last[\"content\"]) if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes: return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for _, block in ranked:\n if total <= max_bytes: break\n content = str(block.get(\"content\", \"\"))\n if len(content) <= PERSIST_THRESHOLD: continue\n tid = block.get(\"tool_use_id\", \"unknown\")\n block[\"content\"] = persist_large_output(tid, content)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return messages\n\n\n# L4: autoCompact — LLM full summary\ndef write_transcript(messages):\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as f:\n for msg in messages: f.write(json.dumps(msg, default=str) + \"\\n\")\n return path\n\ndef summarize_history(messages):\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\"Summarize this coding-agent conversation so work can continue.\\n\"\n \"Preserve: 1. current goal, 2. key findings/decisions, 3. files read/changed, \"\n \"4. remaining work, 5. user constraints.\\nBe compact but concrete.\\n\\n\" + conversation)\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=2000)\n return \"\\n\".join(\n getattr(block, \"text\", \"\")\n for block in response.content\n if getattr(block, \"type\", None) == \"text\").strip() or \"(empty summary)\"\n\ndef compact_history(messages):\n transcript_path = write_transcript(messages)\n print(f\"[transcript saved: {transcript_path}]\")\n summary = summarize_history(messages)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\n\n# Emergency: reactiveCompact — on API error\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n tail_start = max(0, len(messages) - 5)\n if (tail_start > 0 and tail_start < len(messages)\n and _is_tool_result_message(messages[tail_start])\n and _message_has_tool_use(messages[tail_start - 1])):\n tail_start -= 1\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[tail_start:]]\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s07: Tool Definitions\n# ═══════════════════════════════════════════════════════════\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\", \"description\": \"Create and manage a task list for your current coding session.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"todos\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"content\", \"status\"]}}}, \"required\": [\"todos\"]}},\n {\"name\": \"task\", \"description\": \"Launch a subagent to handle a complex subtask. Returns only the final conclusion.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n {\"name\": \"load_skill\", \"description\": \"Load the full content of a skill by name.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n # s08 change: new compact tool — triggers compact_history, not a no-op\n {\"name\": \"compact\", \"description\": \"Summarize earlier conversation to free context space.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"focus\": {\"type\": \"string\"}}}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"todo_write\": run_todo_write,\n \"task\": spawn_subagent, \"load_skill\": load_skill,\n}\n\n# FROM s04 (unchanged): Hooks\nHOOKS = {\"PreToolUse\": [], \"PostToolUse\": []}\ndef trigger_hooks(event, *args):\n for cb in HOOKS[event]:\n r = cb(*args)\n if r is not None: return r\n return None\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\"]\ndef permission_hook(block):\n if block.name == \"bash\":\n for p in DENY_LIST:\n if p in block.input.get(\"command\", \"\"): return \"Permission denied\"\n return None\ndef log_hook(block):\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\nHOOKS[\"PreToolUse\"].append(permission_hook)\nHOOKS[\"PreToolUse\"].append(log_hook)\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — s08 core: run compaction pipeline before LLM\n# ═══════════════════════════════════════════════════════════\n\nMAX_REACTIVE_RETRIES = 1 # retry limit for reactive compact\n\ndef agent_loop(messages: list):\n reactive_retries = 0\n while True:\n # s08 change: three preprocessors (0 API calls, cheap first)\n # Order matches CC source: budget → snip → micro\n messages[:] = tool_result_budget(messages) # L3: persist large results first\n messages[:] = snip_compact(messages) # L1: trim middle\n messages[:] = micro_compact(messages) # L2: old result placeholders\n\n # s08 change: tokens still over threshold → LLM summary (1 API call)\n if estimate_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000)\n reactive_retries = 0 # reset on successful API call\n except Exception as e:\n if (\"prompt_too_long\" in str(e).lower() or \"too many tokens\" in str(e).lower()) and reactive_retries < MAX_REACTIVE_RETRIES:\n print(\"[reactive compact]\")\n messages[:] = reactive_compact(messages)\n reactive_retries += 1\n continue\n raise\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\": return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\": continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n # s08: compact tool triggers compact_history, not a no-op string\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": \"[Compacted. Conversation history has been summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # end current turn, start fresh with compacted context\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(blocked)})\n continue\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n trigger_hooks(\"PostToolUse\", block, output)\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n else:\n # normal path: no compact was called\n messages.append({\"role\": \"user\", \"content\": results})\n continue\n # compact was called: results already appended above\n continue\n\n\nif __name__ == \"__main__\":\n print(\"s08: Context Compact — four-layer compaction pipeline\")\n print(\"输入问题,回车发送。输入 q 退出。\\n\")\n history = []\n while True:\n try: query = input(\"\\033[36ms08 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt): break\n if query.strip().lower() in (\"q\", \"exit\", \"\"): break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\": print(block.text)\n print()\n",
"images": [
{
"src": "/course-assets/s08_context_compact/auto-compact.svg",
@@ -859,7 +874,7 @@
"filename": "s09_memory/code.py",
"title": "Memory",
"subtitle": "Keep a Layer That Doesn't Lose Details",
- "loc": 498,
+ "loc": 528,
"tools": [
"bash",
"read_file",
@@ -931,101 +946,116 @@
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
- "startLine": 360
+ "startLine": 358
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
- "startLine": 365
+ "startLine": 363
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int | None = None)",
- "startLine": 372
+ "startLine": 370
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
- "startLine": 379
+ "startLine": 377
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
- "startLine": 385
+ "startLine": 383
},
{
"name": "run_glob",
"signature": "def run_glob(pattern: str)",
- "startLine": 394
+ "startLine": 392
},
{
"name": "extract_text",
"signature": "def extract_text(content)",
- "startLine": 404
+ "startLine": 402
},
{
"name": "spawn_subagent",
"signature": "def spawn_subagent(task: str)",
- "startLine": 419
+ "startLine": 417
},
{
"name": "estimate_size",
"signature": "def estimate_size(msgs)",
+ "startLine": 450
+ },
+ {
+ "name": "_block_type",
+ "signature": "def _block_type(block)",
"startLine": 452
},
+ {
+ "name": "_message_has_tool_use",
+ "signature": "def _message_has_tool_use(msg)",
+ "startLine": 455
+ },
+ {
+ "name": "_is_tool_result_message",
+ "signature": "def _is_tool_result_message(msg)",
+ "startLine": 463
+ },
{
"name": "snip_compact",
"signature": "def snip_compact(msgs, mx=50)",
- "startLine": 454
+ "startLine": 471
},
{
"name": "collect_tool_results",
"signature": "def collect_tool_results(msgs)",
- "startLine": 458
+ "startLine": 485
},
{
"name": "micro_compact",
"signature": "def micro_compact(msgs)",
- "startLine": 466
+ "startLine": 493
},
{
"name": "persist_large",
"signature": "def persist_large(tid, out)",
- "startLine": 473
+ "startLine": 500
},
{
"name": "tool_result_budget",
"signature": "def tool_result_budget(msgs, mx=200_000)",
- "startLine": 480
+ "startLine": 507
},
{
"name": "write_transcript",
"signature": "def write_transcript(msgs)",
- "startLine": 494
+ "startLine": 521
},
{
"name": "summarize_history",
"signature": "def summarize_history(msgs)",
- "startLine": 501
+ "startLine": 528
},
{
"name": "compact_history",
"signature": "def compact_history(msgs)",
- "startLine": 509
+ "startLine": 536
},
{
"name": "reactive_compact",
"signature": "def reactive_compact(msgs)",
- "startLine": 514
+ "startLine": 541
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
- "startLine": 551
+ "startLine": 583
}
],
"layer": "memory",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns09_memory.py - Memory System\n\nPersistent, cross-session knowledge for the coding agent.\n\nStorage:\n .memory/\n MEMORY.md ← index (one line per memory, ≤200 lines)\n feedback_tabs.md ← individual memory files (Markdown + YAML frontmatter)\n user_profile.md\n project_facts.md\n\nFlow in agent_loop:\n 1. Load MEMORY.md index into SYSTEM prompt (cheap, always present)\n 2. Select relevant memories by filename/description → inject content\n 3. Run compression pipeline from s08\n 4. After each turn ends → extract new memories from original messages\n 5. Periodically consolidate (Dream)\n\nBuilds on s08 (context compact). Usage:\n\n python s09_memory/code.py\n Needs: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport os, subprocess, json, time, re\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"): os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"; MEMORY_DIR.mkdir(exist_ok=True)\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s09: Memory System\n# ═══════════════════════════════════════════════════════════\n\nMEMORY_TYPES = [\"user\", \"feedback\", \"project\", \"reference\"]\n\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n meta = {}\n for line in parts[1].strip().splitlines():\n if \":\" in line:\n k, v = line.split(\":\", 1)\n meta[k.strip()] = v.strip().strip('\"').strip(\"'\")\n return meta, parts[2].strip()\n\n\ndef write_memory_file(name: str, mem_type: str, description: str, body: str):\n \"\"\"Write a single memory file with YAML frontmatter.\"\"\"\n slug = name.lower().replace(\" \", \"-\").replace(\"/\", \"-\")\n filename = f\"{slug}.md\"\n filepath = MEMORY_DIR / filename\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n return filepath\n\n\ndef _rebuild_index():\n \"\"\"Rebuild MEMORY.md index from all memory files.\"\"\"\n lines = []\n for f in sorted(MEMORY_DIR.glob(\"*.md\")):\n if f.name == \"MEMORY.md\":\n continue\n raw = f.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", f.stem)\n desc = meta.get(\"description\", body.split(\"\\n\")[0][:80])\n lines.append(f\"- [{name}]({f.name}) — {desc}\")\n MEMORY_INDEX.write_text(\"\\n\".join(lines) + \"\\n\" if lines else \"\")\n\n\ndef read_memory_index() -> str:\n \"\"\"Read MEMORY.md index (injected into SYSTEM every turn).\"\"\"\n if not MEMORY_INDEX.exists():\n return \"\"\n text = MEMORY_INDEX.read_text().strip()\n return text if text else \"\"\n\n\ndef read_memory_file(filename: str) -> str | None:\n \"\"\"Read a single memory file's full content.\"\"\"\n path = MEMORY_DIR / filename\n if not path.exists():\n return None\n return path.read_text()\n\n\ndef list_memory_files() -> list[dict]:\n \"\"\"List all memory files with metadata.\"\"\"\n result = []\n for f in sorted(MEMORY_DIR.glob(\"*.md\")):\n if f.name == \"MEMORY.md\":\n continue\n raw = f.read_text()\n meta, body = _parse_frontmatter(raw)\n result.append({\n \"filename\": f.name,\n \"name\": meta.get(\"name\", f.stem),\n \"description\": meta.get(\"description\", \"\"),\n \"type\": meta.get(\"type\", \"user\"),\n \"body\": body,\n })\n return result\n\n\ndef select_relevant_memories(messages: list, max_items: int = 5) -> list[str]:\n \"\"\"Select relevant memory filenames by matching recent conversation against\n memory names/descriptions. Uses a simple LLM call (or falls back to keyword\n matching on name+description).\"\"\"\n files = list_memory_files()\n if not files:\n return []\n\n # Collect recent user text for context\n recent_texts = []\n for msg in reversed(messages):\n if msg.get(\"role\") == \"user\":\n content = msg.get(\"content\", \"\")\n if isinstance(content, list):\n content = \" \".join(\n str(getattr(b, \"text\", \"\")) for b in content\n if getattr(b, \"type\", None) == \"text\"\n )\n if isinstance(content, str):\n recent_texts.append(content)\n if len(recent_texts) >= 3:\n break\n recent = \" \".join(reversed(recent_texts))[:2000]\n\n if not recent.strip():\n return []\n\n # Build catalog of name + description for LLM to choose from\n catalog_lines = []\n for i, f in enumerate(files):\n catalog_lines.append(f\"{i}: {f['name']} — {f['description']}\")\n catalog = \"\\n\".join(catalog_lines)\n\n prompt = (\n \"Given the recent conversation and the memory catalog below, \"\n \"select the indices of memories that are clearly relevant. \"\n \"Return ONLY a JSON array of integers, e.g. [0, 3]. \"\n \"If none are relevant, return [].\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\n\"\n f\"Memory catalog:\\n{catalog}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=200,\n )\n text = extract_text(response.content).strip()\n # Extract JSON array from response\n match = re.search(r'\\[.*?\\]', text, re.DOTALL)\n if match:\n indices = json.loads(match.group())\n selected = []\n for idx in indices:\n if isinstance(idx, int) and 0 <= idx < len(files):\n selected.append(files[idx][\"filename\"])\n if len(selected) >= max_items:\n break\n return selected\n except Exception:\n pass\n\n # Fallback: keyword matching on name + description\n keywords = [w.lower() for w in recent.split() if len(w) > 3]\n selected = []\n for f in files:\n text = (f[\"name\"] + \" \" + f[\"description\"]).lower()\n if any(kw in text for kw in keywords):\n selected.append(f[\"filename\"])\n if len(selected) >= max_items:\n break\n return selected\n\n\ndef load_memories(messages: list) -> str:\n \"\"\"Load relevant memory content for injection into context.\"\"\"\n selected_files = select_relevant_memories(messages)\n if not selected_files:\n return \"\"\n\n parts = [\"\"]\n for filename in selected_files:\n content = read_memory_file(filename)\n if content:\n parts.append(content)\n parts.append(\"\")\n return \"\\n\\n\".join(parts)\n\n\ndef extract_memories(messages: list):\n \"\"\"Extract new memories from recent dialogue. Runs after each turn.\"\"\"\n # Collect recent conversation text\n dialogue_parts = []\n for msg in messages[-10:]:\n role = msg.get(\"role\", \"?\")\n content = msg.get(\"content\", \"\")\n if isinstance(content, list):\n content = \" \".join(\n str(getattr(b, \"text\", \"\")) for b in content\n if getattr(b, \"type\", None) == \"text\"\n )\n if isinstance(content, str) and content.strip():\n dialogue_parts.append(f\"{role}: {content}\")\n dialogue = \"\\n\".join(dialogue_parts)\n\n if not dialogue.strip():\n return\n\n # Check existing memories to avoid duplicates\n existing = list_memory_files()\n existing_desc = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in existing) if existing else \"(none)\"\n\n prompt = (\n \"Extract user preferences, constraints, or project facts from this dialogue.\\n\"\n \"Return a JSON array. Each item: {name, type, description, body}.\\n\"\n \"- name: short kebab-case identifier (e.g. 'user-preference-tabs')\\n\"\n \"- type: one of 'user' (user preference), 'feedback' (guidance), \"\n \"'project' (project fact), 'reference' (external pointer)\\n\"\n \"- description: one-line summary for index lookup\\n\"\n \"- body: full detail in markdown\\n\"\n \"If nothing new or already covered by existing memories, return [].\\n\\n\"\n f\"Existing memories:\\n{existing_desc}\\n\\n\"\n f\"Dialogue:\\n{dialogue[:4000]}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=800\n )\n text = extract_text(response.content).strip()\n # Extract JSON array from response\n match = re.search(r'\\[.*\\]', text, re.DOTALL)\n if not match:\n return\n items = json.loads(match.group())\n if not items:\n return\n count = 0\n for mem in items:\n name = mem.get(\"name\", f\"memory_{int(time.time())}\")\n mem_type = mem.get(\"type\", \"user\")\n desc = mem.get(\"description\", \"\")\n body = mem.get(\"body\", \"\")\n if desc and body:\n write_memory_file(name, mem_type, desc, body)\n count += 1\n if count:\n print(f\"\\n\\033[33m[Memory: extracted {count} new memories]\\033[0m\")\n except Exception:\n pass\n\n\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n \"\"\"Merge duplicate/stale memories. Triggered when file count ≥ threshold.\"\"\"\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return\n\n catalog = \"\\n\\n\".join(\n f\"## {f['filename']}\\nname: {f['name']}\\ndescription: {f['description']}\\n{f['body']}\"\n for f in files\n )\n\n prompt = (\n \"Consolidate the following memory files. Rules:\\n\"\n \"1. Merge duplicates into one\\n\"\n \"2. Remove outdated/contradicted memories\\n\"\n \"3. Keep the total under 30 memories\\n\"\n \"4. Preserve important user preferences above all\\n\"\n \"Return a JSON array. Each item: {name, type, description, body}.\\n\\n\"\n f\"{catalog[:16000]}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=3000\n )\n text = extract_text(response.content).strip()\n match = re.search(r'\\[.*\\]', text, re.DOTALL)\n if not match:\n return\n items = json.loads(match.group())\n\n # Remove old memory files (keep MEMORY.md)\n for f in MEMORY_DIR.glob(\"*.md\"):\n if f.name != \"MEMORY.md\":\n f.unlink()\n\n for mem in items:\n name = mem.get(\"name\", f\"memory_{int(time.time())}\")\n mem_type = mem.get(\"type\", \"user\")\n desc = mem.get(\"description\", \"\")\n body = mem.get(\"body\", \"\")\n if desc and body:\n write_memory_file(name, mem_type, desc, body)\n\n print(f\"\\n\\033[33m[Memory: consolidated {len(files)} → {len(items)} memories]\\033[0m\")\n except Exception:\n pass\n\n\n# Build SYSTEM with memory index\ndef build_system() -> str:\n index = read_memory_index()\n memories_section = f\"\\n\\nMemories available:\\n{index}\" if index else \"\"\n return (\n f\"You are a coding agent at {WORKDIR}.\"\n f\"{memories_section}\\n\"\n \"Relevant memories are injected below. Respect user preferences from memory.\\n\"\n \"When the user says 'remember' or expresses a clear preference, extract it as a memory.\"\n )\n\nSYSTEM = build_system()\n\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s08 (skeleton): Basic tools\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR): raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired: return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines): lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e: return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path); file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content); return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text: return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e: return f\"Error: {e}\"\n\ndef extract_text(content) -> str:\n if not isinstance(content, list): return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\n# Subagent (simplified from s06-s07)\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n]\nSUB_HANDLERS = {\"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write}\n\ndef spawn_subagent(task: str) -> str:\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": task}]\n for _ in range(30):\n response = client.messages.create(model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\": break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n result = extract_text(messages[-1][\"content\"])\n if not result:\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result: break\n if not result: result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s08 (skeleton): Compaction pipeline\n# ═══════════════════════════════════════════════════════════\n\nCONTEXT_LIMIT = 50000; KEEP_RECENT = 3; PERSIST_THRESHOLD = 30000\n\ndef estimate_size(msgs): return len(str(msgs))\n\ndef snip_compact(msgs, mx=50):\n if len(msgs) <= mx: return msgs\n return msgs[:3] + [{\"role\": \"user\", \"content\": f\"[snipped {len(msgs)-mx} msgs]\"}] + msgs[-(mx-3):]\n\ndef collect_tool_results(msgs):\n blocks = []\n for mi, msg in enumerate(msgs):\n if msg.get(\"role\") != \"user\" or not isinstance(msg.get(\"content\"), list): continue\n for bi, block in enumerate(msg[\"content\"]):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\": blocks.append((mi, bi, block))\n return blocks\n\ndef micro_compact(msgs):\n tr = collect_tool_results(msgs)\n if len(tr) <= KEEP_RECENT: return msgs\n for _, _, b in tr[:-KEEP_RECENT]:\n if len(b.get(\"content\", \"\")) > 120: b[\"content\"] = \"[Earlier tool result compacted.]\"\n return msgs\n\ndef persist_large(tid, out):\n if len(out) <= PERSIST_THRESHOLD: return out\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n p = TOOL_RESULTS_DIR / f\"{tid}.txt\"\n if not p.exists(): p.write_text(out)\n return f\"\\nFull: {p}\\nPreview:\\n{out[:2000]}\\n\"\n\ndef tool_result_budget(msgs, mx=200_000):\n last = msgs[-1] if msgs else None\n if not last or last.get(\"role\") != \"user\" or not isinstance(last.get(\"content\"), list): return msgs\n blocks = [(i, b) for i, b in enumerate(last[\"content\"]) if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= mx: return msgs\n for _, block in sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True):\n if total <= mx: break\n c = str(block.get(\"content\", \"\"))\n if len(c) <= PERSIST_THRESHOLD: continue\n block[\"content\"] = persist_large(block.get(\"tool_use_id\", \"?\"), c)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return msgs\n\ndef write_transcript(msgs):\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n p = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with p.open(\"w\") as f:\n for m in msgs: f.write(json.dumps(m, default=str) + \"\\n\")\n return p\n\ndef summarize_history(msgs):\n conv = json.dumps(msgs, default=str)[:80000]\n r = client.messages.create(model=MODEL, messages=[{\"role\": \"user\", \"content\":\n \"Summarize this coding-agent conversation so work can continue.\\n\"\n \"Preserve: 1. current goal, 2. key findings, 3. files changed, 4. remaining work, 5. user constraints.\\n\\n\" + conv}],\n max_tokens=2000)\n return extract_text(r.content).strip()\n\ndef compact_history(msgs):\n write_transcript(msgs)\n summary = summarize_history(msgs)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\ndef reactive_compact(msgs):\n write_transcript(msgs)\n summary = summarize_history(msgs)\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *msgs[-5:]]\n\n\n# ═══════════════════════════════════════════════════════════\n# Tool Definitions (skeleton — fewer tools to focus on memory)\n# ═══════════════════════════════════════════════════════════\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"task\", \"description\": \"Launch a subagent to handle a subtask.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"task\": spawn_subagent,\n}\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — s09: inject memories + extract after each turn\n# ═══════════════════════════════════════════════════════════\n\nMAX_REACTIVE_RETRIES = 1\n\ndef agent_loop(messages: list):\n reactive_retries = 0\n # s09: inject relevant memory content into the current user turn\n memories_content = load_memories(messages)\n memory_turn = len(messages) - 1 if messages and isinstance(messages[-1].get(\"content\"), str) else None\n while True:\n # s09: rebuild system with current memory index\n system = build_system()\n\n # s09: save pre-compression snapshot for accurate memory extraction\n pre_compress = [m if isinstance(m, dict) else {\"role\": m.get(\"role\",\"\"),\n \"content\": str(m.get(\"content\",\"\"))} for m in messages]\n\n # s08: compression pipeline (budget → snip → micro)\n messages[:] = tool_result_budget(messages)\n messages[:] = snip_compact(messages)\n messages[:] = micro_compact(messages)\n\n if estimate_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages)\n\n try:\n request_messages = messages\n if memories_content and memory_turn is not None and memory_turn < len(messages):\n request_messages = messages.copy()\n request_messages[memory_turn] = {\n **messages[memory_turn],\n \"content\": memories_content + \"\\n\\n\" + messages[memory_turn][\"content\"],\n }\n response = client.messages.create(\n model=MODEL, system=system, messages=request_messages, tools=TOOLS, max_tokens=8000\n )\n reactive_retries = 0\n except Exception as e:\n if (\"prompt_too_long\" in str(e).lower() or \"too many tokens\" in str(e).lower()) and reactive_retries < MAX_REACTIVE_RETRIES:\n print(\"[reactive compact]\")\n messages[:] = reactive_compact(messages)\n reactive_retries += 1\n continue\n raise\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n # s09: extract from pre-compression snapshot for full fidelity\n extract_memories(pre_compress)\n consolidate_memories()\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\": continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(\"s09: Memory — persistent cross-session knowledge\")\n print(\"输入问题,回车发送。输入 q 退出。\\n\")\n history = []\n while True:\n try: query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt): break\n if query.strip().lower() in (\"q\", \"exit\", \"\"): break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\": print(block.text)\n print()\n",
+ "source": "#!/usr/bin/env python3\n\"\"\"\ns09_memory.py - Memory System\n\nPersistent, cross-session knowledge for the coding agent.\n\nStorage:\n .memory/\n MEMORY.md ← index (one line per memory, ≤200 lines)\n feedback_tabs.md ← individual memory files (Markdown + YAML frontmatter)\n user_profile.md\n project_facts.md\n\nFlow in agent_loop:\n 1. Load MEMORY.md index into SYSTEM prompt (cheap, always present)\n 2. Select relevant memories by filename/description → inject content\n 3. Run compression pipeline from s08\n 4. After each turn ends → extract new memories from original messages\n 5. Periodically consolidate (Dream)\n\nBuilds on s08 (context compact). Usage:\n\n python s09_memory/code.py\n Needs: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport os, subprocess, json, time, re\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"): os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"; MEMORY_DIR.mkdir(exist_ok=True)\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s09: Memory System\n# ═══════════════════════════════════════════════════════════\n\nMEMORY_TYPES = [\"user\", \"feedback\", \"project\", \"reference\"]\n\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n meta = {}\n for line in parts[1].strip().splitlines():\n if \":\" in line:\n k, v = line.split(\":\", 1)\n meta[k.strip()] = v.strip().strip('\"').strip(\"'\")\n return meta, parts[2].strip()\n\n\ndef write_memory_file(name: str, mem_type: str, description: str, body: str):\n \"\"\"Write a single memory file with YAML frontmatter.\"\"\"\n slug = name.lower().replace(\" \", \"-\").replace(\"/\", \"-\")\n filename = f\"{slug}.md\"\n filepath = MEMORY_DIR / filename\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n return filepath\n\n\ndef _rebuild_index():\n \"\"\"Rebuild MEMORY.md index from all memory files.\"\"\"\n lines = []\n for f in sorted(MEMORY_DIR.glob(\"*.md\")):\n if f.name == \"MEMORY.md\":\n continue\n raw = f.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", f.stem)\n desc = meta.get(\"description\", body.split(\"\\n\")[0][:80])\n lines.append(f\"- [{name}]({f.name}) — {desc}\")\n MEMORY_INDEX.write_text(\"\\n\".join(lines) + \"\\n\" if lines else \"\")\n\n\ndef read_memory_index() -> str:\n \"\"\"Read MEMORY.md index (injected into SYSTEM every turn).\"\"\"\n if not MEMORY_INDEX.exists():\n return \"\"\n text = MEMORY_INDEX.read_text().strip()\n return text if text else \"\"\n\n\ndef read_memory_file(filename: str) -> str | None:\n \"\"\"Read a single memory file's full content.\"\"\"\n path = MEMORY_DIR / filename\n if not path.exists():\n return None\n return path.read_text()\n\n\ndef list_memory_files() -> list[dict]:\n \"\"\"List all memory files with metadata.\"\"\"\n result = []\n for f in sorted(MEMORY_DIR.glob(\"*.md\")):\n if f.name == \"MEMORY.md\":\n continue\n raw = f.read_text()\n meta, body = _parse_frontmatter(raw)\n result.append({\n \"filename\": f.name,\n \"name\": meta.get(\"name\", f.stem),\n \"description\": meta.get(\"description\", \"\"),\n \"type\": meta.get(\"type\", \"user\"),\n \"body\": body,\n })\n return result\n\n\ndef select_relevant_memories(messages: list, max_items: int = 5) -> list[str]:\n \"\"\"Select relevant memory filenames by matching recent conversation against\n memory names/descriptions. Uses a simple LLM call (or falls back to keyword\n matching on name+description).\"\"\"\n files = list_memory_files()\n if not files:\n return []\n\n # Collect recent user text for context\n recent_texts = []\n for msg in reversed(messages):\n if msg.get(\"role\") == \"user\":\n content = msg.get(\"content\", \"\")\n if isinstance(content, list):\n content = \" \".join(\n str(getattr(b, \"text\", \"\")) for b in content\n if getattr(b, \"type\", None) == \"text\"\n )\n if isinstance(content, str):\n recent_texts.append(content)\n if len(recent_texts) >= 3:\n break\n recent = \" \".join(reversed(recent_texts))[:2000]\n\n if not recent.strip():\n return []\n\n # Build catalog of name + description for LLM to choose from\n catalog_lines = []\n for i, f in enumerate(files):\n catalog_lines.append(f\"{i}: {f['name']} — {f['description']}\")\n catalog = \"\\n\".join(catalog_lines)\n\n prompt = (\n \"Given the recent conversation and the memory catalog below, \"\n \"select the indices of memories that are clearly relevant. \"\n \"Return ONLY a JSON array of integers, e.g. [0, 3]. \"\n \"If none are relevant, return [].\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\n\"\n f\"Memory catalog:\\n{catalog}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=200,\n )\n text = extract_text(response.content).strip()\n # Extract JSON array from response\n match = re.search(r'\\[.*?\\]', text, re.DOTALL)\n if match:\n indices = json.loads(match.group())\n selected = []\n for idx in indices:\n if isinstance(idx, int) and 0 <= idx < len(files):\n selected.append(files[idx][\"filename\"])\n if len(selected) >= max_items:\n break\n return selected\n except Exception:\n pass\n\n # Fallback: keyword matching on name + description\n keywords = [w.lower() for w in recent.split() if len(w) > 3]\n selected = []\n for f in files:\n text = (f[\"name\"] + \" \" + f[\"description\"]).lower()\n if any(kw in text for kw in keywords):\n selected.append(f[\"filename\"])\n if len(selected) >= max_items:\n break\n return selected\n\n\ndef load_memories(messages: list) -> str:\n \"\"\"Load relevant memory content for injection into context.\"\"\"\n selected_files = select_relevant_memories(messages)\n if not selected_files:\n return \"\"\n\n parts = [\"\"]\n for filename in selected_files:\n content = read_memory_file(filename)\n if content:\n parts.append(content)\n parts.append(\"\")\n return \"\\n\\n\".join(parts)\n\n\ndef extract_memories(messages: list):\n \"\"\"Extract new memories from recent dialogue. Runs after each turn.\"\"\"\n # Collect recent conversation text\n dialogue_parts = []\n for msg in messages[-10:]:\n role = msg.get(\"role\", \"?\")\n content = msg.get(\"content\", \"\")\n if isinstance(content, list):\n content = \" \".join(\n str(getattr(b, \"text\", \"\")) for b in content\n if getattr(b, \"type\", None) == \"text\"\n )\n if isinstance(content, str) and content.strip():\n dialogue_parts.append(f\"{role}: {content}\")\n dialogue = \"\\n\".join(dialogue_parts)\n\n if not dialogue.strip():\n return\n\n # Check existing memories to avoid duplicates\n existing = list_memory_files()\n existing_desc = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in existing) if existing else \"(none)\"\n\n prompt = (\n \"Extract user preferences, constraints, or project facts from this dialogue.\\n\"\n \"Return a JSON array. Each item: {name, type, description, body}.\\n\"\n \"- name: short kebab-case identifier (e.g. 'user-preference-tabs')\\n\"\n \"- type: one of 'user' (user preference), 'feedback' (guidance), \"\n \"'project' (project fact), 'reference' (external pointer)\\n\"\n \"- description: one-line summary for index lookup\\n\"\n \"- body: full detail in markdown\\n\"\n \"If nothing new or already covered by existing memories, return [].\\n\\n\"\n f\"Existing memories:\\n{existing_desc}\\n\\n\"\n f\"Dialogue:\\n{dialogue[:4000]}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=800\n )\n text = extract_text(response.content).strip()\n # Extract JSON array from response\n match = re.search(r'\\[.*\\]', text, re.DOTALL)\n if not match:\n return\n items = json.loads(match.group())\n if not items:\n return\n count = 0\n for mem in items:\n name = mem.get(\"name\", f\"memory_{int(time.time())}\")\n mem_type = mem.get(\"type\", \"user\")\n desc = mem.get(\"description\", \"\")\n body = mem.get(\"body\", \"\")\n if desc and body:\n write_memory_file(name, mem_type, desc, body)\n count += 1\n if count:\n print(f\"\\n\\033[33m[Memory: extracted {count} new memories]\\033[0m\")\n except Exception:\n pass\n\n\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n \"\"\"Merge duplicate/stale memories. Triggered when file count ≥ threshold.\"\"\"\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return\n\n catalog = \"\\n\\n\".join(\n f\"## {f['filename']}\\nname: {f['name']}\\ndescription: {f['description']}\\n{f['body']}\"\n for f in files\n )\n\n prompt = (\n \"Consolidate the following memory files. Rules:\\n\"\n \"1. Merge duplicates into one\\n\"\n \"2. Remove outdated/contradicted memories\\n\"\n \"3. Keep the total under 30 memories\\n\"\n \"4. Preserve important user preferences above all\\n\"\n \"Return a JSON array. Each item: {name, type, description, body}.\\n\\n\"\n f\"{catalog[:16000]}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=3000\n )\n text = extract_text(response.content).strip()\n match = re.search(r'\\[.*\\]', text, re.DOTALL)\n if not match:\n return\n items = json.loads(match.group())\n\n # Remove old memory files (keep MEMORY.md)\n for f in MEMORY_DIR.glob(\"*.md\"):\n if f.name != \"MEMORY.md\":\n f.unlink()\n\n for mem in items:\n name = mem.get(\"name\", f\"memory_{int(time.time())}\")\n mem_type = mem.get(\"type\", \"user\")\n desc = mem.get(\"description\", \"\")\n body = mem.get(\"body\", \"\")\n if desc and body:\n write_memory_file(name, mem_type, desc, body)\n\n print(f\"\\n\\033[33m[Memory: consolidated {len(files)} → {len(items)} memories]\\033[0m\")\n except Exception:\n pass\n\n\n# Build SYSTEM with memory index\ndef build_system() -> str:\n index = read_memory_index()\n memories_section = f\"\\n\\nMemories available:\\n{index}\" if index else \"\"\n return (\n f\"You are a coding agent at {WORKDIR}.\"\n f\"{memories_section}\\n\"\n \"Relevant memories are injected below. Respect user preferences from memory.\\n\"\n \"When the user says 'remember' or expresses a clear preference, extract it as a memory.\"\n )\n\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s08 (skeleton): Basic tools\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR): raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired: return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines): lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e: return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path); file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content); return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text: return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e: return f\"Error: {e}\"\n\ndef extract_text(content) -> str:\n if not isinstance(content, list): return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\n# Subagent (simplified from s06-s07)\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n]\nSUB_HANDLERS = {\"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write}\n\ndef spawn_subagent(task: str) -> str:\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": task}]\n for _ in range(30):\n response = client.messages.create(model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\": break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n result = extract_text(messages[-1][\"content\"])\n if not result:\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result: break\n if not result: result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s08 (skeleton): Compaction pipeline\n# ═══════════════════════════════════════════════════════════\n\nCONTEXT_LIMIT = 50000; KEEP_RECENT = 3; PERSIST_THRESHOLD = 30000\n\ndef estimate_size(msgs): return len(str(msgs))\n\ndef _block_type(block):\n return block.get(\"type\") if isinstance(block, dict) else getattr(block, \"type\", None)\n\ndef _message_has_tool_use(msg):\n if msg.get(\"role\") != \"assistant\":\n return False\n content = msg.get(\"content\")\n if not isinstance(content, list):\n return False\n return any(_block_type(block) == \"tool_use\" for block in content)\n\ndef _is_tool_result_message(msg):\n if msg.get(\"role\") != \"user\":\n return False\n content = msg.get(\"content\")\n if not isinstance(content, list):\n return False\n return any(isinstance(block, dict) and block.get(\"type\") == \"tool_result\" for block in content)\n\ndef snip_compact(msgs, mx=50):\n if len(msgs) <= mx: return msgs\n head_end, tail_start = 3, len(msgs) - (mx - 3)\n if head_end > 0 and _message_has_tool_use(msgs[head_end - 1]):\n while head_end < len(msgs) and _is_tool_result_message(msgs[head_end]):\n head_end += 1\n if (tail_start > 0 and tail_start < len(msgs)\n and _is_tool_result_message(msgs[tail_start])\n and _message_has_tool_use(msgs[tail_start - 1])):\n tail_start -= 1\n if head_end >= tail_start:\n return msgs\n return msgs[:head_end] + [{\"role\": \"user\", \"content\": f\"[snipped {tail_start - head_end} msgs]\"}] + msgs[tail_start:]\n\ndef collect_tool_results(msgs):\n blocks = []\n for mi, msg in enumerate(msgs):\n if msg.get(\"role\") != \"user\" or not isinstance(msg.get(\"content\"), list): continue\n for bi, block in enumerate(msg[\"content\"]):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\": blocks.append((mi, bi, block))\n return blocks\n\ndef micro_compact(msgs):\n tr = collect_tool_results(msgs)\n if len(tr) <= KEEP_RECENT: return msgs\n for _, _, b in tr[:-KEEP_RECENT]:\n if len(b.get(\"content\", \"\")) > 120: b[\"content\"] = \"[Earlier tool result compacted.]\"\n return msgs\n\ndef persist_large(tid, out):\n if len(out) <= PERSIST_THRESHOLD: return out\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n p = TOOL_RESULTS_DIR / f\"{tid}.txt\"\n if not p.exists(): p.write_text(out)\n return f\"\\nFull: {p}\\nPreview:\\n{out[:2000]}\\n\"\n\ndef tool_result_budget(msgs, mx=200_000):\n last = msgs[-1] if msgs else None\n if not last or last.get(\"role\") != \"user\" or not isinstance(last.get(\"content\"), list): return msgs\n blocks = [(i, b) for i, b in enumerate(last[\"content\"]) if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= mx: return msgs\n for _, block in sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True):\n if total <= mx: break\n c = str(block.get(\"content\", \"\"))\n if len(c) <= PERSIST_THRESHOLD: continue\n block[\"content\"] = persist_large(block.get(\"tool_use_id\", \"?\"), c)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return msgs\n\ndef write_transcript(msgs):\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n p = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with p.open(\"w\") as f:\n for m in msgs: f.write(json.dumps(m, default=str) + \"\\n\")\n return p\n\ndef summarize_history(msgs):\n conv = json.dumps(msgs, default=str)[:80000]\n r = client.messages.create(model=MODEL, messages=[{\"role\": \"user\", \"content\":\n \"Summarize this coding-agent conversation so work can continue.\\n\"\n \"Preserve: 1. current goal, 2. key findings, 3. files changed, 4. remaining work, 5. user constraints.\\n\\n\" + conv}],\n max_tokens=2000)\n return extract_text(r.content).strip()\n\ndef compact_history(msgs):\n write_transcript(msgs)\n summary = summarize_history(msgs)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\ndef reactive_compact(msgs):\n write_transcript(msgs)\n summary = summarize_history(msgs)\n tail_start = max(0, len(msgs) - 5)\n if (tail_start > 0 and tail_start < len(msgs)\n and _is_tool_result_message(msgs[tail_start])\n and _message_has_tool_use(msgs[tail_start - 1])):\n tail_start -= 1\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *msgs[tail_start:]]\n\n\n# ═══════════════════════════════════════════════════════════\n# Tool Definitions (skeleton — fewer tools to focus on memory)\n# ═══════════════════════════════════════════════════════════\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"task\", \"description\": \"Launch a subagent to handle a subtask.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"task\": spawn_subagent,\n}\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — s09: inject memories + extract after each turn\n# ═══════════════════════════════════════════════════════════\n\nMAX_REACTIVE_RETRIES = 1\n\ndef agent_loop(messages: list):\n reactive_retries = 0\n # s09: inject relevant memory content into the current user turn\n memories_content = load_memories(messages)\n memory_turn = len(messages) - 1 if messages and isinstance(messages[-1].get(\"content\"), str) else None\n # s09: build system once per user turn; memory is updated after the loop returns\n system = build_system()\n\n while True:\n # s09: save pre-compression snapshot for accurate memory extraction\n pre_compress = [m if isinstance(m, dict) else {\"role\": m.get(\"role\",\"\"),\n \"content\": str(m.get(\"content\",\"\"))} for m in messages]\n\n # s08: compression pipeline (budget → snip → micro)\n messages[:] = tool_result_budget(messages)\n messages[:] = snip_compact(messages)\n messages[:] = micro_compact(messages)\n\n if estimate_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages)\n\n try:\n request_messages = messages\n if memories_content and memory_turn is not None and memory_turn < len(messages):\n request_messages = messages.copy()\n request_messages[memory_turn] = {\n **messages[memory_turn],\n \"content\": memories_content + \"\\n\\n\" + messages[memory_turn][\"content\"],\n }\n response = client.messages.create(\n model=MODEL, system=system, messages=request_messages, tools=TOOLS, max_tokens=8000\n )\n reactive_retries = 0\n except Exception as e:\n if (\"prompt_too_long\" in str(e).lower() or \"too many tokens\" in str(e).lower()) and reactive_retries < MAX_REACTIVE_RETRIES:\n print(\"[reactive compact]\")\n messages[:] = reactive_compact(messages)\n reactive_retries += 1\n continue\n raise\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n # s09: extract from pre-compression snapshot for full fidelity\n extract_memories(pre_compress)\n consolidate_memories()\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\": continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(\"s09: Memory — persistent cross-session knowledge\")\n print(\"输入问题,回车发送。输入 q 退出。\\n\")\n history = []\n while True:\n try: query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt): break\n if query.strip().lower() in (\"q\", \"exit\", \"\"): break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\": print(block.text)\n print()\n",
"images": [
{
"src": "/course-assets/s09_memory/memory-overview.svg",
@@ -3046,7 +3076,7 @@
"filename": "s20_comprehensive/code.py",
"title": "Comprehensive Agent",
"subtitle": "All Mechanisms, One Loop",
- "loc": 1677,
+ "loc": 1708,
"tools": [
"bash",
"read_file",
@@ -3112,18 +3142,18 @@
},
{
"name": "RecoveryState",
- "startLine": 1172,
- "endLine": 1180
+ "startLine": 1208,
+ "endLine": 1216
},
{
"name": "CronJob",
- "startLine": 1302,
- "endLine": 1309
+ "startLine": 1338,
+ "endLine": 1345
},
{
"name": "MCPClient",
- "startLine": 1499,
- "endLine": 1521
+ "startLine": 1535,
+ "endLine": 1557
}
],
"functions": [
@@ -3362,259 +3392,274 @@
"signature": "def estimate_size(messages: list)",
"startLine": 1060
},
+ {
+ "name": "block_type",
+ "signature": "def block_type(block)",
+ "startLine": 1063
+ },
+ {
+ "name": "message_has_tool_use",
+ "signature": "def message_has_tool_use(message: dict)",
+ "startLine": 1067
+ },
+ {
+ "name": "is_tool_result_message",
+ "signature": "def is_tool_result_message(message: dict)",
+ "startLine": 1076
+ },
{
"name": "collect_tool_results",
"signature": "def collect_tool_results(messages: list)",
- "startLine": 1064
+ "startLine": 1086
},
{
"name": "persist_large_output",
"signature": "def persist_large_output(tool_use_id: str, output: str)",
- "startLine": 1076
+ "startLine": 1098
},
{
"name": "tool_result_budget",
"signature": "def tool_result_budget(messages: list, max_bytes: int = 200_000)",
- "startLine": 1087
+ "startLine": 1109
},
{
"name": "snip_compact",
"signature": "def snip_compact(messages: list, max_messages: int = 50)",
- "startLine": 1111
+ "startLine": 1133
},
{
"name": "micro_compact",
"signature": "def micro_compact(messages: list)",
- "startLine": 1121
+ "startLine": 1152
},
{
"name": "write_transcript",
"signature": "def write_transcript(messages: list)",
- "startLine": 1131
+ "startLine": 1162
},
{
"name": "summarize_history",
"signature": "def summarize_history(messages: list)",
- "startLine": 1140
+ "startLine": 1171
},
{
"name": "compact_history",
"signature": "def compact_history(messages: list)",
- "startLine": 1152
+ "startLine": 1183
},
{
"name": "reactive_compact",
"signature": "def reactive_compact(messages: list)",
- "startLine": 1159
+ "startLine": 1190
},
{
"name": "retry_delay",
"signature": "def retry_delay(attempt: int)",
- "startLine": 1181
+ "startLine": 1217
},
{
"name": "with_retry",
"signature": "def with_retry(fn, state: RecoveryState)",
- "startLine": 1186
+ "startLine": 1222
},
{
"name": "is_prompt_too_long_error",
"signature": "def is_prompt_too_long_error(e: Exception)",
- "startLine": 1216
+ "startLine": 1252
},
{
"name": "is_slow_operation",
"signature": "def is_slow_operation(tool_name: str, tool_input: dict)",
- "startLine": 1233
+ "startLine": 1269
},
{
"name": "should_run_background",
"signature": "def should_run_background(tool_name: str, tool_input: dict)",
- "startLine": 1243
+ "startLine": 1279
},
{
"name": "start_background_task",
"signature": "def start_background_task(block, handlers: dict)",
- "startLine": 1249
+ "startLine": 1285
},
{
"name": "collect_background_results",
"signature": "def collect_background_results()",
- "startLine": 1274
+ "startLine": 1310
},
{
"name": "_cron_field_matches",
"signature": "def _cron_field_matches(field: str, value: int)",
- "startLine": 1316
+ "startLine": 1352
},
{
"name": "cron_matches",
"signature": "def cron_matches(cron_expr: str, dt: datetime)",
- "startLine": 1331
+ "startLine": 1367
},
{
"name": "_validate_cron_field",
"signature": "def _validate_cron_field(field: str, lo: int, hi: int)",
- "startLine": 1353
+ "startLine": 1389
},
{
"name": "validate_cron",
"signature": "def validate_cron(cron_expr: str)",
- "startLine": 1385
+ "startLine": 1421
},
{
"name": "save_durable_jobs",
"signature": "def save_durable_jobs()",
- "startLine": 1398
+ "startLine": 1434
},
{
"name": "load_durable_jobs",
"signature": "def load_durable_jobs()",
- "startLine": 1403
+ "startLine": 1439
},
{
"name": "cancel_job",
"signature": "def cancel_job(job_id: str)",
- "startLine": 1431
+ "startLine": 1467
},
{
"name": "cron_scheduler_loop",
"signature": "def cron_scheduler_loop()",
- "startLine": 1441
+ "startLine": 1477
},
{
"name": "consume_cron_queue",
"signature": "def consume_cron_queue()",
- "startLine": 1460
+ "startLine": 1496
},
{
"name": "run_list_crons",
"signature": "def run_list_crons()",
- "startLine": 1475
+ "startLine": 1511
},
{
"name": "run_cancel_cron",
"signature": "def run_cancel_cron(job_id: str)",
- "startLine": 1487
+ "startLine": 1523
},
{
"name": "normalize_mcp_name",
"signature": "def normalize_mcp_name(name: str)",
- "startLine": 1527
+ "startLine": 1563
},
{
"name": "_mock_server_docs",
"signature": "def _mock_server_docs()",
- "startLine": 1532
+ "startLine": 1568
},
{
"name": "_mock_server_deploy",
"signature": "def _mock_server_deploy()",
- "startLine": 1551
+ "startLine": 1587
},
{
"name": "connect_mcp",
"signature": "def connect_mcp(name: str)",
- "startLine": 1578
+ "startLine": 1614
},
{
"name": "assemble_tool_pool",
"signature": "def assemble_tool_pool()",
- "startLine": 1593
+ "startLine": 1629
},
{
"name": "run_create_worktree",
"signature": "def run_create_worktree(name: str, task_id: str = \"\")",
- "startLine": 1614
+ "startLine": 1650
},
{
"name": "run_remove_worktree",
"signature": "def run_remove_worktree(name: str, discard_changes: bool = False)",
- "startLine": 1617
+ "startLine": 1653
},
{
"name": "run_keep_worktree",
"signature": "def run_keep_worktree(name: str)",
- "startLine": 1620
+ "startLine": 1656
},
{
"name": "run_list_tasks",
"signature": "def run_list_tasks()",
- "startLine": 1634
+ "startLine": 1670
},
{
"name": "run_get_task",
"signature": "def run_get_task(task_id: str)",
- "startLine": 1644
+ "startLine": 1680
},
{
"name": "run_claim_task",
"signature": "def run_claim_task(task_id: str)",
- "startLine": 1650
+ "startLine": 1686
},
{
"name": "run_complete_task",
"signature": "def run_complete_task(task_id: str)",
- "startLine": 1656
+ "startLine": 1692
},
{
"name": "run_spawn_teammate",
"signature": "def run_spawn_teammate(name: str, role: str, prompt: str)",
- "startLine": 1662
+ "startLine": 1698
},
{
"name": "run_send_message",
"signature": "def run_send_message(to: str, content: str)",
- "startLine": 1665
+ "startLine": 1701
},
{
"name": "run_check_inbox",
"signature": "def run_check_inbox()",
- "startLine": 1669
+ "startLine": 1705
},
{
"name": "run_connect_mcp",
"signature": "def run_connect_mcp(name: str)",
- "startLine": 1681
+ "startLine": 1717
},
{
"name": "update_context",
"signature": "def update_context(context: dict, messages: list)",
- "startLine": 1863
+ "startLine": 1899
},
{
"name": "prepare_context",
"signature": "def prepare_context(messages: list)",
- "startLine": 1880
+ "startLine": 1916
},
{
"name": "build_user_content",
"signature": "def build_user_content(results: list[dict])",
- "startLine": 1890
+ "startLine": 1926
},
{
"name": "inject_background_notifications",
"signature": "def inject_background_notifications(messages: list)",
- "startLine": 1899
+ "startLine": 1935
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list, context: dict)",
- "startLine": 1919
+ "startLine": 1955
},
{
"name": "print_turn_assistants",
"signature": "def print_turn_assistants(messages: list, turn_start: int)",
- "startLine": 2025
+ "startLine": 2061
},
{
"name": "cron_autorun_loop",
"signature": "def cron_autorun_loop(history: list, context: dict)",
- "startLine": 2034
+ "startLine": 2070
}
],
"layer": "collaboration",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns20: Comprehensive Agent — all teaching components in one loop.\n\nRun: python s20_comprehensive/code.py\nNeed: pip install anthropic python-dotenv pyyaml + .env with ANTHROPIC_API_KEY\n\nThis final chapter intentionally puts the earlier teaching mechanisms back\ntogether: dispatch, permission, hooks, todo, subagent, skills, compaction,\nmemory, prompt assembly, error recovery, task graph, background tasks, cron,\nteams, protocols, autonomous agents, worktrees, and MCP.\n\"\"\"\n\nimport ast, json, os, subprocess, time, random, threading, re\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\nimport yaml\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\n READLINE_AVAILABLE = True\nexcept ImportError:\n READLINE_AVAILABLE = False\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nPRIMARY_MODEL = MODEL\nFALLBACK_MODEL = os.getenv(\"FALLBACK_MODEL_ID\")\n\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\n\nDEFAULT_MAX_TOKENS = 8000\nESCALATED_MAX_TOKENS = 16000\nMAX_RETRIES = 3\nMAX_CONSECUTIVE_529 = 2\nMAX_RECOVERY_RETRIES = 2\nBASE_DELAY_MS = 500\nCONTEXT_LIMIT = 50000\nKEEP_RECENT_TOOL_RESULTS = 3\nPERSIST_THRESHOLD = 30000\nCONTINUATION_PROMPT = \"Continue from the previous response. Do not repeat completed work.\"\nPROMPT = \"\\033[36ms20 >> \\033[0m\"\nCLI_ACTIVE = False\n\n\ndef terminal_print(text: str):\n if threading.current_thread() is threading.main_thread() or not CLI_ACTIVE:\n print(text)\n return\n line = \"\"\n if READLINE_AVAILABLE:\n try:\n line = readline.get_line_buffer()\n except Exception:\n line = \"\"\n print(f\"\\r\\033[K{text}\")\n print(PROMPT + line, end=\"\", flush=True)\n\n# ── Task System ──\n\n# Tasks are tiny durable records. Later systems add ownership, dependencies,\n# worktrees, and teammates on top of this same file-backed state.\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\nCURRENT_TODOS: list[dict] = []\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n worktree: str | None = None\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task_json(task_id: str) -> str:\n return json.dumps(asdict(load_task(task_id)), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n # Dependencies are intentionally simple: every blocker must exist and be\n # completed before the task can be claimed.\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Worktree System ──\n\n# Worktree names become filesystem paths, so the teaching version keeps the\n# validation rules strict and reuses them for create/remove/keep.\nWORKTREES_DIR = WORKDIR / \".worktrees\"\nWORKTREES_DIR.mkdir(exist_ok=True)\n\nVALID_WT_NAME = re.compile(r'^[A-Za-z0-9._-]{1,64}$')\n\n\ndef validate_worktree_name(name: str) -> str | None:\n if not name:\n return \"Worktree name cannot be empty\"\n if name in (\".\", \"..\"):\n return f\"'{name}' is not a valid worktree name\"\n if not VALID_WT_NAME.match(name):\n return (f\"Invalid worktree name '{name}': \"\n \"only letters, digits, dots, underscores, dashes (1-64 chars)\")\n return None\n\n\ndef run_git(args: list[str]) -> tuple[bool, str]:\n try:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR,\n capture_output=True, text=True, timeout=30)\n out = (r.stdout + r.stderr).strip()\n return r.returncode == 0, out[:5000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return False, \"Error: git timeout\"\n\n\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n events_file = WORKTREES_DIR / \"events.jsonl\"\n with open(events_file, \"a\") as f:\n f.write(json.dumps(event) + \"\\n\")\n\n\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n # Tool-layer validation is part of the safety boundary; do it before git\n # sees the name, not only after git happens to reject something.\n err = validate_worktree_name(name)\n if err:\n return f\"Error: {err}\"\n if task_id:\n try:\n load_task(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n path = WORKTREES_DIR / name\n if path.exists():\n return f\"Worktree '{name}' already exists at {path}\"\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n print(f\" \\033[33m[worktree] created: {name} at {path}\\033[0m\")\n return f\"Worktree '{name}' created at {path}\"\n\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n task = load_task(task_id)\n task.worktree = worktree_name\n save_task(task)\n\n\ndef _count_worktree_changes(path: Path) -> tuple[int, int]:\n try:\n r1 = subprocess.run([\"git\", \"status\", \"--porcelain\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n files = len([l for l in r1.stdout.strip().splitlines() if l.strip()])\n r2 = subprocess.run([\"git\", \"log\", \"@{push}..HEAD\", \"--oneline\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n commits = len([l for l in r2.stdout.strip().splitlines() if l.strip()])\n return files, commits\n except Exception:\n return -1, -1\n\n\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n path = WORKTREES_DIR / name\n if not path.exists():\n return f\"Worktree '{name}' not found\"\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files < 0:\n return \"Cannot verify status. Use discard_changes=true to force.\"\n if files > 0 or commits > 0:\n return (f\"Worktree '{name}' has {files} file(s), {commits} commit(s). \"\n \"Use discard_changes=true or keep_worktree.\")\n ok1, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok1:\n return f\"Failed to remove worktree '{name}'\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n print(f\" \\033[33m[worktree] removed: {name}\\033[0m\")\n return f\"Worktree '{name}' removed\"\n\n\ndef keep_worktree(name: str) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n log_event(\"keep\", name)\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n\n\n# ── Skill Loading ──\n\nSKILL_REGISTRY: dict[str, dict] = {}\n\n\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n try:\n meta = yaml.safe_load(parts[1]) or {}\n except yaml.YAMLError:\n meta = {}\n return meta, parts[2].strip()\n\n\ndef scan_skills():\n SKILL_REGISTRY.clear()\n if not SKILLS_DIR.exists():\n return\n for directory in sorted(SKILLS_DIR.iterdir()):\n if not directory.is_dir():\n continue\n manifest = directory / \"SKILL.md\"\n if not manifest.exists():\n continue\n raw = manifest.read_text()\n meta, _ = _parse_frontmatter(raw)\n name = meta.get(\"name\", directory.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\n \"name\": name,\n \"description\": desc,\n \"content\": raw,\n }\n\n\nscan_skills()\n\n\ndef list_skills() -> str:\n if not SKILL_REGISTRY:\n return \"(no skills found)\"\n return \"\\n\".join(\n f\"- {skill['name']}: {skill['description']}\"\n for skill in SKILL_REGISTRY.values())\n\n\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n available = \", \".join(SKILL_REGISTRY.keys()) or \"(none)\"\n return f\"Skill not found: {name}. Available: {available}\"\n return skill[\"content\"]\n\n\n# ── Prompt Assembly ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, edit_file, glob, \"\n \"todo_write, task, load_skill, compact, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"schedule_cron, list_crons, cancel_cron, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan, \"\n \"create_worktree, remove_worktree, keep_worktree, \"\n \"connect_mcp. MCP tools are prefixed mcp__{server}__{tool}.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n # The system prompt is rebuilt each turn from live context. This is where\n # memory, skill catalog, MCP state, and active teammates become visible.\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n sections.append(f\"Current time: {datetime.now().isoformat(timespec='seconds')}\")\n sections.append(\"Skills catalog:\\n\" + list_skills() +\n \"\\nUse load_skill(name) when a skill is relevant.\")\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n mcp_names = list(mcp_clients.keys())\n if mcp_names:\n sections.append(f\"Connected MCP servers: {', '.join(mcp_names)}\")\n return \"\\n\\n\".join(sections)\n\n\n# ── Basic Tools ──\n\ndef safe_path(p: str, cwd: Path = None) -> Path:\n # File tools stay inside the workspace or teammate worktree. Bash remains\n # powerful on purpose and is controlled by the permission hook instead.\n base = cwd or WORKDIR\n path = (base / p).resolve()\n if not path.is_relative_to(base):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, cwd: Path = None,\n run_in_background: bool = False) -> str:\n # run_in_background is consumed by the dispatcher; direct execution ignores it.\n try:\n r = subprocess.run(command, shell=True, cwd=cwd or WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None,\n offset: int = 0, cwd: Path = None) -> str:\n try:\n lines = safe_path(path, cwd).read_text().splitlines()\n offset = max(int(offset or 0), 0)\n limit = int(limit) if limit is not None else None\n lines = lines[offset:]\n if limit is not None and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str, cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str,\n cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n text = fp.read_text()\n if old_text not in text:\n return f\"Error: text not found in {path}\"\n fp.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_glob(pattern: str, cwd: Path = None) -> str:\n import glob as g\n try:\n base = cwd or WORKDIR\n results = []\n for match in g.glob(pattern, root_dir=base):\n if (base / match).resolve().is_relative_to(base):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef call_tool_handler(handler, args: dict, name: str) -> str:\n if not handler:\n return f\"Unknown: {name}\"\n try:\n return handler(**(args or {}))\n except TypeError as e:\n return f\"Error: {e}\"\n\n\ndef _normalize_todos(todos):\n if isinstance(todos, str):\n try:\n todos = json.loads(todos)\n except json.JSONDecodeError:\n try:\n todos = ast.literal_eval(todos)\n except (SyntaxError, ValueError):\n return None, \"Error: todos must be a list or JSON array string\"\n if not isinstance(todos, list):\n return None, \"Error: todos must be a list\"\n for i, todo in enumerate(todos):\n if not isinstance(todo, dict):\n return None, f\"Error: todos[{i}] must be an object\"\n if \"content\" not in todo or \"status\" not in todo:\n return None, f\"Error: todos[{i}] missing 'content' or 'status'\"\n if todo[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return None, f\"Error: todos[{i}] has invalid status '{todo['status']}'\"\n return todos, None\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n todos, error = _normalize_todos(todos)\n if error:\n return error\n CURRENT_TODOS = todos\n print(f\" \\033[33m[todo] updated {len(CURRENT_TODOS)} item(s)\\033[0m\")\n return f\"Updated {len(CURRENT_TODOS)} todos\"\n\n\n# ── MessageBus ──\n\n# Team communication is append-only JSONL mailboxes. This keeps the protocol\n# inspectable on disk and lets background teammates send messages.\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n terminal_print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n # Responses are matched by request_id so one protocol reply cannot approve\n # a different pending request.\n state = pending_requests.get(request_id)\n if not state:\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n return\n state.status = \"approved\" if approve else \"rejected\"\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\n# ── Autonomous Agent ──\n\nIDLE_POLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(agent_name: str, messages: list,\n name: str, role: str,\n worktree_context: dict | None = None) -> str:\n # Autonomous teammates wake up for inbox messages first, then look for\n # unclaimed tasks. This keeps direct protocol messages higher priority.\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return \"shutdown\"\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n return \"work\"\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task_data = unclaimed[0]\n result = claim_task(task_data[\"id\"], agent_name)\n if \"Claimed\" in result:\n wt_info = \"\"\n if task_data.get(\"worktree\"):\n wt_path = WORKTREES_DIR / task_data[\"worktree\"]\n wt_info = f\"\\nWork directory: {wt_path}\"\n if worktree_context is not None:\n worktree_context[\"path\"] = str(wt_path)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task_data['id']}: \"\n f\"{task_data['subject']}{wt_info}\"})\n return \"work\"\n return \"timeout\"\n\n\n# ── Teammate Thread ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n # Plan approval is a real gate: after submit_plan, the teammate stops\n # taking model/tool steps until lead sends plan_approval_response.\n protocol_ctx = {\"waiting_plan\": None}\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"If a task has a worktree, work in that directory.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return True\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if req_id == protocol_ctx[\"waiting_plan\"]:\n protocol_ctx[\"waiting_plan\"] = None\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved]\" if approve\n else f\"[Plan rejected] {msg['content']}\"})\n return False\n\n def run():\n wt_ctx = {\"path\": None}\n\n def _wt_cwd():\n # Once a task with a worktree is claimed, all teammate file tools\n # transparently run inside that isolated directory.\n p = wt_ctx[\"path\"]\n return Path(p) if p else None\n\n def _run_bash(command: str) -> str:\n return run_bash(command, cwd=_wt_cwd())\n\n def _run_read(path: str) -> str:\n return run_read(path, cwd=_wt_cwd())\n\n def _run_write(path: str, content: str) -> str:\n return run_write(path, content, cwd=_wt_cwd())\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n task = load_task(task_id)\n wt_ctx[\"path\"] = (str(WORKTREES_DIR / task.worktree)\n if task.worktree else None)\n return result\n\n def _run_complete_task(task_id: str):\n result = complete_task(task_id)\n wt_ctx[\"path\"] = None\n return result\n\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n sub_handlers = {\n \"bash\": _run_bash, \"read_file\": _run_read,\n \"write_file\": _run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n while True:\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if protocol_ctx[\"waiting_plan\"]:\n # Poll only for protocol replies while the approval gate is\n # closed; do not let the model continue with the task.\n time.sleep(IDLE_POLL_INTERVAL)\n continue\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(non_protocol) + \"\"})\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"submit_plan\":\n output = _teammate_submit_plan(\n name, block.input.get(\"plan\", \"\"))\n match = re.search(r\"\\((req_\\d+)\\)\", output)\n protocol_ctx[\"waiting_plan\"] = (\n match.group(1) if match else output)\n else:\n handler = sub_handlers.get(block.name)\n output = call_tool_handler(handler, block.input,\n block.name)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n if protocol_ctx[\"waiting_plan\"]:\n # Ignore later tool_use blocks from the same model\n # response; they belong after approval, not before.\n break\n messages.append({\"role\": \"user\", \"content\": results})\n if protocol_ctx[\"waiting_plan\"]:\n break\n if should_shutdown:\n break\n if protocol_ctx[\"waiting_plan\"]:\n continue\n idle_result = idle_poll(name, messages, name, role, wt_ctx)\n if idle_result in (\"shutdown\", \"timeout\"):\n break\n\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n return f\"Teammate '{name}' spawned as {role}\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id})\"\n\n\n# ── Lead Protocol Tools ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Shut down.\", \"shutdown_request\",\n {\"request_id\": req_id})\n return f\"Shutdown request sent to {teammate}\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n BUS.send(\"lead\", teammate, f\"Submit plan for: {task}\", \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n return f\"Plan {'approved' if approve else 'rejected'}\"\n\n\n# ── Hooks + Permission Pipeline ──\n\n# Hooks are intentionally outside tool handlers. The loop can add permission,\n# logging, and stop behavior without changing each individual tool.\nHOOKS = {\"UserPromptSubmit\": [], \"PreToolUse\": [],\n \"PostToolUse\": [], \"Stop\": []}\n\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None:\n return result\n return None\n\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"mkfs\", \"dd if=\"]\nDESTRUCTIVE = [\"rm \", \"> /etc/\", \"chmod 777\"]\n\n\ndef permission_hook(block):\n # The permission layer sees the raw tool_use before dispatch. It can deny,\n # ask the user, or allow execution to continue.\n if block.name == \"bash\":\n command = block.input.get(\"command\", \"\")\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Permission denied: '{pattern}' is on the deny list\"\n if any(token in command for token in DESTRUCTIVE):\n print(f\"\\n\\033[33m[permission] destructive command\\033[0m\")\n print(f\" {command}\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n try:\n safe_path(path)\n except Exception:\n return f\"Permission denied: path escapes workspace: {path}\"\n if block.name.startswith(\"mcp__\") and \"deploy\" in block.name:\n print(f\"\\n\\033[33m[permission] MCP destructive-looking tool: {block.name}\\033[0m\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n\ndef log_hook(block):\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\n\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"\\033[33m[HOOK] large output from {block.name}: \"\n f\"{len(str(output))} chars\\033[0m\")\n return None\n\n\ndef user_prompt_hook(query: str):\n print(f\"\\033[90m[HOOK] UserPromptSubmit: {WORKDIR}\\033[0m\")\n return None\n\n\ndef stop_hook(messages: list):\n tool_count = 0\n for msg in messages:\n content = msg.get(\"content\")\n if isinstance(content, list):\n tool_count += sum(1 for item in content\n if isinstance(item, dict)\n and item.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: {tool_count} tool result(s)\\033[0m\")\n return None\n\n\nregister_hook(\"UserPromptSubmit\", user_prompt_hook)\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\nregister_hook(\"Stop\", stop_hook)\n\n\n# ── Subagent Tool ──\n\nSUB_SYSTEM = (\n f\"You are a coding subagent at {WORKDIR}. \"\n \"Complete the task, then return a concise final summary. \"\n \"Do not spawn more agents.\"\n)\n\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"pattern\": {\"type\": \"string\"}},\n \"required\": [\"pattern\"]}},\n]\n\n\nSUB_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read,\n \"write_file\": run_write, \"edit_file\": run_edit,\n \"glob\": run_glob,\n}\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return str(content)\n return \"\\n\".join(\n getattr(block, \"text\", \"\")\n for block in content\n if getattr(block, \"type\", None) == \"text\").strip()\n\n\ndef has_tool_use(content) -> bool:\n # Do not rely on stop_reason alone; the concrete tool_use block is the\n # continuation signal used by the loop.\n return any(getattr(block, \"type\", None) == \"tool_use\"\n for block in content)\n\n\ndef spawn_subagent(description: str) -> str:\n messages = [{\"role\": \"user\", \"content\": description}]\n for _ in range(30):\n response = client.messages.create(\n model=MODEL, system=SUB_SYSTEM, messages=messages,\n tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n break\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n output = str(blocked)\n else:\n handler = SUB_HANDLERS.get(block.name)\n output = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, output)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n text = extract_text(msg[\"content\"])\n if text:\n return text\n return \"Subagent finished without a text summary.\"\n\n\n# ── Context Compaction ──\n\n# Compaction is layered: first shrink oversized tool results, then trim old\n# message ranges, and only call the model for a summary when the context is\n# still too large or the model explicitly asks for compact.\ndef estimate_size(messages: list) -> int:\n return len(json.dumps(messages, default=str))\n\n\ndef collect_tool_results(messages: list):\n found = []\n for mi, msg in enumerate(messages):\n content = msg.get(\"content\")\n if msg.get(\"role\") != \"user\" or not isinstance(content, list):\n continue\n for bi, block in enumerate(content):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n found.append((mi, bi, block))\n return found\n\n\ndef persist_large_output(tool_use_id: str, output: str) -> str:\n if len(output) <= PERSIST_THRESHOLD:\n return output\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not path.exists():\n path.write_text(output)\n return (f\"\\nFull output: {path}\\n\"\n f\"Preview:\\n{output[:2000]}\\n\")\n\n\ndef tool_result_budget(messages: list, max_bytes: int = 200_000) -> list:\n if not messages:\n return messages\n last = messages[-1]\n content = last.get(\"content\")\n if last.get(\"role\") != \"user\" or not isinstance(content, list):\n return messages\n blocks = [(i, b) for i, b in enumerate(content)\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n for _, block in sorted(blocks,\n key=lambda pair: len(str(pair[1].get(\"content\", \"\"))),\n reverse=True):\n if total <= max_bytes:\n break\n text = str(block.get(\"content\", \"\"))\n block[\"content\"] = persist_large_output(\n block.get(\"tool_use_id\", \"unknown\"), text)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return messages\n\n\ndef snip_compact(messages: list, max_messages: int = 50) -> list:\n if len(messages) <= max_messages:\n return messages\n keep_head, keep_tail = 3, max_messages - 3\n snipped = len(messages) - keep_head - keep_tail\n return (messages[:keep_head]\n + [{\"role\": \"user\", \"content\": f\"[snipped {snipped} messages]\"}]\n + messages[-keep_tail:])\n\n\ndef micro_compact(messages: list) -> list:\n tool_results = collect_tool_results(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(str(block.get(\"content\", \"\"))) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n\n\ndef write_transcript(messages: list) -> Path:\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n return path\n\n\ndef summarize_history(messages: list) -> str:\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\"Summarize this coding-agent conversation so work can continue. \"\n \"Preserve current goal, key findings, changed files, remaining work, \"\n \"and user constraints.\\n\\n\" + conversation)\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=2000)\n return extract_text(response.content) or \"(empty summary)\"\n\n\ndef compact_history(messages: list) -> list:\n transcript = write_transcript(messages)\n print(f\" \\033[36m[compact] transcript saved: {transcript}\\033[0m\")\n summary = summarize_history(messages)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\n\ndef reactive_compact(messages: list) -> list:\n transcript = write_transcript(messages)\n print(f\" \\033[31m[reactive compact] transcript saved: {transcript}\\033[0m\")\n try:\n summary = summarize_history(messages)\n except Exception:\n summary = \"Earlier conversation was trimmed after a prompt-too-long error.\"\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"},\n *messages[-5:]]\n\n\n# ── Error Recovery ──\n\nclass RecoveryState:\n def __init__(self):\n self.has_escalated = False\n self.recovery_count = 0\n self.consecutive_529 = 0\n self.has_attempted_reactive_compact = False\n self.current_model = PRIMARY_MODEL\n\n\ndef retry_delay(attempt: int) -> float:\n base = min(BASE_DELAY_MS * (2 ** attempt), 32000) / 1000\n return base + random.uniform(0, base * 0.25)\n\n\ndef with_retry(fn, state: RecoveryState):\n for attempt in range(MAX_RETRIES):\n try:\n result = fn()\n state.consecutive_529 = 0\n return result\n except Exception as e:\n name = type(e).__name__.lower()\n msg = str(e).lower()\n if \"ratelimit\" in name or \"429\" in msg:\n delay = retry_delay(attempt)\n print(f\" \\033[33m[429] retry {attempt + 1}/{MAX_RETRIES} \"\n f\"after {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n if \"overloaded\" in name or \"529\" in msg or \"overloaded\" in msg:\n state.consecutive_529 += 1\n if state.consecutive_529 >= MAX_CONSECUTIVE_529 and FALLBACK_MODEL:\n state.current_model = FALLBACK_MODEL\n state.consecutive_529 = 0\n print(f\" \\033[31m[529] switching to {FALLBACK_MODEL}\\033[0m\")\n delay = retry_delay(attempt)\n print(f\" \\033[33m[529] retry {attempt + 1}/{MAX_RETRIES} \"\n f\"after {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n raise\n raise RuntimeError(f\"Max retries ({MAX_RETRIES}) exceeded\")\n\n\ndef is_prompt_too_long_error(e: Exception) -> bool:\n msg = str(e).lower()\n return ((\"prompt\" in msg and \"long\" in msg)\n or \"context_length_exceeded\" in msg\n or \"max_context_window\" in msg)\n\n\n# ── Background Tasks ──\n\n# Slow tools return a placeholder tool_result immediately. Their real output is\n# later injected as a task_notification, so the main loop can keep moving.\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n if tool_name != \"bash\":\n return False\n command = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(keyword in command for keyword in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n if tool_name != \"bash\":\n return False\n return bool(tool_input.get(\"run_in_background\")) or is_slow_operation(tool_name, tool_input)\n\n\ndef start_background_task(block, handlers: dict) -> str:\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n command = block.input.get(\"command\", block.name)\n\n def worker():\n handler = handlers.get(block.name)\n result = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, result)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = str(result)\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": command,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] {bg_id}: {str(command)[:60]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n with background_lock:\n ready = [bg_id for bg_id, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}
\\n\"\n f\"\")\n return notifications\n\n\n# ── Cron Scheduler ──\n\n# Cron jobs are stored separately from conversation history. When a job fires,\n# it becomes a scheduled prompt that is injected back into the same agent loop.\nDURABLE_PATH = WORKDIR / \".scheduled_tasks.json\"\n\n\n@dataclass\nclass CronJob:\n id: str\n cron: str\n prompt: str\n recurring: bool\n durable: bool\n\n\nscheduled_jobs: dict[str, CronJob] = {}\ncron_queue: list[CronJob] = []\ncron_lock = threading.Lock()\n_last_fired: dict[str, str] = {}\n\n\ndef _cron_field_matches(field: str, value: int) -> bool:\n if field == \"*\":\n return True\n if field.startswith(\"*/\"):\n step = int(field[2:])\n return step > 0 and value % step == 0\n if \",\" in field:\n return any(_cron_field_matches(part.strip(), value)\n for part in field.split(\",\"))\n if \"-\" in field:\n lo, hi = field.split(\"-\", 1)\n return int(lo) <= value <= int(hi)\n return value == int(field)\n\n\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n if not (m and h and month_ok):\n return False\n if dom == \"*\" and dow == \"*\":\n return True\n if dom == \"*\":\n return dow_ok\n if dow == \"*\":\n return dom_ok\n return dom_ok or dow_ok\n\n\ndef _validate_cron_field(field: str, lo: int, hi: int) -> str | None:\n if field == \"*\":\n return None\n if field.startswith(\"*/\"):\n step = field[2:]\n if not step.isdigit() or int(step) <= 0:\n return f\"Invalid step: {field}\"\n return None\n if \",\" in field:\n for part in field.split(\",\"):\n err = _validate_cron_field(part.strip(), lo, hi)\n if err:\n return err\n return None\n if \"-\" in field:\n left, right = field.split(\"-\", 1)\n if not left.isdigit() or not right.isdigit():\n return f\"Invalid range: {field}\"\n a, b = int(left), int(right)\n if a < lo or a > hi or b < lo or b > hi:\n return f\"Range {field} out of bounds [{lo}-{hi}]\"\n if a > b:\n return f\"Range start > end: {field}\"\n return None\n if not field.isdigit():\n return f\"Invalid field: {field}\"\n value = int(field)\n if value < lo or value > hi:\n return f\"Value {value} out of bounds [{lo}-{hi}]\"\n return None\n\n\ndef validate_cron(cron_expr: str) -> str | None:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return f\"Expected 5 fields, got {len(fields)}\"\n bounds = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n names = [\"minute\", \"hour\", \"day-of-month\", \"month\", \"day-of-week\"]\n for field, (lo, hi), name in zip(fields, bounds, names):\n err = _validate_cron_field(field, lo, hi)\n if err:\n return f\"{name}: {err}\"\n return None\n\n\ndef save_durable_jobs():\n durable = [asdict(job) for job in scheduled_jobs.values() if job.durable]\n DURABLE_PATH.write_text(json.dumps(durable, indent=2))\n\n\ndef load_durable_jobs():\n if not DURABLE_PATH.exists():\n return\n try:\n for item in json.loads(DURABLE_PATH.read_text()):\n job = CronJob(**item)\n if not validate_cron(job.cron):\n scheduled_jobs[job.id] = job\n except Exception:\n pass\n\n\ndef schedule_job(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> CronJob | str:\n err = validate_cron(cron)\n if err:\n return err\n job = CronJob(\n id=f\"cron_{random.randint(0, 999999):06d}\",\n cron=cron, prompt=prompt,\n recurring=recurring, durable=durable)\n with cron_lock:\n scheduled_jobs[job.id] = job\n if durable:\n save_durable_jobs()\n return job\n\n\ndef cancel_job(job_id: str) -> str:\n with cron_lock:\n job = scheduled_jobs.pop(job_id, None)\n if not job:\n return f\"Job {job_id} not found\"\n if job.durable:\n save_durable_jobs()\n return f\"Cancelled {job_id}\"\n\n\ndef cron_scheduler_loop():\n while True:\n time.sleep(1)\n now = datetime.now()\n marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now) and _last_fired.get(job.id) != marker:\n cron_queue.append(job)\n _last_fired[job.id] = marker\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\" \\033[31m[cron error] {job.id}: {e}\\033[0m\")\n\n\ndef consume_cron_queue() -> list[CronJob]:\n with cron_lock:\n fired = list(cron_queue)\n cron_queue.clear()\n return fired\n\n\ndef run_schedule_cron(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> str:\n result = schedule_job(cron, prompt, recurring, durable)\n if isinstance(result, str):\n return f\"Error: {result}\"\n return f\"Scheduled {result.id}: '{cron}' -> {prompt}\"\n\n\ndef run_list_crons() -> str:\n with cron_lock:\n jobs = list(scheduled_jobs.values())\n if not jobs:\n return \"No cron jobs.\"\n return \"\\n\".join(\n f\" {job.id}: '{job.cron}' -> {job.prompt[:40]} \"\n f\"[{'recurring' if job.recurring else 'one-shot'}, \"\n f\"{'durable' if job.durable else 'session'}]\"\n for job in jobs)\n\n\ndef run_cancel_cron(job_id: str) -> str:\n return cancel_job(job_id)\n\n\nload_durable_jobs()\nthreading.Thread(target=cron_scheduler_loop, daemon=True).start()\n\n\n# ── MCP System ──\n\n# MCP is modeled as late-bound tools: connect first, then discovered server\n# tools are merged into the normal tool pool with mcp__server__tool names.\nclass MCPClient:\n \"\"\"Discovers and calls tools on an MCP server (mock for teaching).\"\"\"\n\n def __init__(self, name: str):\n self.name = name\n self.tools: list[dict] = []\n self._handlers: dict[str, callable] = {}\n\n def register(self, tool_defs: list[dict],\n handlers: dict[str, callable]):\n self.tools = tool_defs\n self._handlers = handlers\n\n def call_tool(self, tool_name: str, args: dict) -> str:\n handler = self._handlers.get(tool_name)\n if not handler:\n return f\"MCP error: unknown tool '{tool_name}'\"\n try:\n return handler(**args)\n except Exception as e:\n return f\"MCP error: {e}\"\n\n\nmcp_clients: dict[str, MCPClient] = {}\n\n_DISALLOWED_CHARS = re.compile(r'[^a-zA-Z0-9_-]')\n\n\ndef normalize_mcp_name(name: str) -> str:\n \"\"\"Replace non [a-zA-Z0-9_-] with underscore.\"\"\"\n return _DISALLOWED_CHARS.sub('_', name)\n\n\ndef _mock_server_docs():\n client = MCPClient(\"docs\")\n client.register(\n tool_defs=[\n {\"name\": \"search\", \"description\": \"Search documentation. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"query\": {\"type\": \"string\"}},\n \"required\": [\"query\"]}},\n {\"name\": \"get_version\", \"description\": \"Get API version. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n ],\n handlers={\n \"search\": lambda query: f\"[docs] Found 3 results for '{query}'\",\n \"get_version\": lambda: \"[docs] API v2.1.0\",\n })\n return client\n\n\ndef _mock_server_deploy():\n client = MCPClient(\"deploy\")\n client.register(\n tool_defs=[\n {\"name\": \"trigger\",\n \"description\": \"Trigger a deployment. (destructive — requires approval in real CC)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n {\"name\": \"status\", \"description\": \"Check deployment status. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n ],\n handlers={\n \"trigger\": lambda service: f\"[deploy] Triggered: {service}\",\n \"status\": lambda service: f\"[deploy] {service}: running (v1.4.2)\",\n })\n return client\n\n\nMOCK_SERVERS = {\n \"docs\": _mock_server_docs,\n \"deploy\": _mock_server_deploy,\n}\n\n\ndef connect_mcp(name: str) -> str:\n if name in mcp_clients:\n return f\"MCP server '{name}' already connected\"\n factory = MOCK_SERVERS.get(name)\n if not factory:\n available = \", \".join(MOCK_SERVERS.keys())\n return f\"Unknown server '{name}'. Available: {available}\"\n mcp_client = factory()\n mcp_clients[name] = mcp_client\n tool_names = [t[\"name\"] for t in mcp_client.tools]\n print(f\" \\033[31m[mcp] connected: {name} → {tool_names}\\033[0m\")\n return (f\"Connected to MCP server '{name}'. \"\n f\"Discovered {len(mcp_client.tools)} tools: {', '.join(tool_names)}\")\n\n\ndef assemble_tool_pool() -> tuple[list[dict], dict]:\n \"\"\"Merge builtin tools + all MCP tools into one pool.\"\"\"\n tools = list(BUILTIN_TOOLS)\n handlers = dict(BUILTIN_HANDLERS)\n for server_name, mcp_client in mcp_clients.items():\n safe_server = normalize_mcp_name(server_name)\n for tool_def in mcp_client.tools:\n safe_tool = normalize_mcp_name(tool_def[\"name\"])\n prefixed = f\"mcp__{safe_server}__{safe_tool}\"\n tools.append({\n \"name\": prefixed,\n \"description\": tool_def.get(\"description\", \"\"),\n \"input_schema\": tool_def.get(\"inputSchema\", {}),\n })\n handlers[prefixed] = (\n lambda *, c=mcp_client, t=tool_def[\"name\"], **kw: c.call_tool(t, kw))\n return tools, handlers\n\n\n# ── Lead Worktree Tools ──\n\ndef run_create_worktree(name: str, task_id: str = \"\") -> str:\n return create_worktree(name, task_id)\n\ndef run_remove_worktree(name: str, discard_changes: bool = False) -> str:\n return remove_worktree(name, discard_changes)\n\ndef run_keep_worktree(name: str) -> str:\n return keep_worktree(name)\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task_json(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_claim_task(task_id: str) -> str:\n try:\n return claim_task(task_id, owner=\"agent\")\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_complete_task(task_id: str) -> str:\n try:\n return complete_task(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\ndef run_connect_mcp(name: str) -> str:\n return connect_mcp(name)\n\n\n# ── Tool Definitions ──\n\n# The model sees tool schemas; Python executes handlers. S20 keeps both tables\n# explicit so every added capability is visible in one place.\nBUILTIN_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"pattern\": {\"type\": \"string\"}},\n \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\",\n \"description\": \"Create and manage a task list for the current session.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"todos\": {\"type\": \"array\",\n \"items\": {\"type\": \"object\",\n \"properties\": {\n \"content\": {\"type\": \"string\"},\n \"status\": {\"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"]}},\n \"required\": [\"content\", \"status\"]}}},\n \"required\": [\"todos\"]}},\n {\"name\": \"task\",\n \"description\": \"Launch a focused subagent. Returns only its final summary.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"description\": {\"type\": \"string\"}},\n \"required\": [\"description\"]}},\n {\"name\": \"load_skill\",\n \"description\": \"Load the full content of a skill by name.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"compact\",\n \"description\": \"Summarize earlier conversation and continue with compacted context.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"focus\": {\"type\": \"string\"}},\n \"required\": []}},\n {\"name\": \"create_task\", \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\", \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\", \"description\": \"Get full task details.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\", \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\", \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"schedule_cron\",\n \"description\": (\"Schedule a cron job. cron is 5-field: min hour dom \"\n \"month dow. For one-shot reminders, compute the target \"\n \"minute and set recurring=false.\"),\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"cron\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"},\n \"recurring\": {\"type\": \"boolean\"},\n \"durable\": {\"type\": \"boolean\"}},\n \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"list_crons\", \"description\": \"List registered cron jobs.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"cancel_cron\", \"description\": \"Cancel a cron job by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"job_id\": {\"type\": \"string\"}},\n \"required\": [\"job_id\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"create_worktree\",\n \"description\": \"Create an isolated git worktree.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"remove_worktree\",\n \"description\": \"Remove a worktree. Refuses if changes exist.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"discard_changes\": {\"type\": \"boolean\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"keep_worktree\",\n \"description\": \"Keep a worktree for manual review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"connect_mcp\",\n \"description\": \"Connect to an MCP server (docs, deploy) and discover tools.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n]\n\nBUILTIN_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob,\n \"todo_write\": run_todo_write, \"task\": spawn_subagent,\n \"load_skill\": load_skill,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"schedule_cron\": run_schedule_cron,\n \"list_crons\": run_list_crons,\n \"cancel_cron\": run_cancel_cron,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n \"create_worktree\": run_create_worktree,\n \"remove_worktree\": run_remove_worktree,\n \"keep_worktree\": run_keep_worktree,\n \"connect_mcp\": run_connect_mcp,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\n \"memories\": memories,\n \"connected_mcp\": list(mcp_clients.keys()),\n \"active_teammates\": list(active_teammates.keys()),\n }\n\n\n# ── Agent Loop ──\n\nrounds_since_todo = 0\nagent_lock = threading.Lock()\n\n\ndef prepare_context(messages: list) -> list:\n # Every LLM turn enters through the same context budget pipeline.\n messages[:] = tool_result_budget(messages)\n messages[:] = snip_compact(messages)\n messages[:] = micro_compact(messages)\n if estimate_size(messages) > CONTEXT_LIMIT:\n messages[:] = compact_history(messages)\n return messages\n\n\ndef build_user_content(results: list[dict]) -> list[dict]:\n # Tool results and completed background notifications are both returned to\n # the model as user-side content, matching the tool_result feedback loop.\n content = list(results)\n for note in collect_background_results():\n content.append({\"type\": \"text\", \"text\": note})\n return content\n\n\ndef inject_background_notifications(messages: list):\n notes = collect_background_results()\n if notes:\n messages.append({\"role\": \"user\", \"content\": [\n {\"type\": \"text\", \"text\": note} for note in notes]})\n\n\ndef call_llm(messages: list, context: dict, tools: list,\n state: RecoveryState, max_tokens: int):\n system = assemble_system_prompt(context)\n return with_retry(\n lambda: client.messages.create(\n model=state.current_model,\n system=system,\n messages=messages,\n tools=tools,\n max_tokens=max_tokens),\n state)\n\n\ndef agent_loop(messages: list, context: dict):\n global rounds_since_todo\n tools, handlers = assemble_tool_pool()\n state = RecoveryState()\n max_tokens = DEFAULT_MAX_TOKENS\n\n while True:\n # One cycle: inject scheduled/background work, prepare context, call\n # the model, execute tool_use blocks, append tool_results, repeat.\n fired = consume_cron_queue()\n for job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n print(f\" \\033[35m[cron inject] {job.prompt[:60]}\\033[0m\")\n\n inject_background_notifications(messages)\n\n if rounds_since_todo >= 3:\n messages.append({\"role\": \"user\",\n \"content\": \"Update your todos.\"})\n rounds_since_todo = 0\n\n prepare_context(messages)\n context = update_context(context, messages)\n tools, handlers = assemble_tool_pool()\n\n try:\n response = call_llm(messages, context, tools, state, max_tokens)\n except Exception as e:\n if is_prompt_too_long_error(e) and not state.has_attempted_reactive_compact:\n messages[:] = reactive_compact(messages)\n state.has_attempted_reactive_compact = True\n continue\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n if response.stop_reason == \"max_tokens\":\n if not state.has_escalated:\n max_tokens = ESCALATED_MAX_TOKENS\n state.has_escalated = True\n print(f\" \\033[33m[max_tokens] retry with {max_tokens}\\033[0m\")\n continue\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if state.recovery_count < MAX_RECOVERY_RETRIES:\n messages.append({\"role\": \"user\", \"content\": CONTINUATION_PROMPT})\n state.recovery_count += 1\n continue\n return\n\n max_tokens = DEFAULT_MAX_TOKENS\n state.has_escalated = False\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n trigger_hooks(\"Stop\", messages)\n return\n\n results = []\n compacted_now = False\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n messages.append({\"role\": \"user\",\n \"content\": \"[Compacted. Continue with summarized context.]\"})\n compacted_now = True\n break\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block, handlers)\n output = (f\"[Background task {bg_id} started] \"\n \"Result will arrive as a task_notification.\")\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n continue\n\n handler = handlers.get(block.name)\n output = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, output)\n print(str(output)[:300])\n\n if block.name == \"todo_write\":\n rounds_since_todo = 0\n else:\n rounds_since_todo += 1\n\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n\n if compacted_now:\n continue\n\n messages.append({\"role\": \"user\", \"content\": build_user_content(results)})\n\n\ndef print_turn_assistants(messages: list, turn_start: int):\n for msg in messages[turn_start:]:\n if msg.get(\"role\") != \"assistant\":\n continue\n for block in msg.get(\"content\", []):\n if getattr(block, \"type\", None) == \"text\":\n terminal_print(block.text)\n\n\ndef cron_autorun_loop(history: list, context: dict):\n while True:\n time.sleep(1)\n fired = consume_cron_queue()\n if not fired:\n continue\n with agent_lock:\n turn_start = len(history)\n for job in fired:\n history.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n terminal_print(\n f\" \\033[35m[cron auto] {job.prompt[:60]}\\033[0m\")\n agent_loop(history, context)\n context.update(update_context(context, history))\n print_turn_assistants(history, turn_start)\n\n\nif __name__ == \"__main__\":\n CLI_ACTIVE = True\n print(\"s20: comprehensive agent\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n threading.Thread(target=cron_autorun_loop,\n args=(history, context), daemon=True).start()\n while True:\n try:\n query = input(PROMPT)\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n trigger_hooks(\"UserPromptSubmit\", query)\n turn_start = len(history)\n history.append({\"role\": \"user\", \"content\": query})\n with agent_lock:\n agent_loop(history, context)\n context = update_context(context, history)\n print_turn_assistants(history, turn_start)\n\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n def inbox_label(msg):\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n suffix = f\" req:{req_id}\" if req_id else \"\"\n return f\"{msg.get('type', 'message')}{suffix}\"\n\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{inbox_label(m)}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n",
+ "source": "#!/usr/bin/env python3\n\"\"\"\ns20: Comprehensive Agent — all teaching components in one loop.\n\nRun: python s20_comprehensive/code.py\nNeed: pip install anthropic python-dotenv pyyaml + .env with ANTHROPIC_API_KEY\n\nThis final chapter intentionally puts the earlier teaching mechanisms back\ntogether: dispatch, permission, hooks, todo, subagent, skills, compaction,\nmemory, prompt assembly, error recovery, task graph, background tasks, cron,\nteams, protocols, autonomous agents, worktrees, and MCP.\n\"\"\"\n\nimport ast, json, os, subprocess, time, random, threading, re\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\nimport yaml\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\n READLINE_AVAILABLE = True\nexcept ImportError:\n READLINE_AVAILABLE = False\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nPRIMARY_MODEL = MODEL\nFALLBACK_MODEL = os.getenv(\"FALLBACK_MODEL_ID\")\n\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\n\nDEFAULT_MAX_TOKENS = 8000\nESCALATED_MAX_TOKENS = 16000\nMAX_RETRIES = 3\nMAX_CONSECUTIVE_529 = 2\nMAX_RECOVERY_RETRIES = 2\nBASE_DELAY_MS = 500\nCONTEXT_LIMIT = 50000\nKEEP_RECENT_TOOL_RESULTS = 3\nPERSIST_THRESHOLD = 30000\nCONTINUATION_PROMPT = \"Continue from the previous response. Do not repeat completed work.\"\nPROMPT = \"\\033[36ms20 >> \\033[0m\"\nCLI_ACTIVE = False\n\n\ndef terminal_print(text: str):\n if threading.current_thread() is threading.main_thread() or not CLI_ACTIVE:\n print(text)\n return\n line = \"\"\n if READLINE_AVAILABLE:\n try:\n line = readline.get_line_buffer()\n except Exception:\n line = \"\"\n print(f\"\\r\\033[K{text}\")\n print(PROMPT + line, end=\"\", flush=True)\n\n# ── Task System ──\n\n# Tasks are tiny durable records. Later systems add ownership, dependencies,\n# worktrees, and teammates on top of this same file-backed state.\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\nCURRENT_TODOS: list[dict] = []\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n worktree: str | None = None\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task_json(task_id: str) -> str:\n return json.dumps(asdict(load_task(task_id)), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n # Dependencies are intentionally simple: every blocker must exist and be\n # completed before the task can be claimed.\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Worktree System ──\n\n# Worktree names become filesystem paths, so the teaching version keeps the\n# validation rules strict and reuses them for create/remove/keep.\nWORKTREES_DIR = WORKDIR / \".worktrees\"\nWORKTREES_DIR.mkdir(exist_ok=True)\n\nVALID_WT_NAME = re.compile(r'^[A-Za-z0-9._-]{1,64}$')\n\n\ndef validate_worktree_name(name: str) -> str | None:\n if not name:\n return \"Worktree name cannot be empty\"\n if name in (\".\", \"..\"):\n return f\"'{name}' is not a valid worktree name\"\n if not VALID_WT_NAME.match(name):\n return (f\"Invalid worktree name '{name}': \"\n \"only letters, digits, dots, underscores, dashes (1-64 chars)\")\n return None\n\n\ndef run_git(args: list[str]) -> tuple[bool, str]:\n try:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR,\n capture_output=True, text=True, timeout=30)\n out = (r.stdout + r.stderr).strip()\n return r.returncode == 0, out[:5000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return False, \"Error: git timeout\"\n\n\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n events_file = WORKTREES_DIR / \"events.jsonl\"\n with open(events_file, \"a\") as f:\n f.write(json.dumps(event) + \"\\n\")\n\n\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n # Tool-layer validation is part of the safety boundary; do it before git\n # sees the name, not only after git happens to reject something.\n err = validate_worktree_name(name)\n if err:\n return f\"Error: {err}\"\n if task_id:\n try:\n load_task(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n path = WORKTREES_DIR / name\n if path.exists():\n return f\"Worktree '{name}' already exists at {path}\"\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n print(f\" \\033[33m[worktree] created: {name} at {path}\\033[0m\")\n return f\"Worktree '{name}' created at {path}\"\n\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n task = load_task(task_id)\n task.worktree = worktree_name\n save_task(task)\n\n\ndef _count_worktree_changes(path: Path) -> tuple[int, int]:\n try:\n r1 = subprocess.run([\"git\", \"status\", \"--porcelain\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n files = len([l for l in r1.stdout.strip().splitlines() if l.strip()])\n r2 = subprocess.run([\"git\", \"log\", \"@{push}..HEAD\", \"--oneline\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n commits = len([l for l in r2.stdout.strip().splitlines() if l.strip()])\n return files, commits\n except Exception:\n return -1, -1\n\n\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n path = WORKTREES_DIR / name\n if not path.exists():\n return f\"Worktree '{name}' not found\"\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files < 0:\n return \"Cannot verify status. Use discard_changes=true to force.\"\n if files > 0 or commits > 0:\n return (f\"Worktree '{name}' has {files} file(s), {commits} commit(s). \"\n \"Use discard_changes=true or keep_worktree.\")\n ok1, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok1:\n return f\"Failed to remove worktree '{name}'\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n print(f\" \\033[33m[worktree] removed: {name}\\033[0m\")\n return f\"Worktree '{name}' removed\"\n\n\ndef keep_worktree(name: str) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n log_event(\"keep\", name)\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n\n\n# ── Skill Loading ──\n\nSKILL_REGISTRY: dict[str, dict] = {}\n\n\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n try:\n meta = yaml.safe_load(parts[1]) or {}\n except yaml.YAMLError:\n meta = {}\n return meta, parts[2].strip()\n\n\ndef scan_skills():\n SKILL_REGISTRY.clear()\n if not SKILLS_DIR.exists():\n return\n for directory in sorted(SKILLS_DIR.iterdir()):\n if not directory.is_dir():\n continue\n manifest = directory / \"SKILL.md\"\n if not manifest.exists():\n continue\n raw = manifest.read_text()\n meta, _ = _parse_frontmatter(raw)\n name = meta.get(\"name\", directory.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\n \"name\": name,\n \"description\": desc,\n \"content\": raw,\n }\n\n\nscan_skills()\n\n\ndef list_skills() -> str:\n if not SKILL_REGISTRY:\n return \"(no skills found)\"\n return \"\\n\".join(\n f\"- {skill['name']}: {skill['description']}\"\n for skill in SKILL_REGISTRY.values())\n\n\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n available = \", \".join(SKILL_REGISTRY.keys()) or \"(none)\"\n return f\"Skill not found: {name}. Available: {available}\"\n return skill[\"content\"]\n\n\n# ── Prompt Assembly ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, edit_file, glob, \"\n \"todo_write, task, load_skill, compact, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"schedule_cron, list_crons, cancel_cron, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan, \"\n \"create_worktree, remove_worktree, keep_worktree, \"\n \"connect_mcp. MCP tools are prefixed mcp__{server}__{tool}.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n # The system prompt is rebuilt each turn from live context. This is where\n # memory, skill catalog, MCP state, and active teammates become visible.\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n sections.append(f\"Current time: {datetime.now().isoformat(timespec='seconds')}\")\n sections.append(\"Skills catalog:\\n\" + list_skills() +\n \"\\nUse load_skill(name) when a skill is relevant.\")\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n mcp_names = list(mcp_clients.keys())\n if mcp_names:\n sections.append(f\"Connected MCP servers: {', '.join(mcp_names)}\")\n return \"\\n\\n\".join(sections)\n\n\n# ── Basic Tools ──\n\ndef safe_path(p: str, cwd: Path = None) -> Path:\n # File tools stay inside the workspace or teammate worktree. Bash remains\n # powerful on purpose and is controlled by the permission hook instead.\n base = cwd or WORKDIR\n path = (base / p).resolve()\n if not path.is_relative_to(base):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, cwd: Path = None,\n run_in_background: bool = False) -> str:\n # run_in_background is consumed by the dispatcher; direct execution ignores it.\n try:\n r = subprocess.run(command, shell=True, cwd=cwd or WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None,\n offset: int = 0, cwd: Path = None) -> str:\n try:\n lines = safe_path(path, cwd).read_text().splitlines()\n offset = max(int(offset or 0), 0)\n limit = int(limit) if limit is not None else None\n lines = lines[offset:]\n if limit is not None and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str, cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str,\n cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n text = fp.read_text()\n if old_text not in text:\n return f\"Error: text not found in {path}\"\n fp.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_glob(pattern: str, cwd: Path = None) -> str:\n import glob as g\n try:\n base = cwd or WORKDIR\n results = []\n for match in g.glob(pattern, root_dir=base):\n if (base / match).resolve().is_relative_to(base):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef call_tool_handler(handler, args: dict, name: str) -> str:\n if not handler:\n return f\"Unknown: {name}\"\n try:\n return handler(**(args or {}))\n except TypeError as e:\n return f\"Error: {e}\"\n\n\ndef _normalize_todos(todos):\n if isinstance(todos, str):\n try:\n todos = json.loads(todos)\n except json.JSONDecodeError:\n try:\n todos = ast.literal_eval(todos)\n except (SyntaxError, ValueError):\n return None, \"Error: todos must be a list or JSON array string\"\n if not isinstance(todos, list):\n return None, \"Error: todos must be a list\"\n for i, todo in enumerate(todos):\n if not isinstance(todo, dict):\n return None, f\"Error: todos[{i}] must be an object\"\n if \"content\" not in todo or \"status\" not in todo:\n return None, f\"Error: todos[{i}] missing 'content' or 'status'\"\n if todo[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return None, f\"Error: todos[{i}] has invalid status '{todo['status']}'\"\n return todos, None\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n todos, error = _normalize_todos(todos)\n if error:\n return error\n CURRENT_TODOS = todos\n print(f\" \\033[33m[todo] updated {len(CURRENT_TODOS)} item(s)\\033[0m\")\n return f\"Updated {len(CURRENT_TODOS)} todos\"\n\n\n# ── MessageBus ──\n\n# Team communication is append-only JSONL mailboxes. This keeps the protocol\n# inspectable on disk and lets background teammates send messages.\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n terminal_print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n # Responses are matched by request_id so one protocol reply cannot approve\n # a different pending request.\n state = pending_requests.get(request_id)\n if not state:\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n return\n state.status = \"approved\" if approve else \"rejected\"\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\n# ── Autonomous Agent ──\n\nIDLE_POLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(agent_name: str, messages: list,\n name: str, role: str,\n worktree_context: dict | None = None) -> str:\n # Autonomous teammates wake up for inbox messages first, then look for\n # unclaimed tasks. This keeps direct protocol messages higher priority.\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return \"shutdown\"\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n return \"work\"\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task_data = unclaimed[0]\n result = claim_task(task_data[\"id\"], agent_name)\n if \"Claimed\" in result:\n wt_info = \"\"\n if task_data.get(\"worktree\"):\n wt_path = WORKTREES_DIR / task_data[\"worktree\"]\n wt_info = f\"\\nWork directory: {wt_path}\"\n if worktree_context is not None:\n worktree_context[\"path\"] = str(wt_path)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task_data['id']}: \"\n f\"{task_data['subject']}{wt_info}\"})\n return \"work\"\n return \"timeout\"\n\n\n# ── Teammate Thread ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n # Plan approval is a real gate: after submit_plan, the teammate stops\n # taking model/tool steps until lead sends plan_approval_response.\n protocol_ctx = {\"waiting_plan\": None}\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"If a task has a worktree, work in that directory.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return True\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if req_id == protocol_ctx[\"waiting_plan\"]:\n protocol_ctx[\"waiting_plan\"] = None\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved]\" if approve\n else f\"[Plan rejected] {msg['content']}\"})\n return False\n\n def run():\n wt_ctx = {\"path\": None}\n\n def _wt_cwd():\n # Once a task with a worktree is claimed, all teammate file tools\n # transparently run inside that isolated directory.\n p = wt_ctx[\"path\"]\n return Path(p) if p else None\n\n def _run_bash(command: str) -> str:\n return run_bash(command, cwd=_wt_cwd())\n\n def _run_read(path: str) -> str:\n return run_read(path, cwd=_wt_cwd())\n\n def _run_write(path: str, content: str) -> str:\n return run_write(path, content, cwd=_wt_cwd())\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n task = load_task(task_id)\n wt_ctx[\"path\"] = (str(WORKTREES_DIR / task.worktree)\n if task.worktree else None)\n return result\n\n def _run_complete_task(task_id: str):\n result = complete_task(task_id)\n wt_ctx[\"path\"] = None\n return result\n\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n sub_handlers = {\n \"bash\": _run_bash, \"read_file\": _run_read,\n \"write_file\": _run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n while True:\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if protocol_ctx[\"waiting_plan\"]:\n # Poll only for protocol replies while the approval gate is\n # closed; do not let the model continue with the task.\n time.sleep(IDLE_POLL_INTERVAL)\n continue\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(non_protocol) + \"\"})\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"submit_plan\":\n output = _teammate_submit_plan(\n name, block.input.get(\"plan\", \"\"))\n match = re.search(r\"\\((req_\\d+)\\)\", output)\n protocol_ctx[\"waiting_plan\"] = (\n match.group(1) if match else output)\n else:\n handler = sub_handlers.get(block.name)\n output = call_tool_handler(handler, block.input,\n block.name)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n if protocol_ctx[\"waiting_plan\"]:\n # Ignore later tool_use blocks from the same model\n # response; they belong after approval, not before.\n break\n messages.append({\"role\": \"user\", \"content\": results})\n if protocol_ctx[\"waiting_plan\"]:\n break\n if should_shutdown:\n break\n if protocol_ctx[\"waiting_plan\"]:\n continue\n idle_result = idle_poll(name, messages, name, role, wt_ctx)\n if idle_result in (\"shutdown\", \"timeout\"):\n break\n\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n return f\"Teammate '{name}' spawned as {role}\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id})\"\n\n\n# ── Lead Protocol Tools ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Shut down.\", \"shutdown_request\",\n {\"request_id\": req_id})\n return f\"Shutdown request sent to {teammate}\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n BUS.send(\"lead\", teammate, f\"Submit plan for: {task}\", \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n return f\"Plan {'approved' if approve else 'rejected'}\"\n\n\n# ── Hooks + Permission Pipeline ──\n\n# Hooks are intentionally outside tool handlers. The loop can add permission,\n# logging, and stop behavior without changing each individual tool.\nHOOKS = {\"UserPromptSubmit\": [], \"PreToolUse\": [],\n \"PostToolUse\": [], \"Stop\": []}\n\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None:\n return result\n return None\n\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"mkfs\", \"dd if=\"]\nDESTRUCTIVE = [\"rm \", \"> /etc/\", \"chmod 777\"]\n\n\ndef permission_hook(block):\n # The permission layer sees the raw tool_use before dispatch. It can deny,\n # ask the user, or allow execution to continue.\n if block.name == \"bash\":\n command = block.input.get(\"command\", \"\")\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Permission denied: '{pattern}' is on the deny list\"\n if any(token in command for token in DESTRUCTIVE):\n print(f\"\\n\\033[33m[permission] destructive command\\033[0m\")\n print(f\" {command}\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n try:\n safe_path(path)\n except Exception:\n return f\"Permission denied: path escapes workspace: {path}\"\n if block.name.startswith(\"mcp__\") and \"deploy\" in block.name:\n print(f\"\\n\\033[33m[permission] MCP destructive-looking tool: {block.name}\\033[0m\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n\ndef log_hook(block):\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\n\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"\\033[33m[HOOK] large output from {block.name}: \"\n f\"{len(str(output))} chars\\033[0m\")\n return None\n\n\ndef user_prompt_hook(query: str):\n print(f\"\\033[90m[HOOK] UserPromptSubmit: {WORKDIR}\\033[0m\")\n return None\n\n\ndef stop_hook(messages: list):\n tool_count = 0\n for msg in messages:\n content = msg.get(\"content\")\n if isinstance(content, list):\n tool_count += sum(1 for item in content\n if isinstance(item, dict)\n and item.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: {tool_count} tool result(s)\\033[0m\")\n return None\n\n\nregister_hook(\"UserPromptSubmit\", user_prompt_hook)\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\nregister_hook(\"Stop\", stop_hook)\n\n\n# ── Subagent Tool ──\n\nSUB_SYSTEM = (\n f\"You are a coding subagent at {WORKDIR}. \"\n \"Complete the task, then return a concise final summary. \"\n \"Do not spawn more agents.\"\n)\n\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"pattern\": {\"type\": \"string\"}},\n \"required\": [\"pattern\"]}},\n]\n\n\nSUB_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read,\n \"write_file\": run_write, \"edit_file\": run_edit,\n \"glob\": run_glob,\n}\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return str(content)\n return \"\\n\".join(\n getattr(block, \"text\", \"\")\n for block in content\n if getattr(block, \"type\", None) == \"text\").strip()\n\n\ndef has_tool_use(content) -> bool:\n # Do not rely on stop_reason alone; the concrete tool_use block is the\n # continuation signal used by the loop.\n return any(getattr(block, \"type\", None) == \"tool_use\"\n for block in content)\n\n\ndef spawn_subagent(description: str) -> str:\n messages = [{\"role\": \"user\", \"content\": description}]\n for _ in range(30):\n response = client.messages.create(\n model=MODEL, system=SUB_SYSTEM, messages=messages,\n tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n break\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n output = str(blocked)\n else:\n handler = SUB_HANDLERS.get(block.name)\n output = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, output)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n text = extract_text(msg[\"content\"])\n if text:\n return text\n return \"Subagent finished without a text summary.\"\n\n\n# ── Context Compaction ──\n\n# Compaction is layered: first shrink oversized tool results, then trim old\n# message ranges, and only call the model for a summary when the context is\n# still too large or the model explicitly asks for compact.\ndef estimate_size(messages: list) -> int:\n return len(json.dumps(messages, default=str))\n\ndef block_type(block):\n return block.get(\"type\") if isinstance(block, dict) else getattr(block, \"type\", None)\n\n\ndef message_has_tool_use(message: dict) -> bool:\n if message.get(\"role\") != \"assistant\":\n return False\n content = message.get(\"content\")\n if not isinstance(content, list):\n return False\n return any(block_type(block) == \"tool_use\" for block in content)\n\n\ndef is_tool_result_message(message: dict) -> bool:\n if message.get(\"role\") != \"user\":\n return False\n content = message.get(\"content\")\n if not isinstance(content, list):\n return False\n return any(isinstance(block, dict) and block.get(\"type\") == \"tool_result\"\n for block in content)\n\n\ndef collect_tool_results(messages: list):\n found = []\n for mi, msg in enumerate(messages):\n content = msg.get(\"content\")\n if msg.get(\"role\") != \"user\" or not isinstance(content, list):\n continue\n for bi, block in enumerate(content):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n found.append((mi, bi, block))\n return found\n\n\ndef persist_large_output(tool_use_id: str, output: str) -> str:\n if len(output) <= PERSIST_THRESHOLD:\n return output\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not path.exists():\n path.write_text(output)\n return (f\"\\nFull output: {path}\\n\"\n f\"Preview:\\n{output[:2000]}\\n\")\n\n\ndef tool_result_budget(messages: list, max_bytes: int = 200_000) -> list:\n if not messages:\n return messages\n last = messages[-1]\n content = last.get(\"content\")\n if last.get(\"role\") != \"user\" or not isinstance(content, list):\n return messages\n blocks = [(i, b) for i, b in enumerate(content)\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n for _, block in sorted(blocks,\n key=lambda pair: len(str(pair[1].get(\"content\", \"\"))),\n reverse=True):\n if total <= max_bytes:\n break\n text = str(block.get(\"content\", \"\"))\n block[\"content\"] = persist_large_output(\n block.get(\"tool_use_id\", \"unknown\"), text)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return messages\n\n\ndef snip_compact(messages: list, max_messages: int = 50) -> list:\n if len(messages) <= max_messages:\n return messages\n head_end, tail_start = 3, len(messages) - (max_messages - 3)\n if head_end > 0 and message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and is_tool_result_message(messages[head_end]):\n head_end += 1\n if (tail_start > 0 and tail_start < len(messages)\n and is_tool_result_message(messages[tail_start])\n and message_has_tool_use(messages[tail_start - 1])):\n tail_start -= 1\n if head_end >= tail_start:\n return messages\n snipped = tail_start - head_end\n return (messages[:head_end]\n + [{\"role\": \"user\", \"content\": f\"[snipped {snipped} messages]\"}]\n + messages[tail_start:])\n\n\ndef micro_compact(messages: list) -> list:\n tool_results = collect_tool_results(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(str(block.get(\"content\", \"\"))) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n\n\ndef write_transcript(messages: list) -> Path:\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n return path\n\n\ndef summarize_history(messages: list) -> str:\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\"Summarize this coding-agent conversation so work can continue. \"\n \"Preserve current goal, key findings, changed files, remaining work, \"\n \"and user constraints.\\n\\n\" + conversation)\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=2000)\n return extract_text(response.content) or \"(empty summary)\"\n\n\ndef compact_history(messages: list) -> list:\n transcript = write_transcript(messages)\n print(f\" \\033[36m[compact] transcript saved: {transcript}\\033[0m\")\n summary = summarize_history(messages)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\n\ndef reactive_compact(messages: list) -> list:\n transcript = write_transcript(messages)\n print(f\" \\033[31m[reactive compact] transcript saved: {transcript}\\033[0m\")\n try:\n summary = summarize_history(messages)\n except Exception:\n summary = \"Earlier conversation was trimmed after a prompt-too-long error.\"\n tail_start = max(0, len(messages) - 5)\n if (tail_start > 0 and tail_start < len(messages)\n and is_tool_result_message(messages[tail_start])\n and message_has_tool_use(messages[tail_start - 1])):\n tail_start -= 1\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"},\n *messages[tail_start:]]\n\n\n# ── Error Recovery ──\n\nclass RecoveryState:\n def __init__(self):\n self.has_escalated = False\n self.recovery_count = 0\n self.consecutive_529 = 0\n self.has_attempted_reactive_compact = False\n self.current_model = PRIMARY_MODEL\n\n\ndef retry_delay(attempt: int) -> float:\n base = min(BASE_DELAY_MS * (2 ** attempt), 32000) / 1000\n return base + random.uniform(0, base * 0.25)\n\n\ndef with_retry(fn, state: RecoveryState):\n for attempt in range(MAX_RETRIES):\n try:\n result = fn()\n state.consecutive_529 = 0\n return result\n except Exception as e:\n name = type(e).__name__.lower()\n msg = str(e).lower()\n if \"ratelimit\" in name or \"429\" in msg:\n delay = retry_delay(attempt)\n print(f\" \\033[33m[429] retry {attempt + 1}/{MAX_RETRIES} \"\n f\"after {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n if \"overloaded\" in name or \"529\" in msg or \"overloaded\" in msg:\n state.consecutive_529 += 1\n if state.consecutive_529 >= MAX_CONSECUTIVE_529 and FALLBACK_MODEL:\n state.current_model = FALLBACK_MODEL\n state.consecutive_529 = 0\n print(f\" \\033[31m[529] switching to {FALLBACK_MODEL}\\033[0m\")\n delay = retry_delay(attempt)\n print(f\" \\033[33m[529] retry {attempt + 1}/{MAX_RETRIES} \"\n f\"after {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n raise\n raise RuntimeError(f\"Max retries ({MAX_RETRIES}) exceeded\")\n\n\ndef is_prompt_too_long_error(e: Exception) -> bool:\n msg = str(e).lower()\n return ((\"prompt\" in msg and \"long\" in msg)\n or \"context_length_exceeded\" in msg\n or \"max_context_window\" in msg)\n\n\n# ── Background Tasks ──\n\n# Slow tools return a placeholder tool_result immediately. Their real output is\n# later injected as a task_notification, so the main loop can keep moving.\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n if tool_name != \"bash\":\n return False\n command = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(keyword in command for keyword in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n if tool_name != \"bash\":\n return False\n return bool(tool_input.get(\"run_in_background\")) or is_slow_operation(tool_name, tool_input)\n\n\ndef start_background_task(block, handlers: dict) -> str:\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n command = block.input.get(\"command\", block.name)\n\n def worker():\n handler = handlers.get(block.name)\n result = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, result)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = str(result)\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": command,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] {bg_id}: {str(command)[:60]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n with background_lock:\n ready = [bg_id for bg_id, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}
\\n\"\n f\"\")\n return notifications\n\n\n# ── Cron Scheduler ──\n\n# Cron jobs are stored separately from conversation history. When a job fires,\n# it becomes a scheduled prompt that is injected back into the same agent loop.\nDURABLE_PATH = WORKDIR / \".scheduled_tasks.json\"\n\n\n@dataclass\nclass CronJob:\n id: str\n cron: str\n prompt: str\n recurring: bool\n durable: bool\n\n\nscheduled_jobs: dict[str, CronJob] = {}\ncron_queue: list[CronJob] = []\ncron_lock = threading.Lock()\n_last_fired: dict[str, str] = {}\n\n\ndef _cron_field_matches(field: str, value: int) -> bool:\n if field == \"*\":\n return True\n if field.startswith(\"*/\"):\n step = int(field[2:])\n return step > 0 and value % step == 0\n if \",\" in field:\n return any(_cron_field_matches(part.strip(), value)\n for part in field.split(\",\"))\n if \"-\" in field:\n lo, hi = field.split(\"-\", 1)\n return int(lo) <= value <= int(hi)\n return value == int(field)\n\n\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n if not (m and h and month_ok):\n return False\n if dom == \"*\" and dow == \"*\":\n return True\n if dom == \"*\":\n return dow_ok\n if dow == \"*\":\n return dom_ok\n return dom_ok or dow_ok\n\n\ndef _validate_cron_field(field: str, lo: int, hi: int) -> str | None:\n if field == \"*\":\n return None\n if field.startswith(\"*/\"):\n step = field[2:]\n if not step.isdigit() or int(step) <= 0:\n return f\"Invalid step: {field}\"\n return None\n if \",\" in field:\n for part in field.split(\",\"):\n err = _validate_cron_field(part.strip(), lo, hi)\n if err:\n return err\n return None\n if \"-\" in field:\n left, right = field.split(\"-\", 1)\n if not left.isdigit() or not right.isdigit():\n return f\"Invalid range: {field}\"\n a, b = int(left), int(right)\n if a < lo or a > hi or b < lo or b > hi:\n return f\"Range {field} out of bounds [{lo}-{hi}]\"\n if a > b:\n return f\"Range start > end: {field}\"\n return None\n if not field.isdigit():\n return f\"Invalid field: {field}\"\n value = int(field)\n if value < lo or value > hi:\n return f\"Value {value} out of bounds [{lo}-{hi}]\"\n return None\n\n\ndef validate_cron(cron_expr: str) -> str | None:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return f\"Expected 5 fields, got {len(fields)}\"\n bounds = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n names = [\"minute\", \"hour\", \"day-of-month\", \"month\", \"day-of-week\"]\n for field, (lo, hi), name in zip(fields, bounds, names):\n err = _validate_cron_field(field, lo, hi)\n if err:\n return f\"{name}: {err}\"\n return None\n\n\ndef save_durable_jobs():\n durable = [asdict(job) for job in scheduled_jobs.values() if job.durable]\n DURABLE_PATH.write_text(json.dumps(durable, indent=2))\n\n\ndef load_durable_jobs():\n if not DURABLE_PATH.exists():\n return\n try:\n for item in json.loads(DURABLE_PATH.read_text()):\n job = CronJob(**item)\n if not validate_cron(job.cron):\n scheduled_jobs[job.id] = job\n except Exception:\n pass\n\n\ndef schedule_job(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> CronJob | str:\n err = validate_cron(cron)\n if err:\n return err\n job = CronJob(\n id=f\"cron_{random.randint(0, 999999):06d}\",\n cron=cron, prompt=prompt,\n recurring=recurring, durable=durable)\n with cron_lock:\n scheduled_jobs[job.id] = job\n if durable:\n save_durable_jobs()\n return job\n\n\ndef cancel_job(job_id: str) -> str:\n with cron_lock:\n job = scheduled_jobs.pop(job_id, None)\n if not job:\n return f\"Job {job_id} not found\"\n if job.durable:\n save_durable_jobs()\n return f\"Cancelled {job_id}\"\n\n\ndef cron_scheduler_loop():\n while True:\n time.sleep(1)\n now = datetime.now()\n marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now) and _last_fired.get(job.id) != marker:\n cron_queue.append(job)\n _last_fired[job.id] = marker\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\" \\033[31m[cron error] {job.id}: {e}\\033[0m\")\n\n\ndef consume_cron_queue() -> list[CronJob]:\n with cron_lock:\n fired = list(cron_queue)\n cron_queue.clear()\n return fired\n\n\ndef run_schedule_cron(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> str:\n result = schedule_job(cron, prompt, recurring, durable)\n if isinstance(result, str):\n return f\"Error: {result}\"\n return f\"Scheduled {result.id}: '{cron}' -> {prompt}\"\n\n\ndef run_list_crons() -> str:\n with cron_lock:\n jobs = list(scheduled_jobs.values())\n if not jobs:\n return \"No cron jobs.\"\n return \"\\n\".join(\n f\" {job.id}: '{job.cron}' -> {job.prompt[:40]} \"\n f\"[{'recurring' if job.recurring else 'one-shot'}, \"\n f\"{'durable' if job.durable else 'session'}]\"\n for job in jobs)\n\n\ndef run_cancel_cron(job_id: str) -> str:\n return cancel_job(job_id)\n\n\nload_durable_jobs()\nthreading.Thread(target=cron_scheduler_loop, daemon=True).start()\n\n\n# ── MCP System ──\n\n# MCP is modeled as late-bound tools: connect first, then discovered server\n# tools are merged into the normal tool pool with mcp__server__tool names.\nclass MCPClient:\n \"\"\"Discovers and calls tools on an MCP server (mock for teaching).\"\"\"\n\n def __init__(self, name: str):\n self.name = name\n self.tools: list[dict] = []\n self._handlers: dict[str, callable] = {}\n\n def register(self, tool_defs: list[dict],\n handlers: dict[str, callable]):\n self.tools = tool_defs\n self._handlers = handlers\n\n def call_tool(self, tool_name: str, args: dict) -> str:\n handler = self._handlers.get(tool_name)\n if not handler:\n return f\"MCP error: unknown tool '{tool_name}'\"\n try:\n return handler(**args)\n except Exception as e:\n return f\"MCP error: {e}\"\n\n\nmcp_clients: dict[str, MCPClient] = {}\n\n_DISALLOWED_CHARS = re.compile(r'[^a-zA-Z0-9_-]')\n\n\ndef normalize_mcp_name(name: str) -> str:\n \"\"\"Replace non [a-zA-Z0-9_-] with underscore.\"\"\"\n return _DISALLOWED_CHARS.sub('_', name)\n\n\ndef _mock_server_docs():\n client = MCPClient(\"docs\")\n client.register(\n tool_defs=[\n {\"name\": \"search\", \"description\": \"Search documentation. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"query\": {\"type\": \"string\"}},\n \"required\": [\"query\"]}},\n {\"name\": \"get_version\", \"description\": \"Get API version. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n ],\n handlers={\n \"search\": lambda query: f\"[docs] Found 3 results for '{query}'\",\n \"get_version\": lambda: \"[docs] API v2.1.0\",\n })\n return client\n\n\ndef _mock_server_deploy():\n client = MCPClient(\"deploy\")\n client.register(\n tool_defs=[\n {\"name\": \"trigger\",\n \"description\": \"Trigger a deployment. (destructive — requires approval in real CC)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n {\"name\": \"status\", \"description\": \"Check deployment status. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n ],\n handlers={\n \"trigger\": lambda service: f\"[deploy] Triggered: {service}\",\n \"status\": lambda service: f\"[deploy] {service}: running (v1.4.2)\",\n })\n return client\n\n\nMOCK_SERVERS = {\n \"docs\": _mock_server_docs,\n \"deploy\": _mock_server_deploy,\n}\n\n\ndef connect_mcp(name: str) -> str:\n if name in mcp_clients:\n return f\"MCP server '{name}' already connected\"\n factory = MOCK_SERVERS.get(name)\n if not factory:\n available = \", \".join(MOCK_SERVERS.keys())\n return f\"Unknown server '{name}'. Available: {available}\"\n mcp_client = factory()\n mcp_clients[name] = mcp_client\n tool_names = [t[\"name\"] for t in mcp_client.tools]\n print(f\" \\033[31m[mcp] connected: {name} → {tool_names}\\033[0m\")\n return (f\"Connected to MCP server '{name}'. \"\n f\"Discovered {len(mcp_client.tools)} tools: {', '.join(tool_names)}\")\n\n\ndef assemble_tool_pool() -> tuple[list[dict], dict]:\n \"\"\"Merge builtin tools + all MCP tools into one pool.\"\"\"\n tools = list(BUILTIN_TOOLS)\n handlers = dict(BUILTIN_HANDLERS)\n for server_name, mcp_client in mcp_clients.items():\n safe_server = normalize_mcp_name(server_name)\n for tool_def in mcp_client.tools:\n safe_tool = normalize_mcp_name(tool_def[\"name\"])\n prefixed = f\"mcp__{safe_server}__{safe_tool}\"\n tools.append({\n \"name\": prefixed,\n \"description\": tool_def.get(\"description\", \"\"),\n \"input_schema\": tool_def.get(\"inputSchema\", {}),\n })\n handlers[prefixed] = (\n lambda *, c=mcp_client, t=tool_def[\"name\"], **kw: c.call_tool(t, kw))\n return tools, handlers\n\n\n# ── Lead Worktree Tools ──\n\ndef run_create_worktree(name: str, task_id: str = \"\") -> str:\n return create_worktree(name, task_id)\n\ndef run_remove_worktree(name: str, discard_changes: bool = False) -> str:\n return remove_worktree(name, discard_changes)\n\ndef run_keep_worktree(name: str) -> str:\n return keep_worktree(name)\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task_json(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_claim_task(task_id: str) -> str:\n try:\n return claim_task(task_id, owner=\"agent\")\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_complete_task(task_id: str) -> str:\n try:\n return complete_task(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\ndef run_connect_mcp(name: str) -> str:\n return connect_mcp(name)\n\n\n# ── Tool Definitions ──\n\n# The model sees tool schemas; Python executes handlers. S20 keeps both tables\n# explicit so every added capability is visible in one place.\nBUILTIN_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"pattern\": {\"type\": \"string\"}},\n \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\",\n \"description\": \"Create and manage a task list for the current session.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"todos\": {\"type\": \"array\",\n \"items\": {\"type\": \"object\",\n \"properties\": {\n \"content\": {\"type\": \"string\"},\n \"status\": {\"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"]}},\n \"required\": [\"content\", \"status\"]}}},\n \"required\": [\"todos\"]}},\n {\"name\": \"task\",\n \"description\": \"Launch a focused subagent. Returns only its final summary.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"description\": {\"type\": \"string\"}},\n \"required\": [\"description\"]}},\n {\"name\": \"load_skill\",\n \"description\": \"Load the full content of a skill by name.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"compact\",\n \"description\": \"Summarize earlier conversation and continue with compacted context.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"focus\": {\"type\": \"string\"}},\n \"required\": []}},\n {\"name\": \"create_task\", \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\", \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\", \"description\": \"Get full task details.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\", \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\", \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"schedule_cron\",\n \"description\": (\"Schedule a cron job. cron is 5-field: min hour dom \"\n \"month dow. For one-shot reminders, compute the target \"\n \"minute and set recurring=false.\"),\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"cron\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"},\n \"recurring\": {\"type\": \"boolean\"},\n \"durable\": {\"type\": \"boolean\"}},\n \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"list_crons\", \"description\": \"List registered cron jobs.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"cancel_cron\", \"description\": \"Cancel a cron job by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"job_id\": {\"type\": \"string\"}},\n \"required\": [\"job_id\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"create_worktree\",\n \"description\": \"Create an isolated git worktree.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"remove_worktree\",\n \"description\": \"Remove a worktree. Refuses if changes exist.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"discard_changes\": {\"type\": \"boolean\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"keep_worktree\",\n \"description\": \"Keep a worktree for manual review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"connect_mcp\",\n \"description\": \"Connect to an MCP server (docs, deploy) and discover tools.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n]\n\nBUILTIN_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob,\n \"todo_write\": run_todo_write, \"task\": spawn_subagent,\n \"load_skill\": load_skill,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"schedule_cron\": run_schedule_cron,\n \"list_crons\": run_list_crons,\n \"cancel_cron\": run_cancel_cron,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n \"create_worktree\": run_create_worktree,\n \"remove_worktree\": run_remove_worktree,\n \"keep_worktree\": run_keep_worktree,\n \"connect_mcp\": run_connect_mcp,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\n \"memories\": memories,\n \"connected_mcp\": list(mcp_clients.keys()),\n \"active_teammates\": list(active_teammates.keys()),\n }\n\n\n# ── Agent Loop ──\n\nrounds_since_todo = 0\nagent_lock = threading.Lock()\n\n\ndef prepare_context(messages: list) -> list:\n # Every LLM turn enters through the same context budget pipeline.\n messages[:] = tool_result_budget(messages)\n messages[:] = snip_compact(messages)\n messages[:] = micro_compact(messages)\n if estimate_size(messages) > CONTEXT_LIMIT:\n messages[:] = compact_history(messages)\n return messages\n\n\ndef build_user_content(results: list[dict]) -> list[dict]:\n # Tool results and completed background notifications are both returned to\n # the model as user-side content, matching the tool_result feedback loop.\n content = list(results)\n for note in collect_background_results():\n content.append({\"type\": \"text\", \"text\": note})\n return content\n\n\ndef inject_background_notifications(messages: list):\n notes = collect_background_results()\n if notes:\n messages.append({\"role\": \"user\", \"content\": [\n {\"type\": \"text\", \"text\": note} for note in notes]})\n\n\ndef call_llm(messages: list, context: dict, tools: list,\n state: RecoveryState, max_tokens: int):\n system = assemble_system_prompt(context)\n return with_retry(\n lambda: client.messages.create(\n model=state.current_model,\n system=system,\n messages=messages,\n tools=tools,\n max_tokens=max_tokens),\n state)\n\n\ndef agent_loop(messages: list, context: dict):\n global rounds_since_todo\n tools, handlers = assemble_tool_pool()\n state = RecoveryState()\n max_tokens = DEFAULT_MAX_TOKENS\n\n while True:\n # One cycle: inject scheduled/background work, prepare context, call\n # the model, execute tool_use blocks, append tool_results, repeat.\n fired = consume_cron_queue()\n for job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n print(f\" \\033[35m[cron inject] {job.prompt[:60]}\\033[0m\")\n\n inject_background_notifications(messages)\n\n if rounds_since_todo >= 3:\n messages.append({\"role\": \"user\",\n \"content\": \"Update your todos.\"})\n rounds_since_todo = 0\n\n prepare_context(messages)\n context = update_context(context, messages)\n tools, handlers = assemble_tool_pool()\n\n try:\n response = call_llm(messages, context, tools, state, max_tokens)\n except Exception as e:\n if is_prompt_too_long_error(e) and not state.has_attempted_reactive_compact:\n messages[:] = reactive_compact(messages)\n state.has_attempted_reactive_compact = True\n continue\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n if response.stop_reason == \"max_tokens\":\n if not state.has_escalated:\n max_tokens = ESCALATED_MAX_TOKENS\n state.has_escalated = True\n print(f\" \\033[33m[max_tokens] retry with {max_tokens}\\033[0m\")\n continue\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if state.recovery_count < MAX_RECOVERY_RETRIES:\n messages.append({\"role\": \"user\", \"content\": CONTINUATION_PROMPT})\n state.recovery_count += 1\n continue\n return\n\n max_tokens = DEFAULT_MAX_TOKENS\n state.has_escalated = False\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n trigger_hooks(\"Stop\", messages)\n return\n\n results = []\n compacted_now = False\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n messages.append({\"role\": \"user\",\n \"content\": \"[Compacted. Continue with summarized context.]\"})\n compacted_now = True\n break\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block, handlers)\n output = (f\"[Background task {bg_id} started] \"\n \"Result will arrive as a task_notification.\")\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n continue\n\n handler = handlers.get(block.name)\n output = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, output)\n print(str(output)[:300])\n\n if block.name == \"todo_write\":\n rounds_since_todo = 0\n else:\n rounds_since_todo += 1\n\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n\n if compacted_now:\n continue\n\n messages.append({\"role\": \"user\", \"content\": build_user_content(results)})\n\n\ndef print_turn_assistants(messages: list, turn_start: int):\n for msg in messages[turn_start:]:\n if msg.get(\"role\") != \"assistant\":\n continue\n for block in msg.get(\"content\", []):\n if getattr(block, \"type\", None) == \"text\":\n terminal_print(block.text)\n\n\ndef cron_autorun_loop(history: list, context: dict):\n while True:\n time.sleep(1)\n fired = consume_cron_queue()\n if not fired:\n continue\n with agent_lock:\n turn_start = len(history)\n for job in fired:\n history.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n terminal_print(\n f\" \\033[35m[cron auto] {job.prompt[:60]}\\033[0m\")\n agent_loop(history, context)\n context.update(update_context(context, history))\n print_turn_assistants(history, turn_start)\n\n\nif __name__ == \"__main__\":\n CLI_ACTIVE = True\n print(\"s20: comprehensive agent\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n threading.Thread(target=cron_autorun_loop,\n args=(history, context), daemon=True).start()\n while True:\n try:\n query = input(PROMPT)\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n trigger_hooks(\"UserPromptSubmit\", query)\n turn_start = len(history)\n history.append({\"role\": \"user\", \"content\": query})\n with agent_lock:\n agent_loop(history, context)\n context = update_context(context, history)\n print_turn_assistants(history, turn_start)\n\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n def inbox_label(msg):\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n suffix = f\" req:{req_id}\" if req_id else \"\"\n return f\"{msg.get('type', 'message')}{suffix}\"\n\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{inbox_label(m)}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n",
"images": [
{
"src": "/course-assets/s20_comprehensive/system-architecture.svg",
@@ -3720,6 +3765,9 @@
"newClasses": [],
"newFunctions": [
"estimate_size",
+ "_block_type",
+ "_message_has_tool_use",
+ "_is_tool_result_message",
"snip_compact",
"collect_tool_results",
"micro_compact",
@@ -3733,7 +3781,7 @@
"newTools": [
"compact"
],
- "locDelta": 47
+ "locDelta": 79
},
{
"from": "s08",
@@ -3752,7 +3800,7 @@
"persist_large"
],
"newTools": [],
- "locDelta": 116
+ "locDelta": 114
},
{
"from": "s09",
@@ -3764,7 +3812,7 @@
"update_context"
],
"newTools": [],
- "locDelta": -332
+ "locDelta": -362
},
{
"from": "s10",
@@ -3982,6 +4030,9 @@
"has_tool_use",
"spawn_subagent",
"estimate_size",
+ "block_type",
+ "message_has_tool_use",
+ "is_tool_result_message",
"collect_tool_results",
"persist_large_output",
"tool_result_budget",
@@ -4026,7 +4077,7 @@
"list_crons",
"cancel_cron"
],
- "locDelta": 842
+ "locDelta": 873
}
]
-}
+}
\ No newline at end of file
diff --git a/web/src/data/scenarios/s01.json b/learn-claude-code/web/src/data/scenarios/s01.json
similarity index 100%
rename from web/src/data/scenarios/s01.json
rename to learn-claude-code/web/src/data/scenarios/s01.json
diff --git a/web/src/data/scenarios/s02.json b/learn-claude-code/web/src/data/scenarios/s02.json
similarity index 100%
rename from web/src/data/scenarios/s02.json
rename to learn-claude-code/web/src/data/scenarios/s02.json
diff --git a/web/src/data/scenarios/s03.json b/learn-claude-code/web/src/data/scenarios/s03.json
similarity index 100%
rename from web/src/data/scenarios/s03.json
rename to learn-claude-code/web/src/data/scenarios/s03.json
diff --git a/web/src/data/scenarios/s04.json b/learn-claude-code/web/src/data/scenarios/s04.json
similarity index 100%
rename from web/src/data/scenarios/s04.json
rename to learn-claude-code/web/src/data/scenarios/s04.json
diff --git a/web/src/data/scenarios/s05.json b/learn-claude-code/web/src/data/scenarios/s05.json
similarity index 100%
rename from web/src/data/scenarios/s05.json
rename to learn-claude-code/web/src/data/scenarios/s05.json
diff --git a/web/src/data/scenarios/s06.json b/learn-claude-code/web/src/data/scenarios/s06.json
similarity index 100%
rename from web/src/data/scenarios/s06.json
rename to learn-claude-code/web/src/data/scenarios/s06.json
diff --git a/web/src/data/scenarios/s07.json b/learn-claude-code/web/src/data/scenarios/s07.json
similarity index 100%
rename from web/src/data/scenarios/s07.json
rename to learn-claude-code/web/src/data/scenarios/s07.json
diff --git a/web/src/data/scenarios/s08.json b/learn-claude-code/web/src/data/scenarios/s08.json
similarity index 100%
rename from web/src/data/scenarios/s08.json
rename to learn-claude-code/web/src/data/scenarios/s08.json
diff --git a/web/src/data/scenarios/s09.json b/learn-claude-code/web/src/data/scenarios/s09.json
similarity index 100%
rename from web/src/data/scenarios/s09.json
rename to learn-claude-code/web/src/data/scenarios/s09.json
diff --git a/web/src/data/scenarios/s10.json b/learn-claude-code/web/src/data/scenarios/s10.json
similarity index 100%
rename from web/src/data/scenarios/s10.json
rename to learn-claude-code/web/src/data/scenarios/s10.json
diff --git a/web/src/data/scenarios/s11.json b/learn-claude-code/web/src/data/scenarios/s11.json
similarity index 100%
rename from web/src/data/scenarios/s11.json
rename to learn-claude-code/web/src/data/scenarios/s11.json
diff --git a/web/src/data/scenarios/s12.json b/learn-claude-code/web/src/data/scenarios/s12.json
similarity index 100%
rename from web/src/data/scenarios/s12.json
rename to learn-claude-code/web/src/data/scenarios/s12.json
diff --git a/web/src/data/scenarios/s13.json b/learn-claude-code/web/src/data/scenarios/s13.json
similarity index 100%
rename from web/src/data/scenarios/s13.json
rename to learn-claude-code/web/src/data/scenarios/s13.json
diff --git a/web/src/data/scenarios/s14.json b/learn-claude-code/web/src/data/scenarios/s14.json
similarity index 100%
rename from web/src/data/scenarios/s14.json
rename to learn-claude-code/web/src/data/scenarios/s14.json
diff --git a/web/src/data/scenarios/s15.json b/learn-claude-code/web/src/data/scenarios/s15.json
similarity index 100%
rename from web/src/data/scenarios/s15.json
rename to learn-claude-code/web/src/data/scenarios/s15.json
diff --git a/web/src/data/scenarios/s16.json b/learn-claude-code/web/src/data/scenarios/s16.json
similarity index 100%
rename from web/src/data/scenarios/s16.json
rename to learn-claude-code/web/src/data/scenarios/s16.json
diff --git a/web/src/data/scenarios/s17.json b/learn-claude-code/web/src/data/scenarios/s17.json
similarity index 100%
rename from web/src/data/scenarios/s17.json
rename to learn-claude-code/web/src/data/scenarios/s17.json
diff --git a/web/src/data/scenarios/s18.json b/learn-claude-code/web/src/data/scenarios/s18.json
similarity index 100%
rename from web/src/data/scenarios/s18.json
rename to learn-claude-code/web/src/data/scenarios/s18.json
diff --git a/web/src/data/scenarios/s19.json b/learn-claude-code/web/src/data/scenarios/s19.json
similarity index 100%
rename from web/src/data/scenarios/s19.json
rename to learn-claude-code/web/src/data/scenarios/s19.json
diff --git a/web/src/data/scenarios/s20.json b/learn-claude-code/web/src/data/scenarios/s20.json
similarity index 100%
rename from web/src/data/scenarios/s20.json
rename to learn-claude-code/web/src/data/scenarios/s20.json
diff --git a/web/src/hooks/useDarkMode.ts b/learn-claude-code/web/src/hooks/useDarkMode.ts
similarity index 100%
rename from web/src/hooks/useDarkMode.ts
rename to learn-claude-code/web/src/hooks/useDarkMode.ts
diff --git a/web/src/hooks/useSimulator.ts b/learn-claude-code/web/src/hooks/useSimulator.ts
similarity index 100%
rename from web/src/hooks/useSimulator.ts
rename to learn-claude-code/web/src/hooks/useSimulator.ts
diff --git a/web/src/hooks/useSteppedVisualization.ts b/learn-claude-code/web/src/hooks/useSteppedVisualization.ts
similarity index 100%
rename from web/src/hooks/useSteppedVisualization.ts
rename to learn-claude-code/web/src/hooks/useSteppedVisualization.ts
diff --git a/web/src/i18n/messages/en.json b/learn-claude-code/web/src/i18n/messages/en.json
similarity index 100%
rename from web/src/i18n/messages/en.json
rename to learn-claude-code/web/src/i18n/messages/en.json
diff --git a/web/src/i18n/messages/ja.json b/learn-claude-code/web/src/i18n/messages/ja.json
similarity index 100%
rename from web/src/i18n/messages/ja.json
rename to learn-claude-code/web/src/i18n/messages/ja.json
diff --git a/web/src/i18n/messages/zh.json b/learn-claude-code/web/src/i18n/messages/zh.json
similarity index 100%
rename from web/src/i18n/messages/zh.json
rename to learn-claude-code/web/src/i18n/messages/zh.json
diff --git a/web/src/lib/constants.ts b/learn-claude-code/web/src/lib/constants.ts
similarity index 100%
rename from web/src/lib/constants.ts
rename to learn-claude-code/web/src/lib/constants.ts
diff --git a/web/src/lib/i18n-server.ts b/learn-claude-code/web/src/lib/i18n-server.ts
similarity index 100%
rename from web/src/lib/i18n-server.ts
rename to learn-claude-code/web/src/lib/i18n-server.ts
diff --git a/web/src/lib/i18n.tsx b/learn-claude-code/web/src/lib/i18n.tsx
similarity index 100%
rename from web/src/lib/i18n.tsx
rename to learn-claude-code/web/src/lib/i18n.tsx
diff --git a/web/src/lib/utils.ts b/learn-claude-code/web/src/lib/utils.ts
similarity index 100%
rename from web/src/lib/utils.ts
rename to learn-claude-code/web/src/lib/utils.ts
diff --git a/web/src/types/agent-data.ts b/learn-claude-code/web/src/types/agent-data.ts
similarity index 100%
rename from web/src/types/agent-data.ts
rename to learn-claude-code/web/src/types/agent-data.ts
diff --git a/web/tsconfig.json b/learn-claude-code/web/tsconfig.json
similarity index 100%
rename from web/tsconfig.json
rename to learn-claude-code/web/tsconfig.json
diff --git a/web/vercel.json b/learn-claude-code/web/vercel.json
similarity index 100%
rename from web/vercel.json
rename to learn-claude-code/web/vercel.json
diff --git a/learn-pi-agent/.gitignore b/learn-pi-agent/.gitignore
new file mode 100644
index 0000000..153c9ed
--- /dev/null
+++ b/learn-pi-agent/.gitignore
@@ -0,0 +1,7 @@
+node_modules/
+dist/
+*.log
+.env
+.author-checks/
+*.test.ts
+*.spec.ts
diff --git a/learn-pi-agent/EVOLUTION.md b/learn-pi-agent/EVOLUTION.md
new file mode 100644
index 0000000..89bb785
--- /dev/null
+++ b/learn-pi-agent/EVOLUTION.md
@@ -0,0 +1,292 @@
+# 累积演进宪法
+
+> 本文件是 `learn-pi-agent` 的工程对照基准。每一节的 `code.ts` 和 `README.md` 都必须对照它。
+> 目标:学习者从 s01 学到 s12,**累积**实现出一个机制健全、心智与 Pi 一致的 mini Pi。
+
+---
+
+## 0. 核心立场
+
+现有课程确立了「每节独立、最小化」(P2),但丢了「累积实现」(P0)。
+本宪法把 P0 立起来,同时不毁掉「每节聚焦一个机制」的教学性。
+
+**一句话原则:每一节 = 给正在生长的 mini Pi 装一个零件。**
+- 代码是单轨累积的:`sXX/code.ts` 是 mini Pi 的第 N 个版本,是第 N-1 节的**超集**。
+- 文档负责聚焦:每节 README 末尾的「接入主线」段用 diff 风格展示本节焊上了什么零件。
+
+不维护「聚焦 demo」和「主线」两份代码。两份代码必然漂移,漂移就是现在所有类型退化问题的根源。
+
+---
+
+## 1. 元规则(约束所有类型演化)
+
+| 规则 | 内容 | 修复的现有问题 |
+|---|---|---|
+| **R1** | 字段**只增不删**:`ProviderInput`、`ProviderEvent`、`StopReason` 取值集、`AgentMessage` union 成员、`ToolSpec`、`TurnSnapshot` 字段,一旦引入永久保留 | s03 删 tools、s04 删 message_start、s06/s08 删 stopReason、s04/s06 删 ToolSpec.input |
+| **R2** | 方法**只增**:`ToolRegistry` 等类的成员只增 | registry 在 s02/s04/s06 间反复变方法集 |
+| **R3** | 只允许两类**受控升级**,且必须在当节 README 显式声明「这是升级,不是新增」:
**U1 接口语义升级**(不可避免的 breaking change)
**U2 同名类型全局唯一** | 现有「同名不同义」「5 节 5 种 Tool 形状」 |
+| **R4** | 错误**不崩溃**,转结构化消息(见 §4) | Gap3 错误传播缺失 |
+| **R5** | 循环有**终止保证**(`maxTurns` 上限) | Gap3 工具循环无保护 |
+| **R6** | **加载**和**执行**分开管(对齐 Pi 真实设计):trust 控制资源加载;执行边界不内置、靠部署层 containerization;hook 是唯一的执行细化拦截点 | Gap3 trust/policy/hook 三个孤立 demo |
+| **R7** | core 通过 **output 抽象**输出,不直接 `console.log`。从 s01 起就有最小间接层 | Gap3 输出/执行分离,s10 才分离却已焊死 |
+| **R8** | 每节 `code.ts` 是前一节的超集;README「接入主线」段以 diff 展示增量 | 不累积 |
+
+### U1 受控升级清单(宪法允许的全部 breaking change)
+
+| 节 | 升级 | 理由 |
+|---|---|---|
+| s03 | `Provider.complete()` → `Provider.stream()` | provider 输出形态本质改变(一次性→流式),无法并存 |
+| s07 | `AgentState.messages: AgentMessage[]` → `SessionTree` | 历史从线性升级为可分支树;`currentPath()` 仍产出 `AgentMessage[]`,对外构造方式不变 |
+| s11 | `ResourceLoader.load()` → `load(trust)` | 加信任参数过滤资源(参数升级) |
+
+除以上三处外,**任何**对已有类型/接口的删改都违反宪法。
+
+### U2 同名类型全局唯一清单
+
+`ResourceLoader`、`RuntimeEvent`、`Tool`、`Output` 在主线中各自**只有一个定义**,所有章节复用它。
+
+---
+
+## 2. 核心类型字典
+
+> 终态定义 + 引入节 + 演变。每节 `code.ts` 必须与字典一致。
+
+```ts
+// —— 消息(s01 起,union 只增 R1)——
+type StopReason = "stop" | "error"; // s01
+// = "stop" | "toolUse" | "error"; // s04 起加 toolUse,之后稳定
+
+type UserMessage = { role: "user"; content: string }; // s01,稳定
+type AssistantMessage = { role: "assistant"; content: string; stopReason: StopReason };// s01,stopReason 永驻
+type ToolResultMessage= { role: "toolResult"; toolCallId: string; content: string }; // s04,稳定
+type AgentMessage = UserMessage | AssistantMessage // s01
+ | ToolResultMessage; // s04 起并入(只增)
+
+// —— core 状态(对齐 Pi AgentState)——
+type AgentState = {
+ messages: AgentMessage[]; // s01 起;s07 升级为 SessionTree(U1)
+ model: string; // s06 起加:跨轮配置,对齐 Pi(不在 ProviderInput/snapshot)
+};
+
+// —— Provider 对外形态(对齐 Pi Context)——
+type ProviderMessage =
+ | { role: "user" | "assistant"; content: string } // s01
+ | { role: "toolResult"; toolCallId: string; content: string };// s04 起并入(只增)
+
+// Pi 的 Context = { systemPrompt?, messages, tools }。教学版对齐:systemPrompt(s08 起) + messages + tools。model 不在这里,在 AgentState。
+type ProviderInput = {
+ systemPrompt: string; // s08 起加(项目资料组装进去)
+ messages: ProviderMessage[]; // s01
+ tools: ToolSpec[]; // s02 起加(s03 不许删)
+};
+
+// —— 工具契约(s02 起,全局唯一形状 U2)——
+type ToolSpec = { name: string; description: string; input: Record };// input 永驻
+type ToolHandler = (input: Record) => string; // 同步;抛错由 R4 捕获
+type Tool = { spec: ToolSpec; handler: ToolHandler }; // 全局唯一形状
+type ToolCall = { id: string; name: string; input: Record }; // s04
+
+class ToolRegistry { // 方法只增 R2
+ register(tool: Tool): void {} // s02
+ getSpecs(): ToolSpec[] {} // s02
+ run(call: ToolCall): string {} // s04
+}
+
+// —— Provider 事件流 ——
+type ProviderEvent =
+ | { type: "message_start" } // s03(s04 不许删)
+ | { type: "text_delta"; text: string } // s03
+ | { type: "message_end"; stopReason: StopReason } // s03
+ | { type: "tool_call"; call: ToolCall }; // s04 起加
+
+interface Provider { // U1:s03 由 complete 升级为 stream
+ stream(input: ProviderInput): AsyncGenerator;
+}
+
+// —— Turn 快照(对齐 Pi AgentContext:固定 systemPrompt/messages/tools;model 在 state 不进快照)——
+type TurnSnapshot = { systemPrompt: string; messages: ProviderMessage[]; tools: ToolSpec[] };
+
+// —— 会话树(U1:s07 取代扁平 messages 数组)——
+type SessionEntry = { id: string; parentId: string | null;
+ role: "user" | "assistant" | "toolResult"; content: string };
+class SessionTree {
+ append(msg): SessionEntry {}
+ moveTo(id: string): void {}
+ currentPath(): AgentMessage[] {} // 产出线性消息供 ProviderInput 使用
+ allEntries(): SessionEntry[] {}
+}
+
+// —— 上下文资源(s08,U2 全局唯一 ResourceLoader)——
+type ContextResource = { kind: "agents" | "skill" | "prompt"; name: string; content: string };
+class ResourceLoader {
+ constructor(private resources: ContextResource[]) {}
+ load(trust?: ProjectTrust): ContextResource[] {} // U1:s11 加 trust 参数
+}
+// s08:资源组装进 systemPrompt(对齐 Pi buildSystemPrompt),不是独立 context 字段
+function buildSystemPrompt(resources: ContextResource[]): string {}
+
+// —— Hook(s05,外层装饰 registry.run,不进 registry)——
+type BeforeToolCallResult = { type: "allow" } | { type: "block"; reason: string };
+type ToolHooks = {
+ beforeToolCall?: (call: ToolCall) => BeforeToolCallResult;
+ afterToolCall?: (call: ToolCall, result: string) => string;
+};
+function executeToolCall(registry, hooks, call): ToolResultMessage {} // s05 起,稳定(不加 policy)
+
+// —— 扩展运行时(s09,复用前面的 Tool/ToolRegistry)——
+type Command = { name: string; run: () => string };
+type RuntimeEvent = { type: "message"; content: string } | { type: "done" }; // U2 全局唯一
+type ExtensionAPI = {
+ on(type: RuntimeEvent["type"], handler: (e: RuntimeEvent) => void): void;
+ registerTool(tool: Tool): void; // 复用 s02 的 Tool,注入现有 ToolRegistry
+ registerCommand(cmd: Command): void;
+};
+type Extension = (api: ExtensionAPI) => void;
+class ExtensionRuntime { use(ext: Extension): void; /* 内部持有 ToolRegistry */ }
+
+// —— 输出抽象(R7:s01 最小形态 → s10 正式化)——
+type Output = { log(line: string): void }; // s01 起最小间接层
+// s10 升级为:
+type RuntimeMode = { render(events: RuntimeEvent[]): void }; // PrintMode / JsonMode 是两个实现
+
+// —— 信任与执行边界(s11,对齐 Pi:trust 控加载,执行靠 containerization)——
+type ProjectTrust = "trusted" | "untrusted";
+// 注:Pi 不内置执行权限系统。ExecutionPolicy/Executor 已移除——执行边界靠部署层
+// containerization 三方案(OpenShell / Gondolin / Plain Docker),core 内只有 trust 控制资源加载。
+
+// —— 能力分发(s12,注入既有 registry/commands/loader)——
+type PackageManifest = { name: string; tools: string[]; commands: string[]; resources: string[] };
+type Package = { manifest: PackageManifest; contents: Record };
+type LoadedPackage = { name: string; tools: Record;
+ commands: Record; resources: Record };
+function loadPackage(pkg: Package): LoadedPackage {}
+```
+
+---
+
+## 3. 十二节累积演进主表
+
+> 每节三栏:**累积骨架(不变)** | **本节新增零件** | **接入点(焊在哪)**
+
+| 节 | 累积骨架(不变) | 本节新增零件 | 接入点 |
+|---|---|---|---|
+| **s01** | — | AgentState、消息三类型、StopReason(stop/error)、ProviderInput{messages}、Provider.complete、runOneTurn、`Output.log`(R7) | 地基 |
+| **s02** | messages、provider、runOneTurn | ToolSpec{name;description;**input**}、ToolHandler、Tool、ToolRegistry(register/getSpecs) | buildProviderInput 接收 registry;ProviderInput 加 `tools=registry.getSpecs()` |
+| **s03** | 全部(**含 tools,R1 不删**) | **[U1]** Provider complete→stream、ProviderEvent(message_start/text_delta/message_end)、collectAssistantMessage | Provider 接口升级;runOneTurn 内 complete→stream+collect |
+| **s04** | 事件流、tools | ToolCall、ToolResultMessage、tool_call 事件、StopReason+toolUse、ToolRegistry.run、runEventedToolLoop(**maxTurns** R5)、错误捕获(R4) | 循环内 `registry.run(call)`,结果入 messages;tools 仍取 `registry.getSpecs()`(**不硬编码**) |
+| **s05** | 工具循环 | ToolHooks、beforeToolCall/afterToolCall、BeforeToolCallResult(allow/block)、executeToolCall(registry,hooks,call) | 循环内 `registry.run(call)` → `executeToolCall(registry,hooks,call)`;registry 不变(R2) |
+| **s06** | 循环+hook | TurnSnapshot{messages,tools}、createTurnSnapshot、**AgentState+model**(跨轮配置,对齐 Pi) | runEventedToolLoop 开头先 createTurnSnapshot;model 在 AgentState 不进 snapshot(对齐 Pi AgentContext,snapshot 只固定 messages/tools) |
+| **s07** | snapshot、循环、hook | **[U1]** messages 数组→SessionTree、SessionEntry{parentId}、append/moveTo/currentPath | buildProviderInput 用 `session.currentPath()` 取线性消息;对外构造不变 |
+| **s08** | tree、snapshot、tools | ContextResource、ResourceLoader.load()、buildSystemPrompt、ProviderInput+**systemPrompt**(资料组装进去,对齐 Pi;tools 保留 R1) | buildProviderInput 调 buildSystemPrompt(loader.load()) 拼 systemPrompt;snapshot 跟随加 systemPrompt |
+| **s09** | 全部主线 | Extension、ExtensionAPI、ExtensionRuntime、Command、RuntimeEvent、on/registerTool/registerCommand | ExtensionRuntime 内部持有现有 ToolRegistry;registerTool 注入的 Tool 走同一执行链(s05 hook) |
+| **s10** | 全部主线 | **[R7 收获]** Output.log → RuntimeMode、PrintMode/JsonMode、render(RuntimeEvent[]) | core 的 `output.log` 升级为 `mode.render(events)`;s01 起就没直连 console,此处只是命名+多态化 |
+| **s11** | 全部主线 | ProjectTrust、**[U1]** load(trust)、containerization 三方案(执行边界靠部署层,对齐 Pi) | trust→`loader.load(trust)` 控制资源加载;执行边界不内置,靠 containerization(README 讲 OpenShell/Gondolin/Docker 三方案) |
+| **s12** | 全部主线 | PackageManifest、Package、LoadedPackage、loadPackage、pick | loadPackage 产出注入 registry(s02)/commands(s09)/loader(s08)。能力分发闭环 |
+
+---
+
+## 4. Gap3 健全心智补全(宪法硬规定)
+
+| 缺口 | 宪法规定 |
+|---|---|
+| **循环终止** (R5) | `runEventedToolLoop` 加 `maxTurns`(默认 **8**)。终止条件 = provider 不再发 tool_call **或** stopReason≠toolUse **或** 达上限。达上限时返回 stopReason=`"stop"` 并附注 "max turns reached" |
+| **错误传播** (R4) | ① provider stream 抛错 → 捕获,本轮 AssistantMessage.stopReason=`"error"`、content=错误说明,写回 state。
② tool handler 抛错 → `executeToolCall` 捕获,ToolResultMessage.content=`"error: "`,**循环继续**(让 provider 看到错误自行决定)。
StopReason 维持三值 `stop|toolUse|error`,不为错误新增类型 |
+| **加载/执行分离** (R6) | 两件事分开(对齐 Pi 真实设计):
• **加载**靠 trust:`loader.load(trust)`,untrusted 返回空(防恶意资源)
• **执行**不内置权限:Pi 不在 core 里限制文件/进程/网络,执行边界靠部署层 containerization 三方案(OpenShell / Gondolin / Plain Docker)
• **细化拦截**靠 hook:beforeToolCall allow/block 具体工具(core 内唯一的执行拦截点)
教学版不再发明 ExecutionPolicy——它在 Pi 里没有对应物 |
+| **systemPrompt 层级** | `ProviderInput.systemPrompt` 是 **system 级**装配(项目资料组装进去),与 `messages`(**对话级**历史)是两个独立维度。对齐 Pi 的 `Context.systemPrompt`(资料进 systemPrompt,不是独立 context 字段) |
+| **输出/执行分离** (R7) | 从 s01 起 core 通过 `Output.log` 输出,不直连 console;s10 升级为 `RuntimeMode.render`。前 9 节就不存在"core 与 IO 焊死"的债 |
+
+---
+
+## 5. 完整 Turn 执行链(总装蓝图)
+
+> 这是根 `README.md`(总装章)的灵魂。一条链接上全部 12 节 + Gap3。
+
+```text
+newTurn(userInput):
+ 1. systemPrompt = buildSystemPrompt(loader.load(trust)) # s08 资源组装 + s11 trust 过滤 (R6)
+ 2. session.append({role:"user", content:userInput}) # s07 历史树 (U1)
+ 3. snapshot = createTurnSnapshot( # s06 快照
+ session.currentPath(), registry.getSpecs(), systemPrompt)
+ # model 在 AgentState,不进 snapshot(对齐 Pi AgentContext)
+ 4. for turn in 0..maxTurns: # s04 循环 (R5)
+ input = buildProviderInput(snapshot, state) # s01/s02/s08;systemPrompt 从 snapshot,model 从 state
+ events = provider.stream(input) # s03 事件流 (U1)(Pi 真实 stream(model,context),教学 fake provider 简化)
+ for event in events:
+ message_start / text_delta → 累加 content
+ message_end → stopReason
+ tool_call →
+ result = executeToolCall( # s05 hook(R4 错误捕获)
+ registry, hooks, call) # 无 policy(执行边界靠 containerization,R6)
+ session.append(toolResultMessage) # s07
+ if 无 tool_call 或 stopReason≠toolUse: break
+ 5. assistant = {role:"assistant", content, stopReason} # stopReason 永驻 (R1)
+ 6. session.append(assistant) # s07
+ 7. mode.render(allEvents) # s10 输出分离 (R7)
+
+ 旁路:extension(s09) 可注册额外 tool/command;
+ package(s12) 可分发 tool/resource 注入 registry/loader。
+```
+
+每一步都能追溯到某一节 + 某条规则。这就是「心智健全」的可验证证据。
+
+---
+
+## 6. 文件结构与每节 README 规范
+
+### 6.1 文件结构
+
+```text
+learn-pi-agent/
+ README.md 总装章:执行链全景图(§5)+ 累积演进表索引(§3)+ 如何从 s01 读到 s12
+ EVOLUTION.md 本文件(宪法)
+ sXX_*/
+ code.ts 累积主线第 N 版(前一节的超集,R8)
+ README.md 按下方规范
+```
+
+### 6.2 每节 README 模板
+
+```text
+# sXX: <主题> — <一句话点题>
+>
+> Pi 边界:<本节对应的设计边界>
+
+## 问题 ← 真痛点 / 上一节方案的缺陷(不提前命名下节概念)
+## 解决方案 ← 核心洞察 + 设计取舍表(不重复工作原理步骤)
+## 工作原理 ← 递进叙事:每步「为什么需要 + 代码」,末尾点睛
+## 试一下 ← 运行命令 + 输出 + 观察重点
+## 接入主线 ← 相对上节的变更表(前后对比)+ 焊接点
+## 接下来 ← 引出下一节痛点(只描述,不命名)
+## Pi 源码溯源 ← 折叠,只对照本章词汇
+```
+
+### 6.3 写作规则(硬约束)
+
+- **R-写①「问题」必须有痛或张力**:场景痛点 或 上一节方案的缺陷。不准写任务描述("core 要保存对话"这种不算问题)。
+- **R-写②「解决方案」是洞察不是目录**:一句话说清本质 + 设计取舍表。不准预告实现步骤(那是「工作原理」的事)。
+- **R-写③「工作原理」是叙事不是清单**:每个类型/函数出场时回答"为什么现在需要它",递进展开,末尾必点睛(这一节真正交付的东西是什么)。U1 升级必须显式标注「这是升级」。
+- **R-写④ 用自然中文,不直译英文意象**:motto 若源自英文,中文必须重写(`catch one turn` ✗ → 不译成"接住一轮");不为对仗生造动词搭配("穿过 core""接住一轮");用程序员实际会说的词("存下来""转成")。每节写完通读一遍,读起来"像翻译"的句子都要改。
+- **R-写⑤ 词汇纪律(叙事段)**:「问题」「解决方案」「工作原理」「接下来」只用本章已解释词汇;不提前命名下节概念;"本章不讲 X"也把 X 引进来了,不要写。源码溯源同样控词。
+ - **例外**:「接入主线」段是工程对照表,允许路标式提及未来章节的术语,但必须标注出现章节(如 `ToolResultMessage(s04 加)`),作为前瞻路标,不是教学展开。
+
+### 6.4「接入主线」段的写法
+
+用**变更表**(组件维度,前节 vs 本节),再加一句**焊接点**(本节零件焊在主线的哪个位置)。s01 作为地基节无前节可比,改列「本节确立的永驻基础 + 后续怎么演化」。后续节严格用「变更表 + 焊接点」。
+
+---
+
+## 7. 已落实的修正
+
+- **R1**:s03 保留 tools;s04 保留 message_start、ToolSpec.input,并从 registry.getSpecs() 取 tools;s06/s08 保留 stopReason;s08 的 systemPrompt 与 tools 并列进入 ProviderInput。
+- **R2**:s04 之后的 ToolRegistry 统一为 `register + getSpecs + run`。
+- **U2**:ResourceLoader、RuntimeEvent、Tool 全链路保持单一形状;后续章节复用同名类型,不另起含义。
+- **U1**:s03、s07、s11 的三处受控升级在 README 的「接入主线」中显式标注。
+- **R4/R5/R6/R7**:工具错误转 ToolResultMessage、工具循环有 maxTurns、trust 只控加载、输出从 s01 起经过 Output 并在 s10 长成 RuntimeMode。
+- **R8**:每节 code.ts 是前一节的超集;每节 README 都有「接入主线」段。
+- **幽灵名词**:`toProviderMessage`、`ContextBlock` 这类文档里出现但代码里没有的名词已移除或落到实际实现上。
+
+## 8. 待继续检查的点
+
+- **s10 事件源**:本节按教学 A 方案使用 `createDemoRuntimeEvents()` 作为最小事件源,避免重讲完整 tool loop。后续如果读者仍误解为另起 core,再把「演示事件源」说明前移到「问题」段。
+- **s12 安装闭环**:本节按教学 A 方案保留 `tools / commands / resources` 三类教学对象,并通过 `installLoadedPackage()` 接回主线。真实 Pi 的 `extensions / skills / prompts / themes` 只在源码溯源里讲,不进入教学主线。
+- **s06/s07 心智负担**:snapshot 与 SessionTree 是课程里的两次陡坡,后续润色时优先看这两节的「问题」段是否足够具体。
diff --git a/learn-pi-agent/README.md b/learn-pi-agent/README.md
new file mode 100644
index 0000000..3e0e14b
--- /dev/null
+++ b/learn-pi-agent/README.md
@@ -0,0 +1,114 @@
+# learn-pi-agent
+
+一套 Agent Harness 工程课程。用 TypeScript 从零**累积**实现一个机制健全、心智与 Pi 一致的 mini Pi。
+
+12 节,每节只加一个机制。每节的 `code.ts` 是前一节的超集——学完 s12,你手里有一个完整可运行的 mini Pi,而不是 12 个互不相连的玩具。
+
+---
+
+## mini Pi 的一轮,长什么样
+
+12 个机制不是 12 个零件,是一台机器。下面是完整的一轮(turn)执行链——每一步都标注了它来自哪一节:
+
+```text
+newTurn(userInput, trust):
+
+ 1. 按信任加载项目资料,组装进 systemPrompt
+ systemPrompt = buildSystemPrompt(loader.load(trust)) ← s08 资源 + s11 trust
+
+ 2. 用户消息进入历史树
+ session.append(user message) ← s07 历史树
+
+ 3. 一轮开始,拍快照(systemPrompt / tools 固定;model 在 state)
+ snapshot = createTurnSnapshot(session, registry,
+ loader, trust) ← s06 快照
+
+ 4. 工具循环(上限 maxTurns)
+ while 未结束: ← s04 循环 + R5 终止
+ input = buildProviderInput(snapshot, state) ← s01/s02/s08;model 从 state
+ events = provider.stream(input) ← s03 事件流
+ for event in events:
+ tool_call → executeToolCall(registry, hooks, call) ← s05 hook(出错不崩 R4)
+ before → handler → after
+ text_delta → 累加文本
+ 若本轮无 tool_call → 跳出循环
+
+ 5. assistant 消息进入历史树
+ session.append(assistant) ← s07
+
+ 6. 输出(core 产事件,mode 决定怎么展示)
+ mode.render(events) ← s10 运行方式
+
+ 旁路:
+ extension 通过 API 注册 tool / command ← s09
+ package 把一组能力打包、按清单分发 ← s12
+```
+
+这一条链就是 mini Pi 的"心智"。每个机制都接在前一个上——加载、历史、快照、循环、执行、边界、输出,首尾相接。
+
+注意:trust 只决定项目资料是否加载;执行权限不在 core 内解决,系统级边界交给容器或沙箱。
+
+---
+
+## 12 节累积演进
+
+每节只加一个机制,`code.ts` 是前一节的超集(R8)。精确契约见 [EVOLUTION.md](./EVOLUTION.md)。
+
+| 节 | 主题 | 给 mini Pi 加的零件 | Pi 边界 |
+| --- | --- | --- | --- |
+| [s01](./s01_minimal_agent_core/) | Agent Core | core + provider,存一轮消息 | provider 输入边界 |
+| [s02](./s02_tool_contract/) | Tool Contract | 工具拆成 spec(给 provider)+ handler(留本地) | 工具契约边界 |
+| [s03](./s03_provider_event_stream/) | Provider Event Stream | provider 从一次性返回升级为事件流 | provider 输出边界 |
+| [s04](./s04_evented_tool_loop/) | Evented Tool Loop | tool_call → 执行 → 结果回写,带循环和终止保护 | 工具执行边界 |
+| [s05](./s05_tool_hook_boundary/) | Tool Hook Boundary | 执行前后插口(before / after) | 工具插口边界 |
+| [s06](./s06_turn_snapshot/) | Turn Snapshot | 一轮开始拍快照(systemPrompt/tools 固定;model 在 state) | 一轮状态边界 |
+| [s07](./s07_session_tree/) | Session Tree | 历史从数组升级为可分叉的树 | 会话历史边界 |
+| [s08](./s08_context_resources/) | Context Resources | 项目资料作为独立维度进入输入 | 上下文资源边界 |
+| [s09](./s09_extension_runtime/) | Extension Runtime | 外部代码通过 API 注册 tool / command | 扩展 API 边界 |
+| [s10](./s10_runtime_modes/) | Runtime Modes | core 产事件,外层 mode 决定展示 | 运行方式边界 |
+| [s11](./s11_trust_and_execution_boundary/) | Trust and Execution | 加载看 trust、执行靠 containerization、细化拦截靠 hook | 执行权限边界 |
+| [s12](./s12_package_distribution/) | Package Distribution | 能力整理成带清单的包分发 | 能力分发边界 |
+
+---
+
+## 如何阅读
+
+按顺序 s01 → s12。每节:
+
+1. 读「问题」和「解决方案」——理解**为什么**需要这个机制
+2. 读「工作原理」——看机制怎么实现,末尾的点睛说清本质
+3. 运行 `code.ts`,对照「试一下」的输出
+4. 看「接入主线」——这节相对上节加了什么、焊在哪
+
+每节只引入当前机制需要的术语;后续章节的术语会在对应章节第一次出现时再解释。
+
+---
+
+## 运行
+
+```sh
+npm install
+npm run s01 # 从这里开始
+npm run s02
+# ...
+npm run s12
+```
+
+所有章节用固定输入、fake provider 和内存数据,运行不依赖真实模型 API。
+
+---
+
+## 课程宪法
+
+[EVOLUTION.md](./EVOLUTION.md) 是工程对照基准,每节的 `code.ts` 和 README 都对照它:
+
+- **8 条元规则**:R1 字段只增、R2 方法只增、U1/U2 受控升级、R4 错误不崩、R5 循环终止、R6 加载/执行分离、R7 输出抽象、R8 累积
+- **核心类型字典**:每个类型的稳定定义 + 引入节 + 演变
+- **12 节累积主表**:每节加什么、保持什么不变
+- **README 写作规范**:问题驱动 + 五条写作规则(含词汇纪律、去 AI 味)
+
+---
+
+## 和 Pi 的关系
+
+`learn-pi-agent` 不逐行解释 Pi 源码。每节先写一个最小机制,再在折叠的「Pi 源码溯源」里说明它对应 Pi 的哪个设计位置。真实 Pi 更复杂,教学版只保留每个机制的最小主干——但机制之间的连接是齐全的,这就是上面那条 turn 执行链。
diff --git a/learn-pi-agent/package-lock.json b/learn-pi-agent/package-lock.json
new file mode 100644
index 0000000..a8f3cb2
--- /dev/null
+++ b/learn-pi-agent/package-lock.json
@@ -0,0 +1,566 @@
+{
+ "name": "learn-pi-agent",
+ "version": "0.1.0",
+ "lockfileVersion": 3,
+ "requires": true,
+ "packages": {
+ "": {
+ "name": "learn-pi-agent",
+ "version": "0.1.0",
+ "devDependencies": {
+ "@types/node": "latest",
+ "tsx": "latest",
+ "typescript": "latest"
+ }
+ },
+ "node_modules/@esbuild/aix-ppc64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/aix-ppc64/-/aix-ppc64-0.28.1.tgz",
+ "integrity": "sha512-Svl7tq8k/08+p6CXPpRjQ1fKX+1odH/BQbb48fV6fj3CWHhsoIOoY87w1oHXm0qEpkIK3ZfVgp0hed3XBXzXMQ==",
+ "cpu": [
+ "ppc64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "aix"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/android-arm": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/android-arm/-/android-arm-0.28.1.tgz",
+ "integrity": "sha512-0k2F129Xdio1TdJfzJ8sy1Q47vUD2NnwdhiAf7drUN1EBTfPf4hsFCtmMgu/6m8JSzsBrlmVjudMBQqOfG8usQ==",
+ "cpu": [
+ "arm"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "android"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/android-arm64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/android-arm64/-/android-arm64-0.28.1.tgz",
+ "integrity": "sha512-34EGEbCIAgosYz6goLcopX6Mo7NyGv9tfwEM2/7Ce2VcVRk568iSvniGWcUXIy7wEDR1wzolcxcriFVrWYcwBg==",
+ "cpu": [
+ "arm64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "android"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/android-x64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/android-x64/-/android-x64-0.28.1.tgz",
+ "integrity": "sha512-dbwY7ltSMDWsRatcRpCnES4F+im88OCUgGZjy52shC7GqHRE/cYlxNbB4Z4UpJswpcc4Qxd2oE/ufM0p61IKng==",
+ "cpu": [
+ "x64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "android"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/darwin-arm64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/darwin-arm64/-/darwin-arm64-0.28.1.tgz",
+ "integrity": "sha512-TZbWkQY7kvTAXbXUT7uVACR5cMHsDiSz9z7ZKAX/RTq/WJEk3QyRr0wZpNhBDX+/0CtdqUIJlOiodQcta6tY3Q==",
+ "cpu": [
+ "arm64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "darwin"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/darwin-x64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/darwin-x64/-/darwin-x64-0.28.1.tgz",
+ "integrity": "sha512-zfdzgK9ACBNZLI/CyHTOx81SyNbM6YXn7rxSgX97VjyiPl9W1i4Ka4fgKECEoFCKGpvBj5qArWIGgQjOwkgskQ==",
+ "cpu": [
+ "x64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "darwin"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/freebsd-arm64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.28.1.tgz",
+ "integrity": "sha512-wG2EA8ENdEI0qhkSZMjfqrdY+ziCYCPMmtZjjIwOmXFjmyzEHn+UUxk5of+SYsjtfs3VpnlC7QLzSI5hY/rOAw==",
+ "cpu": [
+ "arm64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "freebsd"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/freebsd-x64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/freebsd-x64/-/freebsd-x64-0.28.1.tgz",
+ "integrity": "sha512-i7dZ9vQgnvSCzi/rYCXNgtF/U+eKZNJBzu3eTQbRgHnM7tNSizLOkRFAl3qzVc/Op/u5YkHHa4pf/3DOYHthLQ==",
+ "cpu": [
+ "x64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "freebsd"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/linux-arm": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/linux-arm/-/linux-arm-0.28.1.tgz",
+ "integrity": "sha512-qVXBOHQS+d5Y722GwJzJUtOLlX7km3CraOaGormF1pDtPd2C/l1SHRPgjLunLGe51Sh5YYWKMFDyV4SxgMQYTQ==",
+ "cpu": [
+ "arm"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "linux"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/linux-arm64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/linux-arm64/-/linux-arm64-0.28.1.tgz",
+ "integrity": "sha512-yHs+0uc8+nvEAfAfxrWQKK5peSNzBc4PegcMO0EJ2hT71uA7vB8Ihg2e77R2P7SG5uYjPbHlLLmve4LLLRCf0g==",
+ "cpu": [
+ "arm64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "linux"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/linux-ia32": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/linux-ia32/-/linux-ia32-0.28.1.tgz",
+ "integrity": "sha512-d1z4ZuP0ajrfz/FhGT4vv278rX8KnPPJx8i5+AtK7TYbx9Le9F1hyzurZpkEyjkGa9dUGhQow4C1NmeGvqxN2w==",
+ "cpu": [
+ "ia32"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "linux"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/linux-loong64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/linux-loong64/-/linux-loong64-0.28.1.tgz",
+ "integrity": "sha512-M5sRjUVZrkm1OAPR3dlOYzNmN+loZKGVi1VUQGrwuqLcbR6qeAz+famMhjASeH3YVKvZz+zT1jlh/keC3Rj/lg==",
+ "cpu": [
+ "loong64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "linux"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/linux-mips64el": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/linux-mips64el/-/linux-mips64el-0.28.1.tgz",
+ "integrity": "sha512-mRObBZeHh2OxcBFPWE/FjylkRgZdYuiTR3vaTozquCGOH14iP9oN4x4Ge81CoIDYQrXmIxpFumJBu5MtZpnQJQ==",
+ "cpu": [
+ "mips64el"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "linux"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/linux-ppc64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/linux-ppc64/-/linux-ppc64-0.28.1.tgz",
+ "integrity": "sha512-slScBsMAb3GFDcdrCgLwZtPYRoH2H/youv10QiZyRjmsP48fznoveWytSgCI/R0ZcUgpc0ZhIUEx6LHts8yrfQ==",
+ "cpu": [
+ "ppc64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "linux"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/linux-riscv64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/linux-riscv64/-/linux-riscv64-0.28.1.tgz",
+ "integrity": "sha512-kw0owk1o0GFETUJyW0jc0G4Yzs0BHZn0JDZ8JRT088vjJYX777BAs1fDGxAC+q831qOs2DTC96mNsG2opdfyyQ==",
+ "cpu": [
+ "riscv64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "linux"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/linux-s390x": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/linux-s390x/-/linux-s390x-0.28.1.tgz",
+ "integrity": "sha512-/lAIjX8aYFRByhh6L5rYtPEDRqa9de/4V/juOXcta5frjvzXO4/sqEtyytse0g3zZFuWu5cDN0MkLz2qRDD2Ag==",
+ "cpu": [
+ "s390x"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "linux"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/linux-x64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/linux-x64/-/linux-x64-0.28.1.tgz",
+ "integrity": "sha512-u/anNYF2mmVOEDwLtnQ1wOr3EZ9sTNGLWrsYGYwHWzGA3Si84IOkHXlbWTD1NB+9/1lcnweYKO54uhxZydNzfA==",
+ "cpu": [
+ "x64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "linux"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/netbsd-arm64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/netbsd-arm64/-/netbsd-arm64-0.28.1.tgz",
+ "integrity": "sha512-oks0DYbLwWMmaakTsCb+zL4E+aHRVLom9IJZOAthMQEPiQmydXHkziYEsGYRx0uNV/IjEKGAV941JzH02pflqw==",
+ "cpu": [
+ "arm64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "netbsd"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/netbsd-x64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/netbsd-x64/-/netbsd-x64-0.28.1.tgz",
+ "integrity": "sha512-aeL6lAnN89Hz43Mlh1G8ARasbuoYvSITDEx0tHh5b7jJnHcssqgjy9Yx430GDpmCa6OyrKoS0aNRjKundRizGg==",
+ "cpu": [
+ "x64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "netbsd"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/openbsd-arm64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/openbsd-arm64/-/openbsd-arm64-0.28.1.tgz",
+ "integrity": "sha512-MEFJe5C3R8pwXdZ5Y21oo6m7ePiS0d9pWucn99O/wvyJZChoIQKrQDxKrGeW8F5+T0okTHesAmDeiHDTIq0V/Q==",
+ "cpu": [
+ "arm64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "openbsd"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/openbsd-x64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/openbsd-x64/-/openbsd-x64-0.28.1.tgz",
+ "integrity": "sha512-i/ZLIOafE0Z8cI/XANJAixoJL/uRAoS2xOA3rb0xN+KK0K177cMAsQYkzHtBrtMXAKuAc7HGgcWiZ/sRC1Nxgw==",
+ "cpu": [
+ "x64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "openbsd"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/openharmony-arm64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/openharmony-arm64/-/openharmony-arm64-0.28.1.tgz",
+ "integrity": "sha512-ge+Z7EXFNt2BO1oAMsVpiQ8EwndV9i1xXerAeTIK7AtPs3bKFXQM7nlRxDSIUIMeueR1CNXxqztLzdNeReKBJg==",
+ "cpu": [
+ "arm64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "openharmony"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/sunos-x64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/sunos-x64/-/sunos-x64-0.28.1.tgz",
+ "integrity": "sha512-BEjgtECkL3vY+SaSQ6nzVfiALUeFxpawyp8Jmf5PtYhf1Ug40N1h/hxlhts+f1FvSvarEigdxS3BlSMI2PJLcQ==",
+ "cpu": [
+ "x64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "sunos"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/win32-arm64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/win32-arm64/-/win32-arm64-0.28.1.tgz",
+ "integrity": "sha512-lCv9eK/H6ZJWbE7bh2nw54CZ9M2nupBxJcTsdk/QQnWkdSjKGuxmmH8/GWrlT1eMmZfn4dGcCjRte397WqfQXA==",
+ "cpu": [
+ "arm64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "win32"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/win32-ia32": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/win32-ia32/-/win32-ia32-0.28.1.tgz",
+ "integrity": "sha512-zvb/mB2bSCoJOpoCBgYKKpX6YM6mJBlBUVUtVj41DlZJVEB6/0CKlRYxP5wWl1C1ILiCoAU5wZZ4q1P3qeS6Eg==",
+ "cpu": [
+ "ia32"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "win32"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@esbuild/win32-x64": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/@esbuild/win32-x64/-/win32-x64-0.28.1.tgz",
+ "integrity": "sha512-bm4Mowrv+GXMlpWX++EcXw/iLyd1o3+bJkC2DkWXYVvgZCqD/bSj9ctZeAMC3cIxgjRVR2Dufaiu4YPxr5gW1A==",
+ "cpu": [
+ "x64"
+ ],
+ "dev": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "win32"
+ ],
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/@types/node": {
+ "version": "25.9.3",
+ "resolved": "https://registry.npmmirror.com/@types/node/-/node-25.9.3.tgz",
+ "integrity": "sha512-603BddQMv3pUcr4U2dhujk83N2tTDVr/34wII2B6bJy6g+8WD6yUb11jszNs0gdi4PesVWl7ABt8nYMVpnLUcg==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "undici-types": ">=7.24.0 <7.24.7"
+ }
+ },
+ "node_modules/esbuild": {
+ "version": "0.28.1",
+ "resolved": "https://registry.npmmirror.com/esbuild/-/esbuild-0.28.1.tgz",
+ "integrity": "sha512-HrJrvZv5ayxBzPfwphOoNzkzOIIlifzk0KJrGK2c8R4+LKpMtpYLQeUdjnwjWv/LZlkH2laZk+4w78pi99D4Vw==",
+ "dev": true,
+ "hasInstallScript": true,
+ "license": "MIT",
+ "bin": {
+ "esbuild": "bin/esbuild"
+ },
+ "engines": {
+ "node": ">=18"
+ },
+ "optionalDependencies": {
+ "@esbuild/aix-ppc64": "0.28.1",
+ "@esbuild/android-arm": "0.28.1",
+ "@esbuild/android-arm64": "0.28.1",
+ "@esbuild/android-x64": "0.28.1",
+ "@esbuild/darwin-arm64": "0.28.1",
+ "@esbuild/darwin-x64": "0.28.1",
+ "@esbuild/freebsd-arm64": "0.28.1",
+ "@esbuild/freebsd-x64": "0.28.1",
+ "@esbuild/linux-arm": "0.28.1",
+ "@esbuild/linux-arm64": "0.28.1",
+ "@esbuild/linux-ia32": "0.28.1",
+ "@esbuild/linux-loong64": "0.28.1",
+ "@esbuild/linux-mips64el": "0.28.1",
+ "@esbuild/linux-ppc64": "0.28.1",
+ "@esbuild/linux-riscv64": "0.28.1",
+ "@esbuild/linux-s390x": "0.28.1",
+ "@esbuild/linux-x64": "0.28.1",
+ "@esbuild/netbsd-arm64": "0.28.1",
+ "@esbuild/netbsd-x64": "0.28.1",
+ "@esbuild/openbsd-arm64": "0.28.1",
+ "@esbuild/openbsd-x64": "0.28.1",
+ "@esbuild/openharmony-arm64": "0.28.1",
+ "@esbuild/sunos-x64": "0.28.1",
+ "@esbuild/win32-arm64": "0.28.1",
+ "@esbuild/win32-ia32": "0.28.1",
+ "@esbuild/win32-x64": "0.28.1"
+ }
+ },
+ "node_modules/fsevents": {
+ "version": "2.3.3",
+ "resolved": "https://registry.npmmirror.com/fsevents/-/fsevents-2.3.3.tgz",
+ "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
+ "dev": true,
+ "hasInstallScript": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "darwin"
+ ],
+ "engines": {
+ "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+ }
+ },
+ "node_modules/tsx": {
+ "version": "4.22.4",
+ "resolved": "https://registry.npmmirror.com/tsx/-/tsx-4.22.4.tgz",
+ "integrity": "sha512-X8EX+XV4QR5xCsrgxaED954zTDfY8KqlDtskKEL0cHhyS/P8b4IFOvGDQpsC9Q1XnLq915wEfwwY/zzskCtmhg==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "esbuild": "~0.28.0"
+ },
+ "bin": {
+ "tsx": "dist/cli.mjs"
+ },
+ "engines": {
+ "node": ">=18.0.0"
+ },
+ "optionalDependencies": {
+ "fsevents": "~2.3.3"
+ }
+ },
+ "node_modules/typescript": {
+ "version": "6.0.3",
+ "resolved": "https://registry.npmmirror.com/typescript/-/typescript-6.0.3.tgz",
+ "integrity": "sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw==",
+ "dev": true,
+ "license": "Apache-2.0",
+ "bin": {
+ "tsc": "bin/tsc",
+ "tsserver": "bin/tsserver"
+ },
+ "engines": {
+ "node": ">=14.17"
+ }
+ },
+ "node_modules/undici-types": {
+ "version": "7.24.6",
+ "resolved": "https://registry.npmmirror.com/undici-types/-/undici-types-7.24.6.tgz",
+ "integrity": "sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg==",
+ "dev": true,
+ "license": "MIT"
+ }
+ }
+}
diff --git a/learn-pi-agent/package.json b/learn-pi-agent/package.json
new file mode 100644
index 0000000..5f3aaa3
--- /dev/null
+++ b/learn-pi-agent/package.json
@@ -0,0 +1,26 @@
+{
+ "name": "learn-pi-agent",
+ "version": "0.1.0",
+ "private": true,
+ "type": "module",
+ "scripts": {
+ "s01": "tsx s01_minimal_agent_core/code.ts",
+ "s02": "tsx s02_tool_contract/code.ts",
+ "s03": "tsx s03_provider_event_stream/code.ts",
+ "s04": "tsx s04_evented_tool_loop/code.ts",
+ "s05": "tsx s05_tool_hook_boundary/code.ts",
+ "s06": "tsx s06_turn_snapshot/code.ts",
+ "s07": "tsx s07_session_tree/code.ts",
+ "s08": "tsx s08_context_resources/code.ts",
+ "s09": "tsx s09_extension_runtime/code.ts",
+ "s10": "tsx s10_runtime_modes/code.ts",
+ "s11": "tsx s11_trust_and_execution_boundary/code.ts",
+ "s12": "tsx s12_package_distribution/code.ts",
+ "check": "tsc --noEmit"
+ },
+ "devDependencies": {
+ "@types/node": "latest",
+ "tsx": "latest",
+ "typescript": "latest"
+ }
+}
diff --git a/learn-pi-agent/s01_minimal_agent_core/README.md b/learn-pi-agent/s01_minimal_agent_core/README.md
new file mode 100644
index 0000000..b2e14fa
--- /dev/null
+++ b/learn-pi-agent/s01_minimal_agent_core/README.md
@@ -0,0 +1,251 @@
+# s01: Agent Core — 先存住一轮对话
+
+> *一轮,先存下来。*
+> **Pi 边界**:provider 输入边界 —— core 的内部状态和 provider 调用之间的第一道分隔。
+
+`s01` → [下一节:s02](../s02_tool_contract/)
+
+---
+
+## 问题
+
+你让模型回答一句话,模型给了回复,然后就停了。
+
+如果只是一次问答,这没问题。但你想让它"接着刚才的话继续",就会遇到第一个麻烦:**每一次调用都是独立的,模型自己并不记得上一句说了什么。**
+
+所以 core 要做的第一件事,不是让模型变得更聪明,而是**先把这一轮对话记下来**——用户说了什么、模型回了什么,按顺序存在 core 里。有了这份记录,模型才接得上"刚才",下一轮才有继续的基础。
+
+s01 只做这一件事:存下一轮对话。
+
+---
+
+## 解决方案
+
+一轮对话进入 core,中间要经过一条边界:
+
+```text
+AgentState(core 内部) ──ProviderInput──> Provider
+```
+
+core 内部怎么存,是 core 自己的事;provider 能看到什么,由 ProviderInput 决定。这两边**故意不同**:provider 拿不到 core 的内部状态,只拿到一份为它准备好的输入。
+
+用 `runOneTurn` 一个函数串起这一轮:
+
+| 步骤 | 动作 |
+| --- | --- |
+| 1 | 用户消息进 AgentState |
+| 2 | 从 AgentState 构造 ProviderInput |
+| 3 | 调 Provider,拿到 AssistantMessage |
+| 4 | assistant 消息存回 AgentState |
+
+另外有两处设计先提一下,后面会用到:assistant 消息带一个**停止原因**(这一轮是正常结束,还是出了错);core 的输出统一走一层 **Output**,不直接打印。
+
+---
+
+## 工作原理
+
+从 core 内部往外,一步步把这条边界搭出来。
+
+**core 先存什么。** 一条消息要么来自用户,要么来自 provider。provider 的回复还要带停止原因,这样 core 才知道这一轮是正常结束,还是出了错。
+
+```ts
+export type StopReason = "stop" | "error";
+
+export type UserMessage = { role: "user"; content: string };
+export type AssistantMessage = { role: "assistant"; content: string; stopReason: StopReason };
+export type AgentMessage = UserMessage | AssistantMessage;
+```
+
+core 用一个数组按顺序把它们存起来。现在 state 只有一个字段,但后面所有的对话历史都从这里长出来。
+
+```ts
+export type AgentState = { messages: AgentMessage[] };
+
+export function createInitialState(): AgentState { return { messages: [] }; }
+export function createUserMessage(content: string): UserMessage { return { role: "user", content }; }
+```
+
+**然后是边界。** provider 不直接拿 AgentState,而是把每条消息转成它需要的 role 和 content,组成 ProviderInput。这一步只是做了一次转换,但它就是那道墙——core 的内部结构不会漏给 provider。
+
+```ts
+export type ProviderMessage = { role: "user" | "assistant"; content: string };
+export type ProviderInput = { messages: ProviderMessage[] };
+
+export function buildProviderInput(state: AgentState): ProviderInput {
+ return {
+ messages: state.messages.map((m) => ({ role: m.role, content: m.content })),
+ };
+}
+```
+
+provider 这边的约定就一句话:给我 ProviderInput,我还你 AssistantMessage。
+
+```ts
+export interface Provider {
+ complete(input: ProviderInput): Promise;
+}
+```
+
+**最后收口。** core 不直接决定结果怎么展示,先留一层 Output——现在它只是包了一下 console,但把这层单独拎出来,后面有用。
+
+```ts
+export type Output = { log(line: string): void };
+export function createConsoleOutput(): Output { return { log: (line) => console.log(line) }; }
+```
+
+一轮的推进就是把上面几步连起来:存入用户消息 → 构造输入 → 调 provider → 存回 assistant 消息。
+
+```ts
+export async function runOneTurn(
+ state: AgentState, provider: Provider, userInput: string,
+): Promise {
+ state.messages.push(createUserMessage(userInput));
+ const providerInput = buildProviderInput(state);
+ const assistantMessage = await provider.complete(providerInput);
+ state.messages.push(assistantMessage);
+ return assistantMessage;
+}
+```
+
+> 这一节真正交付的东西,不是 `runOneTurn` 这个函数,而是那条边界——AgentState 和 ProviderInput 之间的转换。后面每一节都会往 ProviderInput 里加东西,但"core 的内部状态不直接暴露给 provider"这条规矩,从 s01 定下来就不会再变。
+
+---
+
+## 试一下
+
+运行:
+
+```sh
+npm run s01
+```
+
+输出类似:
+
+```text
+s01: Agent Core
+
+[user]
+你好,mini Pi
+
+[provider input]
+messages: 1
+last.role: user
+last.content: 你好,mini Pi
+
+[assistant]
+content: 收到:你好,mini Pi
+stopReason: stop
+
+[state]
+messages: 2
+last.role: assistant
+last.stopReason: stop
+```
+
+观察重点:`[provider input]` 里 provider 拿到的是 ProviderInput(只有 role / content),拿不到 core 的 AgentState;`[state]` 里一轮结束后有两条消息。
+
+### 错误情况
+
+```sh
+npm run s01 -- --case error
+```
+
+即使 provider 完不成这一轮,core 也照样把结果存成一条 AssistantMessage(stopReason 是 error)。state 的结构不会因为出错而变形——永远是一对 user / assistant 消息。
+
+---
+
+## 接入主线
+
+s01 是 mini Pi 的第 1 版,是后面 11 节的地基。本节确立的、后续**只扩展不改写**的永驻基础:
+
+| 基础 | 后续怎么演化 |
+| --- | --- |
+| `UserMessage` / `AssistantMessage` / `AgentMessage` | 消息三类型,union 只增(s04 加 ToolResultMessage) |
+| `AssistantMessage.stopReason` | 永驻,取值只增(s04 加 toolUse) |
+| `AgentState.messages` | 先是数组,s07 升级为 SessionTree(U1) |
+| `ProviderInput` | 字段只增(s02 加 tools、s08 加 systemPrompt);对齐 Pi Context,model 在 AgentState 不进 input |
+| `Provider` | s03 从 complete 升级为 stream(U1) |
+| `Output` | s10 升级为 RuntimeMode(R7 收获) |
+
+---
+
+## 接下来
+
+现在 ProviderInput 里只有 messages。下一节会往里加东西,让 provider 看到的不只是对话,还有 core 能提供的本地能力。进入下一节:[s02](../s02_tool_contract/)。
+
+---
+
+
+Pi 源码溯源:Agent Core 一轮的完整设计
+
+教学版的 `runOneTurn` 只"存两条消息"。Pi 的 `packages/agent` 里,一轮远不止于此。
+
+### 源码在哪
+
+- `packages/agent/src/types.ts:317` — `AgentState` 类型
+- `packages/agent/src/agent.ts:166` — `Agent` 类(状态持有 + 生命周期)
+- `packages/agent/src/agent-loop.ts:155` — `runAgentLoop`(核心循环)
+
+### AgentState 的真实形状
+
+教学版只有一个 `messages` 数组。Pi 的 `AgentState`(`types.ts:317`)要多得多:
+
+```ts
+interface AgentState {
+ systemPrompt: string; // 系统提示(s08 方向)
+ model: Model; // 当前模型(跨轮配置,在 AgentState;教学版 s06 引入)
+ thinkingLevel: ThinkingLevel; // 推理强度
+ tools: AgentTool[]; // 工具(s02)
+ messages: AgentMessage[]; // 消息历史
+ isStreaming: boolean; // 正在流式输出?
+ streamingMessage?: AgentMessage; // 当前正在生成的那条
+ pendingToolCalls: Set; // 待执行的工具调用
+ errorMessage?: string; // 出错信息
+}
+```
+
+一个"状态"承载的不只是消息,还有模型、工具、流式进度、错误——一轮里要用到的东西全在这里。教学版只留了 `messages` 一个字段。
+
+### 一轮的真实推进:runWithLifecycle
+
+教学版 `runOneTurn` 是一个 async 函数跑完就结束。Pi 用 `runWithLifecycle`(`agent.ts:451`)包了一层生命周期:
+
+```ts
+private async runWithLifecycle(executor) {
+ const abortController = new AbortController();
+ this.activeRun = { promise, resolve, abortController };
+ this._state.isStreaming = true;
+ try {
+ await executor(abortController.signal); // 真正的循环
+ } catch (error) {
+ await this.handleRunFailure(error, signal.aborted);
+ } finally {
+ this.finishRun();
+ }
+}
+```
+
+三个教学版没有的东西:
+
+- **AbortController**:用户随时能中断一轮(教学版一轮跑完才停)。
+- **activeRun**:防止重入——上一轮没跑完,下一轮进不来(`waitForIdle` 配合)。教学版没这个保护。
+- **handleRunFailure**:出错不崩,转成错误消息写回状态,对应教学版的 `stopReason = error`,但 Pi 有完整的失败恢复路径。
+
+### 双队列:steering 和 follow-up
+
+`runAgentLoop`(`agent-loop.ts:155`)其实是**两层循环**:
+
+- **outer loop**:消费 follow-up 消息队列(用户后续追加的话)。
+- **inner loop**:处理工具调用,以及 steering 消息(执行中途插入、用来"引导"方向的)。
+
+教学版的循环只有"工具来回"一条线;Pi 把"用户中途插话"和"工具来回"分成两个队列,各有优先级——这是真实交互场景必须的(用户不会老老实实等工具跑完)。
+
+### 消息带时间戳,内容是数组
+
+教学版 `content` 是字符串。Pi 的 `AgentMessage` 是 `content: Array` 加 `timestamp`——一条消息能同时含文本、图片、工具调用,且按时间排序。教学版先把 content 简化成 string,s04 加 tool_call 时才会碰到"一条消息多种内容"的真实形态。
+
+### 一句话
+
+`buildProviderInput` + `Provider.complete` 看似平淡,立的是 core 最重要的一堵墙。但 Pi 在墙两侧都加了教学版没有的工程层:墙这边是带生命周期的可变状态(model / tools / streaming / abort),墙那边是多 provider 的统一事件流(s03)。s01 先立最小骨架,这些层在后面陆续长出来。
+
+
diff --git a/learn-pi-agent/s01_minimal_agent_core/code.ts b/learn-pi-agent/s01_minimal_agent_core/code.ts
new file mode 100644
index 0000000..6bfcd64
--- /dev/null
+++ b/learn-pi-agent/s01_minimal_agent_core/code.ts
@@ -0,0 +1,219 @@
+// s01: Agent Core — mini Pi 的第 1 版(地基)
+//
+// 本节只做一件事:core 保存一轮对话,provider 接收 ProviderInput 返回 AssistantMessage。
+// 后续 11 节都在这份代码上累积(宪法 R8:每节 code.ts 是前一节的超集)。
+// 词汇边界:本章只用 AgentState / AgentMessage / UserMessage / AssistantMessage /
+// Provider / ProviderInput / ProviderMessage / StopReason / runOneTurn / complete / Output。
+
+declare const process: {
+ argv: string[];
+ exitCode?: number;
+};
+
+// —— 停止原因(宪法 R1:取值只增。s04 会加入 "toolUse")——
+export type StopReason = "stop" | "error";
+
+// —— 消息三类型(AgentMessage union 只增,R1)——
+export type UserMessage = {
+ role: "user";
+ content: string;
+};
+
+export type AssistantMessage = {
+ role: "assistant";
+ content: string;
+ stopReason: StopReason; // stopReason 永驻:后续章节不会把它删掉
+};
+
+export type AgentMessage = UserMessage | AssistantMessage;
+
+// —— core 内部状态 ——
+export type AgentState = {
+ messages: AgentMessage[]; // s07 会把这里升级为 SessionTree(U1 受控升级)
+};
+
+// —— provider 对外看到的消息形态 ——
+export type ProviderMessage = {
+ role: "user" | "assistant";
+ content: string;
+};
+
+// —— provider 本轮输入(字段只增,R1:s02 加 tools、s06 加 modelName、s08 加 context)——
+export type ProviderInput = {
+ messages: ProviderMessage[];
+};
+
+// —— provider 调用边界(s03 会由 complete 升级为 stream,U1 受控升级)——
+export interface Provider {
+ complete(input: ProviderInput): Promise;
+}
+
+// —— 输出抽象(宪法 R7:core 不直接决定输出形式)——
+// s01 只用最简单的一层间接。s10 会把它升级为 RuntimeMode(PrintMode / JsonMode)。
+export type Output = {
+ log(line: string): void;
+};
+
+export function createConsoleOutput(): Output {
+ return { log: (line) => console.log(line) };
+}
+
+// ============ 构造函数 ============
+
+export function createInitialState(): AgentState {
+ return { messages: [] };
+}
+
+export function createUserMessage(content: string): UserMessage {
+ return { role: "user", content };
+}
+
+// 这一步看起来很薄,但它划出第一条边界:core 内部状态与 provider 输入分开。
+export function buildProviderInput(state: AgentState): ProviderInput {
+ return {
+ messages: state.messages.map((message) => ({
+ role: message.role,
+ content: message.content,
+ })),
+ };
+}
+
+// ============ 一轮推进 ============
+
+export async function runOneTurn(
+ state: AgentState,
+ provider: Provider,
+ userInput: string,
+): Promise {
+ const userMessage = createUserMessage(userInput);
+ state.messages.push(userMessage);
+
+ const providerInput = buildProviderInput(state);
+ const assistantMessage = await provider.complete(providerInput);
+
+ state.messages.push(assistantMessage);
+ return assistantMessage;
+}
+
+// ============ Demo Provider(fake,不依赖真实模型 API)============
+
+export class DemoProvider implements Provider {
+ public lastInput: ProviderInput | undefined;
+
+ async complete(input: ProviderInput): Promise {
+ this.lastInput = input;
+
+ const lastMessage = input.messages[input.messages.length - 1];
+
+ if (!lastMessage || lastMessage.role !== "user") {
+ return {
+ role: "assistant",
+ content: "Provider could not complete this turn.",
+ stopReason: "error",
+ };
+ }
+
+ if (lastMessage.content.includes("触发错误")) {
+ return {
+ role: "assistant",
+ content: "Provider could not complete this turn.",
+ stopReason: "error",
+ };
+ }
+
+ return {
+ role: "assistant",
+ content: `收到:${lastMessage.content}`,
+ stopReason: "stop",
+ };
+ }
+}
+
+// ============ 演示脚手架(观察用,不属于 core)============
+
+type DemoCase = "normal" | "error";
+
+function getDemoCase(): DemoCase {
+ const caseIndex = process.argv.indexOf("--case");
+ const value = caseIndex >= 0 ? process.argv[caseIndex + 1] : undefined;
+ return value === "error" ? "error" : "normal";
+}
+
+function getUserInput(demoCase: DemoCase): string {
+ return demoCase === "error" ? "触发错误" : "你好,mini Pi";
+}
+
+function printProviderInput(
+ output: Output,
+ input: ProviderInput | undefined,
+): void {
+ output.log("[provider input]");
+
+ if (!input) {
+ output.log("messages: 0");
+ output.log("");
+ return;
+ }
+
+ const lastMessage = input.messages[input.messages.length - 1];
+
+ output.log(`messages: ${input.messages.length}`);
+
+ if (lastMessage) {
+ output.log(`last.role: ${lastMessage.role}`);
+ output.log(`last.content: ${lastMessage.content}`);
+ }
+
+ output.log("");
+}
+
+function printAssistantMessage(output: Output, message: AssistantMessage): void {
+ output.log("[assistant]");
+ output.log(`content: ${message.content}`);
+ output.log(`stopReason: ${message.stopReason}`);
+ output.log("");
+}
+
+function printState(output: Output, state: AgentState): void {
+ const lastMessage = state.messages[state.messages.length - 1];
+
+ output.log("[state]");
+ output.log(`messages: ${state.messages.length}`);
+
+ if (lastMessage) {
+ output.log(`last.role: ${lastMessage.role}`);
+
+ if (lastMessage.role === "assistant") {
+ output.log(`last.stopReason: ${lastMessage.stopReason}`);
+ }
+ }
+
+ output.log("");
+}
+
+async function main(): Promise {
+ const output = createConsoleOutput();
+ const demoCase = getDemoCase();
+ const userInput = getUserInput(demoCase);
+
+ const state = createInitialState();
+ const provider = new DemoProvider();
+
+ output.log("s01: Agent Core");
+ output.log("");
+
+ output.log("[user]");
+ output.log(userInput);
+ output.log("");
+
+ const assistantMessage = await runOneTurn(state, provider, userInput);
+
+ printProviderInput(output, provider.lastInput);
+ printAssistantMessage(output, assistantMessage);
+ printState(output, state);
+}
+
+main().catch((error: unknown) => {
+ console.error(error);
+ process.exitCode = 1;
+});
diff --git a/learn-pi-agent/s01_minimal_agent_core/images/.gitkeep b/learn-pi-agent/s01_minimal_agent_core/images/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/learn-pi-agent/s02_tool_contract/README.md b/learn-pi-agent/s02_tool_contract/README.md
new file mode 100644
index 0000000..718f9e1
--- /dev/null
+++ b/learn-pi-agent/s02_tool_contract/README.md
@@ -0,0 +1,239 @@
+# s02: Tool Contract — 工具先变成说明
+
+> *把能力写成说明,再决定给谁看。*
+> **Pi 边界**:工具契约边界 —— 给 provider 的工具说明,和留在 core 的执行函数,分开。
+
+[上一节:s01](../s01_minimal_agent_core/) → `s02` → [下一节:s03](../s03_provider_event_stream/)
+
+---
+
+## 问题
+
+s01 里,provider 只看到了对话(messages)。但 core 手里其实还有本地能力:读一条笔记、看一眼当前时间。
+
+怎么让 provider 知道这些能力?直觉是直接把函数塞给它。但这走不通——provider 只是一个收文本、回文本的端点,它看不懂一段可执行代码,更不可能在它那边把代码跑起来。
+
+所以得先把能力翻译成 provider 能读的东西:一份**说明**。
+
+s02 只做这一件事:把本地能力变成说明,交给 provider。本节还不执行任何工具。
+
+---
+
+## 解决方案
+
+一个工具拆成两层:
+
+```text
+Tool
+ spec → 进 ProviderInput,给 provider 看
+ handler → 留在本地 ToolRegistry
+```
+
+provider 收到的永远是 `spec`(说明),`handler`(执行函数)从不出 core。
+
+这里有个故意的分隔:**provider 看得见的工具集合,和 core 实际跑得了的工具集合,不一样。** provider 只看到说明,看不到、也碰不到执行函数。这条分隔从 s02 立起来,后面所有和工具相关的机制都建立在它之上。
+
+---
+
+## 工作原理
+
+**先定义说明。** 一份工具说明要回答三件事:叫什么名字、干什么用、要什么参数。
+
+```ts
+export type ToolSpec = {
+ name: string;
+ description: string;
+ input: Record;
+};
+```
+
+**再定义本地执行。** handler 是一段普通函数,待在 core 这边,provider 看不见它。
+
+```ts
+export type ToolHandler = (input: Record) => string;
+```
+
+**把两层合起来是一个完整工具。** spec 和 handler 在 Tool 里配对,但只有 spec 会离开 core。
+
+```ts
+export type Tool = {
+ spec: ToolSpec;
+ handler: ToolHandler;
+};
+```
+
+**用一个登记表把它们收起来。** ToolRegistry 持有完整工具,但它对外只交出说明——`getSpecs()` 返回 spec,不带 handler。
+
+```ts
+export class ToolRegistry {
+ private tools = new Map();
+
+ register(tool: Tool): void {
+ this.tools.set(tool.spec.name, tool);
+ }
+
+ getSpecs(): ToolSpec[] {
+ return [...this.tools.values()].map((tool) => tool.spec);
+ }
+}
+```
+
+**最后把说明塞进 provider 输入。** s01 的 ProviderInput 只有 messages,现在多一个 tools。`buildProviderInput` 接收 registry,把 `registry.getSpecs()` 放进去。
+
+```ts
+export type ProviderInput = {
+ messages: ProviderMessage[];
+ tools: ToolSpec[];
+};
+
+export function buildProviderInput(
+ state: AgentState,
+ registry: ToolRegistry,
+): ProviderInput {
+ return {
+ messages: state.messages.map((message) => ({
+ role: message.role,
+ content: message.content,
+ })),
+ tools: registry.getSpecs(),
+ };
+}
+```
+
+> 这一节真正建立的不是某个函数,而是**说明和执行分开**:provider 拿到的永远是说明,handler 永远不出 core。后面 s04 会让 provider 真的"调用"工具,但即便到那时,provider 发出的也只是一个调用请求,handler 仍然在 core 这边跑。
+
+---
+
+## 试一下
+
+运行:
+
+```sh
+npm run s02
+```
+
+输出类似:
+
+```text
+s02: Tool Contract
+
+[tools registered]
+read_note: 读取一条笔记
+current_time: 返回一个固定的演示时间
+
+[provider input]
+messages: 1
+tools: 2
+- read_note: 读取一条笔记
+- current_time: 返回一个固定的演示时间
+
+[assistant]
+content: 我看到 2 个工具:read_note, current_time
+stopReason: stop
+```
+
+观察重点:`[provider input]` 的 tools 里只有说明(name / description),没有任何执行函数;`[tools registered]` 和 provider 看到的是同一份说明。
+
+---
+
+## 接入主线
+
+s02 在 s01 上累积。相对 s01 的变更:
+
+| 组件 | s01 | s02 |
+| --- | --- | --- |
+| `ProviderInput` | `{ messages }` | `{ messages, tools }`(R1 只增) |
+| 新增类型 | — | `ToolSpec` / `ToolHandler` / `Tool` |
+| 新增类 | — | `ToolRegistry`(`register` / `getSpecs`) |
+| `buildProviderInput` | `(state)` | `(state, registry)` |
+| `runOneTurn` | `(state, provider, userInput)` | `(state, provider, registry, userInput)` |
+
+**焊接点**:`buildProviderInput` 把 `registry.getSpecs()` 塞进 `ProviderInput.tools`;handler 留在 registry,绝不进 ProviderInput。
+
+---
+
+## 接下来
+
+现在 provider 能看到工具说明了,但它还是一次性吐出整段回复,core 得等到最后才知道它说了什么。
+
+下一节会改变 provider 返回结果的方式——不再一次性返回,而是一段一段地往外送。
+
+进入下一节:[s03](../s03_provider_event_stream/)。
+
+---
+
+
+Pi 源码溯源:工具的双层定义
+
+教学版用 `Tool = { spec, handler }` 一层搞定。Pi 把工具拆成**两层类型**,分属两个 package。
+
+### 源码在哪
+
+- `packages/ai/src/types.ts:338` — `Tool`(给 provider 看的那层)
+- `packages/agent/src/types.ts:361` — `AgentTool`(本地执行的那层)
+- `packages/agent/src/agent-loop.ts:548` — `prepareToolCallArguments`(参数预处理)
+
+### 两层工具
+
+**AI 层的 `Tool`**(`ai` 包)只描述能力,不含任何可执行代码——它会被序列化发给 provider:
+
+```ts
+interface Tool {
+ name: string;
+ description: string;
+ parameters: TParameters; // TypeBox schema,给 LLM 看的参数结构
+}
+```
+
+**Agent 层的 `AgentTool`**(`agent` 包)继承 `Tool`,再加执行相关的东西:
+
+```ts
+interface AgentTool extends Tool {
+ label: string; // UI 显示标签
+ prepareArguments?: (args: unknown) => Static; // 参数预处理
+ execute: (toolCallId, params, signal?, onUpdate?) => Promise>;
+ executionMode?: "sequential" | "parallel"; // 单工具覆盖执行模式
+}
+```
+
+教学版的 `Tool = { spec, handler }` 把这两层压成一层。Pi 之所以分两个 package,是因为 `ai` 层只关心"怎么跟 LLM 说话"(schema、序列化),`agent` 层才关心"怎么在本地执行"。
+
+### 参数用 TypeBox schema,不是简单对象
+
+教学版 `ToolSpec.input` 是 `Record`(字符串字典)。Pi 用 **TypeBox**(`parameters: TSchema`)——一种运行时可校验的 JSON Schema 类型系统:
+
+- 能表达嵌套、枚举、可选、范围(字符串字典做不到)。
+- provider 收到的是标准 JSON Schema,跨厂商通用。
+- `prepareArguments` 拿到的参数能被 schema 校验和转换。
+
+教学版不引入 schema 库,代价是参数描述很弱(s04 的 ToolCall 也只能带字符串)。
+
+### prepareArguments:参数预处理钩子
+
+教学版 handler 直接吃原始 input。Pi 的 `AgentTool` 多了一个 `prepareArguments`(`agent-loop.ts:548`):
+
+```ts
+function prepareToolCallArguments(tool, toolCall) {
+ if (!tool.prepareArguments) return toolCall;
+ const prepared = tool.prepareArguments(toolCall.arguments);
+ if (prepared === toolCall.arguments) return toolCall;
+ return { ...toolCall, arguments: prepared };
+}
+```
+
+provider 给的参数可能粗糙或带默认值,`prepareArguments` 在执行前统一加工——教学版没有的一层"参数防腐"。
+
+### execute 带 AbortSignal 和 onUpdate
+
+教学版的 `ToolHandler` 是同步的 `(input) => string`。Pi 的 `execute` 多两个参数:
+
+- `signal: AbortSignal`:用户中断时能响应(呼应 s01 的 AbortController)。
+- `onUpdate`:执行中往外推流式进度(partialResult),UI 能实时显示"工具跑到哪了"。
+
+教学版的工具是"调一下、拿个字符串";Pi 的工具是"一个能被中断、能报进度的小任务"。
+
+### 一句话
+
+`Tool = { spec, handler }` 立的是"说明和执行分开"。Pi 把这条边界坐实成两个 package:`ai` 层的 `Tool`(schema、给 LLM)和 `agent` 层的 `AgentTool`(execute、本地),中间隔着参数预处理、中断、进度上报。教学版压成一层,把这条边界先立起来。
+
+
diff --git a/learn-pi-agent/s02_tool_contract/code.ts b/learn-pi-agent/s02_tool_contract/code.ts
new file mode 100644
index 0000000..882edcd
--- /dev/null
+++ b/learn-pi-agent/s02_tool_contract/code.ts
@@ -0,0 +1,236 @@
+// s02: Tool Contract — mini Pi 的第 2 版
+//
+// 在 s01 上累积:core 手里的本地能力,先变成 provider 能读的说明。
+// 工具拆成两层——spec 给 provider 看,handler 留在本地。本节不执行工具(执行是 s04)。
+// 词汇边界:本章新增 Tool / ToolSpec / ToolHandler / ToolRegistry / register / getSpecs / tools。
+
+declare const process: {
+ exitCode?: number;
+};
+
+// —— 停止原因(R1 只增。s01 起,s04 加 toolUse)——
+export type StopReason = "stop" | "error";
+
+// —— 消息三类型(s01 起,union 只增)——
+export type UserMessage = {
+ role: "user";
+ content: string;
+};
+
+export type AssistantMessage = {
+ role: "assistant";
+ content: string;
+ stopReason: StopReason;
+};
+
+export type AgentMessage = UserMessage | AssistantMessage;
+
+// —— core 内部状态(s01 起。s07 升级为 SessionTree)——
+export type AgentState = {
+ messages: AgentMessage[];
+};
+
+// ============ s02 新增:工具契约 ============
+
+// 工具说明:给 provider 看的那一层。只描述能力,不含可执行代码。
+export type ToolSpec = {
+ name: string;
+ description: string;
+ input: Record; // 参数说明;立下来就不再删(R1)
+};
+
+// 本地执行函数:留在 core 这一层,provider 看不到。
+export type ToolHandler = (input: Record) => string;
+
+// 一个完整工具 = 说明 + 执行。两层在 Tool 里合起来,但只有 spec 会离开 core。
+export type Tool = {
+ spec: ToolSpec;
+ handler: ToolHandler;
+};
+
+// 工具登记表:core 持有完整工具(spec + handler)。
+export class ToolRegistry {
+ private tools = new Map();
+
+ register(tool: Tool): void {
+ this.tools.set(tool.spec.name, tool);
+ }
+
+ // 只交出说明,不交出 handler。
+ getSpecs(): ToolSpec[] {
+ return [...this.tools.values()].map((tool) => tool.spec);
+ }
+
+ // s04 会在这里加 run(call):真正执行 handler。
+}
+
+// ============ provider 对外形态(s01 起)============
+
+export type ProviderMessage = {
+ role: "user" | "assistant";
+ content: string;
+};
+
+// provider 输入(R1 字段只增):s01 的 messages + s02 新增的 tools。
+export type ProviderInput = {
+ messages: ProviderMessage[];
+ tools: ToolSpec[]; // s02 新增;s06 加 modelName、s08 加 context
+};
+
+// provider 调用边界(s01 起。s03 升级为 stream)
+export interface Provider {
+ complete(input: ProviderInput): Promise;
+}
+
+// 输出抽象(R7。s01 起,s10 升级为 RuntimeMode)
+export type Output = {
+ log(line: string): void;
+};
+
+export function createConsoleOutput(): Output {
+ return { log: (line) => console.log(line) };
+}
+
+// ============ 构造函数 ============
+
+export function createInitialState(): AgentState {
+ return { messages: [] };
+}
+
+export function createUserMessage(content: string): UserMessage {
+ return { role: "user", content };
+}
+
+// s02 起:buildProviderInput 多接收 registry,把工具说明一起交给 provider。
+export function buildProviderInput(
+ state: AgentState,
+ registry: ToolRegistry,
+): ProviderInput {
+ return {
+ messages: state.messages.map((message) => ({
+ role: message.role,
+ content: message.content,
+ })),
+ tools: registry.getSpecs(),
+ };
+}
+
+// ============ 一轮推进 ============
+
+// s02 起:runOneTurn 多接收 registry。
+export async function runOneTurn(
+ state: AgentState,
+ provider: Provider,
+ registry: ToolRegistry,
+ userInput: string,
+): Promise {
+ state.messages.push(createUserMessage(userInput));
+
+ const providerInput = buildProviderInput(state, registry);
+ const assistantMessage = await provider.complete(providerInput);
+
+ state.messages.push(assistantMessage);
+ return assistantMessage;
+}
+
+// ============ Demo Provider(fake)============
+
+export class DemoProvider implements Provider {
+ public lastInput: ProviderInput | undefined;
+
+ async complete(input: ProviderInput): Promise {
+ this.lastInput = input;
+
+ const names = input.tools.map((tool) => tool.name).join(", ");
+
+ return {
+ role: "assistant",
+ content: `我看到 ${input.tools.length} 个工具:${names}`,
+ stopReason: "stop",
+ };
+ }
+}
+
+// ============ 演示脚手架 ============
+
+function createRegistry(): ToolRegistry {
+ const registry = new ToolRegistry();
+
+ registry.register({
+ spec: {
+ name: "read_note",
+ description: "读取一条笔记",
+ input: { name: "笔记名" },
+ },
+ handler: (input) => `note:${input.name ?? "unknown"}`,
+ });
+
+ registry.register({
+ spec: {
+ name: "current_time",
+ description: "返回一个固定的演示时间",
+ input: {},
+ },
+ handler: () => "2026-01-01T00:00:00Z",
+ });
+
+ return registry;
+}
+
+function printProviderInput(output: Output, input: ProviderInput | undefined): void {
+ output.log("[provider input]");
+
+ if (!input) {
+ output.log("messages: 0");
+ output.log("tools: 0");
+ output.log("");
+ return;
+ }
+
+ output.log(`messages: ${input.messages.length}`);
+ output.log(`tools: ${input.tools.length}`);
+
+ for (const tool of input.tools) {
+ output.log(`- ${tool.name}: ${tool.description}`);
+ }
+
+ output.log("");
+}
+
+function printAssistantMessage(output: Output, message: AssistantMessage): void {
+ output.log("[assistant]");
+ output.log(`content: ${message.content}`);
+ output.log(`stopReason: ${message.stopReason}`);
+ output.log("");
+}
+
+async function main(): Promise {
+ const output = createConsoleOutput();
+ const state = createInitialState();
+ const registry = createRegistry();
+ const provider = new DemoProvider();
+
+ output.log("s02: Tool Contract");
+ output.log("");
+
+ output.log("[tools registered]");
+ for (const spec of registry.getSpecs()) {
+ output.log(`${spec.name}: ${spec.description}`);
+ }
+ output.log("");
+
+ const assistant = await runOneTurn(
+ state,
+ provider,
+ registry,
+ "我有哪些本地能力?",
+ );
+
+ printProviderInput(output, provider.lastInput);
+ printAssistantMessage(output, assistant);
+}
+
+main().catch((error: unknown) => {
+ console.error(error);
+ process.exitCode = 1;
+});
diff --git a/learn-pi-agent/s02_tool_contract/images/.gitkeep b/learn-pi-agent/s02_tool_contract/images/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/learn-pi-agent/s03_provider_event_stream/README.md b/learn-pi-agent/s03_provider_event_stream/README.md
new file mode 100644
index 0000000..a959d25
--- /dev/null
+++ b/learn-pi-agent/s03_provider_event_stream/README.md
@@ -0,0 +1,226 @@
+# s03: Provider Event Stream — 结果一段一段回来
+
+> *core 不等整段,而是一段段接。*
+> **Pi 边界**:provider 输出边界 —— provider 的输出先变成统一事件,core 再攒回消息。
+
+[上一节:s02](../s02_tool_contract/) → `s03` → [下一节:s04](../s04_evented_tool_loop/)
+
+---
+
+## 问题
+
+前两节里,provider 一次性吐出整段回复,core 只能干等 `complete` 那个 Promise 结束。
+
+这有两个麻烦。一是 core 看不到中间过程——回复很长时,core 没法边收边用。二是不同 provider 返回的东西五花八门,core 要是直接对接每一种,就会被各家差异绑死。
+
+所以 provider 的输出要先变成一种统一的东西:**事件**。core 只认事件,不再关心是哪家 provider。
+
+s03 只做这一件事:把 provider 的返回方式,从"一次性给整段"改成"一段段给事件"。
+
+---
+
+## 解决方案
+
+provider 不再返回完整的 AssistantMessage,而是返回一串事件。这一节用三种:
+
+| 事件 | 含义 |
+| --- | --- |
+| `message_start` | 一条回复开始了 |
+| `text_delta` | 一小段文本 |
+| `message_end` | 一条回复结束了,带上停止原因 |
+
+core 这边用一个 `collectAssistantMessage`,把事件重新攒回一条 AssistantMessage。
+
+> **[U1 升级]** Provider 的方法从 `complete` 改成 `stream`。这是宪法允许的受控升级:输出的形态从"一次性"变成"流式",没法同时存在,所以是替换、不是新增。后面 `stream` 就稳定下来,不再变。
+
+注意一件事:这次升级只动 provider 的**输出**,没动它的**输入**。ProviderInput 里的 messages 和 tools 都还在。
+
+---
+
+## 工作原理
+
+**先定义事件。** 一段回复被拆成三种事件,按顺序到来。
+
+```ts
+export type ProviderEvent =
+ | { type: "message_start" }
+ | { type: "text_delta"; text: string }
+ | { type: "message_end"; stopReason: StopReason };
+```
+
+**provider 改成产出事件。** `stream` 不再返回一条消息,而是一个挨个 yield 事件的异步生成器。
+
+```ts
+export interface Provider {
+ stream(input: ProviderInput): AsyncGenerator;
+}
+```
+
+**core 把事件攒回消息。** `collectAssistantMessage` 一边收事件、一边累加文本,等 `message_end` 到了,停止原因也就拿到了。如果事件流里一个 `message_end` 都没有,停止原因默认是 `stop`——core 不会因为 provider 少发了一个事件就崩掉。
+
+```ts
+export async function collectAssistantMessage(
+ events: AsyncGenerator,
+ output: Output,
+): Promise {
+ let content = "";
+ let stopReason: StopReason = "stop";
+
+ for await (const event of events) {
+ if (event.type === "message_start") {
+ output.log("message_start");
+ } else if (event.type === "text_delta") {
+ output.log(`text_delta: ${event.text}`);
+ content += event.text;
+ } else if (event.type === "message_end") {
+ stopReason = event.stopReason;
+ output.log(`message_end: ${stopReason}`);
+ }
+ }
+
+ return { role: "assistant", content, stopReason };
+}
+```
+
+**一轮推进换一种接法。** `runOneTurn` 内部从 `provider.complete(...)` 改成 `provider.stream(...)` + `collectAssistantMessage(...)`。对外只是 provider 的返回方式变了,state 还是照样存一条 AssistantMessage。
+
+> 这一节真正建立的是**provider 输出边界**:core 只和事件打交道,provider 内部怎么产生这些事件,是它自己的事。不同 provider 的差异,被事件流这一层吸收掉了。后面 s04 会让事件里多出一种新的类型,但"core 只认事件"这条规矩,从这里立起来。
+
+---
+
+## 试一下
+
+运行:
+
+```sh
+npm run s03
+```
+
+输出类似:
+
+```text
+s03: Provider Event Stream
+
+[user]
+你好,mini Pi
+
+[events]
+message_start
+text_delta: 收到:
+text_delta: 你好,mini Pi
+message_end: stop
+
+[assistant]
+content: 收到:你好,mini Pi
+stopReason: stop
+
+[provider input]
+messages: 1
+tools: 2
+```
+
+观察重点:`[events]` 里一段回复被拆成了四个事件;`[assistant]` 是这些事件攒回来的结果;最后一行 `tools: 2` 说明 tools 字段还在,没丢。
+
+---
+
+## 接入主线
+
+s03 在 s02 上累积。相对 s02 的变更:
+
+| 组件 | s02 | s03 |
+| --- | --- | --- |
+| `Provider` 方法 | `complete`(一次性) | **`stream`**(U1 升级,流式) |
+| 新增类型 | — | `ProviderEvent`(`message_start` / `text_delta` / `message_end`) |
+| 新增函数 | — | `collectAssistantMessage` |
+| `runOneTurn` | `(state, provider, registry, userInput)` | `(state, provider, registry, userInput, output)` |
+| `ProviderInput` | `{ messages, tools }` | **不变**(R1:tools 保留) |
+
+**焊接点**:`runOneTurn` 内部 `complete` → `stream` + `collectAssistantMessage`;输入侧(ProviderInput)一字未动。
+
+---
+
+## 接下来
+
+现在事件流里只有文本。
+
+下一节会让事件里多出一种东西——provider 不只是说文本,还会请求 core 去执行一个本地能力。
+
+进入下一节:[s04](../s04_evented_tool_loop/)。
+
+---
+
+
+Pi 源码溯源:多 provider 的统一事件流
+
+教学版的三种事件(message_start / text_delta / message_end)只覆盖文本。Pi 的 `packages/ai` 把各家 provider 的原始流统一成 **12 种事件**。
+
+### 源码在哪
+
+- `packages/ai/src/types.ts:358` — `AssistantMessageEvent`(事件联合类型)
+- `packages/ai/src/types.ts:280` — `StopReason`
+- `packages/ai/src/stream.ts:40` — 统一流式入口
+- `packages/ai/src/providers/{openai-completions,anthropic,google}.ts` — 各家适配
+
+### 12 种事件
+
+教学版 3 种,Pi 12 种(`types.ts:358`):
+
+```ts
+type AssistantMessageEvent =
+ | { type: "start" }
+ | { type: "text_start" | "text_delta" | "text_end" } // 文本
+ | { type: "thinking_start" | "thinking_delta" | "thinking_end" } // 推理过程
+ | { type: "toolcall_start" | "toolcall_delta" | "toolcall_end" } // 工具调用
+ | { type: "done"; reason: "stop" | "length" | "toolUse" }
+ | { type: "error"; reason: "aborted" | "error" };
+```
+
+教学版没有的几类:
+
+- **thinking_delta**:模型的推理过程(chain-of-thought)也是流式的,单独一类。教学版不区分推理和正文。
+- **toolcall_start/delta/end**:工具调用本身是分片到达的(参数 JSON 一段段来),不是一次性给齐。教学版 s04 用一个 `tool_call` 表示完整调用,Pi 要先攒碎片。
+- **每个事件都带 `partial: AssistantMessage`**:流式过程中每个事件都附上"当前累计的完整消息",消费方不用自己累加。
+
+### StopReason:5 种,不是 2 种
+
+教学版 `stop | error`。Pi(`types.ts:280`):
+
+```ts
+type StopReason = "stop" | "length" | "toolUse" | "error" | "aborted";
+```
+
+- `length`:撞了 max_tokens(教学版没这个概念)。
+- `aborted`:用户主动中断(呼应 s01 的 AbortController)。
+
+注意 `done` 和 `error` 是两个顶层终止事件:正常结束发 `done`,出问题发 `error`。教学版把它们都塞进 `message_end` 的 stopReason,Pi 分成两个顶层事件。
+
+### 多 provider 怎么统一
+
+每家 provider 的原始流格式完全不同,Pi 给每家写一个适配器,都产出同一套 `AssistantMessageEvent`:
+
+| provider | 原始流 | 适配文件 | 关键转换 |
+| --- | --- | --- | --- |
+| OpenAI | `ChatCompletionChunk[]` | `openai-completions.ts:111` | `delta.content → text_delta`,`delta.tool_calls → toolcall_delta` |
+| Anthropic | `RawMessageStreamEvent[]` | `anthropic.ts:448` | `content_block_delta.text_delta → text_delta` |
+| Google | `GenerateContentResponse[]` | `google.ts:47` | `candidate.content.parts.text → text_delta` |
+
+三家的 `finish_reason` / `stop_reason` 各不相同,各自有 `mapStopReason` 映射到 Pi 的 5 种。这就是教学版 ProviderInput 边界在 provider 侧的对应——core 只认统一事件,provider 差异被适配器吃掉。
+
+### 边界:流中断和空流
+
+OpenAI 适配器(`openai-completions.ts:392`)的收尾逻辑:
+
+```ts
+if (options?.signal?.aborted) throw new Error("Request was aborted");
+if (output.stopReason === "error") throw new Error(output.errorMessage);
+if (!hasFinishReason) throw new Error("Stream ended without finish_reason");
+stream.push({ type: "done", reason: output.stopReason, message: output });
+```
+
+流中断、provider 报错、没给 finish_reason——三种异常都转成 `error` 事件或异常,消费方(s04 的循环)接住就行。教学版没这层(fake provider 不会失败)。
+
+### 一句话
+
+教学版 3 种事件立的是"provider 输出先变成统一事件"。Pi 把它扩成 12 种事件 + 5 种 stop reason + 三家适配器,把"多 provider 差异"全压在事件流这一层下面——core 永远只和 `AssistantMessageEvent` 打交道。
+
+
diff --git a/learn-pi-agent/s03_provider_event_stream/code.ts b/learn-pi-agent/s03_provider_event_stream/code.ts
new file mode 100644
index 0000000..49c1d07
--- /dev/null
+++ b/learn-pi-agent/s03_provider_event_stream/code.ts
@@ -0,0 +1,269 @@
+// s03: Provider Event Stream — mini Pi 的第 3 版
+//
+// [U1 受控升级] Provider 从 complete(一次性返回)升级为 stream(分段返回事件)。
+// 词汇边界:本章新增 ProviderEvent / stream / message_start / text_delta / message_end / collectAssistantMessage。
+// 关键:ProviderInput 的 tools 字段保留(R1),不因聚焦事件流而退化。
+
+declare const process: {
+ exitCode?: number;
+};
+
+// —— s01 起:停止原因 ——
+export type StopReason = "stop" | "error";
+
+// —— s01 起:消息三类型 ——
+export type UserMessage = {
+ role: "user";
+ content: string;
+};
+
+export type AssistantMessage = {
+ role: "assistant";
+ content: string;
+ stopReason: StopReason;
+};
+
+export type AgentMessage = UserMessage | AssistantMessage;
+
+// —— s01 起:core 内部状态 ——
+export type AgentState = {
+ messages: AgentMessage[];
+};
+
+// —— s02 起:工具契约 ——
+export type ToolSpec = {
+ name: string;
+ description: string;
+ input: Record;
+};
+
+export type ToolHandler = (input: Record) => string;
+
+export type Tool = {
+ spec: ToolSpec;
+ handler: ToolHandler;
+};
+
+export class ToolRegistry {
+ private tools = new Map();
+
+ register(tool: Tool): void {
+ this.tools.set(tool.spec.name, tool);
+ }
+
+ getSpecs(): ToolSpec[] {
+ return [...this.tools.values()].map((tool) => tool.spec);
+ }
+}
+
+// —— s01 起:provider 对外消息 ——
+export type ProviderMessage = {
+ role: "user" | "assistant";
+ content: string;
+};
+
+// provider 输入(R1 只增):messages + tools 都在。
+export type ProviderInput = {
+ messages: ProviderMessage[];
+ tools: ToolSpec[];
+};
+
+// ============ s03 新增:provider 输出从"一条消息"变成"一串事件" ============
+
+export type ProviderEvent =
+ | { type: "message_start" }
+ | { type: "text_delta"; text: string }
+ | { type: "message_end"; stopReason: StopReason };
+// s04 会在这里加 tool_call(R1:message_start 不会被删)
+
+// [U1 升级] Provider 从 complete 改为 stream。
+// provider 的输出形态从"一次性"变成"流式",两者无法并存,所以这是替换、不是新增。
+export interface Provider {
+ stream(input: ProviderInput): AsyncGenerator;
+}
+
+// —— s01 起:输出抽象(R7)——
+export type Output = {
+ log(line: string): void;
+};
+
+export function createConsoleOutput(): Output {
+ return { log: (line) => console.log(line) };
+}
+
+// ============ 构造函数 ============
+
+export function createInitialState(): AgentState {
+ return { messages: [] };
+}
+
+export function createUserMessage(content: string): UserMessage {
+ return { role: "user", content };
+}
+
+// s02 起
+export function buildProviderInput(
+ state: AgentState,
+ registry: ToolRegistry,
+): ProviderInput {
+ return {
+ messages: state.messages.map((message) => ({
+ role: message.role,
+ content: message.content,
+ })),
+ tools: registry.getSpecs(),
+ };
+}
+
+// ============ s03 新增:把一串事件攒回一条 assistant 消息 ============
+
+export async function collectAssistantMessage(
+ events: AsyncGenerator,
+ output: Output,
+): Promise {
+ let content = "";
+ let stopReason: StopReason = "stop";
+
+ for await (const event of events) {
+ if (event.type === "message_start") {
+ output.log("message_start");
+ } else if (event.type === "text_delta") {
+ output.log(`text_delta: ${event.text}`);
+ content += event.text;
+ } else if (event.type === "message_end") {
+ stopReason = event.stopReason;
+ output.log(`message_end: ${stopReason}`);
+ }
+ }
+
+ return { role: "assistant", content, stopReason };
+}
+
+// ============ 一轮推进 ============
+
+// s03 起:runOneTurn 多接收 output,内部从 complete 改为 stream + collect。
+export async function runOneTurn(
+ state: AgentState,
+ provider: Provider,
+ registry: ToolRegistry,
+ userInput: string,
+ output: Output,
+): Promise {
+ state.messages.push(createUserMessage(userInput));
+
+ const providerInput = buildProviderInput(state, registry);
+ const assistantMessage = await collectAssistantMessage(
+ provider.stream(providerInput),
+ output,
+ );
+
+ state.messages.push(assistantMessage);
+ return assistantMessage;
+}
+
+// ============ Demo Provider(fake)============
+
+export class DemoProvider implements Provider {
+ public lastInput: ProviderInput | undefined;
+
+ async *stream(input: ProviderInput): AsyncGenerator {
+ this.lastInput = input;
+
+ const last = input.messages[input.messages.length - 1];
+
+ yield { type: "message_start" };
+
+ if (!last || last.role !== "user") {
+ yield { type: "text_delta", text: "Provider could not complete this turn." };
+ yield { type: "message_end", stopReason: "error" };
+ return;
+ }
+
+ yield { type: "text_delta", text: "收到:" };
+ yield { type: "text_delta", text: last.content };
+ yield { type: "message_end", stopReason: "stop" };
+ }
+}
+
+// ============ 演示脚手架 ============
+
+function createRegistry(): ToolRegistry {
+ const registry = new ToolRegistry();
+
+ registry.register({
+ spec: {
+ name: "read_note",
+ description: "读取一条笔记",
+ input: { name: "笔记名" },
+ },
+ handler: (input) => `note:${input.name ?? "unknown"}`,
+ });
+
+ registry.register({
+ spec: {
+ name: "current_time",
+ description: "返回一个固定的演示时间",
+ input: {},
+ },
+ handler: () => "2026-01-01T00:00:00Z",
+ });
+
+ return registry;
+}
+
+function printAssistantMessage(output: Output, message: AssistantMessage): void {
+ output.log("[assistant]");
+ output.log(`content: ${message.content}`);
+ output.log(`stopReason: ${message.stopReason}`);
+ output.log("");
+}
+
+function printProviderInputSummary(
+ output: Output,
+ input: ProviderInput | undefined,
+): void {
+ output.log("[provider input]");
+ if (!input) {
+ output.log("messages: 0");
+ output.log("tools: 0");
+ output.log("");
+ return;
+ }
+ output.log(`messages: ${input.messages.length}`);
+ output.log(`tools: ${input.tools.length}`);
+ output.log("");
+}
+
+async function main(): Promise {
+ const output = createConsoleOutput();
+ const state = createInitialState();
+ const registry = createRegistry();
+ const provider = new DemoProvider();
+
+ output.log("s03: Provider Event Stream");
+ output.log("");
+
+ output.log("[user]");
+ output.log("你好,mini Pi");
+ output.log("");
+
+ output.log("[events]");
+ const assistant = await runOneTurn(
+ state,
+ provider,
+ registry,
+ "你好,mini Pi",
+ output,
+ );
+ output.log("");
+
+ printAssistantMessage(output, assistant);
+
+ // 这一行证明:tools 字段还在(R1),没有因为改用事件流而丢掉。
+ printProviderInputSummary(output, provider.lastInput);
+}
+
+main().catch((error: unknown) => {
+ console.error(error);
+ process.exitCode = 1;
+});
diff --git a/learn-pi-agent/s03_provider_event_stream/images/.gitkeep b/learn-pi-agent/s03_provider_event_stream/images/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/learn-pi-agent/s04_evented_tool_loop/README.md b/learn-pi-agent/s04_evented_tool_loop/README.md
new file mode 100644
index 0000000..49c1fbc
--- /dev/null
+++ b/learn-pi-agent/s04_evented_tool_loop/README.md
@@ -0,0 +1,227 @@
+# s04: Evented Tool Loop — 请求、执行、送回去
+
+> *provider 点菜,core 后厨,结果回桌。*
+> **Pi 边界**:工具执行边界 —— provider 只发请求,执行永远在 core;结果回到消息流,成为历史。
+
+[上一节:s03](../s03_provider_event_stream/) → `s04` → [下一节:s05](../s05_tool_hook_boundary/)
+
+---
+
+## 问题
+
+s02 给了 provider 工具说明,s03 给了事件流。但到现在为止,provider 还不能**真的用上**这些工具——它只会说文本,没法告诉 core"去把那个工具跑一下"。
+
+而且就算能请求,往往也不是一趟就够:provider 用完一个工具,看了结果,可能还要再用一个。这需要一个来回多次的循环。
+
+s04 要补上这一环:让 provider 能**请求**执行工具,core 执行后把结果**送回去**,循环到 provider 不再请求为止。
+
+---
+
+## 解决方案
+
+事件里多出一种 `tool_call`:provider 用它说"我要调用某个工具"。core 收到后做三步:
+
+```text
+tool_call → registry.run() → ToolResultMessage → 进 messages
+```
+
+工具结果和 user / assistant 消息**平级**,也存进 messages。这样 provider 下一轮就能看到"刚才那个工具返回了什么",决定是接着用工具,还是收尾。
+
+> **[U1 升级]** 一轮推进的函数从 `runOneTurn` 变成 `runEventedToolLoop`:原来跑一趟就结束,现在套了一个循环。这是受控升级——单轮没法表达"来回多趟",所以是替换。
+
+这一节还顺手立两条保护:循环有**轮次上限**,provider 一直请求也不会死循环(R5);工具执行**抛错也不崩**,错误会被包成一条结果消息送回去(R4)。
+
+---
+
+## 工作原理
+
+**先定义请求和结果。** provider 的请求叫 ToolCall,结果叫 ToolResultMessage。
+
+```ts
+export type ToolCall = {
+ id: string;
+ name: string;
+ input: Record;
+};
+
+export type ToolResultMessage = {
+ role: "toolResult";
+ toolCallId: string;
+ content: string;
+};
+```
+
+工具结果也是一种消息,并入 AgentMessage;停止原因也多一个 `toolUse`,表示"provider 还想用工具,先别停"。
+
+```ts
+export type StopReason = "stop" | "toolUse" | "error";
+export type AgentMessage = UserMessage | AssistantMessage | ToolResultMessage;
+```
+
+**事件里加 tool_call。** s03 的三种事件都在,多出来的是 `tool_call`。
+
+```ts
+export type ProviderEvent =
+ | { type: "message_start" }
+ | { type: "text_delta"; text: string }
+ | { type: "tool_call"; call: ToolCall }
+ | { type: "message_end"; stopReason: StopReason };
+```
+
+**registry 学会执行。** `run(call)` 拿到工具就跑 handler;碰到没注册过的名字,不抛错,返回一句说明。
+
+```ts
+run(call: ToolCall): string {
+ const tool = this.tools.get(call.name);
+ if (!tool) {
+ return `unknown tool: ${call.name}`;
+ }
+ return tool.handler(call.input);
+}
+```
+
+**核心是循环。** `runEventedToolLoop` 每一轮:构造输入 → 收事件 → 遇到 tool_call 就执行、把结果存成消息 → 一轮事件收完后,看还有没有 tool_call。有就再来一轮,没有就收尾。轮次超过上限会主动停下。
+
+```ts
+while (true) {
+ turns += 1;
+ if (turns > MAX_TURNS) { /* 主动停止 */ }
+
+ // ... 收事件,遇 tool_call 就 registry.run + 存 ToolResultMessage ...
+
+ if (!sawToolCall || stopReason !== "toolUse") {
+ // 存下 assistant 消息,结束
+ return assistant;
+ }
+}
+```
+
+工具执行包在 try/catch 里:handler 抛错,错误变成 `error: ...` 这条结果消息,循环继续——provider 会看到这个错误,自己决定怎么办。
+
+> 这一节真正建立的是**工具执行边界**:provider 只发请求,执行永远发生在 core 这边;而且工具结果不丢,它回到消息流里,成为对话历史的一部分。后面 s05 会在"执行"这个动作的前后加插口,但"请求在 provider、执行在 core"这条分隔,从这里立起来。
+
+---
+
+## 试一下
+
+运行:
+
+```sh
+npm run s04
+```
+
+输出类似:
+
+```text
+s04: Evented Tool Loop
+
+[user]
+现在几点?
+
+message_start
+tool_call: current_time
+tool_result: 2026-01-01T00:00:00Z
+message_end: toolUse
+message_start
+text_delta: 工具结果是:2026-01-01T00:00:00Z
+message_end: stop
+
+[assistant]
+content: 工具结果是:2026-01-01T00:00:00Z
+stopReason: stop
+
+[state]
+user -> toolResult -> assistant
+```
+
+观察重点:第一轮 provider 请求 `current_time`,core 执行后结果进了 messages(`tool_result`);第二轮 provider 看到结果,输出文本并结束;`[state]` 里能看到 `user -> toolResult -> assistant` 这条链。
+
+---
+
+## 接入主线
+
+s04 在 s03 上累积。相对 s03 的变更:
+
+| 组件 | s03 | s04 |
+| --- | --- | --- |
+| `StopReason` | `stop \| error` | `stop \| toolUse \| error`(R1 加 toolUse) |
+| `AgentMessage` | `User \| Assistant` | `User \| Assistant \| ToolResultMessage`(R1) |
+| 新增类型 | — | `ToolCall` / `ToolResultMessage` |
+| `ProviderEvent` | 三种 | 加 `tool_call`(R1:message_start 保留) |
+| `ToolRegistry` | `register / getSpecs` | 加 `run(call)`(R2) |
+| 一轮推进 | `runOneTurn`(单趟) | **`runEventedToolLoop`**(U1 升级,带循环) |
+| 事件收集 | `collectAssistantMessage`(单函数) | 被循环内联收集取代(要处理 tool_call) |
+| 保护 | — | `MAX_TURNS` 终止(R5)、工具错误捕获(R4) |
+
+**焊接点**:循环内 `tool_call` → `registry.run(call)` → 结果存成 ToolResultMessage 进 messages → 下一轮 provider 输入;tools 始终取 `registry.getSpecs()`,和能执行的工具是同一份来源。
+
+---
+
+## 接下来
+
+现在 provider 一请求,工具就直接执行了——没有给 core 留任何介入的余地。
+
+下一节会在执行这个动作的前后留两个插口:执行前可以拦下来,执行后可以改写结果。
+
+进入下一节:[s05](../s05_tool_hook_boundary/)。
+
+---
+
+
+Pi 源码溯源:工具循环的并发与终止
+
+教学版的 `runEventedToolLoop` 是串行循环、`MAX_TURNS` 兜底。Pi 的工具循环(`packages/agent`)要复杂得多——最关键的是**并发执行**。
+
+### 源码在哪
+
+- `packages/agent/src/agent-loop.ts:373` — `executeToolCalls`(工具执行总入口)
+- `packages/agent/src/agent-loop.ts:451` — `executeToolCallsParallel`(并发执行)
+- `packages/agent/src/types.ts:29` — `ToolExecutionMode`
+
+### 一个 stop 可以带多个工具调用
+
+教学版一轮只处理一个 `tool_call`。Pi 里模型一次回复可以带**多个** tool call(`agent-loop.ts:207`):
+
+```ts
+if (toolCalls.length > 0) {
+ const batch = await executeToolCalls(currentContext, message, config, signal, emit);
+ toolResults.push(...batch.messages);
+ hasMoreToolCalls = !batch.terminate;
+}
+```
+
+一次 batch 里所有工具一起处理,结果一起回写。
+
+### sequential vs parallel
+
+教学版串行。Pi 有两种执行模式(`types.ts:29`):
+
+```ts
+type ToolExecutionMode = "sequential" | "parallel";
+```
+
+并发版 `executeToolCallsParallel`(`agent-loop.ts:451`)用 `Promise.all` 同时跑多个工具,但**事件顺序保持**——prepare 阶段(参数校验 + beforeHook,s05)是顺序的,execute 阶段才并发,结果按完成时间发事件。哪个工具该并发、哪个该独占,由工具自己的 `executionMode` 决定(s02 提过 AgentTool 有这个字段)。
+
+### 工具能主动终止整个循环
+
+教学版的终止条件是"provider 不再发 tool_call"或"撞 MAX_TURNS"。Pi 多一个:工具结果能带 `terminate`(`types.ts`):
+
+```ts
+interface AgentToolResult {
+ content: (TextContent | ImageContent)[];
+ details: T;
+ terminate?: boolean; // 这个工具要求停止整个 agent
+}
+```
+
+只有 batch 里**所有**工具都 `terminate` 才真停——防止单个工具误杀整个会话。教学版的 MAX_TURNS 是被动兜底,Pi 的 terminate 是工具主动喊停。
+
+### 执行前后有插口(s05 的预告)
+
+教学版直接 `registry.run`。Pi 的每个工具执行经过 `prepareToolCall`(参数校验 + beforeToolCall hook)→ `execute` → `finalizeExecutedToolCall`(afterToolCall hook)。这就是 s05 的 hook 真实位置——它长在并发执行的 prepare/finalize 两侧。
+
+### 一句话
+
+教学版的循环立的是"请求 → 执行 → 回写 → 再来"。Pi 把它扩成 batch 并发 + 工具主动 terminate + 执行前后插口,但循环主干(收 tool_call、执行、结果回 messages、看是否继续)和教学版一致。
+
+
diff --git a/learn-pi-agent/s04_evented_tool_loop/code.ts b/learn-pi-agent/s04_evented_tool_loop/code.ts
new file mode 100644
index 0000000..6a4d822
--- /dev/null
+++ b/learn-pi-agent/s04_evented_tool_loop/code.ts
@@ -0,0 +1,306 @@
+// s04: Evented Tool Loop — mini Pi 的第 4 版
+//
+// [U1 升级] runOneTurn → runEventedToolLoop:provider 请求工具,core 执行后把结果送回,循环到 provider 不再请求为止。
+// 词汇边界:本章新增 ToolCall / ToolResultMessage / tool_call / toolUse / runEventedToolLoop / run。
+// 关键:tools 取 registry.getSpecs()(单一数据源,不硬编码);循环有上限(R5);工具出错不崩(R4)。
+
+declare const process: {
+ exitCode?: number;
+};
+
+// —— 停止原因(R1:s04 加 toolUse)——
+export type StopReason = "stop" | "toolUse" | "error";
+
+// —— s01 起:消息 ——
+export type UserMessage = {
+ role: "user";
+ content: string;
+};
+
+export type AssistantMessage = {
+ role: "assistant";
+ content: string;
+ stopReason: StopReason;
+};
+
+// s04 新增:工具执行结果也是一种消息,和 user / assistant 平级。
+export type ToolResultMessage = {
+ role: "toolResult";
+ toolCallId: string;
+ content: string;
+};
+
+// s04 起:AgentMessage 并入 ToolResultMessage(R1 只增)
+export type AgentMessage = UserMessage | AssistantMessage | ToolResultMessage;
+
+// —— s01 起:core 内部状态 ——
+export type AgentState = {
+ messages: AgentMessage[];
+};
+
+// —— s02 起:工具契约 ——
+export type ToolSpec = {
+ name: string;
+ description: string;
+ input: Record;
+};
+
+export type ToolHandler = (input: Record) => string;
+
+// s04 新增:provider 对一个工具的调用请求。
+export type ToolCall = {
+ id: string;
+ name: string;
+ input: Record;
+};
+
+export type Tool = {
+ spec: ToolSpec;
+ handler: ToolHandler;
+};
+
+export class ToolRegistry {
+ private tools = new Map();
+
+ register(tool: Tool): void {
+ this.tools.set(tool.spec.name, tool);
+ }
+
+ getSpecs(): ToolSpec[] {
+ return [...this.tools.values()].map((tool) => tool.spec);
+ }
+
+ // s04 新增:执行工具。未知工具不崩,返回一句说明。
+ run(call: ToolCall): string {
+ const tool = this.tools.get(call.name);
+ if (!tool) {
+ return `unknown tool: ${call.name}`;
+ }
+ return tool.handler(call.input);
+ }
+}
+
+// —— provider 对外消息(s04:加 toolResult 形态)——
+export type ProviderMessage =
+ | { role: "user" | "assistant"; content: string }
+ | { role: "toolResult"; toolCallId: string; content: string };
+
+// provider 输入(R1 只增)
+export type ProviderInput = {
+ messages: ProviderMessage[];
+ tools: ToolSpec[];
+};
+
+// —— s03 起:事件流(s04 加 tool_call,保留 message_start,R1)——
+export type ProviderEvent =
+ | { type: "message_start" }
+ | { type: "text_delta"; text: string }
+ | { type: "tool_call"; call: ToolCall }
+ | { type: "message_end"; stopReason: StopReason };
+
+export interface Provider {
+ stream(input: ProviderInput): AsyncGenerator;
+}
+
+// —— s01 起:输出抽象(R7)——
+export type Output = {
+ log(line: string): void;
+};
+
+export function createConsoleOutput(): Output {
+ return { log: (line) => console.log(line) };
+}
+
+// ============ 构造函数 ============
+
+export function createInitialState(): AgentState {
+ return { messages: [] };
+}
+
+export function createUserMessage(content: string): UserMessage {
+ return { role: "user", content };
+}
+
+// s04:buildProviderInput 要把 toolResult 消息也正确转给 provider。
+export function buildProviderInput(
+ state: AgentState,
+ registry: ToolRegistry,
+): ProviderInput {
+ return {
+ messages: state.messages.map((message) => {
+ if (message.role === "toolResult") {
+ return {
+ role: "toolResult",
+ toolCallId: message.toolCallId,
+ content: message.content,
+ };
+ }
+ return { role: message.role, content: message.content };
+ }),
+ tools: registry.getSpecs(),
+ };
+}
+
+// ============ s04 [U1]:工具循环(取代 runOneTurn)============
+// s03 的 collectAssistantMessage(只攒文本事件)被这里的循环内联收集取代——
+// 循环要处理 tool_call,所以收集逻辑直接长在循环里。
+
+// R5:循环必须有上限。否则一个一直请求工具的 provider 会让 core 死循环。
+const MAX_TURNS = 8;
+
+export async function runEventedToolLoop(
+ state: AgentState,
+ provider: Provider,
+ registry: ToolRegistry,
+ userInput: string,
+ output: Output,
+): Promise {
+ state.messages.push(createUserMessage(userInput));
+
+ let turns = 0;
+
+ while (true) {
+ turns += 1;
+ if (turns > MAX_TURNS) {
+ output.log(`(达到最大轮次 ${MAX_TURNS},停止)`);
+ const stopped: AssistantMessage = {
+ role: "assistant",
+ content: "(达到最大轮次,停止)",
+ stopReason: "stop",
+ };
+ state.messages.push(stopped);
+ return stopped;
+ }
+
+ const providerInput = buildProviderInput(state, registry);
+ let content = "";
+ let stopReason: StopReason = "stop";
+ let sawToolCall = false;
+
+ for await (const event of provider.stream(providerInput)) {
+ if (event.type === "message_start") {
+ output.log("message_start");
+ } else if (event.type === "text_delta") {
+ output.log(`text_delta: ${event.text}`);
+ content += event.text;
+ } else if (event.type === "tool_call") {
+ sawToolCall = true;
+ output.log(`tool_call: ${event.call.name}`);
+
+ // R4:工具执行抛错也不崩,错误变成一条结果消息送回去。
+ let result: string;
+ try {
+ result = registry.run(event.call);
+ } catch (error) {
+ result = `error: ${error instanceof Error ? error.message : String(error)}`;
+ }
+
+ const resultMessage: ToolResultMessage = {
+ role: "toolResult",
+ toolCallId: event.call.id,
+ content: result,
+ };
+ state.messages.push(resultMessage);
+ output.log(`tool_result: ${result}`);
+ } else if (event.type === "message_end") {
+ stopReason = event.stopReason;
+ output.log(`message_end: ${stopReason}`);
+ }
+ }
+
+ // 没有 tool_call,或 provider 明确不再用工具(stopReason 不是 toolUse),就结束。
+ if (!sawToolCall || stopReason !== "toolUse") {
+ const assistant: AssistantMessage = { role: "assistant", content, stopReason };
+ state.messages.push(assistant);
+ return assistant;
+ }
+ }
+}
+
+// ============ Demo Provider(fake)============
+// 演示一个完整的工具循环:第一轮请求工具,第二轮收到结果后输出文本并结束。
+export class DemoProvider implements Provider {
+ public lastInput: ProviderInput | undefined;
+
+ async *stream(input: ProviderInput): AsyncGenerator {
+ this.lastInput = input;
+ const last = input.messages[input.messages.length - 1];
+
+ yield { type: "message_start" };
+
+ if (last?.role === "toolResult") {
+ // 工具结果回来了:输出文本,正常结束。
+ yield { type: "text_delta", text: `工具结果是:${last.content}` };
+ yield { type: "message_end", stopReason: "stop" };
+ return;
+ }
+
+ // 否则请求调用一个工具。
+ yield {
+ type: "tool_call",
+ call: { id: "call_1", name: "current_time", input: {} },
+ };
+ yield { type: "message_end", stopReason: "toolUse" };
+ }
+}
+
+// ============ 演示脚手架 ============
+
+function createRegistry(): ToolRegistry {
+ const registry = new ToolRegistry();
+
+ registry.register({
+ spec: {
+ name: "current_time",
+ description: "返回一个固定的演示时间",
+ input: {},
+ },
+ handler: () => "2026-01-01T00:00:00Z",
+ });
+
+ return registry;
+}
+
+function printAssistantMessage(output: Output, message: AssistantMessage): void {
+ output.log("[assistant]");
+ output.log(`content: ${message.content}`);
+ output.log(`stopReason: ${message.stopReason}`);
+ output.log("");
+}
+
+function printState(output: Output, state: AgentState): void {
+ output.log("[state]");
+ output.log(state.messages.map((message) => message.role).join(" -> "));
+ output.log("");
+}
+
+async function main(): Promise {
+ const output = createConsoleOutput();
+ const state = createInitialState();
+ const registry = createRegistry();
+ const provider = new DemoProvider();
+
+ output.log("s04: Evented Tool Loop");
+ output.log("");
+
+ output.log("[user]");
+ output.log("现在几点?");
+ output.log("");
+
+ const assistant = await runEventedToolLoop(
+ state,
+ provider,
+ registry,
+ "现在几点?",
+ output,
+ );
+ output.log("");
+
+ printAssistantMessage(output, assistant);
+ printState(output, state);
+}
+
+main().catch((error: unknown) => {
+ console.error(error);
+ process.exitCode = 1;
+});
diff --git a/learn-pi-agent/s04_evented_tool_loop/images/.gitkeep b/learn-pi-agent/s04_evented_tool_loop/images/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/learn-pi-agent/s05_tool_hook_boundary/README.md b/learn-pi-agent/s05_tool_hook_boundary/README.md
new file mode 100644
index 0000000..2111bf2
--- /dev/null
+++ b/learn-pi-agent/s05_tool_hook_boundary/README.md
@@ -0,0 +1,229 @@
+# s05: Tool Hook Boundary — 执行前后各留一个口子
+
+> *执行是直线,插口在两头。*
+> **Pi 边界**:工具插口边界 —— 执行这个动作被掰成 before / run / after,中间不变,两头可插。
+
+[上一节:s04](../s04_evented_tool_loop/) → `s05` → [下一节:s06](../s06_turn_snapshot/)
+
+---
+
+## 问题
+
+s04 里,provider 一请求工具,core 立刻就执行了——中间没有任何介入的余地。
+
+但真实使用中,执行前后往往要做事:执行前想检查"这个工具现在能不能用""参数合不合规",执行后想"给结果脱个敏""记一条日志"。如果这些都写死在执行逻辑里,每改一次规则就得动 core,没法按情况调整。
+
+s05 要在执行这个动作的**前后**各留一个插口。
+
+---
+
+## 解决方案
+
+两个插口:
+
+| 插口 | 时机 | 能做什么 |
+| --- | --- | --- |
+| `beforeToolCall` | 执行前 | 放行(allow)或拦下(block,带原因) |
+| `afterToolCall` | 执行后 | 保留结果,或改写后再交出去 |
+
+一个 `executeToolCall` 把执行流程串成三段:
+
+```text
+beforeToolCall → registry.run() → afterToolCall
+```
+
+中间那段还是 s04 的 `registry.run`,**ToolRegistry 本身不变**——hook 只是套在外面的一层,不改 core 的执行逻辑。
+
+拦下(block)时 handler 根本不会跑;handler 抛错还是按 s04 的规矩被包成一条结果消息,循环继续。
+
+---
+
+## 工作原理
+
+**先定义插口的返回。** 执行前要么放行、要么拦下并给个原因。
+
+```ts
+export type BeforeToolCallResult =
+ | { type: "allow" }
+ | { type: "block"; reason: string };
+
+export type ToolHooks = {
+ beforeToolCall?: (call: ToolCall) => BeforeToolCallResult;
+ afterToolCall?: (call: ToolCall, result: string) => string;
+};
+```
+
+两个插口都可选——不配就相当于全放行、不改写。
+
+**把三段串起来。** `executeToolCall` 就是 before → run → after。block 时直接返回,handler 不执行;handler 抛错在这里收口,不向上传。
+
+```ts
+export function executeToolCall(
+ registry: ToolRegistry,
+ hooks: ToolHooks,
+ call: ToolCall,
+): ToolResultMessage {
+ const before = hooks.beforeToolCall?.(call) ?? { type: "allow" };
+
+ if (before.type === "block") {
+ return { role: "toolResult", toolCallId: call.id, content: `blocked: ${before.reason}` };
+ }
+
+ let result: string;
+ try {
+ result = registry.run(call);
+ } catch (error) {
+ result = `error: ${error instanceof Error ? error.message : String(error)}`;
+ }
+
+ const finalResult = hooks.afterToolCall?.(call, result) ?? result;
+ return { role: "toolResult", toolCallId: call.id, content: finalResult };
+}
+```
+
+**接到循环里。** s04 的 `runEventedToolLoop` 现在 `tool_call` 这一步不再直接调 `registry.run`,而是调 `executeToolCall`。对循环来说,拿到的还是一个 ToolResultMessage,流程没变——只是中间多过了一层插口。
+
+> 这一节真正建立的是**工具插口边界**:执行被掰成 before / run / after 三段,中间那段是 core 的固定逻辑,两头是可以从外面配置的钩子。后面 s11 的权限检查会直接接到 beforeToolCall 上,但"执行本身不动、规则插在两头"这条规矩,从这里立起来。
+
+---
+
+## 试一下
+
+运行(放行路径):
+
+```sh
+npm run s05
+```
+
+输出类似:
+
+```text
+s05: Tool Hook Boundary
+
+[user]
+复读一下 hi
+
+message_start
+tool_call: echo
+[beforeToolCall]
+allow: echo
+[afterToolCall]
+echo -> hi
+tool_result: checked: hi
+message_end: toolUse
+message_start
+text_delta: 工具结果是:checked: hi
+message_end: stop
+
+[assistant]
+content: 工具结果是:checked: hi
+stopReason: stop
+```
+
+再运行(拦截路径):
+
+```sh
+npm run s05 -- --case blocked
+```
+
+输出类似:
+
+```text
+message_start
+tool_call: dangerous
+[beforeToolCall]
+block: dangerous
+tool_result: blocked: 这个工具在演示里被禁用
+message_end: toolUse
+message_start
+text_delta: 工具结果是:blocked: 这个工具在演示里被禁用
+message_end: stop
+```
+
+观察重点:放行时 handler 跑了、afterToolCall 把结果改成了 `checked: ...`;拦截时 handler 根本没跑("不该执行到这里"从未出现),结果直接是 `blocked: ...`。
+
+---
+
+## 接入主线
+
+s05 在 s04 上累积。相对 s04 的变更:
+
+| 组件 | s04 | s05 |
+| --- | --- | --- |
+| 新增类型 | — | `BeforeToolCallResult` / `ToolHooks` |
+| 新增函数 | — | `executeToolCall`(把 s04 内联的 run + 错误捕获收口到这里) |
+| `runEventedToolLoop` | `(state, provider, registry, userInput, output)` | 多一个 `hooks` 参数;`tool_call` 走 `executeToolCall` |
+| `ToolRegistry` | `register / getSpecs / run` | **不变**(R2:hook 是外层装饰) |
+
+**焊接点**:循环内 `tool_call` → `executeToolCall(registry, hooks, call)` → ToolResultMessage 进 messages。registry 未改动,hook 套在执行外面。
+
+---
+
+## 接下来
+
+现在每一轮执行,信息都是现场从各个对象里读的。如果一轮开始后外部又改了工具列表或模型名,这一轮就会变得说不清。
+
+下一节会把一轮开始时用到的信息,先集中拍成一份快照。
+
+进入下一节:[s06](../s06_turn_snapshot/)。
+
+---
+
+
+Pi 源码溯源:beforeToolCall / afterToolCall 的真实位置
+
+教学版的 hook 是 `executeToolCall` 里的两个可选函数。Pi 的 hook 长在并发工具执行的 prepare/finalize 两侧,且能拿到比教学版丰富得多的上下文。
+
+### 源码在哪
+
+- `packages/agent/src/types.ts:83` — `BeforeToolCallContext` / `AfterToolCallContext`
+- `packages/agent/src/types.ts:262` — `beforeToolCall` / `afterToolCall` 签名
+- `packages/agent/src/agent-loop.ts:581` — beforeTool 触发点
+- `packages/agent/src/agent-loop.ts:676` — afterTool 触发点
+
+### hook 能拿到什么
+
+教学版的 `beforeToolCall(call)` 只拿到 ToolCall。Pi 的 context(`types.ts:83`)丰富得多:
+
+```ts
+interface BeforeToolCallContext {
+ assistantMessage: AgentMessage; // 触发这次工具调用的那条 assistant 消息
+ toolCall: AgentToolCall; // 工具调用本身
+ args: validatedArgs; // 已经校验过的参数
+ context: AgentContext; // 本轮的完整快照(s06)
+}
+```
+
+hook 能看到"是哪条 assistant 消息要调这个工具""本轮的完整上下文是什么"——不只是孤立的调用。
+
+### beforeTool 能 block
+
+教学版的 block 返回 `{ block, reason }`。Pi 一致(`agent-loop.ts:581`):
+
+```ts
+if (config.beforeToolCall) {
+ const beforeResult = await config.beforeToolCall(
+ { assistantMessage, toolCall, args, context }, signal);
+ if (beforeResult?.block) {
+ return { kind: "immediate",
+ result: createErrorToolResult(beforeResult.reason || "blocked"),
+ isError: true };
+ }
+}
+```
+
+block 后工具跳过执行,直接生成一条错误结果发回去——和教学版语义一致,但 Pi 把它包成 `kind: "immediate"`(立即返回),无缝接入 s04 的并发执行框架。
+
+### afterTool 能改写结果
+
+教学版的 `afterToolCall(call, result) => string` 只能改 content。Pi 的 `afterToolCall` 能改更多字段(错误标记、terminate 标志等),是字段级覆盖、非深度合并。
+
+### hook 是异步的,且能被中断
+
+教学版的 hook 是同步函数。Pi 的 hook 是 `async`,且都带 `signal: AbortSignal`——用户中断时 hook 也能及时收手。这呼应 s01 的 AbortController 贯穿到每一层。
+
+### 一句话
+
+教学版的 `executeToolCall = before → run → after` 立的是"执行前后留插口"。Pi 把这两个插口坐实在并发执行的 prepare/finalize 两侧,context 更丰富(assistant 消息 + 本轮快照)、异步且可中断。后面 s11 的权限检查会直接接到 beforeToolCall 上。
+
+
diff --git a/learn-pi-agent/s05_tool_hook_boundary/code.ts b/learn-pi-agent/s05_tool_hook_boundary/code.ts
new file mode 100644
index 0000000..50f5c42
--- /dev/null
+++ b/learn-pi-agent/s05_tool_hook_boundary/code.ts
@@ -0,0 +1,282 @@
+// s05: Tool Hook Boundary — mini Pi 的第 5 版
+//
+// 在工具执行的前后各留一个插口:执行前可以拦,执行后可以改结果。
+// 词汇边界:本章新增 beforeToolCall / afterToolCall / ToolHooks / BeforeToolCallResult / executeToolCall / allow / block。
+// 关键:hook 是外层装饰,ToolRegistry 本身不变(R2);执行+错误捕获收口到 executeToolCall。
+
+declare const process: {
+ argv: string[];
+ exitCode?: number;
+};
+
+// —— 停止原因(s04 起)——
+export type StopReason = "stop" | "toolUse" | "error";
+
+// —— 消息(s01 起 + s04 的 ToolResultMessage)——
+export type UserMessage = { role: "user"; content: string };
+export type AssistantMessage = { role: "assistant"; content: string; stopReason: StopReason };
+export type ToolResultMessage = { role: "toolResult"; toolCallId: string; content: string };
+export type AgentMessage = UserMessage | AssistantMessage | ToolResultMessage;
+
+export type AgentState = { messages: AgentMessage[] };
+
+// —— 工具契约(s02 起)——
+export type ToolSpec = { name: string; description: string; input: Record };
+export type ToolHandler = (input: Record) => string;
+export type ToolCall = { id: string; name: string; input: Record };
+export type Tool = { spec: ToolSpec; handler: ToolHandler };
+
+export class ToolRegistry {
+ private tools = new Map();
+ register(tool: Tool): void { this.tools.set(tool.spec.name, tool); }
+ getSpecs(): ToolSpec[] { return [...this.tools.values()].map((tool) => tool.spec); }
+ run(call: ToolCall): string {
+ const tool = this.tools.get(call.name);
+ if (!tool) return `unknown tool: ${call.name}`;
+ return tool.handler(call.input);
+ }
+}
+
+// —— provider 对外(s04 起)——
+export type ProviderMessage =
+ | { role: "user" | "assistant"; content: string }
+ | { role: "toolResult"; toolCallId: string; content: string };
+export type ProviderInput = { messages: ProviderMessage[]; tools: ToolSpec[] };
+
+export type ProviderEvent =
+ | { type: "message_start" }
+ | { type: "text_delta"; text: string }
+ | { type: "tool_call"; call: ToolCall }
+ | { type: "message_end"; stopReason: StopReason };
+
+export interface Provider {
+ stream(input: ProviderInput): AsyncGenerator;
+}
+
+export type Output = { log(line: string): void };
+export function createConsoleOutput(): Output { return { log: (line) => console.log(line) }; }
+
+// ============ s05 新增:执行前后的两个插口 ============
+
+export type BeforeToolCallResult =
+ | { type: "allow" }
+ | { type: "block"; reason: string };
+
+export type ToolHooks = {
+ beforeToolCall?: (call: ToolCall) => BeforeToolCallResult;
+ afterToolCall?: (call: ToolCall, result: string) => string;
+};
+
+// 把一次工具执行串成 before → run → after 三段。
+// before 可以拦下(block),after 可以改写结果。中间的 run 仍是 s04 的 registry.run。
+// 工具抛错也在这里收口(R4),不向上抛。
+export function executeToolCall(
+ registry: ToolRegistry,
+ hooks: ToolHooks,
+ call: ToolCall,
+): ToolResultMessage {
+ const before = hooks.beforeToolCall?.(call) ?? { type: "allow" };
+
+ if (before.type === "block") {
+ return {
+ role: "toolResult",
+ toolCallId: call.id,
+ content: `blocked: ${before.reason}`,
+ };
+ }
+
+ let result: string;
+ try {
+ result = registry.run(call);
+ } catch (error) {
+ result = `error: ${error instanceof Error ? error.message : String(error)}`;
+ }
+
+ const finalResult = hooks.afterToolCall?.(call, result) ?? result;
+
+ return { role: "toolResult", toolCallId: call.id, content: finalResult };
+}
+
+// ============ 构造函数 ============
+export function createInitialState(): AgentState { return { messages: [] }; }
+export function createUserMessage(content: string): UserMessage { return { role: "user", content }; }
+
+export function buildProviderInput(state: AgentState, registry: ToolRegistry): ProviderInput {
+ return {
+ messages: state.messages.map((message) => {
+ if (message.role === "toolResult") {
+ return { role: "toolResult", toolCallId: message.toolCallId, content: message.content };
+ }
+ return { role: message.role, content: message.content };
+ }),
+ tools: registry.getSpecs(),
+ };
+}
+
+// ============ 工具循环(s04 起。s05:加 hooks,tool_call 走 executeToolCall)============
+const MAX_TURNS = 8;
+
+export async function runEventedToolLoop(
+ state: AgentState,
+ provider: Provider,
+ registry: ToolRegistry,
+ hooks: ToolHooks,
+ userInput: string,
+ output: Output,
+): Promise {
+ state.messages.push(createUserMessage(userInput));
+
+ let turns = 0;
+
+ while (true) {
+ turns += 1;
+ if (turns > MAX_TURNS) {
+ const stopped: AssistantMessage = {
+ role: "assistant",
+ content: "(达到最大轮次,停止)",
+ stopReason: "stop",
+ };
+ state.messages.push(stopped);
+ return stopped;
+ }
+
+ const providerInput = buildProviderInput(state, registry);
+ let content = "";
+ let stopReason: StopReason = "stop";
+ let sawToolCall = false;
+
+ for await (const event of provider.stream(providerInput)) {
+ if (event.type === "message_start") {
+ output.log("message_start");
+ } else if (event.type === "text_delta") {
+ output.log(`text_delta: ${event.text}`);
+ content += event.text;
+ } else if (event.type === "tool_call") {
+ sawToolCall = true;
+ output.log(`tool_call: ${event.call.name}`);
+ // s05:执行交给 executeToolCall,hook 在里面跑。
+ const resultMessage = executeToolCall(registry, hooks, event.call);
+ state.messages.push(resultMessage);
+ output.log(`tool_result: ${resultMessage.content}`);
+ } else if (event.type === "message_end") {
+ stopReason = event.stopReason;
+ output.log(`message_end: ${stopReason}`);
+ }
+ }
+
+ if (!sawToolCall || stopReason !== "toolUse") {
+ const assistant: AssistantMessage = { role: "assistant", content, stopReason };
+ state.messages.push(assistant);
+ return assistant;
+ }
+ }
+}
+
+// ============ Demo Provider(fake)============
+// 按传入的工具名发请求,演示 allow 和 block 两种路径。
+export class DemoProvider implements Provider {
+ public lastInput: ProviderInput | undefined;
+ constructor(private requestedTool: string) {}
+
+ async *stream(input: ProviderInput): AsyncGenerator {
+ this.lastInput = input;
+ const last = input.messages[input.messages.length - 1];
+
+ yield { type: "message_start" };
+
+ if (last?.role === "toolResult") {
+ yield { type: "text_delta", text: `工具结果是:${last.content}` };
+ yield { type: "message_end", stopReason: "stop" };
+ return;
+ }
+
+ yield {
+ type: "tool_call",
+ call: { id: "call_1", name: this.requestedTool, input: { text: "hi" } },
+ };
+ yield { type: "message_end", stopReason: "toolUse" };
+ }
+}
+
+// ============ 演示脚手架 ============
+
+function createRegistry(): ToolRegistry {
+ const registry = new ToolRegistry();
+ registry.register({
+ spec: { name: "echo", description: "原样返回输入", input: { text: "要复读的文本" } },
+ handler: (input) => input.text ?? "(空)",
+ });
+ registry.register({
+ spec: { name: "dangerous", description: "一个被禁用的演示工具", input: {} },
+ handler: () => "不该执行到这里",
+ });
+ return registry;
+}
+
+function createHooks(output: Output): ToolHooks {
+ return {
+ beforeToolCall(call) {
+ output.log("[beforeToolCall]");
+ if (call.name === "dangerous") {
+ output.log(`block: ${call.name}`);
+ return { type: "block", reason: "这个工具在演示里被禁用" };
+ }
+ output.log(`allow: ${call.name}`);
+ return { type: "allow" };
+ },
+ afterToolCall(call, result) {
+ output.log("[afterToolCall]");
+ output.log(`${call.name} -> ${result}`);
+ return `checked: ${result}`;
+ },
+ };
+}
+
+function getCase(): "normal" | "blocked" {
+ const index = process.argv.indexOf("--case");
+ const value = index >= 0 ? process.argv[index + 1] : undefined;
+ return value === "blocked" ? "blocked" : "normal";
+}
+
+function printAssistantMessage(output: Output, message: AssistantMessage): void {
+ output.log("[assistant]");
+ output.log(`content: ${message.content}`);
+ output.log(`stopReason: ${message.stopReason}`);
+ output.log("");
+}
+
+async function main(): Promise {
+ const output = createConsoleOutput();
+ const state = createInitialState();
+ const registry = createRegistry();
+ const hooks = createHooks(output);
+
+ const caseName = getCase();
+ const requestedTool = caseName === "blocked" ? "dangerous" : "echo";
+ const userInput = caseName === "blocked" ? "调用危险工具" : "复读一下 hi";
+ const provider = new DemoProvider(requestedTool);
+
+ output.log("s05: Tool Hook Boundary");
+ output.log("");
+
+ output.log("[user]");
+ output.log(userInput);
+ output.log("");
+
+ const assistant = await runEventedToolLoop(
+ state,
+ provider,
+ registry,
+ hooks,
+ userInput,
+ output,
+ );
+ output.log("");
+
+ printAssistantMessage(output, assistant);
+}
+
+main().catch((error: unknown) => {
+ console.error(error);
+ process.exitCode = 1;
+});
diff --git a/learn-pi-agent/s05_tool_hook_boundary/images/.gitkeep b/learn-pi-agent/s05_tool_hook_boundary/images/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/learn-pi-agent/s06_turn_snapshot/README.md b/learn-pi-agent/s06_turn_snapshot/README.md
new file mode 100644
index 0000000..71c179d
--- /dev/null
+++ b/learn-pi-agent/s06_turn_snapshot/README.md
@@ -0,0 +1,190 @@
+# s06: Turn Snapshot — 一轮开始,先拍一张
+
+> *开始即冻结:本轮用的东西,后面改了也不算。*
+> **Pi 边界**:一轮状态边界 —— 一轮一旦开始,它依赖的配置就定死了,外部怎么变都不影响这一轮。
+
+[上一节:s05](../s05_tool_hook_boundary/) → `s06` → [下一节:s07](../s07_session_tree/)
+
+---
+
+## 问题
+
+前面几节里,每一轮的信息都是**现场读**的:provider 输入每次都从当前的 state 和 registry 临时拼。
+
+问题在于:如果一轮进行到一半,外部又改了 registry(加了个工具、删了个工具),这一轮就前后对不上了——provider 这一轮第一次看到的工具列表,和后来看到的不一样。一轮执行到一半被外部改动干扰,结果就说不清。
+
+可以先看一个小事故:provider 第一轮看到 1 个工具,工具执行过程中外部又注册了第 2 个工具。下一轮如果重新读 registry,同一个 turn 里的工具集合就变了。模型看到的世界前后不一致,调试时很难判断到底是哪一轮出了问题。
+
+s06 要在一轮开始时,把本轮依赖的东西**先固定下来**。
+
+---
+
+## 解决方案
+
+一轮开始先拍一份快照 `TurnSnapshot`,固定 **messages** 和 **tools**。之后本轮的 tools 都用快照里的,不管外部怎么改 registry。
+
+```text
+AgentState + tools → TurnSnapshot → 本轮 ProviderInput
+```
+
+先记住这条:snapshot 不是把整个世界冻住,只是把本轮需要稳定的输入固定下来。
+
+这里有个关键区分(也是和 Pi 对齐的地方):**快照固定的是"外部可变的配置"(tools),不是所有东西。**
+
+- **tools**:外部能改(registry 随时变),所以要固定。
+- **messages**:core 内部的,循环里 toolResult 会不断追加,取实时值。
+- **model**:是 **agent 级的跨轮配置**,放 `AgentState`,**不进单轮快照**——对齐 Pi 的 `AgentContext`(它也不含 model)。
+
+---
+
+## 工作原理
+
+**先定义快照。** 两个字段:本轮的消息、本轮的工具说明。
+
+```ts
+export type TurnSnapshot = {
+ messages: ProviderMessage[];
+ tools: ToolSpec[];
+};
+```
+
+**model 不在快照里,在 AgentState。** 这是和"把 model 当输入参数"的区别——model 是 agent 的跨轮配置,一轮内不变、跨轮可换,所以它属于状态,不属于单轮快照。
+
+```ts
+export type AgentState = {
+ messages: AgentMessage[]; // s07 会升级为 SessionTree
+ model: string; // s06 起加:跨轮配置
+};
+```
+
+**在循环开始前拍。** `runEventedToolLoop` 进循环前,由调用方先 `createTurnSnapshot`。之后整个循环都用这份快照的 tools。
+
+```ts
+const snapshot = createTurnSnapshot(state, registry);
+```
+
+**本轮输入从快照取。** `buildProviderInputFromSnapshot`:messages 用实时的(循环内会增长),tools 用快照的(固定)。
+
+```ts
+export function buildProviderInputFromSnapshot(
+ snapshot: TurnSnapshot,
+ state: AgentState,
+): ProviderInput {
+ return {
+ messages: toProviderMessages(state.messages), // 实时
+ tools: snapshot.tools, // 固定
+ };
+}
+```
+
+> 这一节真正建立的是**一轮状态边界**:一轮一旦开始,它依赖的配置(tools)就冻结了。messages 该增长还增长,model 在 state 里跨轮——这正好对齐 Pi 的 `AgentContext`(固定 systemPrompt/messages/tools,model 在 `AgentState`)。
+
+---
+
+## 试一下
+
+运行:
+
+```sh
+npm run s06
+```
+
+输出类似:
+
+```text
+s06: Turn Snapshot
+
+[snapshot 固定性]
+snapshot.tools: 1
+registry 现在: 2
+state.model: demo-small(跨轮配置,不在 snapshot)
+
+[user]
+现在几点?
+
+message_start
+tool_call: current_time
+...
+message_end: stop
+
+[provider 看到的 tools]
+tools: 1
+```
+
+观察重点:`[snapshot 固定性]` 里 snapshot 拍下时只有 1 个工具,之后 registry 加到 2 个,但快照没变;`[provider 看到的 tools]` 也是 1——本轮 provider 自始至终只看到快照里的那一个。model 在 `state.model`,不进快照。
+
+---
+
+## 接入主线
+
+s06 在 s05 上累积。相对 s05 的变更:
+
+| 组件 | s05 | s06 |
+| --- | --- | --- |
+| `AgentState` | `{ messages }` | `{ messages, model }`(加 model 跨轮配置,对齐 Pi) |
+| 新增类型 | — | `TurnSnapshot { messages, tools }` |
+| 新增函数 | — | `createTurnSnapshot` / `buildProviderInputFromSnapshot` |
+| `runEventedToolLoop` | `(..., userInput, output)` | 接收外部拍好的 `snapshot`(替换 userInput) |
+
+**焊接点**:调用方先 `createTurnSnapshot(state, registry)`;循环内 `buildProviderInputFromSnapshot(snapshot, state)`——messages 实时、tools 固定。model 在 AgentState,不进 ProviderInput/snapshot(对齐 Pi 的 `Context` 不含 model)。
+
+---
+
+## 接下来
+
+到现在为止,历史还只是一根直线——messages 是个数组,只能一条接一条往后排。
+
+下一节会让历史能分叉:从中间某条岔出去,再走一条不同的路。
+
+进入下一节:[s07](../s07_session_tree/)。
+
+---
+
+
+Pi 源码溯源:AgentContext —— 每轮一份不可变快照
+
+教学版的 `TurnSnapshot` 固定 messages/tools。Pi 的等价物叫 `AgentContext`,每轮新建、不可变。
+
+### 源码在哪
+
+- `packages/agent/src/types.ts:387` — `AgentContext`
+- `packages/agent/src/agent-loop.ts:103` — 每轮拷贝构造
+
+### AgentContext 的真实形状
+
+```ts
+interface AgentContext {
+ systemPrompt: string; // 本轮系统提示(固定)
+ messages: AgentMessage[]; // 本轮对话历史(固定)
+ tools?: AgentTool[]; // 本轮工具(固定)
+}
+```
+
+教学版的 TurnSnapshot 字段(messages/tools)是 AgentContext 的子集——Pi 还固定了 systemPrompt(s08 会引入)。**注意 Pi 把 model 放在 `AgentState`(不在 AgentContext)**,因为 model 是跨轮的配置,不是单轮快照内容。教学版 s06 正是对齐了这点:model 在 AgentState,TurnSnapshot 不含 model。
+
+### 每轮新建,浅拷贝
+
+`agent-loop.ts:103`:
+
+```ts
+const currentContext: AgentContext = {
+ ...context,
+ messages: [...context.messages, ...prompts], // 浅拷贝新数组
+};
+```
+
+每轮创建新的 context 对象,messages 用新数组——本轮往里 push toolResult 不会污染原始 context。这正是教学版 snapshot 的"固定"语义。
+
+### turn_start / turn_end 事件
+
+Pi 在每轮边界发事件(`types.ts:408`):`turn_start` 和 `turn_end`(带 message 和 toolResults)。UI 和 extension(s09)靠它们观察一轮起止——教学版没有"轮"事件。
+
+### convertToLlm:发之前再过滤
+
+`AgentContext.messages` 是 core 内部的完整历史。真正发给 provider 前,Pi 还有一道 `convertToLlm` 过滤——把不该发给 LLM 的消息剔掉。snapshot 固定 core 侧,convertToLlm 管 provider 侧,两道关一起保证一轮输入既稳定又干净。
+
+### 一句话
+
+教学版的 TurnSnapshot 立的是"一轮开始把输入固定下来"。Pi 用 `AgentContext` 坐实它:每轮新建不可变副本 + turn 事件 + 发送前的 convertToLlm 过滤。关键对齐点:**model 在 AgentState 不进快照**,两边一致。
+
+
diff --git a/learn-pi-agent/s06_turn_snapshot/code.ts b/learn-pi-agent/s06_turn_snapshot/code.ts
new file mode 100644
index 0000000..64a5e2c
--- /dev/null
+++ b/learn-pi-agent/s06_turn_snapshot/code.ts
@@ -0,0 +1,282 @@
+// s06: Turn Snapshot — mini Pi 的第 6 版
+//
+// 一轮开始时先拍一份快照:messages/tools 固定下来,本轮不再受外部改动影响。
+// 词汇边界:本章新增 TurnSnapshot / createTurnSnapshot / buildProviderInputFromSnapshot。
+// 关键(对齐 Pi):model 是跨轮配置,放 AgentState,不进单轮快照(Pi 的 AgentContext 也不含 model)。
+
+declare const process: {
+ exitCode?: number;
+};
+
+// —— 停止原因(s04 起)——
+export type StopReason = "stop" | "toolUse" | "error";
+
+// —— 消息 ——
+export type UserMessage = { role: "user"; content: string };
+export type AssistantMessage = { role: "assistant"; content: string; stopReason: StopReason };
+export type ToolResultMessage = { role: "toolResult"; toolCallId: string; content: string };
+export type AgentMessage = UserMessage | AssistantMessage | ToolResultMessage;
+
+// —— core 内部状态(s06:加 model 跨轮配置,对齐 Pi AgentState)——
+export type AgentState = {
+ messages: AgentMessage[]; // s07 会升级为 SessionTree(U1)
+ model: string; // s06 起加:跨轮配置,不在单轮快照里
+};
+
+// —— 工具契约 ——
+export type ToolSpec = { name: string; description: string; input: Record };
+export type ToolHandler = (input: Record) => string;
+export type ToolCall = { id: string; name: string; input: Record };
+export type Tool = { spec: ToolSpec; handler: ToolHandler };
+
+export class ToolRegistry {
+ private tools = new Map();
+ register(tool: Tool): void { this.tools.set(tool.spec.name, tool); }
+ getSpecs(): ToolSpec[] { return [...this.tools.values()].map((tool) => tool.spec); }
+ count(): number { return this.tools.size; }
+ run(call: ToolCall): string {
+ const tool = this.tools.get(call.name);
+ if (!tool) return `unknown tool: ${call.name}`;
+ return tool.handler(call.input);
+ }
+}
+
+// —— provider 对外(对齐 Pi Context:messages + tools;model 在 state)——
+export type ProviderMessage =
+ | { role: "user" | "assistant"; content: string }
+ | { role: "toolResult"; toolCallId: string; content: string };
+export type ProviderInput = { messages: ProviderMessage[]; tools: ToolSpec[] };
+
+export type ProviderEvent =
+ | { type: "message_start" }
+ | { type: "text_delta"; text: string }
+ | { type: "tool_call"; call: ToolCall }
+ | { type: "message_end"; stopReason: StopReason };
+
+export interface Provider {
+ stream(input: ProviderInput): AsyncGenerator;
+}
+
+export type Output = { log(line: string): void };
+export function createConsoleOutput(): Output { return { log: (line) => console.log(line) }; }
+
+// —— s05 起:执行插口 ——
+export type BeforeToolCallResult = { type: "allow" } | { type: "block"; reason: string };
+export type ToolHooks = {
+ beforeToolCall?: (call: ToolCall) => BeforeToolCallResult;
+ afterToolCall?: (call: ToolCall, result: string) => string;
+};
+
+export function executeToolCall(registry: ToolRegistry, hooks: ToolHooks, call: ToolCall): ToolResultMessage {
+ const before = hooks.beforeToolCall?.(call) ?? { type: "allow" };
+ if (before.type === "block") {
+ return { role: "toolResult", toolCallId: call.id, content: `blocked: ${before.reason}` };
+ }
+ let result: string;
+ try {
+ result = registry.run(call);
+ } catch (error) {
+ result = `error: ${error instanceof Error ? error.message : String(error)}`;
+ }
+ const finalResult = hooks.afterToolCall?.(call, result) ?? result;
+ return { role: "toolResult", toolCallId: call.id, content: finalResult };
+}
+
+// ============ s06 新增:一轮快照 ============
+
+// 对齐 Pi AgentContext:固定 messages/tools。model 在 AgentState,不进快照。
+export type TurnSnapshot = {
+ messages: ProviderMessage[];
+ tools: ToolSpec[];
+};
+
+function toProviderMessages(messages: AgentMessage[]): ProviderMessage[] {
+ return messages.map((message) => {
+ if (message.role === "toolResult") {
+ return { role: "toolResult", toolCallId: message.toolCallId, content: message.content };
+ }
+ return { role: message.role, content: message.content };
+ });
+}
+
+export function createTurnSnapshot(state: AgentState, registry: ToolRegistry): TurnSnapshot {
+ return {
+ messages: toProviderMessages(state.messages),
+ tools: registry.getSpecs(),
+ };
+}
+
+// 本轮 provider 输入:messages 取实时(循环内会增长),tools 取快照(固定)。
+// model 在 state,不进 ProviderInput(对齐 Pi:调用 provider 时单独传 model)。
+export function buildProviderInputFromSnapshot(
+ snapshot: TurnSnapshot,
+ state: AgentState,
+): ProviderInput {
+ return {
+ messages: toProviderMessages(state.messages),
+ tools: snapshot.tools,
+ };
+}
+
+// ============ 构造函数 ============
+export function createInitialState(model = "demo-small"): AgentState {
+ return { messages: [], model };
+}
+export function createUserMessage(content: string): UserMessage { return { role: "user", content }; }
+
+export function snapshotToolsCount(snapshot: TurnSnapshot): number {
+ return snapshot.tools.length;
+}
+
+// ============ 工具循环(s06:接收外部拍好的 snapshot)============
+const MAX_TURNS = 8;
+
+export async function runEventedToolLoop(
+ state: AgentState,
+ provider: Provider,
+ registry: ToolRegistry,
+ hooks: ToolHooks,
+ snapshot: TurnSnapshot,
+ output: Output,
+): Promise {
+ let turns = 0;
+ while (true) {
+ turns += 1;
+ if (turns > MAX_TURNS) {
+ const stopped: AssistantMessage = {
+ role: "assistant", content: "(达到最大轮次,停止)", stopReason: "stop",
+ };
+ state.messages.push(stopped);
+ return stopped;
+ }
+ const providerInput = buildProviderInputFromSnapshot(snapshot, state);
+ let content = "";
+ let stopReason: StopReason = "stop";
+ let sawToolCall = false;
+ for await (const event of provider.stream(providerInput)) {
+ if (event.type === "message_start") output.log("message_start");
+ else if (event.type === "text_delta") { output.log(`text_delta: ${event.text}`); content += event.text; }
+ else if (event.type === "tool_call") {
+ sawToolCall = true;
+ output.log(`tool_call: ${event.call.name}`);
+ const resultMessage = executeToolCall(registry, hooks, event.call);
+ state.messages.push(resultMessage);
+ output.log(`tool_result: ${resultMessage.content}`);
+ } else if (event.type === "message_end") { stopReason = event.stopReason; output.log(`message_end: ${stopReason}`); }
+ }
+ if (!sawToolCall || stopReason !== "toolUse") {
+ const assistant: AssistantMessage = { role: "assistant", content, stopReason };
+ state.messages.push(assistant);
+ return assistant;
+ }
+ }
+}
+
+// ============ Demo Provider(fake)============
+export class DemoProvider implements Provider {
+ public lastInput: ProviderInput | undefined;
+
+ async *stream(input: ProviderInput): AsyncGenerator {
+ this.lastInput = input;
+ const last = input.messages[input.messages.length - 1];
+
+ yield { type: "message_start" };
+
+ if (last?.role === "toolResult") {
+ yield { type: "text_delta", text: `工具结果是:${last.content}` };
+ yield { type: "message_end", stopReason: "stop" };
+ return;
+ }
+
+ yield {
+ type: "tool_call",
+ call: { id: "call_1", name: "current_time", input: {} },
+ };
+ yield { type: "message_end", stopReason: "toolUse" };
+ }
+}
+
+// ============ 演示脚手架 ============
+
+function createRegistry(): ToolRegistry {
+ const registry = new ToolRegistry();
+ registry.register({
+ spec: { name: "current_time", description: "返回一个固定的演示时间", input: {} },
+ handler: () => "2026-01-01T00:00:00Z",
+ });
+ return registry;
+}
+
+function createHooks(output: Output): ToolHooks {
+ return {
+ beforeToolCall(call) {
+ output.log(`[beforeToolCall] allow: ${call.name}`);
+ return { type: "allow" };
+ },
+ afterToolCall(call, result) {
+ output.log(`[afterToolCall] ${call.name} -> ${result}`);
+ return result;
+ },
+ };
+}
+
+function printAssistantMessage(output: Output, message: AssistantMessage): void {
+ output.log("[assistant]");
+ output.log(`content: ${message.content}`);
+ output.log(`stopReason: ${message.stopReason}`);
+ output.log("");
+}
+
+async function main(): Promise {
+ const output = createConsoleOutput();
+ const state = createInitialState("demo-small");
+ const registry = createRegistry();
+ const hooks = createHooks(output);
+ const provider = new DemoProvider();
+
+ output.log("s06: Turn Snapshot");
+ output.log("");
+
+ // 1) 一轮开始:push 用户消息,拍快照(此刻 registry 只有 current_time)。
+ state.messages.push(createUserMessage("现在几点?"));
+ const snapshot = createTurnSnapshot(state, registry);
+
+ // 2) 拍完之后,外部又往 registry 加了一个工具。
+ registry.register({
+ spec: { name: "echo", description: "原样返回输入", input: { text: "文本" } },
+ handler: (input) => input.text ?? "(空)",
+ });
+
+ // 3) 验证固定性:快照没变,但 registry 已经多了工具;model 在 state(跨轮),不进快照。
+ output.log("[snapshot 固定性]");
+ output.log(`snapshot.tools: ${snapshotToolsCount(snapshot)}`);
+ output.log(`registry 现在: ${registry.count()}`);
+ output.log(`state.model: ${state.model}(跨轮配置,不在 snapshot)`);
+ output.log("");
+
+ output.log("[user]");
+ output.log("现在几点?");
+ output.log("");
+
+ // 4) 跑循环:本轮 tools 用 snapshot 的(仍只有 current_time),不含后加的 echo。
+ const assistant = await runEventedToolLoop(
+ state,
+ provider,
+ registry,
+ hooks,
+ snapshot,
+ output,
+ );
+ output.log("");
+
+ printAssistantMessage(output, assistant);
+
+ output.log("[provider 看到的 tools]");
+ output.log(`tools: ${provider.lastInput?.tools.length ?? 0}`);
+ output.log("");
+}
+
+main().catch((error: unknown) => {
+ console.error(error);
+ process.exitCode = 1;
+});
diff --git a/learn-pi-agent/s06_turn_snapshot/images/.gitkeep b/learn-pi-agent/s06_turn_snapshot/images/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/learn-pi-agent/s07_session_tree/README.md b/learn-pi-agent/s07_session_tree/README.md
new file mode 100644
index 0000000..c3ec34e
--- /dev/null
+++ b/learn-pi-agent/s07_session_tree/README.md
@@ -0,0 +1,201 @@
+# s07: Session Tree — 历史不是一条线,是一棵树
+
+> *走错了能回头,回头还能换条路。*
+> **Pi 边界**:会话历史边界 —— 历史存成一棵树,一轮输入只取当前这条路径。
+
+[上一节:s06](../s06_turn_snapshot/) → `s07` → [下一节:s08](../s08_context_resources/)
+
+---
+
+## 问题
+
+到 s06 为止,消息都存在一个数组里,只能一条线往后加:A → B → C。
+
+但现实里经常想这样:走到 B,发现不太对,想退回 A 换个方向再试一次。数组做不到——它只会往后追加,没有"回到某个点、从那里分叉"。
+
+s07 要让历史能**分叉**。
+
+---
+
+## 解决方案
+
+给每条消息记一个 `parentId`,指向它的上一条。这样历史不再是一条线,而是一棵树:同一个节点可以长出多条分支。
+
+```text
+ user(方案A)
+ / \
+ asst(A的回答) asst(改走方案B)
+```
+
+`currentPath` 从当前位置一路回溯到根,得到**当前这条线**的消息序列——provider 拿到的还是线性的消息,对外完全没变。`moveTo` 切换当前位置,就能走到另一条分支。
+
+注意:s07 不是让 provider 理解一棵树。树只存在于 core 内部,用来支持回到历史点再分叉;provider 仍然只看当前路径上的线性 messages。
+
+> **[U1 升级]** `AgentState.messages`(数组)升级为 `SessionTree`。这是受控升级:数组没法表达分叉,所以是替换。但 `currentPath()` 仍产出线性 `AgentMessage[]`,ProviderInput 的构造方式一字不变——升级藏在 core 内部,不漏到外面。
+
+---
+
+## 工作原理
+
+**先定义节点。** 一个节点就是一条消息,外加它在树里的位置。
+
+```ts
+export type SessionEntry = {
+ id: string;
+ parentId: string | null;
+ message: AgentMessage;
+};
+```
+
+**SessionTree 做三件事。** 追加、切换位置、读当前路径。
+
+```ts
+export class SessionTree {
+ private entries = new Map();
+ private activeLeafId: string | null = null;
+ private counter = 0; // 实例级:每个树独立计数
+
+ append(message: AgentMessage): SessionEntry {
+ const entry = { id: `e${++this.counter}`, parentId: this.activeLeafId, message };
+ this.entries.set(entry.id, entry);
+ this.activeLeafId = entry.id;
+ return entry;
+ }
+
+ moveTo(entryId: string): void { /* 切换当前位置 */ }
+
+ currentPath(): AgentMessage[] {
+ // 从 activeLeaf 一路回溯到根,反转,得到当前这条线
+ }
+}
+```
+
+`append` 总是接在当前位置后面;`moveTo` 把当前位置挪到任意已有节点(分叉的起点);`currentPath` 回溯出当前这条线。切位置不会删掉旧节点——它们还在树里,只是不在当前路径上。
+
+**id 计数器是实例级的**(`this.counter`),不是全局变量。这样多个 SessionTree 互不干扰,也不会因为新建一个树就接着旧树的编号往后数。
+
+**对外不变。** `createTurnSnapshot` 和 `buildProviderInputFromSnapshot` 现在从 `state.session.currentPath()` 取消息,但产出的还是线性的 ProviderMessage[]——provider 这边感觉不到 core 内部已经从数组换成了树。
+
+> 这一节真正建立的是**会话历史边界**:历史在 core 内部是一棵树,但对一轮输入来说,它永远是"当前这条路径"的线性投影。后面 s08 会往输入里加项目资料,但"历史 = 当前路径"这条规矩,从这里立起来。
+
+---
+
+## 试一下
+
+运行:
+
+```sh
+npm run s07
+```
+
+输出类似:
+
+```text
+s07: Session Tree
+
+[路径:方案 A]
+user: 方案 A
+assistant: A 的回答
+
+[路径:方案 B]
+user: 方案 A
+assistant: 改走方案 B
+
+[所有节点]
+e1 parent=null user: 方案 A
+e2 parent=e1 assistant: A 的回答
+e3 parent=e1 assistant: 改走方案 B
+```
+
+观察重点:`e2` 和 `e3` 的 parent 都是 `e1`——从同一个节点分叉出两条路;切到方案 B 后,`[路径:方案 B]` 只含 `e1` 和 `e3`,不含 `e2`。
+
+---
+
+## 接入主线
+
+s07 在 s06 上累积。相对 s06 的变更:
+
+| 组件 | s06 | s07 |
+| --- | --- | --- |
+| `AgentState` | `{ messages: AgentMessage[] }` | **`{ session: SessionTree }`**(U1 升级) |
+| 新增类型 | — | `SessionEntry` / `SessionTree` |
+| 消息写入 | `state.messages.push(...)` | `state.session.append(...)` |
+| 消息读取 | `state.messages` | `state.session.currentPath()` |
+| `createTurnSnapshot` / `buildProviderInputFromSnapshot` | 从 `state.messages` 取 | 从 `state.session.currentPath()` 取 |
+| `ProviderInput` 构造 | — | **不变**(currentPath 产出线性消息) |
+
+**焊接点**:消息读写全改为走 `state.session`;但 `currentPath()` 产出线性 `AgentMessage[]`,所以 ProviderInput / TurnSnapshot 的构造逻辑一字未动。U1 升级藏在 core 内部。
+
+---
+
+## 接下来
+
+现在一轮输入里有:当前路径上的历史、工具说明、模型名。
+
+下一节会再往输入里加一样东西——项目本身的资料(比如一份说明文档、一个可复用的提示词)。
+
+进入下一节:[s08](../s08_context_resources/)。
+
+---
+
+
+Pi 源码溯源:持久化的 SessionTree 和 11 种 entry
+
+教学版的 SessionTree 在内存里、只有 message entry。Pi 的 session 是**持久化**的树,节点有 **11 种类型**,远不止消息。
+
+### 源码在哪
+
+- `packages/agent/src/harness/types.ts:334` — SessionTreeEntry 联合类型
+- `packages/agent/src/harness/types.ts:409` — 11 种 entry
+- `packages/agent/src/harness/session/session.ts:82` — Session 实现
+- `packages/agent/src/harness/session/session.ts:246` — `moveTo`(分支)
+
+### 不只是消息:11 种 entry
+
+教学版的 SessionEntry 只有 `{ id, parentId, message }`。Pi 的 entry 是个大联合(`types.ts:409`):
+
+```ts
+type SessionTreeEntry =
+ | MessageEntry // 消息(教学版唯一有的)
+ | ThinkingLevelChangeEntry // 改了推理强度
+ | ModelChangeEntry // 换了模型
+ | ActiveToolsChangeEntry // 启用/禁用了工具
+ | CompactionEntry // 做了上下文压缩(s08 方向)
+ | BranchSummaryEntry // 分支摘要
+ | CustomEntry / CustomMessageEntry // 自定义内容
+ | LabelEntry // 给某个节点打标签
+ | SessionInfoEntry // 会话元信息
+ | LeafEntry; // 当前活跃叶子
+```
+
+历史不只记"说了什么",还记"中途换了什么"——换模型、换工具、压缩上下文都是树上的节点。这样回到任何一个历史点,能完整还原当时的配置。
+
+### parentId + moveTo = 真分叉
+
+每个 entry 都有 `parentId`(`types.ts:337`),`moveTo(entryId)`(`session.ts:246`)切换当前位置:
+
+```ts
+async appendMessage(message) {
+ return this.appendTypedEntry({
+ type: "message", id: ...,
+ parentId: await this.storage.getLeafId(), // 挂在当前叶子下
+ timestamp: ..., message,
+ });
+}
+```
+
+新节点总挂在当前叶子下;`moveTo` 把叶子指针挪到任意历史节点,再 append 就长出一条新分支。和教学版的 SessionTree 一模一样的心智,但 Pi 的分支还能带 `BranchSummaryEntry` 记录"为什么岔出去"。
+
+### 持久化,不是内存
+
+教学版的 SessionTree 在内存里、进程退出就没了。Pi 的 session 走 `storage`(`session.ts`),落盘持久化——关掉重开能恢复,能跨会话。`LeafEntry` 专门跟踪"当前在哪条分支",持久化后重启能接上。
+
+### 边界
+
+`moveTo` 一个不存在的 id 抛 `SessionError`(教学版也抛错,一致)。分支不会删旧节点——它们留在树里,只是不在当前路径上。
+
+### 一句话
+
+教学版的 SessionTree 立的是"历史是树、能分叉、一轮输入取当前路径"。Pi 把它坐实成持久化的树 + 11 种 entry(消息/模型变更/工具变更/压缩/分支摘要…),parentId + moveTo 实现分叉。教学版只留 MessageEntry 和分支骨架。
+
+
diff --git a/learn-pi-agent/s07_session_tree/code.ts b/learn-pi-agent/s07_session_tree/code.ts
new file mode 100644
index 0000000..09f6313
--- /dev/null
+++ b/learn-pi-agent/s07_session_tree/code.ts
@@ -0,0 +1,316 @@
+// s07: Session Tree — mini Pi 的第 7 版
+//
+// [U1 受控升级] AgentState.messages 从数组升级为 SessionTree:历史能分叉,一轮输入取当前路径。
+// 词汇边界:本章新增 SessionTree / SessionEntry / parentId / moveTo / currentPath / append / activeLeaf。
+// 关键:currentPath() 仍产出线性 AgentMessage[],ProviderInput 的构造方式不变;id 计数器是实例级(不跨实例累加)。
+
+declare const process: {
+ exitCode?: number;
+};
+
+// —— 停止原因(s04 起)——
+export type StopReason = "stop" | "toolUse" | "error";
+
+// —— 消息 ——
+export type UserMessage = { role: "user"; content: string };
+export type AssistantMessage = { role: "assistant"; content: string; stopReason: StopReason };
+export type ToolResultMessage = { role: "toolResult"; toolCallId: string; content: string };
+export type AgentMessage = UserMessage | AssistantMessage | ToolResultMessage;
+
+// ============ s07 新增 [U1]:会话历史从数组变成树 ============
+
+// 一个节点 = 一条消息 + 它在树里的位置。
+export type SessionEntry = {
+ id: string;
+ parentId: string | null;
+ message: AgentMessage;
+};
+
+export class SessionTree {
+ private entries = new Map();
+ private activeLeafId: string | null = null;
+ private counter = 0; // 实例级:每个 SessionTree 独立计数,不跨实例累加
+
+ append(message: AgentMessage): SessionEntry {
+ const entry: SessionEntry = {
+ id: `e${++this.counter}`,
+ parentId: this.activeLeafId,
+ message,
+ };
+ this.entries.set(entry.id, entry);
+ this.activeLeafId = entry.id;
+ return entry;
+ }
+
+ // 切换当前位置到某个已有节点(分叉的起点)。不存在的 id 会抛错。
+ moveTo(entryId: string): void {
+ if (!this.entries.has(entryId)) {
+ throw new Error(`unknown entry: ${entryId}`);
+ }
+ this.activeLeafId = entryId;
+ }
+
+ // 从当前位置回溯到根,产出一条线性的消息序列。ProviderInput 就用它。
+ currentPath(): AgentMessage[] {
+ const path: AgentMessage[] = [];
+ let cursor = this.activeLeafId;
+ while (cursor) {
+ const entry = this.entries.get(cursor);
+ if (!entry) break;
+ path.push(entry.message);
+ cursor = entry.parentId;
+ }
+ return path.reverse();
+ }
+
+ allEntries(): SessionEntry[] {
+ return [...this.entries.values()];
+ }
+}
+
+// [U1] core 内部状态:messages 数组 → SessionTree;model 跨轮配置(对齐 Pi AgentState)。
+export type AgentState = {
+ session: SessionTree;
+ model: string;
+};
+
+// —— 工具契约 ——
+export type ToolSpec = { name: string; description: string; input: Record };
+export type ToolHandler = (input: Record) => string;
+export type ToolCall = { id: string; name: string; input: Record };
+export type Tool = { spec: ToolSpec; handler: ToolHandler };
+
+export class ToolRegistry {
+ private tools = new Map();
+ register(tool: Tool): void { this.tools.set(tool.spec.name, tool); }
+ getSpecs(): ToolSpec[] { return [...this.tools.values()].map((tool) => tool.spec); }
+ count(): number { return this.tools.size; }
+ run(call: ToolCall): string {
+ const tool = this.tools.get(call.name);
+ if (!tool) return `unknown tool: ${call.name}`;
+ return tool.handler(call.input);
+ }
+}
+
+// —— provider 对外 ——
+export type ProviderMessage =
+ | { role: "user" | "assistant"; content: string }
+ | { role: "toolResult"; toolCallId: string; content: string };
+
+// 对齐 Pi Context:messages + tools。model 在 AgentState,不进 ProviderInput。
+export type ProviderInput = {
+ messages: ProviderMessage[];
+ tools: ToolSpec[];
+};
+
+export type ProviderEvent =
+ | { type: "message_start" }
+ | { type: "text_delta"; text: string }
+ | { type: "tool_call"; call: ToolCall }
+ | { type: "message_end"; stopReason: StopReason };
+
+export interface Provider {
+ stream(input: ProviderInput): AsyncGenerator;
+}
+
+export type Output = { log(line: string): void };
+export function createConsoleOutput(): Output { return { log: (line) => console.log(line) }; }
+
+// —— s05 起:执行插口 ——
+export type BeforeToolCallResult = { type: "allow" } | { type: "block"; reason: string };
+export type ToolHooks = {
+ beforeToolCall?: (call: ToolCall) => BeforeToolCallResult;
+ afterToolCall?: (call: ToolCall, result: string) => string;
+};
+
+export function executeToolCall(registry: ToolRegistry, hooks: ToolHooks, call: ToolCall): ToolResultMessage {
+ const before = hooks.beforeToolCall?.(call) ?? { type: "allow" };
+ if (before.type === "block") {
+ return { role: "toolResult", toolCallId: call.id, content: `blocked: ${before.reason}` };
+ }
+ let result: string;
+ try {
+ result = registry.run(call);
+ } catch (error) {
+ result = `error: ${error instanceof Error ? error.message : String(error)}`;
+ }
+ const finalResult = hooks.afterToolCall?.(call, result) ?? result;
+ return { role: "toolResult", toolCallId: call.id, content: finalResult };
+}
+
+// —— s06 起:一轮快照(对齐 Pi AgentContext:固定 messages/tools;model 在 state 不进快照)——
+export type TurnSnapshot = {
+ messages: ProviderMessage[];
+ tools: ToolSpec[];
+};
+
+function toProviderMessages(messages: AgentMessage[]): ProviderMessage[] {
+ return messages.map((message) => {
+ if (message.role === "toolResult") {
+ return { role: "toolResult", toolCallId: message.toolCallId, content: message.content };
+ }
+ return { role: message.role, content: message.content };
+ });
+}
+
+// s07:messages 从 state.session.currentPath() 取(线性投影当前路径)。
+export function createTurnSnapshot(
+ state: AgentState,
+ registry: ToolRegistry,
+): TurnSnapshot {
+ return {
+ messages: toProviderMessages(state.session.currentPath()),
+ tools: registry.getSpecs(),
+ };
+}
+
+export function buildProviderInputFromSnapshot(
+ snapshot: TurnSnapshot,
+ state: AgentState,
+): ProviderInput {
+ return {
+ messages: toProviderMessages(state.session.currentPath()),
+ tools: snapshot.tools,
+ };
+}
+
+// ============ 构造函数 ============
+export function createInitialState(model = "demo-small"): AgentState {
+ return { session: new SessionTree(), model };
+}
+
+export function createUserMessage(content: string): UserMessage {
+ return { role: "user", content };
+}
+
+// ============ 工具循环(s07:用 state.session)============
+const MAX_TURNS = 8;
+
+export async function runEventedToolLoop(
+ state: AgentState,
+ provider: Provider,
+ registry: ToolRegistry,
+ hooks: ToolHooks,
+ snapshot: TurnSnapshot,
+ output: Output,
+): Promise {
+ let turns = 0;
+
+ while (true) {
+ turns += 1;
+ if (turns > MAX_TURNS) {
+ const stopped: AssistantMessage = {
+ role: "assistant",
+ content: "(达到最大轮次,停止)",
+ stopReason: "stop",
+ };
+ state.session.append(stopped);
+ return stopped;
+ }
+
+ const providerInput = buildProviderInputFromSnapshot(snapshot, state);
+ let content = "";
+ let stopReason: StopReason = "stop";
+ let sawToolCall = false;
+
+ for await (const event of provider.stream(providerInput)) {
+ if (event.type === "message_start") {
+ output.log("message_start");
+ } else if (event.type === "text_delta") {
+ output.log(`text_delta: ${event.text}`);
+ content += event.text;
+ } else if (event.type === "tool_call") {
+ sawToolCall = true;
+ output.log(`tool_call: ${event.call.name}`);
+ const resultMessage = executeToolCall(registry, hooks, event.call);
+ state.session.append(resultMessage);
+ output.log(`tool_result: ${resultMessage.content}`);
+ } else if (event.type === "message_end") {
+ stopReason = event.stopReason;
+ output.log(`message_end: ${stopReason}`);
+ }
+ }
+
+ if (!sawToolCall || stopReason !== "toolUse") {
+ const assistant: AssistantMessage = { role: "assistant", content, stopReason };
+ state.session.append(assistant);
+ return assistant;
+ }
+ }
+}
+
+// ============ Demo Provider(fake)============
+export class DemoProvider implements Provider {
+ public lastInput: ProviderInput | undefined;
+
+ async *stream(input: ProviderInput): AsyncGenerator {
+ this.lastInput = input;
+ const last = input.messages[input.messages.length - 1];
+
+ yield { type: "message_start" };
+
+ if (last?.role === "toolResult") {
+ yield { type: "text_delta", text: `工具结果是:${last.content}` };
+ yield { type: "message_end", stopReason: "stop" };
+ return;
+ }
+
+ yield {
+ type: "tool_call",
+ call: { id: "call_1", name: "current_time", input: {} },
+ };
+ yield { type: "message_end", stopReason: "toolUse" };
+ }
+}
+
+// ============ 演示脚手架:演示历史分叉 ============
+
+function printPath(output: Output, title: string, path: AgentMessage[]): void {
+ output.log(title);
+ for (const message of path) {
+ output.log(`${message.role}: ${message.content}`);
+ }
+ output.log("");
+}
+
+async function main(): Promise {
+ const output = createConsoleOutput();
+ const state = createInitialState();
+
+ output.log("s07: Session Tree");
+ output.log("");
+
+ // 第一条线:方案 A
+ const first = state.session.append(createUserMessage("方案 A"));
+ state.session.append({
+ role: "assistant",
+ content: "A 的回答",
+ stopReason: "stop",
+ });
+
+ printPath(output, "[路径:方案 A]", state.session.currentPath());
+
+ // 回到第一个节点,从那里分叉出方案 B
+ state.session.moveTo(first.id);
+ state.session.append({
+ role: "assistant",
+ content: "改走方案 B",
+ stopReason: "stop",
+ });
+
+ printPath(output, "[路径:方案 B]", state.session.currentPath());
+
+ // 树的全貌
+ output.log("[所有节点]");
+ for (const entry of state.session.allEntries()) {
+ output.log(
+ `${entry.id} parent=${entry.parentId ?? "null"} ${entry.message.role}: ${entry.message.content}`,
+ );
+ }
+ output.log("");
+}
+
+main().catch((error: unknown) => {
+ console.error(error);
+ process.exitCode = 1;
+});
diff --git a/learn-pi-agent/s07_session_tree/images/.gitkeep b/learn-pi-agent/s07_session_tree/images/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/learn-pi-agent/s08_context_resources/README.md b/learn-pi-agent/s08_context_resources/README.md
new file mode 100644
index 0000000..86037ab
--- /dev/null
+++ b/learn-pi-agent/s08_context_resources/README.md
@@ -0,0 +1,213 @@
+# s08: Context Resources — 项目资料进入 systemPrompt
+
+> *不光听用户说,还得带上项目自己的规矩。*
+> **Pi 边界**:上下文资源边界 —— 项目资料组装进 systemPrompt,和对话历史分开。
+
+[上一节:s07](../s07_session_tree/) → `s08` → [下一节:s09](../s09_extension_runtime/)
+
+---
+
+## 问题
+
+到 s07 为止,一轮输入里有历史、有工具说明。但 provider 还看不到**项目本身的资料**:一份编码规范、一个可复用的提示词、一段领域说明。
+
+这些东西不是用户刚刚说的话,但会影响 provider 怎么回答。比如用户问"总结一下这个项目",provider 如果知道"这个项目要求简洁的工程说明",回答会不一样。
+
+要是把这些硬编码进代码,每换一个项目就得改 core。需要一个地方统一装这些资料,再注入到输入里。
+
+s08 就做这件事——而且对齐 Pi 的做法:**把资料组装进 `systemPrompt`**(provider 的标准字段),而不是另造一个独立字段。
+
+---
+
+## 解决方案
+
+引入 `ContextResource`:一份项目资料。它有三类来源:
+
+| kind | 是什么 | 例子 |
+| --- | --- | --- |
+| `agents` | 项目级的规矩 | `AGENTS.md`(编码规范) |
+| `skill` | 一段可复用的能力说明 | `repo-review` |
+| `prompt` | 一个提示词模板 | `summarize` |
+
+`ResourceLoader` 负责加载它们,`buildSystemPrompt` 把它们拼成一段文本,**装进 `ProviderInput.systemPrompt`**。
+
+注意:`systemPrompt` 是和 `tools` **并列新增**的字段(tools 保留,R1)。ProviderInput 现在是 `{ systemPrompt, messages, tools }`——这正是 Pi 的 `Context` 形状。
+
+---
+
+## 工作原理
+
+**先定义资源。** 一份资料带三件事:来源类型、名字、内容。
+
+```ts
+export type ContextResource = {
+ kind: "agents" | "skill" | "prompt";
+ name: string;
+ content: string;
+};
+```
+
+**用一个加载器装起来。** `ResourceLoader.load()` 返回资源的拷贝(不直接交出内部数组)。
+
+```ts
+export class ResourceLoader {
+ constructor(private resources: ContextResource[]) {}
+ load(): ContextResource[] {
+ return this.resources.map((resource) => ({ ...resource }));
+ }
+}
+```
+
+**拼成 systemPrompt。** `buildSystemPrompt` 每份资料带上来源标记,用空行隔开。
+
+```ts
+export function buildSystemPrompt(resources: ContextResource[]): string {
+ return resources
+ .map((r) => `[${r.kind}:${r.name}]\n${r.content}`)
+ .join("\n\n");
+}
+```
+
+**塞进 ProviderInput。** `systemPrompt` 是 provider 的标准字段;`createTurnSnapshot` 多接收一个 loader,把 systemPrompt 一并拍进快照。
+
+```ts
+export type ProviderInput = {
+ systemPrompt: string; // s08 新增:项目资料组装进去(对齐 Pi Context.systemPrompt)
+ messages: ProviderMessage[];
+ tools: ToolSpec[]; // s02 起,保留(R1)
+};
+```
+
+> 这一节真正建立的是**上下文资源边界**:项目资料作为 `systemPrompt` 进入输入(system 级),和 `messages`(对话级)分开。对齐 Pi——它也是把 AGENTS.md/skills 组装进 `systemPrompt`,没有独立的 context 字段。后面 s11 会按信任程度决定要不要加载这些资源。
+
+---
+
+## 试一下
+
+运行:
+
+```sh
+npm run s08
+```
+
+输出类似:
+
+```text
+s08: Context Resources
+
+[resources]
+agents: AGENTS.md
+skill: repo-review
+prompt: summarize
+
+[provider input]
+systemPrompt blocks: 3
+messages: 1
+tools: 1
+
+[systemPrompt]
+[agents:AGENTS.md]
+Use concise engineering explanations.
+
+[skill:repo-review]
+Inspect package.json first. Then summarize risks.
+
+[prompt:summarize]
+Return three bullets and one next step.
+```
+
+观察重点:`[provider input]` 里 systemPrompt、messages、tools 三样都在——systemPrompt 是并列加进来的,没挤掉 tools;`[systemPrompt]` 是三份资源拼出来的完整文本。
+
+---
+
+## 接入主线
+
+s08 在 s07 上累积。相对 s07 的变更:
+
+| 组件 | s07 | s08 |
+| --- | --- | --- |
+| `ProviderInput` | `{ messages, tools }` | `{ systemPrompt, messages, tools }`(加 systemPrompt,对齐 Pi Context) |
+| `TurnSnapshot` | 两字段 | 加 `systemPrompt` |
+| 新增类型 | — | `ContextResource` |
+| 新增类/函数 | — | `ResourceLoader` / `buildSystemPrompt` |
+| `createTurnSnapshot` | `(state, registry)` | 多一个 `loader` |
+
+**焊接点**:`createTurnSnapshot` 调 `buildSystemPrompt(loader.load())` 把 systemPrompt 拍进快照;`buildProviderInputFromSnapshot` 把 `snapshot.systemPrompt` 放进 ProviderInput。tools 自始至终保留。
+
+---
+
+## 接下来
+
+现在 core 的能力(工具、资源)都写死在 core 里。每加一种新玩法都得改 core。
+
+下一节会让外部代码通过一个公开的 API 接入 core,core 不用动就能长出新能力。
+
+进入下一节:[s09](../s09_extension_runtime/)。
+
+---
+
+
+Pi 源码溯源:system prompt 的运行时组装
+
+教学版把资源拼成 systemPrompt 塞进 ProviderInput。Pi 的 `packages/coding-agent` 有完整的资源发现 + system prompt 组装管线。
+
+### 源码在哪
+
+- `packages/coding-agent/src/core/resource-loader.ts` — `DefaultResourceLoader`(资源发现)
+- `packages/coding-agent/src/core/resource-loader.ts:28` — `buildSystemPrompt`(system 组装)
+- `packages/coding-agent/src/core/resource-loader.ts:79` — `loadProjectContextFiles`(AGENTS.md 发现)
+
+### 五类资源
+
+教学版只有一类 `ContextResource`。Pi 的 `ResourceLoader` 发现五类:
+
+| 类型 | 是什么 | 发现方式 |
+| --- | --- | --- |
+| context files | AGENTS.md / CLAUDE.md | 从 cwd 向上找(`loadProjectContextFiles`) |
+| skills | SKILL.md | `.pi/skills/` + 包 |
+| prompt templates | 提示模板 | `.pi/prompts/` |
+| themes | UI 主题 | `.pi/themes/` |
+| system prompt | 自定义 base | 配置 |
+
+教学版的 `kind: "agents" | "skill" | "prompt"` 是这五类的子集。
+
+### AGENTS.md 的发现规则
+
+`loadProjectContextFiles`(`resource-loader.ts:79`)按候选名 + 向上查找:
+
+```ts
+const candidates = ["AGENTS.md", "AGENTS.MD", "CLAUDE.md", "CLAUDE.MD"];
+// 从 cwd 逐级向上找,项目级优先于全局级
+```
+
+不只看当前目录,还往祖先目录找——monorepo 里每一层都能放 AGENTS.md,层层叠加。
+
+### system prompt 的组装顺序
+
+`buildSystemPrompt`(`resource-loader.ts:28`)按固定顺序拼:
+
+```text
+1. base system prompt(如果自定义)
+2. 项目上下文文件(AGENTS.md 内容)
+3. 可用技能列表(formatSkillsForPrompt)
+4. APPEND_SYSTEM.md(追加的系统提示)
+5. 日期 + 工作目录信息
+```
+
+教学版用 `buildSystemPrompt` 把资源拼成一段 `[kind:name]\ncontent`——是这里第 2、3 步的极简版。Pi 的 system prompt 是**运行时组装**的,不是硬编码——换项目、换工具,拼出来的 prompt 就不同。
+
+### 加载顺序有讲究
+
+`DefaultResourceLoader.reload` 先加载扩展(s09),再加载其他资源——因为扩展能注册新的资源路径,必须先让扩展跑完。最后才加载 context files。教学版没有这个依赖顺序(资源都是内存写死的)。
+
+### 边界
+
+- 资源缺失:通过 `ResourceDiagnostic` 报告但继续跑,不崩。
+- 循环加载:`canonicalizePath` + `Set` 去重,已加载路径跳过。
+- 单个资源失败:错误隔离,不影响其他资源。
+
+### 一句话
+
+教学版立的是"项目资料组装进 systemPrompt"(对齐 Pi 的 `Context.systemPrompt`)。Pi 把它坐实成五类资源的运行时发现 + 按固定顺序组装。教学版用内存数据保留最小路径,但"system prompt 运行时组装"这个核心心智一致。
+
+
diff --git a/learn-pi-agent/s08_context_resources/code.ts b/learn-pi-agent/s08_context_resources/code.ts
new file mode 100644
index 0000000..d5d2f3b
--- /dev/null
+++ b/learn-pi-agent/s08_context_resources/code.ts
@@ -0,0 +1,285 @@
+// s08: Context Resources — mini Pi 的第 8 版
+//
+// 把项目资料组装进 systemPrompt(对齐 Pi buildSystemPrompt),不再是独立 context 字段。
+// 词汇边界:本章新增 ContextResource / ResourceLoader / buildSystemPrompt / systemPrompt。
+// 关键(对齐 Pi Context):ProviderInput 加 systemPrompt(资料进去);tools 保留(R1)。
+
+declare const process: {
+ exitCode?: number;
+};
+
+// —— 停止原因(s04 起)——
+export type StopReason = "stop" | "toolUse" | "error";
+
+// —— 消息 ——
+export type UserMessage = { role: "user"; content: string };
+export type AssistantMessage = { role: "assistant"; content: string; stopReason: StopReason };
+export type ToolResultMessage = { role: "toolResult"; toolCallId: string; content: string };
+export type AgentMessage = UserMessage | AssistantMessage | ToolResultMessage;
+
+// —— 会话历史(s07 起)——
+export type SessionEntry = { id: string; parentId: string | null; message: AgentMessage };
+export class SessionTree {
+ private entries = new Map();
+ private activeLeafId: string | null = null;
+ private counter = 0;
+ append(message: AgentMessage): SessionEntry {
+ const entry = { id: `e${++this.counter}`, parentId: this.activeLeafId, message };
+ this.entries.set(entry.id, entry);
+ this.activeLeafId = entry.id;
+ return entry;
+ }
+ moveTo(entryId: string): void {
+ if (!this.entries.has(entryId)) throw new Error(`unknown entry: ${entryId}`);
+ this.activeLeafId = entryId;
+ }
+ currentPath(): AgentMessage[] {
+ const path: AgentMessage[] = [];
+ let cursor = this.activeLeafId;
+ while (cursor) {
+ const entry = this.entries.get(cursor);
+ if (!entry) break;
+ path.push(entry.message);
+ cursor = entry.parentId;
+ }
+ return path.reverse();
+ }
+ allEntries(): SessionEntry[] { return [...this.entries.values()]; }
+}
+
+// —— core 状态(s06 起:model 跨轮配置)——
+export type AgentState = { session: SessionTree; model: string };
+
+// —— 工具契约 ——
+export type ToolSpec = { name: string; description: string; input: Record };
+export type ToolHandler = (input: Record) => string;
+export type ToolCall = { id: string; name: string; input: Record };
+export type Tool = { spec: ToolSpec; handler: ToolHandler };
+export class ToolRegistry {
+ private tools = new Map();
+ register(tool: Tool): void { this.tools.set(tool.spec.name, tool); }
+ getSpecs(): ToolSpec[] { return [...this.tools.values()].map((tool) => tool.spec); }
+ count(): number { return this.tools.size; }
+ run(call: ToolCall): string {
+ const tool = this.tools.get(call.name);
+ if (!tool) return `unknown tool: ${call.name}`;
+ return tool.handler(call.input);
+ }
+}
+
+// ============ s08 新增:项目资料(组装进 systemPrompt)============
+
+export type ContextResource = { kind: "agents" | "skill" | "prompt"; name: string; content: string };
+
+// U2 全局唯一:s08 定义,s11 会给 load 加 trust 参数(U1)。
+export class ResourceLoader {
+ constructor(private resources: ContextResource[]) {}
+ load(): ContextResource[] {
+ return this.resources.map((resource) => ({ ...resource }));
+ }
+}
+
+// 把资源组装进 systemPrompt(对齐 Pi buildSystemPrompt),每份带上来源标记。
+export function buildSystemPrompt(resources: ContextResource[]): string {
+ return resources
+ .map((resource) => `[${resource.kind}:${resource.name}]\n${resource.content}`)
+ .join("\n\n");
+}
+
+// —— provider 对外(对齐 Pi Context:systemPrompt + messages + tools)——
+export type ProviderMessage =
+ | { role: "user" | "assistant"; content: string }
+ | { role: "toolResult"; toolCallId: string; content: string };
+
+export type ProviderInput = {
+ systemPrompt: string; // s08 新增:项目资料组装进去(对齐 Pi Context.systemPrompt)
+ messages: ProviderMessage[];
+ tools: ToolSpec[]; // s02 起,保留(R1)
+};
+
+export type ProviderEvent =
+ | { type: "message_start" }
+ | { type: "text_delta"; text: string }
+ | { type: "tool_call"; call: ToolCall }
+ | { type: "message_end"; stopReason: StopReason };
+
+export interface Provider {
+ stream(input: ProviderInput): AsyncGenerator;
+}
+
+export type Output = { log(line: string): void };
+export function createConsoleOutput(): Output { return { log: (line) => console.log(line) }; }
+
+// —— s05 起:执行插口 ——
+export type BeforeToolCallResult = { type: "allow" } | { type: "block"; reason: string };
+export type ToolHooks = {
+ beforeToolCall?: (call: ToolCall) => BeforeToolCallResult;
+ afterToolCall?: (call: ToolCall, result: string) => string;
+};
+export function executeToolCall(registry: ToolRegistry, hooks: ToolHooks, call: ToolCall): ToolResultMessage {
+ const before = hooks.beforeToolCall?.(call) ?? { type: "allow" };
+ if (before.type === "block") {
+ return { role: "toolResult", toolCallId: call.id, content: `blocked: ${before.reason}` };
+ }
+ let result: string;
+ try { result = registry.run(call); }
+ catch (error) { result = `error: ${error instanceof Error ? error.message : String(error)}`; }
+ const finalResult = hooks.afterToolCall?.(call, result) ?? result;
+ return { role: "toolResult", toolCallId: call.id, content: finalResult };
+}
+
+// —— s06 起快照(s08:加 systemPrompt)——
+export type TurnSnapshot = {
+ systemPrompt: string;
+ messages: ProviderMessage[];
+ tools: ToolSpec[];
+};
+
+function toProviderMessages(messages: AgentMessage[]): ProviderMessage[] {
+ return messages.map((message) => {
+ if (message.role === "toolResult") {
+ return { role: "toolResult", toolCallId: message.toolCallId, content: message.content };
+ }
+ return { role: message.role, content: message.content };
+ });
+}
+
+// s08:createTurnSnapshot 多接收 loader,把 systemPrompt 一起拍进快照。
+export function createTurnSnapshot(
+ state: AgentState,
+ registry: ToolRegistry,
+ loader: ResourceLoader,
+): TurnSnapshot {
+ return {
+ systemPrompt: buildSystemPrompt(loader.load()),
+ messages: toProviderMessages(state.session.currentPath()),
+ tools: registry.getSpecs(),
+ };
+}
+
+export function buildProviderInputFromSnapshot(
+ snapshot: TurnSnapshot,
+ state: AgentState,
+): ProviderInput {
+ return {
+ systemPrompt: snapshot.systemPrompt,
+ messages: toProviderMessages(state.session.currentPath()),
+ tools: snapshot.tools,
+ };
+}
+
+// ============ 构造函数 ============
+export function createInitialState(model = "demo-small"): AgentState {
+ return { session: new SessionTree(), model };
+}
+export function createUserMessage(content: string): UserMessage { return { role: "user", content }; }
+
+// ============ 工具循环(不变,用 snapshot)============
+const MAX_TURNS = 8;
+export async function runEventedToolLoop(
+ state: AgentState, provider: Provider, registry: ToolRegistry,
+ hooks: ToolHooks, snapshot: TurnSnapshot, output: Output,
+): Promise {
+ let turns = 0;
+ while (true) {
+ turns += 1;
+ if (turns > MAX_TURNS) {
+ const stopped: AssistantMessage = { role: "assistant", content: "(达到最大轮次,停止)", stopReason: "stop" };
+ state.session.append(stopped);
+ return stopped;
+ }
+ const providerInput = buildProviderInputFromSnapshot(snapshot, state);
+ let content = "";
+ let stopReason: StopReason = "stop";
+ let sawToolCall = false;
+ for await (const event of provider.stream(providerInput)) {
+ if (event.type === "message_start") output.log("message_start");
+ else if (event.type === "text_delta") { output.log(`text_delta: ${event.text}`); content += event.text; }
+ else if (event.type === "tool_call") {
+ sawToolCall = true;
+ output.log(`tool_call: ${event.call.name}`);
+ const resultMessage = executeToolCall(registry, hooks, event.call);
+ state.session.append(resultMessage);
+ output.log(`tool_result: ${resultMessage.content}`);
+ } else if (event.type === "message_end") { stopReason = event.stopReason; output.log(`message_end: ${stopReason}`); }
+ }
+ if (!sawToolCall || stopReason !== "toolUse") {
+ const assistant: AssistantMessage = { role: "assistant", content, stopReason };
+ state.session.append(assistant);
+ return assistant;
+ }
+ }
+}
+
+// ============ Demo Provider(保留,累积)============
+export class DemoProvider implements Provider {
+ public lastInput: ProviderInput | undefined;
+ async *stream(input: ProviderInput): AsyncGenerator {
+ this.lastInput = input;
+ const last = input.messages[input.messages.length - 1];
+ yield { type: "message_start" };
+ if (last?.role === "toolResult") {
+ yield { type: "text_delta", text: `工具结果是:${last.content}` };
+ yield { type: "message_end", stopReason: "stop" };
+ return;
+ }
+ yield { type: "tool_call", call: { id: "call_1", name: "current_time", input: {} } };
+ yield { type: "message_end", stopReason: "toolUse" };
+ }
+}
+
+// ============ 演示脚手架 ============
+
+function createRegistry(): ToolRegistry {
+ const registry = new ToolRegistry();
+ registry.register({
+ spec: { name: "current_time", description: "返回一个固定的演示时间", input: {} },
+ handler: () => "2026-01-01T00:00:00Z",
+ });
+ return registry;
+}
+
+function createLoader(): ResourceLoader {
+ return new ResourceLoader([
+ { kind: "agents", name: "AGENTS.md", content: "Use concise engineering explanations." },
+ { kind: "skill", name: "repo-review", content: "Inspect package.json first. Then summarize risks." },
+ { kind: "prompt", name: "summarize", content: "Return three bullets and one next step." },
+ ]);
+}
+
+async function main(): Promise {
+ const output = createConsoleOutput();
+ const state = createInitialState("demo-small");
+ const registry = createRegistry();
+ const loader = createLoader();
+
+ output.log("s08: Context Resources");
+ output.log("");
+
+ const resources = loader.load();
+ output.log("[resources]");
+ for (const resource of resources) {
+ output.log(`${resource.kind}: ${resource.name}`);
+ }
+ output.log("");
+
+ // 一轮开始:push 用户消息,拍快照(含 systemPrompt)。
+ state.session.append(createUserMessage("请总结这个项目"));
+ const snapshot = createTurnSnapshot(state, registry, loader);
+ const input = buildProviderInputFromSnapshot(snapshot, state);
+
+ output.log("[provider input]");
+ output.log(`systemPrompt blocks: ${resources.length}`);
+ output.log(`messages: ${input.messages.length}`);
+ output.log(`tools: ${input.tools.length}`);
+ output.log("");
+
+ output.log("[systemPrompt]");
+ output.log(input.systemPrompt);
+ output.log("");
+}
+
+main().catch((error: unknown) => {
+ console.error(error);
+ process.exitCode = 1;
+});
diff --git a/learn-pi-agent/s08_context_resources/images/.gitkeep b/learn-pi-agent/s08_context_resources/images/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/learn-pi-agent/s09_extension_runtime/README.md b/learn-pi-agent/s09_extension_runtime/README.md
new file mode 100644
index 0000000..01a2995
--- /dev/null
+++ b/learn-pi-agent/s09_extension_runtime/README.md
@@ -0,0 +1,200 @@
+# s09: Extension Runtime — 外部代码通过 API 接入
+
+> *core 不改,能力从外面接进来。*
+> **Pi 边界**:扩展 API 边界 —— core 暴露的是 API,不是内部对象。
+
+[上一节:s08](../s08_context_resources/) → `s09` → [下一节:s10](../s10_runtime_modes/)
+
+---
+
+## 问题
+
+到 s08 为止,core 的能力(工具、资源)全都写在 core 代码里。每想加一种新玩法——一个新工具、一条新命令、对某类事件做个处理——都得改 core 自己。core 只会越来越重。
+
+s09 要让**外部代码**接入 core,core 不用动就能长出新能力。
+
+---
+
+## 解决方案
+
+core 暴露一个 `ExtensionAPI`,外部代码(叫一个 extension)只能通过它做三件事:
+
+```text
+on(type, handler) 订阅事件
+registerTool(tool) 注册工具
+registerCommand(command) 注册命令
+```
+
+一个 extension 就是一个接收 API 的函数。它拿不到 core 的内部对象,只能用这三个方法。
+
+关键设计:`registerTool` 复用的是 s02 就有的 `Tool` 类型,注册进去的工具直接进**既有 ToolRegistry**。也就是说,extension 注册的工具和 core 内置的工具,走的是**同一条执行链**(经过 s05 的 hook)——不分彼此。
+
+---
+
+## 工作原理
+
+**先定义事件和命令。** 事件是 core 往外发的信号;命令是外部注册的无参动作。
+
+```ts
+export type RuntimeEvent =
+ | { type: "message"; content: string }
+ | { type: "done" };
+
+export type Command = { name: string; run: () => string };
+```
+
+**定义 API 表面。** 这就是 extension 能碰的全部。
+
+```ts
+export type ExtensionAPI = {
+ on(type: RuntimeEvent["type"], handler: (event: RuntimeEvent) => void): void;
+ registerTool(tool: Tool): void;
+ registerCommand(command: Command): void;
+};
+
+export type Extension = (api: ExtensionAPI) => void;
+```
+
+**ExtensionRuntime 接住注册。** 它构造时接收既有 registry;`registerTool` 直接往这个 registry 里加。
+
+```ts
+export class ExtensionRuntime {
+ constructor(private registry: ToolRegistry) {}
+ createApi(): ExtensionAPI {
+ return {
+ on: (type, handler) => { this.handlers.push({ type, handler }); },
+ registerTool: (tool) => { this.registry.register(tool); }, // 注入既有 registry
+ registerCommand: (command) => { this.commands.set(command.name, command); },
+ };
+ }
+ emit(event) { /* 按类型匹配 handler,不是全调 */ }
+ runCommand(name) { /* 找不到返回 unknown command */ }
+}
+```
+
+两个细节:`emit` 按**事件类型**匹配 handler(不是把所有 handler 都调一遍);命令找不到时返回一句说明,不抛错。
+
+> 这一节真正建立的是**扩展 API 边界**:core 对外只给一个受控的 API,extension 加的工具和内置工具同源同链,事件按类型分发。后面 s11 的权限检查会同样作用在 extension 注册的工具上,因为它们本就在同一个 registry 里。
+
+---
+
+## 试一下
+
+运行:
+
+```sh
+npm run s09
+```
+
+输出类似:
+
+```text
+s09: Extension Runtime
+
+[registry]
+tool: current_time
+tool: note
+
+[event] message: hello from core
+
+[command]
+/status -> extension is active
+
+[tool via extension]
+note -> note saved: hi
+```
+
+观察重点:`[registry]` 里 `current_time` 是内置的、`note` 是 extension 注册的,两者同处一个 registry;`[tool via extension]` 里 extension 的工具走的还是 `executeToolCall` 那条既有执行链。
+
+---
+
+## 接入主线
+
+s09 在 s08 上累积。相对 s08 的变更:
+
+| 组件 | s08 | s09 |
+| --- | --- | --- |
+| 新增类型 | — | `RuntimeEvent`(U2 全局唯一)/ `Command` / `Extension` / `ExtensionAPI` |
+| 新增类 | — | `ExtensionRuntime`(构造接收既有 `ToolRegistry`) |
+| 工具来源 | 只有 core 内置 | core 内置 + extension 注册(同一 registry) |
+| `ProviderInput` / 主循环 | — | **不变**(纯新增,无 U1 升级) |
+
+**焊接点**:`ExtensionRuntime` 构造接收既有 `ToolRegistry`;`registerTool` 往里加。extension 工具和内置工具同源,执行时都走 `executeToolCall`。
+
+---
+
+## 接下来
+
+现在 core 能产生结果,但结果怎么展示(打印?JSON?)写死在代码里。
+
+下一节会把"产生结果"和"展示结果"分开:同一个 core,接不同的输出方式。
+
+进入下一节:[s10](../s10_runtime_modes/)。
+
+---
+
+
+Pi 源码溯源:Extension API 和它的 20 多个事件
+
+教学版的 ExtensionAPI 暴露 on/registerTool/registerCommand 三个方法。Pi 的 `packages/coding-agent` 有一套庞大得多的 extension 系统。
+
+### 源码在哪
+
+- `packages/coding-agent/src/core/extensions/types.ts` — `ExtensionAPI` 类型
+- `packages/coding-agent/src/core/extensions/loader.ts` — 发现 + 加载
+- `packages/coding-agent/src/core/extensions/runner.ts` — 运行时
+- `.pi/extensions/` — 项目级扩展目录
+
+### API 比教学版大得多
+
+教学版三个方法。Pi 的 `ExtensionAPI`(`types.ts`)有一长串:
+
+```ts
+interface ExtensionAPI {
+ // 注册能力
+ registerTool(tool): void;
+ registerCommand(name, options): void;
+ registerFlag(name, { description, type, default }): void;
+ // 订阅事件(20+ 种)
+ on(event: "session_start" | "tool_execution_start" | "before_agent_start" | ..., handler): void;
+ // 运行时动作
+ sendMessage(msg): void;
+ setModel(model): void;
+ getActiveTools(): AgentTool[];
+ registerProvider(...) / unregisterProvider(...): void;
+ exec(command): Promise<...>;
+}
+```
+
+教学版的 on/registerTool/registerCommand 是它的一个子集。Pi 的 extension 不仅能加工具/命令,还能改模型、注册 provider、执行命令、订阅 20 多种生命周期事件。
+
+### 20 多种事件
+
+教学版只有 `message` / `done` 两种 RuntimeEvent。Pi 的 extension 能订阅 `session_start`、`tool_execution_start`、`before_agent_start`、`project_trust`(s11 用它决定信任)……覆盖整个 agent 生命周期。每个事件的 handler 还能返回结果反向影响 core(比如 `before_agent_start` 的返回值能改本轮配置)。
+
+### 四种发现来源
+
+`discoverAndLoadExtensions`(`loader.ts:557`)从四个地方找扩展:
+
+```text
+1. cwd/.pi/extensions/ 项目级
+2. agentDir/.pi/extensions/ 全局级
+3. package.json 的 pi.extensions 字段 包声明
+4. 命令行传入的路径 CLI 级
+```
+
+教学版的 extension 是手动 `runtime.use(...)`。Pi 是自动发现——放对目录就加载。
+
+### 冲突检测 + 沙箱
+
+两个扩展注册同名工具怎么办?`detectExtensionConflicts`(`loader.ts:988`)检查工具/命令/标志名冲突,通过 `ResourceDiagnostic` 报告,保留先加载的。扩展代码跑在 jiti 沙箱里,每个扩展有 `sourceInfo` 标记来源和权限级别——这是教学版完全没有的隔离层。
+
+### notInitialized 守卫
+
+`createExtensionRuntime`(`runner.ts`)有个巧思:扩展加载阶段(执行 factory 函数时),runtime 的动作方法(sendMessage 等)都指向 `notInitialized`——一调用就抛错。因为加载时 core 还没就绪,扩展只能"注册",不能"动作"。加载完成后才换上真实实现。
+
+### 一句话
+
+教学版的 ExtensionAPI 立的是"外部代码通过受控 API 接入 core"。Pi 把它坐实成 20 多个事件 + 注册 tool/command/flag/provider + 四种自动发现 + 冲突检测 + 沙箱隔离。教学版只保留最小接入(on/registerTool/registerCommand + 手动 use),但"core 暴露 API 而非内部"这条边界一致。
+
+
diff --git a/learn-pi-agent/s09_extension_runtime/code.ts b/learn-pi-agent/s09_extension_runtime/code.ts
new file mode 100644
index 0000000..9d453b1
--- /dev/null
+++ b/learn-pi-agent/s09_extension_runtime/code.ts
@@ -0,0 +1,334 @@
+// s09: Extension Runtime — mini Pi 的第 9 版
+//
+// 外部代码通过公开 API 接入 core:订阅事件、注册工具、注册命令。core 不用动就能长出新能力。
+// 词汇边界:本章新增 Extension / ExtensionAPI / ExtensionRuntime / Command / RuntimeEvent / on / registerTool / registerCommand / emit / use。
+// 关键:registerTool 复用既有 Tool 类型,注入现有 ToolRegistry——extension 的工具和内置工具走同一条执行链。
+
+declare const process: {
+ exitCode?: number;
+};
+
+// —— 停止原因(s04 起)——
+export type StopReason = "stop" | "toolUse" | "error";
+
+// —— 消息 ——
+export type UserMessage = { role: "user"; content: string };
+export type AssistantMessage = { role: "assistant"; content: string; stopReason: StopReason };
+export type ToolResultMessage = { role: "toolResult"; toolCallId: string; content: string };
+export type AgentMessage = UserMessage | AssistantMessage | ToolResultMessage;
+
+// —— 会话历史(s07 起)——
+export type SessionEntry = { id: string; parentId: string | null; message: AgentMessage };
+export class SessionTree {
+ private entries = new Map();
+ private activeLeafId: string | null = null;
+ private counter = 0;
+ append(message: AgentMessage): SessionEntry {
+ const entry = { id: `e${++this.counter}`, parentId: this.activeLeafId, message };
+ this.entries.set(entry.id, entry);
+ this.activeLeafId = entry.id;
+ return entry;
+ }
+ moveTo(entryId: string): void {
+ if (!this.entries.has(entryId)) throw new Error(`unknown entry: ${entryId}`);
+ this.activeLeafId = entryId;
+ }
+ currentPath(): AgentMessage[] {
+ const path: AgentMessage[] = [];
+ let cursor = this.activeLeafId;
+ while (cursor) {
+ const entry = this.entries.get(cursor);
+ if (!entry) break;
+ path.push(entry.message);
+ cursor = entry.parentId;
+ }
+ return path.reverse();
+ }
+ allEntries(): SessionEntry[] { return [...this.entries.values()]; }
+}
+export type AgentState = { session: SessionTree; model: string };
+
+// —— 工具契约(s02 起)——
+export type ToolSpec = { name: string; description: string; input: Record };
+export type ToolHandler = (input: Record) => string;
+export type ToolCall = { id: string; name: string; input: Record };
+export type Tool = { spec: ToolSpec; handler: ToolHandler };
+
+export class ToolRegistry {
+ private tools = new Map();
+ register(tool: Tool): void { this.tools.set(tool.spec.name, tool); }
+ getSpecs(): ToolSpec[] { return [...this.tools.values()].map((tool) => tool.spec); }
+ count(): number { return this.tools.size; }
+ run(call: ToolCall): string {
+ const tool = this.tools.get(call.name);
+ if (!tool) return `unknown tool: ${call.name}`;
+ return tool.handler(call.input);
+ }
+}
+
+// —— 上下文资源(s08 起)——
+export type ContextResource = { kind: "agents" | "skill" | "prompt"; name: string; content: string };
+export class ResourceLoader {
+ constructor(private resources: ContextResource[]) {}
+ load(): ContextResource[] { return this.resources.map((r) => ({ ...r })); }
+}
+// s08:资源组装进 systemPrompt(对齐 Pi buildSystemPrompt)
+export function buildSystemPrompt(resources: ContextResource[]): string {
+ return resources.map((r) => `[${r.kind}:${r.name}]\n${r.content}`).join("\n\n");
+}
+
+// —— provider 对外 ——
+export type ProviderMessage =
+ | { role: "user" | "assistant"; content: string }
+ | { role: "toolResult"; toolCallId: string; content: string };
+export type ProviderInput = { systemPrompt: string; messages: ProviderMessage[]; tools: ToolSpec[] };
+export type ProviderEvent =
+ | { type: "message_start" }
+ | { type: "text_delta"; text: string }
+ | { type: "tool_call"; call: ToolCall }
+ | { type: "message_end"; stopReason: StopReason };
+export interface Provider { stream(input: ProviderInput): AsyncGenerator; }
+
+export type Output = { log(line: string): void };
+export function createConsoleOutput(): Output { return { log: (line) => console.log(line) }; }
+
+// —— s05 起:执行插口 ——
+export type BeforeToolCallResult = { type: "allow" } | { type: "block"; reason: string };
+export type ToolHooks = {
+ beforeToolCall?: (call: ToolCall) => BeforeToolCallResult;
+ afterToolCall?: (call: ToolCall, result: string) => string;
+};
+export function executeToolCall(registry: ToolRegistry, hooks: ToolHooks, call: ToolCall): ToolResultMessage {
+ const before = hooks.beforeToolCall?.(call) ?? { type: "allow" };
+ if (before.type === "block") {
+ return { role: "toolResult", toolCallId: call.id, content: `blocked: ${before.reason}` };
+ }
+ let result: string;
+ try { result = registry.run(call); }
+ catch (error) { result = `error: ${error instanceof Error ? error.message : String(error)}`; }
+ const finalResult = hooks.afterToolCall?.(call, result) ?? result;
+ return { role: "toolResult", toolCallId: call.id, content: finalResult };
+}
+
+// —— s06 起:一轮快照 ——
+export type TurnSnapshot = { systemPrompt: string; messages: ProviderMessage[]; tools: ToolSpec[] };
+function toProviderMessages(messages: AgentMessage[]): ProviderMessage[] {
+ return messages.map((message) => {
+ if (message.role === "toolResult") {
+ return { role: "toolResult", toolCallId: message.toolCallId, content: message.content };
+ }
+ return { role: message.role, content: message.content };
+ });
+}
+export function createTurnSnapshot(state: AgentState, registry: ToolRegistry, loader: ResourceLoader): TurnSnapshot {
+ return {
+ systemPrompt: buildSystemPrompt(loader.load()),
+ messages: toProviderMessages(state.session.currentPath()),
+ tools: registry.getSpecs(),
+ };
+}
+export function buildProviderInputFromSnapshot(snapshot: TurnSnapshot, state: AgentState): ProviderInput {
+ return {
+ systemPrompt: snapshot.systemPrompt,
+ messages: toProviderMessages(state.session.currentPath()),
+ tools: snapshot.tools,
+ };
+}
+
+// ============ 构造函数 ============
+export function createInitialState(model = "demo-small"): AgentState { return { session: new SessionTree(), model }; }
+export function createUserMessage(content: string): UserMessage { return { role: "user", content }; }
+
+// ============ 工具循环(不变)============
+const MAX_TURNS = 8;
+export async function runEventedToolLoop(
+ state: AgentState, provider: Provider, registry: ToolRegistry,
+ hooks: ToolHooks, snapshot: TurnSnapshot, output: Output,
+): Promise {
+ let turns = 0;
+ while (true) {
+ turns += 1;
+ if (turns > MAX_TURNS) {
+ const stopped: AssistantMessage = { role: "assistant", content: "(达到最大轮次,停止)", stopReason: "stop" };
+ state.session.append(stopped);
+ return stopped;
+ }
+ const providerInput = buildProviderInputFromSnapshot(snapshot, state);
+ let content = "";
+ let stopReason: StopReason = "stop";
+ let sawToolCall = false;
+ for await (const event of provider.stream(providerInput)) {
+ if (event.type === "message_start") output.log("message_start");
+ else if (event.type === "text_delta") { output.log(`text_delta: ${event.text}`); content += event.text; }
+ else if (event.type === "tool_call") {
+ sawToolCall = true;
+ output.log(`tool_call: ${event.call.name}`);
+ const resultMessage = executeToolCall(registry, hooks, event.call);
+ state.session.append(resultMessage);
+ output.log(`tool_result: ${resultMessage.content}`);
+ } else if (event.type === "message_end") { stopReason = event.stopReason; output.log(`message_end: ${stopReason}`); }
+ }
+ if (!sawToolCall || stopReason !== "toolUse") {
+ const assistant: AssistantMessage = { role: "assistant", content, stopReason };
+ state.session.append(assistant);
+ return assistant;
+ }
+ }
+}
+
+// ============ s09 新增:扩展运行时 ============
+
+// U2 全局唯一:s09 定义,s10 复用。
+export type RuntimeEvent =
+ | { type: "message"; content: string }
+ | { type: "done" };
+
+// 命令:一个不带参数、返回字符串的动作。
+export type Command = { name: string; run: () => string };
+
+// 订阅某类事件时,handler 收到的事件结构自动对应(订阅 "message" 就只收 message 事件)。
+type EventHandler = (
+ event: Extract,
+) => void;
+
+// extension 能接触的全部表面。
+export type ExtensionAPI = {
+ on(type: T, handler: EventHandler): void;
+ registerTool(tool: Tool): void; // 复用 s02 的 Tool
+ registerCommand(command: Command): void;
+};
+
+// 一个 extension 就是一个接收 API 的函数。
+export type Extension = (api: ExtensionAPI) => void;
+
+export class ExtensionRuntime {
+ private registry: ToolRegistry; // 复用既有 registry:extension 注册的工具和内置工具同源
+ private commands = new Map();
+ private handlers: { type: RuntimeEvent["type"]; handler: (event: RuntimeEvent) => void }[] = [];
+
+ constructor(registry: ToolRegistry) {
+ this.registry = registry;
+ }
+
+ // 外部只能拿到这个 API,拿不到 runtime 内部字段。
+ createApi(): ExtensionAPI {
+ return {
+ on: (type, handler) => {
+ this.handlers.push({
+ type,
+ handler: handler as (event: RuntimeEvent) => void,
+ });
+ },
+ registerTool: (tool) => {
+ this.registry.register(tool); // 注入既有 registry,走同一执行链
+ },
+ registerCommand: (command) => {
+ this.commands.set(command.name, command);
+ },
+ };
+ }
+
+ use(extension: Extension): void {
+ extension(this.createApi());
+ }
+
+ // 按事件类型分发(不是全部 handler 都调)。
+ emit(event: RuntimeEvent): void {
+ for (const { type, handler } of this.handlers) {
+ if (type === event.type) {
+ handler(event);
+ }
+ }
+ }
+
+ runCommand(name: string): string {
+ const command = this.commands.get(name);
+ if (!command) return `unknown command: ${name}`;
+ return command.run();
+ }
+}
+
+// ============ Demo Provider(保留,累积)============
+export class DemoProvider implements Provider {
+ public lastInput: ProviderInput | undefined;
+ async *stream(input: ProviderInput): AsyncGenerator {
+ this.lastInput = input;
+ const last = input.messages[input.messages.length - 1];
+ yield { type: "message_start" };
+ if (last?.role === "toolResult") {
+ yield { type: "text_delta", text: `工具结果是:${last.content}` };
+ yield { type: "message_end", stopReason: "stop" };
+ return;
+ }
+ yield { type: "tool_call", call: { id: "call_1", name: "current_time", input: {} } };
+ yield { type: "message_end", stopReason: "toolUse" };
+ }
+}
+
+// ============ 演示脚手架 ============
+
+function createRegistry(): ToolRegistry {
+ const registry = new ToolRegistry();
+ registry.register({
+ spec: { name: "current_time", description: "返回一个固定的演示时间", input: {} },
+ handler: () => "2026-01-01T00:00:00Z",
+ });
+ return registry;
+}
+
+// 一个 demo extension:订阅事件、注册命令、注册工具。全部通过 API,不碰 core 内部。
+function createDemoExtension(output: Output): Extension {
+ return (api) => {
+ api.on("message", (event) => {
+ output.log(`[event] message: ${event.content}`);
+ });
+ api.registerCommand({ name: "status", run: () => "extension is active" });
+ api.registerTool({
+ spec: { name: "note", description: "保存一条笔记", input: { text: "内容" } },
+ handler: (input) => `note saved: ${input.text ?? ""}`,
+ });
+ };
+}
+
+async function main(): Promise {
+ const output = createConsoleOutput();
+ const registry = createRegistry();
+ const runtime = new ExtensionRuntime(registry);
+
+ output.log("s09: Extension Runtime");
+ output.log("");
+
+ // extension 接入:通过 API 注册能力。
+ runtime.use(createDemoExtension(output));
+
+ // 注册后,registry 里既有内置工具,也有 extension 注册的工具。
+ output.log("[registry]");
+ for (const spec of registry.getSpecs()) {
+ output.log(`tool: ${spec.name}`);
+ }
+ output.log("");
+
+ // 事件:core emit,extension 的 handler 被触发(按类型匹配)。
+ runtime.emit({ type: "message", content: "hello from core" });
+ output.log("");
+
+ // 命令。
+ output.log("[command]");
+ output.log(`/status -> ${runtime.runCommand("status")}`);
+ output.log("");
+
+ // extension 注册的工具,走既有执行链(executeToolCall)。
+ output.log("[tool via extension]");
+ const result = executeToolCall(
+ registry,
+ {},
+ { id: "c1", name: "note", input: { text: "hi" } },
+ );
+ output.log(`note -> ${result.content}`);
+ output.log("");
+}
+
+main().catch((error: unknown) => {
+ console.error(error);
+ process.exitCode = 1;
+});
diff --git a/learn-pi-agent/s09_extension_runtime/images/.gitkeep b/learn-pi-agent/s09_extension_runtime/images/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/learn-pi-agent/s10_runtime_modes/README.md b/learn-pi-agent/s10_runtime_modes/README.md
new file mode 100644
index 0000000..380ceca
--- /dev/null
+++ b/learn-pi-agent/s10_runtime_modes/README.md
@@ -0,0 +1,178 @@
+# s10: Runtime Modes — 同一个 core,不同的展示
+
+> *core 只管产生,怎么展示外层说了算。*
+> **Pi 边界**:运行方式边界 —— core 产生事件,展示方式是外层的事,换展示不改 core。
+
+[上一节:s09](../s09_extension_runtime/) → `s10` → [下一节:s11](../s11_trust_and_execution_boundary/)
+
+---
+
+## 问题
+
+前面几节里,core 一产生结果就直接打印出来——展示方式写死在代码里。
+
+但"展示"这件事,不同场景要的不一样:给人看,要人类可读的文本;给别的程序看,要结构化的 JSON;以后可能还要 GUI 渲染。如果展示方式写死在 core 里,每换一种就得复制或改动 core。
+
+core 只该管**产生**什么,**怎么展示**应该分离出去。
+
+---
+
+## 解决方案
+
+core 把要做的事变成一批 `RuntimeEvent`,外层用一个 `RuntimeMode` 决定怎么展示。
+
+```text
+createDemoRuntimeEvents() → RuntimeEvent[] → RuntimeMode.render()
+```
+
+同一个 core、同一批事件,接不同的 mode 就有不同输出:
+
+| mode | 展示成 |
+| --- | --- |
+| `PrintMode` | 人类可读文本(只打印 message) |
+| `JsonMode` | 结构化 JSON(每事件一行) |
+
+> **[R7 收获]** 回想 s01:那时候 core 不直接 `console.log`,而是走了一层 `Output.log`。那是一个最小的"输出抽象"种子。s10 把它正式化、可切换了——同一个 core 的事件,想打印就 PrintMode,想 JSON 就 JsonMode,core 一个字都不用改。
+
+这里不是替换 s01-s09 的 `runEventedToolLoop`。为了让本节输出短一点,demo 用 `createDemoRuntimeEvents()` 造一批最小事件;真正的主线里,这批事件来自前面已经累积出来的 core。
+
+---
+
+## 工作原理
+
+**先准备一批事件。** `createDemoRuntimeEvents` 把输入变成一批最小 RuntimeEvent。它只是本节的演示事件源,不是新的主循环。
+
+```ts
+export function createDemoRuntimeEvents(input: string): RuntimeEvent[] {
+ return [
+ { type: "message", content: `收到:${input}` },
+ { type: "done" },
+ ];
+}
+```
+
+**mode 消费事件。** RuntimeMode 只有一个方法 `render`。PrintMode 挑出 message 打印文本;JsonMode 把每个事件序列化成 JSON。
+
+```ts
+export type RuntimeMode = { render(events: RuntimeEvent[]): void };
+
+export class PrintMode implements RuntimeMode {
+ render(events) {
+ for (const event of events) {
+ if (event.type === "message") console.log(event.content);
+ }
+ }
+}
+
+export class JsonMode implements RuntimeMode {
+ render(events) {
+ for (const event of events) console.log(JSON.stringify(event));
+ }
+}
+```
+
+> 这一节真正建立的是**运行方式边界**:core 产生事件,展示是外层 mode 的事。RuntimeEvent 是 core 对外的"输出语言",mode 是"翻译器"。换展示方式只是换 mode,core 不动——这正是 s01 那层 Output 抽象要长成的样子。
+
+---
+
+## 试一下
+
+运行:
+
+```sh
+npm run s10
+```
+
+输出类似:
+
+```text
+s10: Runtime Modes
+
+[print mode]
+收到:你好,mini Pi
+
+[json mode]
+{"type":"message","content":"收到:你好,mini Pi"}
+{"type":"done"}
+```
+
+观察重点:两种输出来自**同一批事件**——`[print mode]` 只显示了 message 内容,`[json mode]` 把每个事件都序列化了,包括 `done`。
+
+---
+
+## 接入主线
+
+s10 在 s09 上累积。相对 s09 的变更:
+
+| 组件 | s09 | s10 |
+| --- | --- | --- |
+| 新增类/函数 | — | `createDemoRuntimeEvents`(演示事件源)/ `PrintMode` / `JsonMode` |
+| 新增类型 | — | `RuntimeMode` |
+| 输出抽象 | `Output.log`(s01 起,逐行) | `RuntimeMode.render`(可切换展示) |
+| 主循环 / `ProviderInput` | — | **不变**(纯新增,无 U1 升级) |
+
+**焊接点**:前面主线产出的 `RuntimeEvent[]` 交给 `RuntimeMode.render`;本节 demo 只用 `createDemoRuntimeEvents` 代替真实事件源。`PrintMode` / `JsonMode` 各自 `render` 同一批事件,core 与展示彻底分开。
+
+---
+
+## 接下来
+
+core 会接触本地项目:要加载项目资料,也可能要执行本地动作。这两件事的风险不一样,得分开管。
+
+下一节会把"能不能加载资料"和"能不能执行动作"拆成两个独立的开关。
+
+进入下一节:[s11](../s11_trust_and_execution_boundary/)。
+
+---
+
+
+Pi 源码溯源:四种 AppMode 和自动分流
+
+教学版两种 mode(Print/Json)消费同一批事件。Pi 的 `packages/coding-agent` 有四种运行模式,按终端环境自动分流。
+
+### 源码在哪
+
+- `packages/coding-agent/src/cli/args.ts:10` — `AppMode` 类型
+- `packages/coding-agent/src/main.ts:98` — `resolveAppMode`(分流逻辑)
+- `packages/coding-agent/src/main.ts:768` — 各模式入口
+- `packages/coding-agent/src/modes/print-mode.ts` — print 模式
+
+### 四种模式
+
+```ts
+type AppMode = "interactive" | "print" | "json" | "rpc";
+```
+
+| 模式 | 什么时候用 | 怎么输出 |
+| --- | --- | --- |
+| interactive | stdin 和 stdout 都是 TTY | TUI 差分渲染(`pi-tui`) |
+| print | `--print` 或管道输入 | 纯文本,跑完退出 |
+| json | `--mode json` | 结构化 JSON 事件流 |
+| rpc | `--mode rpc` | JSON-RPC 接口,给编辑器/工具集成 |
+
+教学版的 PrintMode/JsonMode 是 print 和 json 两种的极简版。
+
+### 自动分流
+
+`resolveAppMode`(`main.ts:98`)的判定顺序:
+
+```ts
+function resolveAppMode(parsed, stdinIsTTY, stdoutIsTTY): AppMode {
+ if (parsed.mode === "rpc") return "rpc"; // 显式 rpc 最优先
+ if (parsed.mode === "json") return "json"; // 显式 json
+ if (parsed.print || !stdinIsTTY || !stdoutIsTTY) return "print"; // 管道自动 print
+ return "interactive"; // 默认交互
+}
+```
+
+关键设计:**管道自动降级到 print**。把 pi 接到管道(`echo hi | pi`)时,它检测到 stdin 不是 TTY,自动用 print 模式——不会傻乎乎起一个 TUI。教学版没有这个自动检测。
+
+### TUI 用差分渲染
+
+interactive 模式(`main.ts:770`)用 `@earendil-works/pi-tui`,这是个专门的终端 UI 库,做差分渲染(只重绘变化的部分)——流式输出时不会闪烁。教学版的 mode 只是 console.log,没有渲染层。
+
+### 一句话
+
+教学版的 RuntimeMode 立的是"core 产事件、外层决定展示"。Pi 把它坐实成四种 AppMode + 管道自动降级 + TUI 差分渲染。同一个 agent core,接 TTY 是交互式、接管道是 print、接工具是 json/rpc——core 一个字不用改。
+
+
diff --git a/learn-pi-agent/s10_runtime_modes/code.ts b/learn-pi-agent/s10_runtime_modes/code.ts
new file mode 100644
index 0000000..ca2c3e5
--- /dev/null
+++ b/learn-pi-agent/s10_runtime_modes/code.ts
@@ -0,0 +1,269 @@
+// s10: Runtime Modes — mini Pi 的第 10 版
+//
+// core 只产生事件,怎么展示由外层 mode 决定。[R7 收获] s01 的 Output 抽象,长成可切换的 RuntimeMode。
+// 词汇边界:本章新增 RuntimeMode / PrintMode / JsonMode / createDemoRuntimeEvents / render。
+// 关键:Output 保留(过程打印),RuntimeMode 新增(结果展示);同一个 core 产同一批事件,不同 mode 展示成不同形式。
+
+declare const process: {
+ exitCode?: number;
+};
+
+// —— 停止原因(s04 起)——
+export type StopReason = "stop" | "toolUse" | "error";
+
+// —— 消息 ——
+export type UserMessage = { role: "user"; content: string };
+export type AssistantMessage = { role: "assistant"; content: string; stopReason: StopReason };
+export type ToolResultMessage = { role: "toolResult"; toolCallId: string; content: string };
+export type AgentMessage = UserMessage | AssistantMessage | ToolResultMessage;
+
+// —— 会话历史(s07 起)——
+export type SessionEntry = { id: string; parentId: string | null; message: AgentMessage };
+export class SessionTree {
+ private entries = new Map();
+ private activeLeafId: string | null = null;
+ private counter = 0;
+ append(message: AgentMessage): SessionEntry {
+ const entry = { id: `e${++this.counter}`, parentId: this.activeLeafId, message };
+ this.entries.set(entry.id, entry);
+ this.activeLeafId = entry.id;
+ return entry;
+ }
+ moveTo(entryId: string): void {
+ if (!this.entries.has(entryId)) throw new Error(`unknown entry: ${entryId}`);
+ this.activeLeafId = entryId;
+ }
+ currentPath(): AgentMessage[] {
+ const path: AgentMessage[] = [];
+ let cursor = this.activeLeafId;
+ while (cursor) {
+ const entry = this.entries.get(cursor);
+ if (!entry) break;
+ path.push(entry.message);
+ cursor = entry.parentId;
+ }
+ return path.reverse();
+ }
+ allEntries(): SessionEntry[] { return [...this.entries.values()]; }
+}
+export type AgentState = { session: SessionTree; model: string };
+
+// —— 工具契约(s02 起)——
+export type ToolSpec = { name: string; description: string; input: Record };
+export type ToolHandler = (input: Record) => string;
+export type ToolCall = { id: string; name: string; input: Record };
+export type Tool = { spec: ToolSpec; handler: ToolHandler };
+export class ToolRegistry {
+ private tools = new Map();
+ register(tool: Tool): void { this.tools.set(tool.spec.name, tool); }
+ getSpecs(): ToolSpec[] { return [...this.tools.values()].map((tool) => tool.spec); }
+ count(): number { return this.tools.size; }
+ run(call: ToolCall): string {
+ const tool = this.tools.get(call.name);
+ if (!tool) return `unknown tool: ${call.name}`;
+ return tool.handler(call.input);
+ }
+}
+
+// —— 上下文资源(s08 起)——
+export type ContextResource = { kind: "agents" | "skill" | "prompt"; name: string; content: string };
+export class ResourceLoader {
+ constructor(private resources: ContextResource[]) {}
+ load(): ContextResource[] { return this.resources.map((r) => ({ ...r })); }
+}
+// s08:资源组装进 systemPrompt(对齐 Pi buildSystemPrompt)
+export function buildSystemPrompt(resources: ContextResource[]): string {
+ return resources.map((r) => `[${r.kind}:${r.name}]\n${r.content}`).join("\n\n");
+}
+
+// —— provider 对外 ——
+export type ProviderMessage =
+ | { role: "user" | "assistant"; content: string }
+ | { role: "toolResult"; toolCallId: string; content: string };
+export type ProviderInput = { systemPrompt: string; messages: ProviderMessage[]; tools: ToolSpec[] };
+export type ProviderEvent =
+ | { type: "message_start" }
+ | { type: "text_delta"; text: string }
+ | { type: "tool_call"; call: ToolCall }
+ | { type: "message_end"; stopReason: StopReason };
+export interface Provider { stream(input: ProviderInput): AsyncGenerator; }
+
+// —— s01 起:输出抽象(R7。s10 会再加 RuntimeMode,两者并存)——
+export type Output = { log(line: string): void };
+export function createConsoleOutput(): Output { return { log: (line) => console.log(line) }; }
+
+// —— s05 起:执行插口 ——
+export type BeforeToolCallResult = { type: "allow" } | { type: "block"; reason: string };
+export type ToolHooks = {
+ beforeToolCall?: (call: ToolCall) => BeforeToolCallResult;
+ afterToolCall?: (call: ToolCall, result: string) => string;
+};
+export function executeToolCall(registry: ToolRegistry, hooks: ToolHooks, call: ToolCall): ToolResultMessage {
+ const before = hooks.beforeToolCall?.(call) ?? { type: "allow" };
+ if (before.type === "block") {
+ return { role: "toolResult", toolCallId: call.id, content: `blocked: ${before.reason}` };
+ }
+ let result: string;
+ try { result = registry.run(call); }
+ catch (error) { result = `error: ${error instanceof Error ? error.message : String(error)}`; }
+ const finalResult = hooks.afterToolCall?.(call, result) ?? result;
+ return { role: "toolResult", toolCallId: call.id, content: finalResult };
+}
+
+// —— s06 起:一轮快照 ——
+export type TurnSnapshot = { systemPrompt: string; messages: ProviderMessage[]; tools: ToolSpec[] };
+function toProviderMessages(messages: AgentMessage[]): ProviderMessage[] {
+ return messages.map((message) => {
+ if (message.role === "toolResult") {
+ return { role: "toolResult", toolCallId: message.toolCallId, content: message.content };
+ }
+ return { role: message.role, content: message.content };
+ });
+}
+export function createTurnSnapshot(state: AgentState, registry: ToolRegistry, loader: ResourceLoader): TurnSnapshot {
+ return {
+ systemPrompt: buildSystemPrompt(loader.load()),
+ messages: toProviderMessages(state.session.currentPath()),
+ tools: registry.getSpecs(),
+ };
+}
+export function buildProviderInputFromSnapshot(snapshot: TurnSnapshot, state: AgentState): ProviderInput {
+ return {
+ systemPrompt: snapshot.systemPrompt,
+ messages: toProviderMessages(state.session.currentPath()),
+ tools: snapshot.tools,
+ };
+}
+
+// ============ 构造函数 ============
+export function createInitialState(model = "demo-small"): AgentState { return { session: new SessionTree(), model }; }
+export function createUserMessage(content: string): UserMessage { return { role: "user", content }; }
+
+// ============ 工具循环(s04 起,保留不动)============
+const MAX_TURNS = 8;
+export async function runEventedToolLoop(
+ state: AgentState, provider: Provider, registry: ToolRegistry,
+ hooks: ToolHooks, snapshot: TurnSnapshot, output: Output,
+): Promise {
+ let turns = 0;
+ while (true) {
+ turns += 1;
+ if (turns > MAX_TURNS) {
+ const stopped: AssistantMessage = { role: "assistant", content: "(达到最大轮次,停止)", stopReason: "stop" };
+ state.session.append(stopped);
+ return stopped;
+ }
+ const providerInput = buildProviderInputFromSnapshot(snapshot, state);
+ let content = "";
+ let stopReason: StopReason = "stop";
+ let sawToolCall = false;
+ for await (const event of provider.stream(providerInput)) {
+ if (event.type === "message_start") output.log("message_start");
+ else if (event.type === "text_delta") { output.log(`text_delta: ${event.text}`); content += event.text; }
+ else if (event.type === "tool_call") {
+ sawToolCall = true;
+ output.log(`tool_call: ${event.call.name}`);
+ const resultMessage = executeToolCall(registry, hooks, event.call);
+ state.session.append(resultMessage);
+ output.log(`tool_result: ${resultMessage.content}`);
+ } else if (event.type === "message_end") { stopReason = event.stopReason; output.log(`message_end: ${stopReason}`); }
+ }
+ if (!sawToolCall || stopReason !== "toolUse") {
+ const assistant: AssistantMessage = { role: "assistant", content, stopReason };
+ state.session.append(assistant);
+ return assistant;
+ }
+ }
+}
+
+// ============ s09 起:扩展运行时 ============
+export type RuntimeEvent = { type: "message"; content: string } | { type: "done" }; // U2 全局唯一
+type EventHandler = (event: Extract) => void;
+export type Command = { name: string; run: () => string };
+export type ExtensionAPI = {
+ on(type: T, handler: EventHandler): void;
+ registerTool(tool: Tool): void;
+ registerCommand(command: Command): void;
+};
+export type Extension = (api: ExtensionAPI) => void;
+export class ExtensionRuntime {
+ private commands = new Map();
+ private handlers: { type: RuntimeEvent["type"]; handler: (event: RuntimeEvent) => void }[] = [];
+ constructor(private registry: ToolRegistry) {}
+ createApi(): ExtensionAPI {
+ return {
+ on: (type, handler) => { this.handlers.push({ type, handler: handler as (event: RuntimeEvent) => void }); },
+ registerTool: (tool) => { this.registry.register(tool); },
+ registerCommand: (command) => { this.commands.set(command.name, command); },
+ };
+ }
+ use(extension: Extension): void { extension(this.createApi()); }
+ emit(event: RuntimeEvent): void {
+ for (const { type, handler } of this.handlers) if (type === event.type) handler(event);
+ }
+ runCommand(name: string): string {
+ const command = this.commands.get(name);
+ if (!command) return `unknown command: ${name}`;
+ return command.run();
+ }
+}
+
+// ============ s10 新增 [R7 收获]:运行方式(输出分离)============
+
+// 为了压缩本节 demo,只造一批最小 RuntimeEvent。
+// 它不是替换前面累积出来的 tool loop,只是演示 mode 如何消费同一批事件。
+export function createDemoRuntimeEvents(input: string): RuntimeEvent[] {
+ return [
+ { type: "message", content: `收到:${input}` },
+ { type: "done" },
+ ];
+}
+
+// 输出方式:消费同一批事件,展示成不同形式。
+export type RuntimeMode = {
+ render(events: RuntimeEvent[]): void;
+};
+
+// 人类可读:只打印 message 的内容。
+export class PrintMode implements RuntimeMode {
+ render(events: RuntimeEvent[]): void {
+ for (const event of events) {
+ if (event.type === "message") {
+ console.log(event.content);
+ }
+ }
+ }
+}
+
+// 结构化:每个事件一行 JSON,给机器消费。
+export class JsonMode implements RuntimeMode {
+ render(events: RuntimeEvent[]): void {
+ for (const event of events) {
+ console.log(JSON.stringify(event));
+ }
+ }
+}
+
+// ============ 演示脚手架 ============
+
+function main(): void {
+ const events = createDemoRuntimeEvents("你好,mini Pi");
+
+ console.log("s10: Runtime Modes");
+ console.log("");
+
+ console.log("[print mode]");
+ new PrintMode().render(events);
+ console.log("");
+
+ console.log("[json mode]");
+ new JsonMode().render(events);
+ console.log("");
+}
+
+try {
+ main();
+} catch (error: unknown) {
+ console.error(error);
+ process.exitCode = 1;
+}
diff --git a/learn-pi-agent/s10_runtime_modes/images/.gitkeep b/learn-pi-agent/s10_runtime_modes/images/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/learn-pi-agent/s11_trust_and_execution_boundary/README.md b/learn-pi-agent/s11_trust_and_execution_boundary/README.md
new file mode 100644
index 0000000..4f65f75
--- /dev/null
+++ b/learn-pi-agent/s11_trust_and_execution_boundary/README.md
@@ -0,0 +1,200 @@
+# s11: Trust and Execution Boundary — 加载有 trust,执行靠容器
+
+> *加载在 core 里管,执行交给容器。*
+> **Pi 边界**:执行权限边界 —— 资源加载看 trust,执行边界不内置、靠部署层 containerization。
+
+[上一节:s10](../s10_runtime_modes/) → `s11` → [下一节:s12](../s12_package_distribution/)
+
+---
+
+## 问题
+
+core 会接触本地项目:要加载项目资料(s08),工具也会执行本地动作(s04)。
+
+这两件事**风险差很多**:加载一份资料只是读,执行一个动作可能改动系统。所以加载该有个开关——不可信的项目,连资料都别加载,防恶意 AGENTS.md 或扩展混进来。
+
+但"执行"这件事,Pi 的真实取舍和我们直觉不同:**它不在 core 里限制执行权限**。文件系统、进程、网络全开放,权限等于启动它的用户。真要隔离执行,靠部署层把整个进程关进容器。
+
+s11 就把这两件事的真实分工摆出来:加载在 core 里用 trust 管,执行边界交给容器。
+
+---
+
+## 解决方案
+
+两个层次,分工明确:
+
+| 层 | 在哪 | 管什么 |
+| --- | --- | --- |
+| **加载** | core 内(trust) | 不可信项目不加载资料,防恶意资源 |
+| **执行** | 部署层(containerization) | 整个进程关进沙箱/容器,限制文件/进程/网络 |
+
+containerization 有三种 pattern(见 Pi 的 `containerization.md`):
+
+```text
+OpenShell 整个 pi 进程跑在策略控制的沙箱
+Gondolin pi 留主机,工具执行路由到 Linux 微虚拟机
+Plain Docker 整个 pi 进程跑在本地容器
+```
+
+> **重要**:教学版**不再发明** `ExecutionPolicy`/`Executor` 那种"core 内 dryRun/allow 开关"——它在 Pi 里没有对应物。core 内唯一能拦住执行的,是 s05 的 `beforeToolCall` hook(按工具 allow/block)。系统级的执行隔离,整体推给容器。
+
+---
+
+## 工作原理
+
+**先定信任开关。**
+
+```ts
+export type ProjectTrust = "trusted" | "untrusted";
+```
+
+**资源加载看 trust。** `load(trust)` 在 untrusted 时直接返回空——core 拿不到任何项目资料。
+
+```ts
+load(trust: ProjectTrust = "trusted"): ContextResource[] {
+ if (trust === "untrusted") return [];
+ return this.resources.map((r) => ({ ...r }));
+}
+```
+
+`createTurnSnapshot` 把 trust 透传给 load,所以拍快照时就决定了本轮装不装资料。
+
+**执行不靠 core 管。** 这里没有 `ExecutionPolicy`、没有 `Executor`。`executeToolCall` 的签名回到 s05 的样子(无 policy 参数):
+
+```ts
+export function executeToolCall(registry, hooks, call): ToolResultMessage {
+ const before = hooks.beforeToolCall?.(call) ?? { type: "allow" };
+ if (before.type === "block") return { /* blocked */ };
+ // ... 真正执行 handler,错误捕获 ...
+}
+```
+
+唯一能拦住执行的,是 `beforeToolCall` hook——它是扩展层的、按工具的拦截,不是系统级权限。要系统级隔离执行,去部署层用容器。
+
+> 这一节真正建立的是**执行权限边界**,而且是对齐 Pi 的真实取舍:**加载**在 core 里用 trust 管(防恶意资源),**执行**不在 core 里管,整体交给部署层 containerization。core 保持轻量,权限的"重活"推给容器——这正是 README 里说的"Pi 不内置 permission system"。
+
+---
+
+## 试一下
+
+运行(默认 trusted):
+
+```sh
+npm run s11
+```
+
+输出类似:
+
+```text
+s11: Trust and Execution Boundary
+
+[resources]
+AGENTS.md
+
+[execution boundary]
+Pi 不在 core 内限制执行权限。执行边界靠部署层 containerization:
+- OpenShell:整个 pi 进程跑在策略控制的沙箱
+- Gondolin:pi 留主机,工具执行路由到 Linux 微虚拟机
+- Plain Docker:整个 pi 进程跑在本地容器
+core 内唯一的执行拦截点是 s05 的 beforeToolCall hook。
+```
+
+不可信项目(不加载资料):
+
+```sh
+npm run s11 -- --trust untrusted
+```
+
+```text
+[resources]
+none(untrusted,不加载任何资料)
+```
+
+观察重点:trust 只管"加载不加载资料";执行边界那段说明清楚——core 里没有 dryRun/allow 开关,真要限制执行得用容器。
+
+---
+
+## 接入主线
+
+s11 在 s10 上累积。相对 s10 的变更:
+
+| 组件 | s10 | s11 |
+| --- | --- | --- |
+| 新增类型 | — | `ProjectTrust` |
+| `ResourceLoader.load` | `load()` | **`load(trust)`**(U1,默认 trusted) |
+| `createTurnSnapshot` | `(state, registry, loader)` | 多一个 `trust`(默认 trusted) |
+| 执行权限 | 只有 hook | **trust 控加载;执行靠 containerization(core 内不内置 permission)** |
+
+**焊接点**:`loader.load(trust)` 决定 context 装不装资料;`createTurnSnapshot` 透传 trust。`executeToolCall` 保持 s05 的签名(无 policy)——执行拦截只有 beforeToolCall hook,系统级隔离交给容器。
+
+> 注:本节移除了早期教学版的 `ExecutionPolicy`/`Executor`。它们是为了"自演示执行边界"而发明的,但 Pi 真实没有这层——保留会让内核和 Pi 不一致。
+
+---
+
+## 接下来
+
+现在工具、命令、项目资料都是零散定义的。想复用一组能力,没有个清单说明"这包里有什么"。
+
+下一节会把它们整理成一个带清单的包,方便整体分发和加载。
+
+进入下一节:[s12](../s12_package_distribution/)。
+
+---
+
+
+Pi 源码溯源:不内置 permission,靠 containerization
+
+教学版用 trust 控加载。Pi 的真实情况值得特别说明——**它不内置 permission 系统**,权限边界靠外部容器化。
+
+### 源码在哪
+
+- `packages/coding-agent/docs/containerization.md` — 三种容器化方案(官方文档)
+- `packages/coding-agent/src/core/project-trust.ts:45` — `resolveProjectTrusted`
+- `packages/coding-agent/src/core/extensions/runner.ts` — trust 事件
+- `packages/coding-agent/src/tools/bash.ts:66` — bash 执行(无权限检查)
+
+### 核实:Pi 确实不内置 permission
+
+README 说"Pi 不内置 permission system"。源码证实:`createLocalBashOperations`(`bash.ts:66`)直接 `spawn(shell, ...)`,**没有任何权限检查**——文件系统、进程、网络全开放,权限等于启动它的用户。
+
+### 那 trust 管什么
+
+Pi 的 `ProjectTrust`(`project-trust.ts:45`)只管**资源加载**,不管执行:
+
+```ts
+async function resolveProjectTrusted(options): Promise {
+ if (options.trustOverride !== undefined) return options.trustOverride;
+ if (!hasProjectTrustInputs(options.cwd)) return true; // 没有可信任输入,直接信任
+ const { result } = await emitProjectTrustEvent(...); // 问扩展 hook
+ if (result) return result.trusted === "yes";
+ const decision = options.trustStore.get(options.cwd); // 查历史决策
+ if (decision !== null) return decision;
+ switch (options.defaultProjectTrust ?? "ask") { // 默认问用户
+ case "always": return true;
+ case "never": return false;
+ case "ask": break;
+ }
+}
+```
+
+trust 决定"要不要加载这个项目的扩展/资源"(防恶意 AGENTS.md 或扩展),**不限制**加载之后的执行。
+
+### 三种容器化方案
+
+`containerization.md` 给三种 pattern:
+
+| 方案 | 怎么做 | 适用 |
+| --- | --- | --- |
+| **OpenShell** | 整个 pi 进程跑在策略控制的沙箱 | 想全面限制 |
+| **Gondolin 扩展** | pi 留在主机,工具执行路由到 Linux 微虚拟机 | 想保护 provider auth |
+| **Plain Docker** | 整个 pi 跑在本地容器 | 简单隔离 |
+
+### beforeToolCall 是唯一的执行拦截点
+
+Pi 唯一能拦截执行的,是 s05 的 `beforeToolCall` hook——扩展可以在那里 block 某个工具。但这是扩展层的、按工具的,不是 core 内置的、系统级的权限系统。
+
+### 一句话
+
+教学版用 trust 控加载,和 Pi 对齐;执行边界也对齐——**不内置**,靠 containerization 三方案在部署层做。早期教学版发明过 `ExecutionPolicy`,但那是为了自演示,Pi 真实没有,所以本节移除了它。
+
+
diff --git a/learn-pi-agent/s11_trust_and_execution_boundary/code.ts b/learn-pi-agent/s11_trust_and_execution_boundary/code.ts
new file mode 100644
index 0000000..c819a53
--- /dev/null
+++ b/learn-pi-agent/s11_trust_and_execution_boundary/code.ts
@@ -0,0 +1,281 @@
+// s11: Trust and Execution Boundary — mini Pi 的第 11 版
+//
+// 对齐 Pi 真实设计:trust 控制资源加载;执行边界不内置 permission,靠部署层 containerization。
+// 词汇边界:本章新增 ProjectTrust / trust / trusted / untrusted;containerization 三方案(README 讲)。
+// 关键:移除了教学版的 ExecutionPolicy/Executor(Pi 里没有);executeToolCall 回到无 policy(s05 版本)。
+
+declare const process: {
+ argv: string[];
+ exitCode?: number;
+};
+
+// ============ s11 新增:项目信任(控制资源加载)============
+
+// 项目可不可信:决定要不要加载它的资料(防恶意 AGENTS.md / 扩展)。
+export type ProjectTrust = "trusted" | "untrusted";
+
+// —— 停止原因(s04 起)——
+export type StopReason = "stop" | "toolUse" | "error";
+
+// —— 消息 ——
+export type UserMessage = { role: "user"; content: string };
+export type AssistantMessage = { role: "assistant"; content: string; stopReason: StopReason };
+export type ToolResultMessage = { role: "toolResult"; toolCallId: string; content: string };
+export type AgentMessage = UserMessage | AssistantMessage | ToolResultMessage;
+
+// —— 会话历史(s07 起)——
+export type SessionEntry = { id: string; parentId: string | null; message: AgentMessage };
+export class SessionTree {
+ private entries = new Map();
+ private activeLeafId: string | null = null;
+ private counter = 0;
+ append(message: AgentMessage): SessionEntry {
+ const entry = { id: `e${++this.counter}`, parentId: this.activeLeafId, message };
+ this.entries.set(entry.id, entry);
+ this.activeLeafId = entry.id;
+ return entry;
+ }
+ moveTo(entryId: string): void {
+ if (!this.entries.has(entryId)) throw new Error(`unknown entry: ${entryId}`);
+ this.activeLeafId = entryId;
+ }
+ currentPath(): AgentMessage[] {
+ const path: AgentMessage[] = [];
+ let cursor = this.activeLeafId;
+ while (cursor) {
+ const entry = this.entries.get(cursor);
+ if (!entry) break;
+ path.push(entry.message);
+ cursor = entry.parentId;
+ }
+ return path.reverse();
+ }
+ allEntries(): SessionEntry[] { return [...this.entries.values()]; }
+}
+
+export type AgentState = { session: SessionTree; model: string };
+
+// —— 工具契约 ——
+export type ToolSpec = { name: string; description: string; input: Record };
+export type ToolHandler = (input: Record) => string;
+export type ToolCall = { id: string; name: string; input: Record };
+export type Tool = { spec: ToolSpec; handler: ToolHandler };
+export class ToolRegistry {
+ private tools = new Map();
+ register(tool: Tool): void { this.tools.set(tool.spec.name, tool); }
+ getSpecs(): ToolSpec[] { return [...this.tools.values()].map((tool) => tool.spec); }
+ count(): number { return this.tools.size; }
+ run(call: ToolCall): string {
+ const tool = this.tools.get(call.name);
+ if (!tool) return `unknown tool: ${call.name}`;
+ return tool.handler(call.input);
+ }
+}
+
+// —— 上下文资源(s08 起;s11:load 加 trust 参数,U1)——
+export type ContextResource = { kind: "agents" | "skill" | "prompt"; name: string; content: string };
+export class ResourceLoader {
+ constructor(private resources: ContextResource[]) {}
+ // [U1 升级] 加 trust 参数。untrusted → 不加载任何资料。默认 trusted。
+ load(trust: ProjectTrust = "trusted"): ContextResource[] {
+ if (trust === "untrusted") return [];
+ return this.resources.map((r) => ({ ...r }));
+ }
+}
+export function buildSystemPrompt(resources: ContextResource[]): string {
+ return resources.map((r) => `[${r.kind}:${r.name}]\n${r.content}`).join("\n\n");
+}
+
+// —— provider 对外 ——
+export type ProviderMessage =
+ | { role: "user" | "assistant"; content: string }
+ | { role: "toolResult"; toolCallId: string; content: string };
+export type ProviderInput = { systemPrompt: string; messages: ProviderMessage[]; tools: ToolSpec[] };
+export type ProviderEvent =
+ | { type: "message_start" }
+ | { type: "text_delta"; text: string }
+ | { type: "tool_call"; call: ToolCall }
+ | { type: "message_end"; stopReason: StopReason };
+export interface Provider { stream(input: ProviderInput): AsyncGenerator; }
+
+export type Output = { log(line: string): void };
+export function createConsoleOutput(): Output { return { log: (line) => console.log(line) }; }
+
+// —— s05 起:执行插口(无 policy——Pi 不内置执行权限)——
+export type BeforeToolCallResult = { type: "allow" } | { type: "block"; reason: string };
+export type ToolHooks = {
+ beforeToolCall?: (call: ToolCall) => BeforeToolCallResult;
+ afterToolCall?: (call: ToolCall, result: string) => string;
+};
+export function executeToolCall(registry: ToolRegistry, hooks: ToolHooks, call: ToolCall): ToolResultMessage {
+ const before = hooks.beforeToolCall?.(call) ?? { type: "allow" };
+ if (before.type === "block") {
+ return { role: "toolResult", toolCallId: call.id, content: `blocked: ${before.reason}` };
+ }
+ let result: string;
+ try { result = registry.run(call); }
+ catch (error) { result = `error: ${error instanceof Error ? error.message : String(error)}`; }
+ const finalResult = hooks.afterToolCall?.(call, result) ?? result;
+ return { role: "toolResult", toolCallId: call.id, content: finalResult };
+}
+
+// —— s06 起快照(s11:createTurnSnapshot 加 trust,传给 load)——
+export type TurnSnapshot = { systemPrompt: string; messages: ProviderMessage[]; tools: ToolSpec[] };
+function toProviderMessages(messages: AgentMessage[]): ProviderMessage[] {
+ return messages.map((message) => {
+ if (message.role === "toolResult") {
+ return { role: "toolResult", toolCallId: message.toolCallId, content: message.content };
+ }
+ return { role: message.role, content: message.content };
+ });
+}
+export function createTurnSnapshot(
+ state: AgentState, registry: ToolRegistry, loader: ResourceLoader, trust: ProjectTrust = "trusted",
+): TurnSnapshot {
+ return {
+ systemPrompt: buildSystemPrompt(loader.load(trust)),
+ messages: toProviderMessages(state.session.currentPath()),
+ tools: registry.getSpecs(),
+ };
+}
+export function buildProviderInputFromSnapshot(snapshot: TurnSnapshot, state: AgentState): ProviderInput {
+ return {
+ systemPrompt: snapshot.systemPrompt,
+ messages: toProviderMessages(state.session.currentPath()),
+ tools: snapshot.tools,
+ };
+}
+
+export function createInitialState(model = "demo-small"): AgentState { return { session: new SessionTree(), model }; }
+export function createUserMessage(content: string): UserMessage { return { role: "user", content }; }
+
+const MAX_TURNS = 8;
+export async function runEventedToolLoop(
+ state: AgentState, provider: Provider, registry: ToolRegistry,
+ hooks: ToolHooks, snapshot: TurnSnapshot, output: Output,
+): Promise {
+ let turns = 0;
+ while (true) {
+ turns += 1;
+ if (turns > MAX_TURNS) {
+ const stopped: AssistantMessage = { role: "assistant", content: "(达到最大轮次,停止)", stopReason: "stop" };
+ state.session.append(stopped);
+ return stopped;
+ }
+ const providerInput = buildProviderInputFromSnapshot(snapshot, state);
+ let content = "";
+ let stopReason: StopReason = "stop";
+ let sawToolCall = false;
+ for await (const event of provider.stream(providerInput)) {
+ if (event.type === "message_start") output.log("message_start");
+ else if (event.type === "text_delta") { output.log(`text_delta: ${event.text}`); content += event.text; }
+ else if (event.type === "tool_call") {
+ sawToolCall = true;
+ output.log(`tool_call: ${event.call.name}`);
+ const resultMessage = executeToolCall(registry, hooks, event.call);
+ state.session.append(resultMessage);
+ output.log(`tool_result: ${resultMessage.content}`);
+ } else if (event.type === "message_end") { stopReason = event.stopReason; output.log(`message_end: ${stopReason}`); }
+ }
+ if (!sawToolCall || stopReason !== "toolUse") {
+ const assistant: AssistantMessage = { role: "assistant", content, stopReason };
+ state.session.append(assistant);
+ return assistant;
+ }
+ }
+}
+
+// —— s09 起:扩展运行时(累积)——
+export type RuntimeEvent = { type: "message"; content: string } | { type: "done" };
+type EventHandler = (event: Extract) => void;
+export type Command = { name: string; run: () => string };
+export type ExtensionAPI = {
+ on(type: T, handler: EventHandler): void;
+ registerTool(tool: Tool): void;
+ registerCommand(command: Command): void;
+};
+export type Extension = (api: ExtensionAPI) => void;
+export class ExtensionRuntime {
+ private commands = new Map();
+ private handlers: { type: RuntimeEvent["type"]; handler: (event: RuntimeEvent) => void }[] = [];
+ constructor(private registry: ToolRegistry) {}
+ createApi(): ExtensionAPI {
+ return {
+ on: (type, handler) => { this.handlers.push({ type, handler: handler as (event: RuntimeEvent) => void }); },
+ registerTool: (tool) => { this.registry.register(tool); },
+ registerCommand: (command) => { this.commands.set(command.name, command); },
+ };
+ }
+ use(extension: Extension): void { extension(this.createApi()); }
+ emit(event: RuntimeEvent): void {
+ for (const { type, handler } of this.handlers) if (type === event.type) handler(event);
+ }
+ runCommand(name: string): string {
+ const command = this.commands.get(name);
+ if (!command) return `unknown command: ${name}`;
+ return command.run();
+ }
+}
+
+// —— s10 起:运行方式(累积)——
+export function createDemoRuntimeEvents(input: string): RuntimeEvent[] {
+ return [{ type: "message", content: `收到:${input}` }, { type: "done" }];
+}
+export type RuntimeMode = { render(events: RuntimeEvent[]): void };
+export class PrintMode implements RuntimeMode {
+ render(events: RuntimeEvent[]): void {
+ for (const event of events) if (event.type === "message") console.log(event.content);
+ }
+}
+export class JsonMode implements RuntimeMode {
+ render(events: RuntimeEvent[]): void {
+ for (const event of events) console.log(JSON.stringify(event));
+ }
+}
+
+// ============ 演示脚手架 ============
+
+function readArg(name: string): string | undefined {
+ const index = process.argv.indexOf(name);
+ return index >= 0 ? process.argv[index + 1] : undefined;
+}
+
+function main(): void {
+ const output = createConsoleOutput();
+ const trust: ProjectTrust = readArg("--trust") === "untrusted" ? "untrusted" : "trusted";
+
+ const loader = new ResourceLoader([
+ { kind: "agents", name: "AGENTS.md", content: "Use concise engineering explanations." },
+ ]);
+
+ output.log("s11: Trust and Execution Boundary");
+ output.log("");
+
+ // 加载边界:看 trust。untrusted → 不加载资料(防恶意资源)。
+ const resources = loader.load(trust);
+ output.log("[resources]");
+ if (resources.length === 0) {
+ output.log("none(untrusted,不加载任何资料)");
+ } else {
+ for (const resource of resources) {
+ output.log(resource.name);
+ }
+ }
+ output.log("");
+
+ // 执行边界:对齐 Pi——core 不内置 permission,靠部署层 containerization。
+ output.log("[execution boundary]");
+ output.log("Pi 不在 core 内限制执行权限。执行边界靠部署层 containerization:");
+ output.log("- OpenShell:整个 pi 进程跑在策略控制的沙箱");
+ output.log("- Gondolin:pi 留主机,工具执行路由到 Linux 微虚拟机");
+ output.log("- Plain Docker:整个 pi 进程跑在本地容器");
+ output.log("core 内唯一的执行拦截点是 s05 的 beforeToolCall hook。");
+ output.log("");
+}
+
+try {
+ main();
+} catch (error: unknown) {
+ console.error(error);
+ process.exitCode = 1;
+}
diff --git a/learn-pi-agent/s11_trust_and_execution_boundary/images/.gitkeep b/learn-pi-agent/s11_trust_and_execution_boundary/images/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/learn-pi-agent/s12_package_distribution/README.md b/learn-pi-agent/s12_package_distribution/README.md
new file mode 100644
index 0000000..1d62538
--- /dev/null
+++ b/learn-pi-agent/s12_package_distribution/README.md
@@ -0,0 +1,260 @@
+# s12: Package Distribution — 能力整理成一个包
+
+> *一组能力,一张清单,整体带走。*
+> **Pi 边界**:能力分发边界 —— manifest 是入口,决定哪些内容可见。
+
+[上一节:s11](../s11_trust_and_execution_boundary/) → `s12`
+
+---
+
+## 问题
+
+到现在,工具、命令、项目资料都是零散定义的。想复用一整套能力(某个项目的全部工具 + 命令 + 资料),没有个地方说明"这包里到底有什么"。
+
+零散定义没法整体分发,也没法整体加载——拿到一堆内容,不知道哪些该用、哪些是多余的。
+
+s12 要把它们整理成一个**带清单的包**。
+
+---
+
+## 解决方案
+
+一个包由两部分组成:
+
+```text
+manifest 清单:声明包里有哪些 tools / commands / resources(按名字)
+contents 实际内容:名字 → 内容
+```
+
+`loadPackage` 按 manifest 从 contents 里挑出对应内容。**清单就是入口**:清单上列了才加载,没列的(哪怕 contents 里有)一律不进结果。
+
+加载完还要接回主线:`installLoadedPackage` 把 loaded 结果装回已有的 ToolRegistry、commands 和 resources。这样 package 才不只是一个清单解析 demo,而是能真的把能力分发回 mini Pi。
+
+---
+
+## 工作原理
+
+**先定义清单和包。**
+
+```ts
+export type PackageManifest = {
+ name: string;
+ tools: string[];
+ commands: string[];
+ resources: string[];
+};
+
+export type Package = {
+ manifest: PackageManifest;
+ contents: Record;
+};
+```
+
+**按名字挑内容。** `pick` 从 contents 里取清单上列出的名字;清单列了但 contents 里没有的,跳过(不会因为缺一项就崩)。
+
+```ts
+function pick(contents: Record, names: string[]): Record {
+ const result: Record = {};
+ for (const name of names) {
+ const value = contents[name];
+ if (value !== undefined) {
+ result[name] = value;
+ }
+ }
+ return result;
+}
+```
+
+**按清单加载。** `loadPackage` 对三类资源分别 pick,产出 LoadedPackage。
+
+```ts
+export function loadPackage(pkg: Package): LoadedPackage {
+ return {
+ name: pkg.manifest.name,
+ tools: pick(pkg.contents, pkg.manifest.tools),
+ commands: pick(pkg.contents, pkg.manifest.commands),
+ resources: pick(pkg.contents, pkg.manifest.resources),
+ };
+}
+```
+
+**再安装回主线。** `loadPackage` 只负责挑内容,`installLoadedPackage` 才负责把内容接回前面已经有的零件。
+
+```ts
+export function installLoadedPackage(
+ loaded: LoadedPackage,
+ registry: ToolRegistry,
+ commands: Map,
+ resources: ContextResource[],
+): void {
+ // loaded.tools -> registry.register(...)
+ // loaded.commands -> commands.set(...)
+ // loaded.resources -> resources.push(...)
+}
+```
+
+加载后的 tools / commands / resources,分别注入 s02 的 ToolRegistry、s09 的 commands、s08 的 ResourceLoader。这一节把前面散落的能力收拢成一个可分发、可安装的整体。
+
+> 这一节真正建立的是**能力分发边界**:manifest 是唯一入口,决定一个包对外暴露什么。contents 里再多东西,只要 manifest 没列,就不会被加载——分发方靠清单精确控制可见能力。
+
+---
+
+## 试一下
+
+运行:
+
+```sh
+npm run s12
+```
+
+输出类似:
+
+```text
+s12: Package Distribution
+
+[manifest]
+name: demo-package
+tools: note
+commands: status
+resources: AGENTS.md
+
+[loaded]
+tools: 1
+commands: 1
+resources: 1
+
+[installed]
+registry tools: 1
+commands: 1
+resources: 1
+note -> package tool note: tool: 保存一条笔记
+/status -> command: 打印包状态
+```
+
+观察重点:contents 里其实有 4 项(含一个 `ignored`),但 loaded 只挑出 manifest 列出的 3 类各 1 项——`ignored` 因为不在清单里,没有被加载。随后 installed 证明这 3 项已经接回 mini Pi 的 registry、commands 和 resources。
+
+---
+
+## 接入主线
+
+s12 在 s11 上累积,是 mini Pi 的最后一版。相对 s11 的变更:
+
+| 组件 | s11 | s12 |
+| --- | --- | --- |
+| 新增类型 | — | `PackageManifest` / `Package` / `LoadedPackage` |
+| 新增函数 | — | `loadPackage` / `installLoadedPackage` / `pick` |
+| 主循环 / `ProviderInput` | — | **不变**(纯新增) |
+
+**焊接点**:`loadPackage(pkg)` 按 manifest 从 contents 挑出 tools / commands / resources;`installLoadedPackage(loaded, registry, commands, resources)` 把它们注入既有 `ToolRegistry` / commands / `ResourceLoader`。s01–s11 的全部能力至此收拢成一个完整 mini Pi。
+
+---
+
+## 课程结束
+
+12 节走完,mini Pi 覆盖了这条主线:
+
+```text
+s01 Agent Core 接住一轮消息
+s02 Tool Contract 工具拆成说明和执行
+s03 Provider Event Stream provider 分段返回事件
+s04 Evented Tool Loop 工具请求 → 执行 → 结果回写,循环
+s05 Tool Hook Boundary 执行前后留插口
+s06 Turn Snapshot 一轮开始先拍快照
+s07 Session Tree 历史能分叉
+s08 Context Resources 项目资料进入输入
+s09 Extension Runtime 外部代码通过 API 接入
+s10 Runtime Modes core 产事件,外层决定展示
+s11 Trust and Execution 加载靠 trust,执行靠容器
+s12 Package Distribution 能力整理成包分发
+```
+
+每一节只加一个机制,机制之间首尾相接。完整的 turn 执行链和总览,见[项目根 README](../README.md)。
+
+---
+
+
+Pi 源码溯源:PiManifest 和三种包源
+
+教学版用 PackageManifest(名字列表)+ contents(内容字典)+ loadPackage。Pi 的 `packages/coding-agent` 有完整的包管理,支持 npm/git/local 三种来源。
+
+### 源码在哪
+
+- `packages/coding-agent/docs/packages.md` — 包机制官方文档
+- `packages/coding-agent/src/core/package-manager.ts:92` — `PackageManager` 接口
+- `packages/coding-agent/src/core/package-manager.ts:147` — `PiManifest`
+- `packages/coding-agent/src/core/resource-loader.ts:22` — `ResourceLoader`
+
+### PiManifest 的真实形状
+
+教学版的 manifest 是 `{ tools, commands, resources }` 三个名字列表。Pi 的 manifest(`package-manager.ts:147`)声明四类资源的**路径**:
+
+```ts
+interface PiManifest {
+ extensions?: string[]; // 扩展路径
+ skills?: string[]; // skill 路径
+ prompts?: string[]; // 提示模板路径
+ themes?: string[]; // 主题路径
+}
+```
+
+放在 `package.json` 的 `pi` 字段里:
+
+```json
+{
+ "name": "my-package",
+ "keywords": ["pi-package"],
+ "pi": {
+ "extensions": ["./extensions"],
+ "skills": ["./skills"],
+ "prompts": ["./prompts"],
+ "themes": ["./themes"]
+ }
+}
+```
+
+教学版的 tools/commands/resources 对应 Pi 的 extensions/skills/prompts/themes——Pi 没有单独的 "tools" 和 "commands",它们都由 extension 注册(s09)。
+
+### 三种包来源
+
+教学版的包是内存对象。Pi 的 `PackageManager`(`package-manager.ts:92`)支持三种来源:
+
+```ts
+interface PackageManager {
+ resolve(onMissing?): Promise;
+ install(source: string, options?): Promise;
+ remove(source: string, options?): Promise;
+ update(source?): Promise;
+}
+```
+
+| 来源 | 格式 | 例子 |
+| --- | --- | --- |
+| npm | `npm:@scope/pkg@1.2.3` | 从 npm 安装 |
+| git | `git:github.com/user/repo@v1` | 从 git 仓库 |
+| local | `/absolute/path` | 本地路径 |
+
+教学版的 `pick(contents, names)` 是 Pi `resolve` 的极简版——Pi 的 resolve 要解析三种来源、处理依赖、去重,复杂得多。
+
+### glob + 排除 + 强制包含
+
+manifest 的路径支持 glob,还能排除和强制包含:
+
+```json
+"extensions": [
+ "./extensions/**/*",
+ "!extensions/legacy.ts", // 排除
+ "+themes/legacy.json" // 强制包含(即使被排除规则匹配)
+]
+```
+
+教学版没有这层路径模式。
+
+### 安全警告
+
+`packages.md` 明确:第三方包拿到的是完全系统访问权限(呼应 s11——Pi 不内置 permission)。装一个 pi 包等于让它跑任意代码,信任靠包来源和 s11 的 trust 机制。
+
+### 一句话
+
+教学版的 PackageManifest 立的是"清单驱动的按需加载"。Pi 把它坐实成 `pi` 字段声明四类资源路径 + npm/git/local 三种来源 + glob 排除规则 + 完整的 install/remove/update 生命周期。教学版用内存对象保留最小路径,但"manifest 是入口、决定哪些内容可见"这个心智一致。
+
+
diff --git a/learn-pi-agent/s12_package_distribution/code.ts b/learn-pi-agent/s12_package_distribution/code.ts
new file mode 100644
index 0000000..76b1ff8
--- /dev/null
+++ b/learn-pi-agent/s12_package_distribution/code.ts
@@ -0,0 +1,340 @@
+// s12: Package Distribution — mini Pi 的第 12 版(完整版)
+//
+// 把工具、命令、项目资料整理成一个带清单的包,按清单加载、整体分发。
+// 词汇边界:本章新增 PackageManifest / Package / LoadedPackage / loadPackage / pick / manifest / contents。
+// 关键:manifest 是入口,决定哪些 contents 可见;清单没列的内容(ignored)不会被加载。
+
+declare const process: {
+ exitCode?: number;
+};
+
+// ============ s12 新增:能力打包分发 ============
+
+export type PackageManifest = {
+ name: string;
+ tools: string[];
+ commands: string[];
+ resources: string[];
+};
+
+export type Package = {
+ manifest: PackageManifest;
+ contents: Record;
+};
+
+export type LoadedPackage = {
+ name: string;
+ tools: Record;
+ commands: Record;
+ resources: Record;
+};
+
+function pick(contents: Record, names: string[]): Record {
+ const result: Record = {};
+ for (const name of names) {
+ const value = contents[name];
+ if (value !== undefined) {
+ result[name] = value;
+ }
+ }
+ return result;
+}
+
+export function loadPackage(pkg: Package): LoadedPackage {
+ return {
+ name: pkg.manifest.name,
+ tools: pick(pkg.contents, pkg.manifest.tools),
+ commands: pick(pkg.contents, pkg.manifest.commands),
+ resources: pick(pkg.contents, pkg.manifest.resources),
+ };
+}
+
+export function installLoadedPackage(
+ loaded: LoadedPackage,
+ registry: ToolRegistry,
+ commands: Map,
+ resources: ContextResource[],
+): void {
+ for (const [name, content] of Object.entries(loaded.tools)) {
+ registry.register({
+ spec: { name, description: content, input: {} },
+ handler: () => `package tool ${name}: ${content}`,
+ });
+ }
+
+ for (const [name, content] of Object.entries(loaded.commands)) {
+ commands.set(name, { name, run: () => content });
+ }
+
+ for (const [name, content] of Object.entries(loaded.resources)) {
+ resources.push({ kind: "agents", name, content });
+ }
+}
+
+// —— 以下为 s01–s11 累积的全部能力(mini Pi 完整版)——
+
+export type ProjectTrust = "trusted" | "untrusted";
+export type StopReason = "stop" | "toolUse" | "error";
+export type UserMessage = { role: "user"; content: string };
+export type AssistantMessage = { role: "assistant"; content: string; stopReason: StopReason };
+export type ToolResultMessage = { role: "toolResult"; toolCallId: string; content: string };
+export type AgentMessage = UserMessage | AssistantMessage | ToolResultMessage;
+
+export type SessionEntry = { id: string; parentId: string | null; message: AgentMessage };
+export class SessionTree {
+ private entries = new Map();
+ private activeLeafId: string | null = null;
+ private counter = 0;
+ append(message: AgentMessage): SessionEntry {
+ const entry = { id: `e${++this.counter}`, parentId: this.activeLeafId, message };
+ this.entries.set(entry.id, entry);
+ this.activeLeafId = entry.id;
+ return entry;
+ }
+ moveTo(entryId: string): void {
+ if (!this.entries.has(entryId)) throw new Error(`unknown entry: ${entryId}`);
+ this.activeLeafId = entryId;
+ }
+ currentPath(): AgentMessage[] {
+ const path: AgentMessage[] = [];
+ let cursor = this.activeLeafId;
+ while (cursor) {
+ const entry = this.entries.get(cursor);
+ if (!entry) break;
+ path.push(entry.message);
+ cursor = entry.parentId;
+ }
+ return path.reverse();
+ }
+ allEntries(): SessionEntry[] { return [...this.entries.values()]; }
+}
+export type AgentState = { session: SessionTree; model: string };
+
+export type ToolSpec = { name: string; description: string; input: Record };
+export type ToolHandler = (input: Record) => string;
+export type ToolCall = { id: string; name: string; input: Record };
+export type Tool = { spec: ToolSpec; handler: ToolHandler };
+export class ToolRegistry {
+ private tools = new Map();
+ register(tool: Tool): void { this.tools.set(tool.spec.name, tool); }
+ getSpecs(): ToolSpec[] { return [...this.tools.values()].map((tool) => tool.spec); }
+ count(): number { return this.tools.size; }
+ run(call: ToolCall): string {
+ const tool = this.tools.get(call.name);
+ if (!tool) return `unknown tool: ${call.name}`;
+ return tool.handler(call.input);
+ }
+}
+
+export type ContextResource = { kind: "agents" | "skill" | "prompt"; name: string; content: string };
+export class ResourceLoader {
+ constructor(private resources: ContextResource[]) {}
+ load(trust: ProjectTrust = "trusted"): ContextResource[] {
+ if (trust === "untrusted") return [];
+ return this.resources.map((r) => ({ ...r }));
+ }
+}
+export function buildSystemPrompt(resources: ContextResource[]): string {
+ return resources.map((r) => `[${r.kind}:${r.name}]\n${r.content}`).join("\n\n");
+}
+
+export type ProviderMessage =
+ | { role: "user" | "assistant"; content: string }
+ | { role: "toolResult"; toolCallId: string; content: string };
+export type ProviderInput = { systemPrompt: string; messages: ProviderMessage[]; tools: ToolSpec[] };
+export type ProviderEvent =
+ | { type: "message_start" }
+ | { type: "text_delta"; text: string }
+ | { type: "tool_call"; call: ToolCall }
+ | { type: "message_end"; stopReason: StopReason };
+export interface Provider { stream(input: ProviderInput): AsyncGenerator; }
+
+export type Output = { log(line: string): void };
+export function createConsoleOutput(): Output { return { log: (line) => console.log(line) }; }
+
+export type BeforeToolCallResult = { type: "allow" } | { type: "block"; reason: string };
+export type ToolHooks = {
+ beforeToolCall?: (call: ToolCall) => BeforeToolCallResult;
+ afterToolCall?: (call: ToolCall, result: string) => string;
+};
+export function executeToolCall(registry: ToolRegistry, hooks: ToolHooks, call: ToolCall): ToolResultMessage {
+ const before = hooks.beforeToolCall?.(call) ?? { type: "allow" };
+ if (before.type === "block") return { role: "toolResult", toolCallId: call.id, content: `blocked: ${before.reason}` };
+ let result: string;
+ try { result = registry.run(call); }
+ catch (error) { result = `error: ${error instanceof Error ? error.message : String(error)}`; }
+ const finalResult = hooks.afterToolCall?.(call, result) ?? result;
+ return { role: "toolResult", toolCallId: call.id, content: finalResult };
+}
+
+export type TurnSnapshot = { systemPrompt: string; messages: ProviderMessage[]; tools: ToolSpec[] };
+function toProviderMessages(messages: AgentMessage[]): ProviderMessage[] {
+ return messages.map((message) => {
+ if (message.role === "toolResult") {
+ return { role: "toolResult", toolCallId: message.toolCallId, content: message.content };
+ }
+ return { role: message.role, content: message.content };
+ });
+}
+export function createTurnSnapshot(
+ state: AgentState, registry: ToolRegistry, loader: ResourceLoader, trust: ProjectTrust = "trusted",
+): TurnSnapshot {
+ return {
+ systemPrompt: buildSystemPrompt(loader.load(trust)),
+ messages: toProviderMessages(state.session.currentPath()),
+ tools: registry.getSpecs(),
+ };
+}
+export function buildProviderInputFromSnapshot(snapshot: TurnSnapshot, state: AgentState): ProviderInput {
+ return {
+ systemPrompt: snapshot.systemPrompt,
+ messages: toProviderMessages(state.session.currentPath()),
+ tools: snapshot.tools,
+ };
+}
+
+export function createInitialState(model = "demo-small"): AgentState { return { session: new SessionTree(), model }; }
+export function createUserMessage(content: string): UserMessage { return { role: "user", content }; }
+
+const MAX_TURNS = 8;
+export async function runEventedToolLoop(
+ state: AgentState, provider: Provider, registry: ToolRegistry,
+ hooks: ToolHooks, snapshot: TurnSnapshot, output: Output,
+): Promise {
+ let turns = 0;
+ while (true) {
+ turns += 1;
+ if (turns > MAX_TURNS) {
+ const stopped: AssistantMessage = { role: "assistant", content: "(达到最大轮次,停止)", stopReason: "stop" };
+ state.session.append(stopped);
+ return stopped;
+ }
+ const providerInput = buildProviderInputFromSnapshot(snapshot, state);
+ let content = "";
+ let stopReason: StopReason = "stop";
+ let sawToolCall = false;
+ for await (const event of provider.stream(providerInput)) {
+ if (event.type === "message_start") output.log("message_start");
+ else if (event.type === "text_delta") { output.log(`text_delta: ${event.text}`); content += event.text; }
+ else if (event.type === "tool_call") {
+ sawToolCall = true;
+ output.log(`tool_call: ${event.call.name}`);
+ const resultMessage = executeToolCall(registry, hooks, event.call);
+ state.session.append(resultMessage);
+ output.log(`tool_result: ${resultMessage.content}`);
+ } else if (event.type === "message_end") { stopReason = event.stopReason; output.log(`message_end: ${stopReason}`); }
+ }
+ if (!sawToolCall || stopReason !== "toolUse") {
+ const assistant: AssistantMessage = { role: "assistant", content, stopReason };
+ state.session.append(assistant);
+ return assistant;
+ }
+ }
+}
+
+export type RuntimeEvent = { type: "message"; content: string } | { type: "done" };
+type EventHandler = (event: Extract) => void;
+export type Command = { name: string; run: () => string };
+export type ExtensionAPI = {
+ on(type: T, handler: EventHandler): void;
+ registerTool(tool: Tool): void;
+ registerCommand(command: Command): void;
+};
+export type Extension = (api: ExtensionAPI) => void;
+export class ExtensionRuntime {
+ private commands = new Map();
+ private handlers: { type: RuntimeEvent["type"]; handler: (event: RuntimeEvent) => void }[] = [];
+ constructor(private registry: ToolRegistry) {}
+ createApi(): ExtensionAPI {
+ return {
+ on: (type, handler) => { this.handlers.push({ type, handler: handler as (event: RuntimeEvent) => void }); },
+ registerTool: (tool) => { this.registry.register(tool); },
+ registerCommand: (command) => { this.commands.set(command.name, command); },
+ };
+ }
+ use(extension: Extension): void { extension(this.createApi()); }
+ emit(event: RuntimeEvent): void {
+ for (const { type, handler } of this.handlers) if (type === event.type) handler(event);
+ }
+ runCommand(name: string): string {
+ const command = this.commands.get(name);
+ if (!command) return `unknown command: ${name}`;
+ return command.run();
+ }
+}
+
+export function createDemoRuntimeEvents(input: string): RuntimeEvent[] {
+ return [{ type: "message", content: `收到:${input}` }, { type: "done" }];
+}
+export type RuntimeMode = { render(events: RuntimeEvent[]): void };
+export class PrintMode implements RuntimeMode {
+ render(events: RuntimeEvent[]): void {
+ for (const event of events) if (event.type === "message") console.log(event.content);
+ }
+}
+export class JsonMode implements RuntimeMode {
+ render(events: RuntimeEvent[]): void {
+ for (const event of events) console.log(JSON.stringify(event));
+ }
+}
+
+// ============ 演示脚手架 ============
+
+function createPackage(): Package {
+ return {
+ manifest: {
+ name: "demo-package",
+ tools: ["note"],
+ commands: ["status"],
+ resources: ["AGENTS.md"],
+ },
+ contents: {
+ note: "tool: 保存一条笔记",
+ status: "command: 打印包状态",
+ "AGENTS.md": "Use package resources when building context.",
+ ignored: "这份内容不在清单里,不会被加载",
+ },
+ };
+}
+
+function main(): void {
+ const output = createConsoleOutput();
+ const pkg = createPackage();
+ const loaded = loadPackage(pkg);
+ const registry = new ToolRegistry();
+ const commands = new Map();
+ const resources: ContextResource[] = [];
+
+ installLoadedPackage(loaded, registry, commands, resources);
+
+ output.log("s12: Package Distribution");
+ output.log("");
+
+ output.log("[manifest]");
+ output.log(`name: ${pkg.manifest.name}`);
+ output.log(`tools: ${pkg.manifest.tools.join(", ")}`);
+ output.log(`commands: ${pkg.manifest.commands.join(", ")}`);
+ output.log(`resources: ${pkg.manifest.resources.join(", ")}`);
+ output.log("");
+
+ output.log("[loaded]");
+ output.log(`tools: ${Object.keys(loaded.tools).length}`);
+ output.log(`commands: ${Object.keys(loaded.commands).length}`);
+ output.log(`resources: ${Object.keys(loaded.resources).length}`);
+ output.log("");
+
+ output.log("[installed]");
+ output.log(`registry tools: ${registry.count()}`);
+ output.log(`commands: ${commands.size}`);
+ output.log(`resources: ${resources.length}`);
+ output.log(`note -> ${registry.run({ id: "pkg-tool-1", name: "note", input: {} })}`);
+ output.log(`/status -> ${commands.get("status")?.run() ?? "missing"}`);
+ output.log("");
+}
+
+try {
+ main();
+} catch (error: unknown) {
+ console.error(error);
+ process.exitCode = 1;
+}
diff --git a/learn-pi-agent/s12_package_distribution/images/.gitkeep b/learn-pi-agent/s12_package_distribution/images/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/learn-pi-agent/tsconfig.json b/learn-pi-agent/tsconfig.json
new file mode 100644
index 0000000..24249ed
--- /dev/null
+++ b/learn-pi-agent/tsconfig.json
@@ -0,0 +1,13 @@
+{
+ "compilerOptions": {
+ "target": "ES2022",
+ "module": "NodeNext",
+ "moduleResolution": "NodeNext",
+ "strict": true,
+ "skipLibCheck": true,
+ "esModuleInterop": true,
+ "forceConsistentCasingInFileNames": true,
+ "noEmit": true
+ },
+ "include": ["s*/code.ts"]
+}