#!/usr/bin/env python3 """ v2_todo_agent.py - Mini Claude Code: Structured Planning (~300 lines) Core Philosophy: "Make Plans Visible" ===================================== v1 works great for simple tasks. But ask it to "refactor auth, add tests, update docs" and watch what happens. Without explicit planning, the model: - Jumps between tasks randomly - Forgets completed steps - Loses focus mid-way The Problem - "Context Fade": ---------------------------- In v1, plans exist only in the model's "head": v1: "I'll do A, then B, then C" (invisible) After 10 tool calls: "Wait, what was I doing?" The Solution - TodoWrite Tool: ----------------------------- v2 adds ONE new tool that fundamentally changes how the agent works: v2: [ ] Refactor auth module [>] Add unit tests <- Currently working on this [ ] Update documentation Now both YOU and the MODEL can see the plan. The model can: - Update status as it works - See what's done and what's next - Stay focused on one task at a time Key Constraints (not arbitrary - these are guardrails): ------------------------------------------------------ | Rule | Why | |-------------------|----------------------------------| | Max 20 items | Prevents infinite task lists | | One in_progress | Forces focus on one thing | | Required fields | Ensures structured output | The Deep Insight: ---------------- > "Structure constrains AND enables." Todo constraints (max items, one in_progress) ENABLE (visible plan, tracked progress). This pattern appears everywhere in agent design: - max_tokens constrains -> enables manageable responses - Tool schemas constrain -> enable structured calls - Todos constrain -> enable complex task completion Good constraints aren't limitations. They're scaffolding. Usage: python v2_todo_agent.py """ import os import subprocess import sys from pathlib import Path from anthropic import Anthropic from dotenv import load_dotenv load_dotenv(override=True) # ============================================================================= # Configuration # ============================================================================= WORKDIR = Path.cwd() client = Anthropic() MODEL = "claude-sonnet-4-5-20250929" # ============================================================================= # TodoManager - The core addition in v2 # ============================================================================= class TodoManager: """ Manages a structured task list with enforced constraints. Key Design Decisions: -------------------- 1. Max 20 items: Prevents the model from creating endless lists 2. One in_progress: Forces focus - can only work on ONE thing at a time 3. Required fields: Each item needs content, status, and activeForm The activeForm field deserves explanation: - It's the PRESENT TENSE form of what's happening - Shown when status is "in_progress" - Example: content="Add tests", activeForm="Adding unit tests..." This gives real-time visibility into what the agent is doing. """ def __init__(self): self.items = [] def update(self, items: list) -> str: """ Validate and update the todo list. The model sends a complete new list each time. We validate it, store it, and return a rendered view that the model will see. Validation Rules: - Each item must have: content, status, activeForm - Status must be: pending | in_progress | completed - Only ONE item can be in_progress at a time - Maximum 20 items allowed Returns: Rendered text view of the todo list """ validated = [] in_progress_count = 0 for i, item in enumerate(items): # Extract and validate fields content = str(item.get("content", "")).strip() status = str(item.get("status", "pending")).lower() active_form = str(item.get("activeForm", "")).strip() # Validation checks if not content: raise ValueError(f"Item {i}: content required") if status not in ("pending", "in_progress", "completed"): raise ValueError(f"Item {i}: invalid status '{status}'") if not active_form: raise ValueError(f"Item {i}: activeForm required") if status == "in_progress": in_progress_count += 1 validated.append({ "content": content, "status": status, "activeForm": active_form }) # Enforce constraints if len(validated) > 20: raise ValueError("Max 20 todos allowed") if in_progress_count > 1: raise ValueError("Only one task can be in_progress at a time") self.items = validated return self.render() def render(self) -> str: """ Render the todo list as human-readable text. Format: [x] Completed task [>] In progress task <- Doing something... [ ] Pending task (2/3 completed) This rendered text is what the model sees as the tool result. It can then update the list based on its current state. """ if not self.items: return "No todos." lines = [] for item in self.items: if item["status"] == "completed": lines.append(f"[x] {item['content']}") elif item["status"] == "in_progress": lines.append(f"[>] {item['content']} <- {item['activeForm']}") else: lines.append(f"[ ] {item['content']}") completed = sum(1 for t in self.items if t["status"] == "completed") lines.append(f"\n({completed}/{len(self.items)} completed)") return "\n".join(lines) # Global todo manager instance TODO = TodoManager() # ============================================================================= # System Prompt - Updated for v2 # ============================================================================= SYSTEM = f"""You are a coding agent at {WORKDIR}. Loop: plan -> act with tools -> update todos -> report. Rules: - Use TodoWrite to track multi-step tasks - Mark tasks in_progress before starting, completed when done - Prefer tools over prose. Act, don't just explain. - After finishing, summarize what changed.""" # ============================================================================= # System Reminders - Soft prompts to encourage todo usage # ============================================================================= # Shown at the start of conversation INITIAL_REMINDER = "Use TodoWrite for multi-step tasks." # Shown if model hasn't updated todos in a while NAG_REMINDER = "10+ turns without todo update. Please update todos." # ============================================================================= # Tool Definitions (v1 tools + TodoWrite) # ============================================================================= TOOLS = [ # v1 tools (unchanged) { "name": "bash", "description": "Run a shell command.", "input_schema": { "type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"], }, }, { "name": "read_file", "description": "Read file contents.", "input_schema": { "type": "object", "properties": { "path": {"type": "string"}, "limit": {"type": "integer"} }, "required": ["path"], }, }, { "name": "write_file", "description": "Write content to file.", "input_schema": { "type": "object", "properties": { "path": {"type": "string"}, "content": {"type": "string"} }, "required": ["path", "content"], }, }, { "name": "edit_file", "description": "Replace exact text in file.", "input_schema": { "type": "object", "properties": { "path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}, }, "required": ["path", "old_text", "new_text"], }, }, # NEW in v2: TodoWrite # This is the key addition that enables structured planning { "name": "TodoWrite", "description": "Update the task list. Use to plan and track progress.", "input_schema": { "type": "object", "properties": { "items": { "type": "array", "description": "Complete list of tasks (replaces existing)", "items": { "type": "object", "properties": { "content": { "type": "string", "description": "Task description" }, "status": { "type": "string", "enum": ["pending", "in_progress", "completed"], "description": "Task status" }, "activeForm": { "type": "string", "description": "Present tense action, e.g. 'Reading files'" }, }, "required": ["content", "status", "activeForm"], }, } }, "required": ["items"], }, }, ] # ============================================================================= # Tool Implementations (v1 + TodoWrite) # ============================================================================= def safe_path(p: str) -> Path: """Ensure path stays within workspace.""" path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def run_bash(cmd: str) -> str: """Execute shell command with safety checks.""" dangerous = ["rm -rf /", "sudo", "shutdown", "reboot"] if any(d in cmd for d in dangerous): return "Error: Dangerous command blocked" try: result = subprocess.run( cmd, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=60 ) output = (result.stdout + result.stderr).strip() return output[:50000] if output else "(no output)" except subprocess.TimeoutExpired: return "Error: Timeout" except Exception as e: return f"Error: {e}" def run_read(path: str, limit: int = None) -> str: """Read file contents.""" try: text = safe_path(path).read_text() lines = text.splitlines() if limit and limit < len(lines): lines = lines[:limit] + [f"... ({len(text.splitlines()) - limit} more)"] return "\n".join(lines)[:50000] except Exception as e: return f"Error: {e}" def run_write(path: str, content: str) -> str: """Write content to file.""" try: fp = safe_path(path) fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(content) return f"Wrote {len(content)} bytes to {path}" except Exception as e: return f"Error: {e}" def run_edit(path: str, old_text: str, new_text: str) -> str: """Replace exact text in file.""" try: fp = safe_path(path) content = fp.read_text() if old_text not in content: return f"Error: Text not found in {path}" fp.write_text(content.replace(old_text, new_text, 1)) return f"Edited {path}" except Exception as e: return f"Error: {e}" def run_todo(items: list) -> str: """ Update the todo list. The model sends a complete new list (not a diff). We validate it and return the rendered view. """ try: return TODO.update(items) except Exception as e: return f"Error: {e}" def execute_tool(name: str, args: dict) -> str: """Dispatch tool call to implementation.""" if name == "bash": return run_bash(args["command"]) if name == "read_file": return run_read(args["path"], args.get("limit")) if name == "write_file": return run_write(args["path"], args["content"]) if name == "edit_file": return run_edit(args["path"], args["old_text"], args["new_text"]) if name == "TodoWrite": return run_todo(args["items"]) return f"Unknown tool: {name}" # ============================================================================= # Agent Loop (with todo tracking) # ============================================================================= # Track how many rounds since last todo update rounds_without_todo = 0 def agent_loop(messages: list) -> list: """ Agent loop with todo usage tracking. Same core loop as v1, but now we track whether the model is using todos. If it goes too long without updating, we inject a reminder into the next user message (tool results). """ global rounds_without_todo while True: response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) tool_calls = [] for block in response.content: if hasattr(block, "text"): print(block.text) if block.type == "tool_use": tool_calls.append(block) if response.stop_reason != "tool_use": messages.append({"role": "assistant", "content": response.content}) return messages results = [] used_todo = False for tc in tool_calls: print(f"\n> {tc.name}") output = execute_tool(tc.name, tc.input) preview = output[:300] + "..." if len(output) > 300 else output print(f" {preview}") results.append({ "type": "tool_result", "tool_use_id": tc.id, "content": output, }) # Track todo usage if tc.name == "TodoWrite": used_todo = True # Update counter: reset if used todo, increment otherwise if used_todo: rounds_without_todo = 0 else: rounds_without_todo += 1 messages.append({"role": "assistant", "content": response.content}) # Inject NAG_REMINDER into user message if model hasn't used todos # This happens INSIDE the agent loop, so model sees it during task execution if rounds_without_todo > 10: results.insert(0, {"type": "text", "text": NAG_REMINDER}) messages.append({"role": "user", "content": results}) # ============================================================================= # Main REPL # ============================================================================= def main(): """ REPL with reminder injection. Key v2 addition: We inject "reminder" messages to encourage todo usage without forcing it. This is a soft constraint. - INITIAL_REMINDER: injected at conversation start - NAG_REMINDER: injected inside agent_loop when 10+ rounds without todo """ global rounds_without_todo print(f"Mini Claude Code v2 (with Todos) - {WORKDIR}") print("Type 'exit' to quit.\n") history = [] first_message = True while True: try: user_input = input("You: ").strip() except (EOFError, KeyboardInterrupt): break if not user_input or user_input.lower() in ("exit", "quit", "q"): break # Build user message content content = [] if first_message: # Gentle reminder at start of conversation content.append({"type": "text", "text": INITIAL_REMINDER}) first_message = False content.append({"type": "text", "text": user_input}) history.append({"role": "user", "content": content}) try: agent_loop(history) except Exception as e: print(f"Error: {e}") print() if __name__ == "__main__": main()