From c396d59f03399913a6ad4e1062e45c8d9d05bc71 Mon Sep 17 00:00:00 2001 From: Gui-Yue Date: Tue, 26 May 2026 05:35:51 +0000 Subject: [PATCH] fix: inject s09 memories as turn context --- .gitignore | 1 + s09_memory/README.md | 9 +++++---- s09_memory/code.py | 25 ++++++++++++++++--------- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index 7c2e9b2..ce87dce 100644 --- a/.gitignore +++ b/.gitignore @@ -192,6 +192,7 @@ cython_debug/ .transcripts/ # Runtime artifacts (generated by agent tests) +.memory/ .task_outputs/ .tasks/ .teams/ diff --git a/s09_memory/README.md b/s09_memory/README.md index fb78b07..76e5505 100644 --- a/s09_memory/README.md +++ b/s09_memory/README.md @@ -23,7 +23,7 @@ LLM 没有持久状态,所有信息都在上下文窗口里。上下文满了 s08 的压缩管线保留,聚焦记忆。存储选文件系统:`.memory/` 目录下,每个记忆一个 `.md` 文件,带 YAML frontmatter(`name` / `description` / `type`)。文件多了需要索引:`MEMORY.md` 一行一个链接,注入 SYSTEM。 -关键设计:索引常驻 SYSTEM prompt(可被 prompt cache 缓存),文件内容按需注入(按 filename/description 匹配当前对话,不破坏 cache)。写入分两条路径:用户显式说"记住",或者每轮结束后后台提取。文件积累多了,定期整理去重。 +关键设计:索引常驻 SYSTEM prompt(可被 prompt cache 缓存),文件内容按需注入到当前 user turn(按 filename/description 匹配当前对话,不破坏 cache)。写入由每轮结束后的提取器完成:用户显式说"记住"或表达稳定偏好时,提取器会保存为记忆。文件积累多了,定期整理去重。 四类记忆,各有用途: @@ -78,7 +78,7 @@ def write_memory_file(name, mem_type, description, body): **路径一:索引常驻 SYSTEM。** `build_system()` 每轮重建 SYSTEM 时读取 `MEMORY.md`,把记忆清单注入。SYSTEM prompt 中的索引可以被 prompt cache 缓存,不需要每轮重新发送。 -**路径二:相关记忆按需注入。** 每轮调用前,`load_memories()` 把最近对话和记忆目录(name + description)一起发给 LLM 做一次轻量 side-query,选出相关的文件名,再读文件内容注入上下文。最多 5 条,控制开销。 +**路径二:相关记忆按需注入。** 每轮调用前,`load_memories()` 把最近对话和记忆目录(name + description)一起发给 LLM 做一次轻量 side-query,选出相关的文件名,再读文件内容临时注入到当前 user turn。最多 5 条,控制开销。 ```python def select_relevant_memories(messages, max_items=5): @@ -93,7 +93,8 @@ def select_relevant_memories(messages, max_items=5): "content": f"Select relevant memory indices. Return JSON array.\n\n" f"Recent conversation:\n{recent}\n\nMemory catalog:\n{catalog}"}], max_tokens=200) - indices = json.loads(re.search(r'\[.*?\]', response.content[0].text).group()) + text = extract_text(response.content).strip() + indices = json.loads(re.search(r'\[.*?\]', text).group()) return [files[i]["filename"] for i in indices if 0 <= i < len(files)] ``` @@ -108,7 +109,7 @@ def select_relevant_memories(messages, max_items=5): ```python # In agent_loop: if response.stop_reason != "tool_use": - extract_memories(messages) # 从最近对话提取新记忆 + extract_memories(pre_compress) # 从压缩前快照提取新记忆 consolidate_memories() # 检查是否需要整理 return ``` diff --git a/s09_memory/code.py b/s09_memory/code.py index 7dd39c8..b0f6120 100644 --- a/s09_memory/code.py +++ b/s09_memory/code.py @@ -177,7 +177,7 @@ def select_relevant_memories(messages: list, max_items: int = 5) -> list[str]: messages=[{"role": "user", "content": prompt}], max_tokens=200, ) - text = response.content[0].text.strip() + text = extract_text(response.content).strip() # Extract JSON array from response match = re.search(r'\[.*?\]', text, re.DOTALL) if match: @@ -259,7 +259,7 @@ def extract_memories(messages: list): response = client.messages.create( model=MODEL, messages=[{"role": "user", "content": prompt}], max_tokens=800 ) - text = response.content[0].text.strip() + text = extract_text(response.content).strip() # Extract JSON array from response match = re.search(r'\[.*\]', text, re.DOTALL) if not match: @@ -309,7 +309,7 @@ def consolidate_memories(): response = client.messages.create( model=MODEL, messages=[{"role": "user", "content": prompt}], max_tokens=3000 ) - text = response.content[0].text.strip() + text = extract_text(response.content).strip() match = re.search(r'\[.*\]', text, re.DOTALL) if not match: return @@ -504,7 +504,7 @@ def summarize_history(msgs): "Summarize this coding-agent conversation so work can continue.\n" "Preserve: 1. current goal, 2. key findings, 3. files changed, 4. remaining work, 5. user constraints.\n\n" + conv}], max_tokens=2000) - return r.content[0].text.strip() + return extract_text(r.content).strip() def compact_history(msgs): write_transcript(msgs) @@ -550,12 +550,12 @@ MAX_REACTIVE_RETRIES = 1 def agent_loop(messages: list): reactive_retries = 0 + # s09: inject relevant memory content into the current user turn + memories_content = load_memories(messages) + memory_turn = len(messages) - 1 if messages and isinstance(messages[-1].get("content"), str) else None while True: - # s09: rebuild system with current memory index + relevant memories + # s09: rebuild system with current memory index system = build_system() - memories_content = load_memories(messages) - if memories_content: - system += "\n\n" + memories_content # s09: save pre-compression snapshot for accurate memory extraction pre_compress = [m if isinstance(m, dict) else {"role": m.get("role",""), @@ -571,8 +571,15 @@ def agent_loop(messages: list): messages[:] = compact_history(messages) try: + request_messages = messages + if memories_content and memory_turn is not None and memory_turn < len(messages): + request_messages = messages.copy() + request_messages[memory_turn] = { + **messages[memory_turn], + "content": memories_content + "\n\n" + messages[memory_turn]["content"], + } response = client.messages.create( - model=MODEL, system=system, messages=messages, tools=TOOLS, max_tokens=8000 + model=MODEL, system=system, messages=request_messages, tools=TOOLS, max_tokens=8000 ) reactive_retries = 0 except Exception as e: