Merge pull request #312 from Gui-Yue/fix/s09-memory-turn-context

fix: inject s09 memories as turn context
This commit is contained in:
Yang Haoran
2026-05-26 14:56:49 +08:00
committed by GitHub
3 changed files with 22 additions and 13 deletions

1
.gitignore vendored
View File

@@ -192,6 +192,7 @@ cython_debug/
.transcripts/ .transcripts/
# Runtime artifacts (generated by agent tests) # Runtime artifacts (generated by agent tests)
.memory/
.task_outputs/ .task_outputs/
.tasks/ .tasks/
.teams/ .teams/

View File

@@ -23,7 +23,7 @@ LLM 没有持久状态,所有信息都在上下文窗口里。上下文满了
s08 的压缩管线保留,聚焦记忆。存储选文件系统:`.memory/` 目录下,每个记忆一个 `.md` 文件,带 YAML frontmatter`name` / `description` / `type`)。文件多了需要索引:`MEMORY.md` 一行一个链接,注入 SYSTEM。 s08 的压缩管线保留,聚焦记忆。存储选文件系统:`.memory/` 目录下,每个记忆一个 `.md` 文件,带 YAML frontmatter`name` / `description` / `type`)。文件多了需要索引:`MEMORY.md` 一行一个链接,注入 SYSTEM。
关键设计:索引常驻 SYSTEM prompt可被 prompt cache 缓存),文件内容按需注入(按 filename/description 匹配当前对话,不破坏 cache。写入分两条路径:用户显式说"记住",或者每轮结束后后台提取。文件积累多了,定期整理去重。 关键设计:索引常驻 SYSTEM prompt可被 prompt cache 缓存),文件内容按需注入到当前 user turn(按 filename/description 匹配当前对话,不破坏 cache。写入由每轮结束后的提取器完成:用户显式说"记住"或表达稳定偏好时,提取器会保存为记忆。文件积累多了,定期整理去重。
四类记忆,各有用途: 四类记忆,各有用途:
@@ -78,7 +78,7 @@ def write_memory_file(name, mem_type, description, body):
**路径一:索引常驻 SYSTEM。** `build_system()` 每轮重建 SYSTEM 时读取 `MEMORY.md`把记忆清单注入。SYSTEM prompt 中的索引可以被 prompt cache 缓存,不需要每轮重新发送。 **路径一:索引常驻 SYSTEM。** `build_system()` 每轮重建 SYSTEM 时读取 `MEMORY.md`把记忆清单注入。SYSTEM prompt 中的索引可以被 prompt cache 缓存,不需要每轮重新发送。
**路径二:相关记忆按需注入。** 每轮调用前,`load_memories()` 把最近对话和记忆目录name + description一起发给 LLM 做一次轻量 side-query选出相关的文件名再读文件内容注入上下文。最多 5 条,控制开销。 **路径二:相关记忆按需注入。** 每轮调用前,`load_memories()` 把最近对话和记忆目录name + description一起发给 LLM 做一次轻量 side-query选出相关的文件名再读文件内容临时注入到当前 user turn。最多 5 条,控制开销。
```python ```python
def select_relevant_memories(messages, max_items=5): def select_relevant_memories(messages, max_items=5):
@@ -93,7 +93,8 @@ def select_relevant_memories(messages, max_items=5):
"content": f"Select relevant memory indices. Return JSON array.\n\n" "content": f"Select relevant memory indices. Return JSON array.\n\n"
f"Recent conversation:\n{recent}\n\nMemory catalog:\n{catalog}"}], f"Recent conversation:\n{recent}\n\nMemory catalog:\n{catalog}"}],
max_tokens=200) max_tokens=200)
indices = json.loads(re.search(r'\[.*?\]', response.content[0].text).group()) text = extract_text(response.content).strip()
indices = json.loads(re.search(r'\[.*?\]', text).group())
return [files[i]["filename"] for i in indices if 0 <= i < len(files)] return [files[i]["filename"] for i in indices if 0 <= i < len(files)]
``` ```
@@ -108,7 +109,7 @@ def select_relevant_memories(messages, max_items=5):
```python ```python
# In agent_loop: # In agent_loop:
if response.stop_reason != "tool_use": if response.stop_reason != "tool_use":
extract_memories(messages) # 从最近对话提取新记忆 extract_memories(pre_compress) # 从压缩前快照提取新记忆
consolidate_memories() # 检查是否需要整理 consolidate_memories() # 检查是否需要整理
return return
``` ```

View File

@@ -177,7 +177,7 @@ def select_relevant_memories(messages: list, max_items: int = 5) -> list[str]:
messages=[{"role": "user", "content": prompt}], messages=[{"role": "user", "content": prompt}],
max_tokens=200, max_tokens=200,
) )
text = response.content[0].text.strip() text = extract_text(response.content).strip()
# Extract JSON array from response # Extract JSON array from response
match = re.search(r'\[.*?\]', text, re.DOTALL) match = re.search(r'\[.*?\]', text, re.DOTALL)
if match: if match:
@@ -259,7 +259,7 @@ def extract_memories(messages: list):
response = client.messages.create( response = client.messages.create(
model=MODEL, messages=[{"role": "user", "content": prompt}], max_tokens=800 model=MODEL, messages=[{"role": "user", "content": prompt}], max_tokens=800
) )
text = response.content[0].text.strip() text = extract_text(response.content).strip()
# Extract JSON array from response # Extract JSON array from response
match = re.search(r'\[.*\]', text, re.DOTALL) match = re.search(r'\[.*\]', text, re.DOTALL)
if not match: if not match:
@@ -309,7 +309,7 @@ def consolidate_memories():
response = client.messages.create( response = client.messages.create(
model=MODEL, messages=[{"role": "user", "content": prompt}], max_tokens=3000 model=MODEL, messages=[{"role": "user", "content": prompt}], max_tokens=3000
) )
text = response.content[0].text.strip() text = extract_text(response.content).strip()
match = re.search(r'\[.*\]', text, re.DOTALL) match = re.search(r'\[.*\]', text, re.DOTALL)
if not match: if not match:
return return
@@ -504,7 +504,7 @@ def summarize_history(msgs):
"Summarize this coding-agent conversation so work can continue.\n" "Summarize this coding-agent conversation so work can continue.\n"
"Preserve: 1. current goal, 2. key findings, 3. files changed, 4. remaining work, 5. user constraints.\n\n" + conv}], "Preserve: 1. current goal, 2. key findings, 3. files changed, 4. remaining work, 5. user constraints.\n\n" + conv}],
max_tokens=2000) max_tokens=2000)
return r.content[0].text.strip() return extract_text(r.content).strip()
def compact_history(msgs): def compact_history(msgs):
write_transcript(msgs) write_transcript(msgs)
@@ -550,12 +550,12 @@ MAX_REACTIVE_RETRIES = 1
def agent_loop(messages: list): def agent_loop(messages: list):
reactive_retries = 0 reactive_retries = 0
# s09: inject relevant memory content into the current user turn
memories_content = load_memories(messages)
memory_turn = len(messages) - 1 if messages and isinstance(messages[-1].get("content"), str) else None
while True: while True:
# s09: rebuild system with current memory index + relevant memories # s09: rebuild system with current memory index
system = build_system() system = build_system()
memories_content = load_memories(messages)
if memories_content:
system += "\n\n" + memories_content
# s09: save pre-compression snapshot for accurate memory extraction # s09: save pre-compression snapshot for accurate memory extraction
pre_compress = [m if isinstance(m, dict) else {"role": m.get("role",""), pre_compress = [m if isinstance(m, dict) else {"role": m.get("role",""),
@@ -571,8 +571,15 @@ def agent_loop(messages: list):
messages[:] = compact_history(messages) messages[:] = compact_history(messages)
try: try:
request_messages = messages
if memories_content and memory_turn is not None and memory_turn < len(messages):
request_messages = messages.copy()
request_messages[memory_turn] = {
**messages[memory_turn],
"content": memories_content + "\n\n" + messages[memory_turn]["content"],
}
response = client.messages.create( response = client.messages.create(
model=MODEL, system=system, messages=messages, tools=TOOLS, max_tokens=8000 model=MODEL, system=system, messages=request_messages, tools=TOOLS, max_tokens=8000
) )
reactive_retries = 0 reactive_retries = 0
except Exception as e: except Exception as e: