mirror of
https://github.com/shareAI-lab/analysis_claude_code.git
synced 2026-06-20 20:23:36 +08:00
fix: inject s09 memories as turn context
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -192,6 +192,7 @@ cython_debug/
|
||||
.transcripts/
|
||||
|
||||
# Runtime artifacts (generated by agent tests)
|
||||
.memory/
|
||||
.task_outputs/
|
||||
.tasks/
|
||||
.teams/
|
||||
|
||||
@@ -23,7 +23,7 @@ LLM 没有持久状态,所有信息都在上下文窗口里。上下文满了
|
||||
|
||||
s08 的压缩管线保留,聚焦记忆。存储选文件系统:`.memory/` 目录下,每个记忆一个 `.md` 文件,带 YAML frontmatter(`name` / `description` / `type`)。文件多了需要索引:`MEMORY.md` 一行一个链接,注入 SYSTEM。
|
||||
|
||||
关键设计:索引常驻 SYSTEM prompt(可被 prompt cache 缓存),文件内容按需注入(按 filename/description 匹配当前对话,不破坏 cache)。写入分两条路径:用户显式说"记住",或者每轮结束后后台提取。文件积累多了,定期整理去重。
|
||||
关键设计:索引常驻 SYSTEM prompt(可被 prompt cache 缓存),文件内容按需注入到当前 user turn(按 filename/description 匹配当前对话,不破坏 cache)。写入由每轮结束后的提取器完成:用户显式说"记住"或表达稳定偏好时,提取器会保存为记忆。文件积累多了,定期整理去重。
|
||||
|
||||
四类记忆,各有用途:
|
||||
|
||||
@@ -78,7 +78,7 @@ def write_memory_file(name, mem_type, description, body):
|
||||
|
||||
**路径一:索引常驻 SYSTEM。** `build_system()` 每轮重建 SYSTEM 时读取 `MEMORY.md`,把记忆清单注入。SYSTEM prompt 中的索引可以被 prompt cache 缓存,不需要每轮重新发送。
|
||||
|
||||
**路径二:相关记忆按需注入。** 每轮调用前,`load_memories()` 把最近对话和记忆目录(name + description)一起发给 LLM 做一次轻量 side-query,选出相关的文件名,再读文件内容注入上下文。最多 5 条,控制开销。
|
||||
**路径二:相关记忆按需注入。** 每轮调用前,`load_memories()` 把最近对话和记忆目录(name + description)一起发给 LLM 做一次轻量 side-query,选出相关的文件名,再读文件内容临时注入到当前 user turn。最多 5 条,控制开销。
|
||||
|
||||
```python
|
||||
def select_relevant_memories(messages, max_items=5):
|
||||
@@ -93,7 +93,8 @@ def select_relevant_memories(messages, max_items=5):
|
||||
"content": f"Select relevant memory indices. Return JSON array.\n\n"
|
||||
f"Recent conversation:\n{recent}\n\nMemory catalog:\n{catalog}"}],
|
||||
max_tokens=200)
|
||||
indices = json.loads(re.search(r'\[.*?\]', response.content[0].text).group())
|
||||
text = extract_text(response.content).strip()
|
||||
indices = json.loads(re.search(r'\[.*?\]', text).group())
|
||||
return [files[i]["filename"] for i in indices if 0 <= i < len(files)]
|
||||
```
|
||||
|
||||
@@ -108,7 +109,7 @@ def select_relevant_memories(messages, max_items=5):
|
||||
```python
|
||||
# In agent_loop:
|
||||
if response.stop_reason != "tool_use":
|
||||
extract_memories(messages) # 从最近对话提取新记忆
|
||||
extract_memories(pre_compress) # 从压缩前快照提取新记忆
|
||||
consolidate_memories() # 检查是否需要整理
|
||||
return
|
||||
```
|
||||
|
||||
@@ -177,7 +177,7 @@ def select_relevant_memories(messages: list, max_items: int = 5) -> list[str]:
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=200,
|
||||
)
|
||||
text = response.content[0].text.strip()
|
||||
text = extract_text(response.content).strip()
|
||||
# Extract JSON array from response
|
||||
match = re.search(r'\[.*?\]', text, re.DOTALL)
|
||||
if match:
|
||||
@@ -259,7 +259,7 @@ def extract_memories(messages: list):
|
||||
response = client.messages.create(
|
||||
model=MODEL, messages=[{"role": "user", "content": prompt}], max_tokens=800
|
||||
)
|
||||
text = response.content[0].text.strip()
|
||||
text = extract_text(response.content).strip()
|
||||
# Extract JSON array from response
|
||||
match = re.search(r'\[.*\]', text, re.DOTALL)
|
||||
if not match:
|
||||
@@ -309,7 +309,7 @@ def consolidate_memories():
|
||||
response = client.messages.create(
|
||||
model=MODEL, messages=[{"role": "user", "content": prompt}], max_tokens=3000
|
||||
)
|
||||
text = response.content[0].text.strip()
|
||||
text = extract_text(response.content).strip()
|
||||
match = re.search(r'\[.*\]', text, re.DOTALL)
|
||||
if not match:
|
||||
return
|
||||
@@ -504,7 +504,7 @@ def summarize_history(msgs):
|
||||
"Summarize this coding-agent conversation so work can continue.\n"
|
||||
"Preserve: 1. current goal, 2. key findings, 3. files changed, 4. remaining work, 5. user constraints.\n\n" + conv}],
|
||||
max_tokens=2000)
|
||||
return r.content[0].text.strip()
|
||||
return extract_text(r.content).strip()
|
||||
|
||||
def compact_history(msgs):
|
||||
write_transcript(msgs)
|
||||
@@ -550,12 +550,12 @@ MAX_REACTIVE_RETRIES = 1
|
||||
|
||||
def agent_loop(messages: list):
|
||||
reactive_retries = 0
|
||||
# s09: inject relevant memory content into the current user turn
|
||||
memories_content = load_memories(messages)
|
||||
memory_turn = len(messages) - 1 if messages and isinstance(messages[-1].get("content"), str) else None
|
||||
while True:
|
||||
# s09: rebuild system with current memory index + relevant memories
|
||||
# s09: rebuild system with current memory index
|
||||
system = build_system()
|
||||
memories_content = load_memories(messages)
|
||||
if memories_content:
|
||||
system += "\n\n" + memories_content
|
||||
|
||||
# s09: save pre-compression snapshot for accurate memory extraction
|
||||
pre_compress = [m if isinstance(m, dict) else {"role": m.get("role",""),
|
||||
@@ -571,8 +571,15 @@ def agent_loop(messages: list):
|
||||
messages[:] = compact_history(messages)
|
||||
|
||||
try:
|
||||
request_messages = messages
|
||||
if memories_content and memory_turn is not None and memory_turn < len(messages):
|
||||
request_messages = messages.copy()
|
||||
request_messages[memory_turn] = {
|
||||
**messages[memory_turn],
|
||||
"content": memories_content + "\n\n" + messages[memory_turn]["content"],
|
||||
}
|
||||
response = client.messages.create(
|
||||
model=MODEL, system=system, messages=messages, tools=TOOLS, max_tokens=8000
|
||||
model=MODEL, system=system, messages=request_messages, tools=TOOLS, max_tokens=8000
|
||||
)
|
||||
reactive_retries = 0
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user