mirror of
https://github.com/shareAI-lab/analysis_claude_code.git
synced 2026-06-21 04:33:36 +08:00
fix: inject s09 memories as turn context
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -192,6 +192,7 @@ cython_debug/
|
|||||||
.transcripts/
|
.transcripts/
|
||||||
|
|
||||||
# Runtime artifacts (generated by agent tests)
|
# Runtime artifacts (generated by agent tests)
|
||||||
|
.memory/
|
||||||
.task_outputs/
|
.task_outputs/
|
||||||
.tasks/
|
.tasks/
|
||||||
.teams/
|
.teams/
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ LLM 没有持久状态,所有信息都在上下文窗口里。上下文满了
|
|||||||
|
|
||||||
s08 的压缩管线保留,聚焦记忆。存储选文件系统:`.memory/` 目录下,每个记忆一个 `.md` 文件,带 YAML frontmatter(`name` / `description` / `type`)。文件多了需要索引:`MEMORY.md` 一行一个链接,注入 SYSTEM。
|
s08 的压缩管线保留,聚焦记忆。存储选文件系统:`.memory/` 目录下,每个记忆一个 `.md` 文件,带 YAML frontmatter(`name` / `description` / `type`)。文件多了需要索引:`MEMORY.md` 一行一个链接,注入 SYSTEM。
|
||||||
|
|
||||||
关键设计:索引常驻 SYSTEM prompt(可被 prompt cache 缓存),文件内容按需注入(按 filename/description 匹配当前对话,不破坏 cache)。写入分两条路径:用户显式说"记住",或者每轮结束后后台提取。文件积累多了,定期整理去重。
|
关键设计:索引常驻 SYSTEM prompt(可被 prompt cache 缓存),文件内容按需注入到当前 user turn(按 filename/description 匹配当前对话,不破坏 cache)。写入由每轮结束后的提取器完成:用户显式说"记住"或表达稳定偏好时,提取器会保存为记忆。文件积累多了,定期整理去重。
|
||||||
|
|
||||||
四类记忆,各有用途:
|
四类记忆,各有用途:
|
||||||
|
|
||||||
@@ -78,7 +78,7 @@ def write_memory_file(name, mem_type, description, body):
|
|||||||
|
|
||||||
**路径一:索引常驻 SYSTEM。** `build_system()` 每轮重建 SYSTEM 时读取 `MEMORY.md`,把记忆清单注入。SYSTEM prompt 中的索引可以被 prompt cache 缓存,不需要每轮重新发送。
|
**路径一:索引常驻 SYSTEM。** `build_system()` 每轮重建 SYSTEM 时读取 `MEMORY.md`,把记忆清单注入。SYSTEM prompt 中的索引可以被 prompt cache 缓存,不需要每轮重新发送。
|
||||||
|
|
||||||
**路径二:相关记忆按需注入。** 每轮调用前,`load_memories()` 把最近对话和记忆目录(name + description)一起发给 LLM 做一次轻量 side-query,选出相关的文件名,再读文件内容注入上下文。最多 5 条,控制开销。
|
**路径二:相关记忆按需注入。** 每轮调用前,`load_memories()` 把最近对话和记忆目录(name + description)一起发给 LLM 做一次轻量 side-query,选出相关的文件名,再读文件内容临时注入到当前 user turn。最多 5 条,控制开销。
|
||||||
|
|
||||||
```python
|
```python
|
||||||
def select_relevant_memories(messages, max_items=5):
|
def select_relevant_memories(messages, max_items=5):
|
||||||
@@ -93,7 +93,8 @@ def select_relevant_memories(messages, max_items=5):
|
|||||||
"content": f"Select relevant memory indices. Return JSON array.\n\n"
|
"content": f"Select relevant memory indices. Return JSON array.\n\n"
|
||||||
f"Recent conversation:\n{recent}\n\nMemory catalog:\n{catalog}"}],
|
f"Recent conversation:\n{recent}\n\nMemory catalog:\n{catalog}"}],
|
||||||
max_tokens=200)
|
max_tokens=200)
|
||||||
indices = json.loads(re.search(r'\[.*?\]', response.content[0].text).group())
|
text = extract_text(response.content).strip()
|
||||||
|
indices = json.loads(re.search(r'\[.*?\]', text).group())
|
||||||
return [files[i]["filename"] for i in indices if 0 <= i < len(files)]
|
return [files[i]["filename"] for i in indices if 0 <= i < len(files)]
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -108,7 +109,7 @@ def select_relevant_memories(messages, max_items=5):
|
|||||||
```python
|
```python
|
||||||
# In agent_loop:
|
# In agent_loop:
|
||||||
if response.stop_reason != "tool_use":
|
if response.stop_reason != "tool_use":
|
||||||
extract_memories(messages) # 从最近对话提取新记忆
|
extract_memories(pre_compress) # 从压缩前快照提取新记忆
|
||||||
consolidate_memories() # 检查是否需要整理
|
consolidate_memories() # 检查是否需要整理
|
||||||
return
|
return
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -177,7 +177,7 @@ def select_relevant_memories(messages: list, max_items: int = 5) -> list[str]:
|
|||||||
messages=[{"role": "user", "content": prompt}],
|
messages=[{"role": "user", "content": prompt}],
|
||||||
max_tokens=200,
|
max_tokens=200,
|
||||||
)
|
)
|
||||||
text = response.content[0].text.strip()
|
text = extract_text(response.content).strip()
|
||||||
# Extract JSON array from response
|
# Extract JSON array from response
|
||||||
match = re.search(r'\[.*?\]', text, re.DOTALL)
|
match = re.search(r'\[.*?\]', text, re.DOTALL)
|
||||||
if match:
|
if match:
|
||||||
@@ -259,7 +259,7 @@ def extract_memories(messages: list):
|
|||||||
response = client.messages.create(
|
response = client.messages.create(
|
||||||
model=MODEL, messages=[{"role": "user", "content": prompt}], max_tokens=800
|
model=MODEL, messages=[{"role": "user", "content": prompt}], max_tokens=800
|
||||||
)
|
)
|
||||||
text = response.content[0].text.strip()
|
text = extract_text(response.content).strip()
|
||||||
# Extract JSON array from response
|
# Extract JSON array from response
|
||||||
match = re.search(r'\[.*\]', text, re.DOTALL)
|
match = re.search(r'\[.*\]', text, re.DOTALL)
|
||||||
if not match:
|
if not match:
|
||||||
@@ -309,7 +309,7 @@ def consolidate_memories():
|
|||||||
response = client.messages.create(
|
response = client.messages.create(
|
||||||
model=MODEL, messages=[{"role": "user", "content": prompt}], max_tokens=3000
|
model=MODEL, messages=[{"role": "user", "content": prompt}], max_tokens=3000
|
||||||
)
|
)
|
||||||
text = response.content[0].text.strip()
|
text = extract_text(response.content).strip()
|
||||||
match = re.search(r'\[.*\]', text, re.DOTALL)
|
match = re.search(r'\[.*\]', text, re.DOTALL)
|
||||||
if not match:
|
if not match:
|
||||||
return
|
return
|
||||||
@@ -504,7 +504,7 @@ def summarize_history(msgs):
|
|||||||
"Summarize this coding-agent conversation so work can continue.\n"
|
"Summarize this coding-agent conversation so work can continue.\n"
|
||||||
"Preserve: 1. current goal, 2. key findings, 3. files changed, 4. remaining work, 5. user constraints.\n\n" + conv}],
|
"Preserve: 1. current goal, 2. key findings, 3. files changed, 4. remaining work, 5. user constraints.\n\n" + conv}],
|
||||||
max_tokens=2000)
|
max_tokens=2000)
|
||||||
return r.content[0].text.strip()
|
return extract_text(r.content).strip()
|
||||||
|
|
||||||
def compact_history(msgs):
|
def compact_history(msgs):
|
||||||
write_transcript(msgs)
|
write_transcript(msgs)
|
||||||
@@ -550,12 +550,12 @@ MAX_REACTIVE_RETRIES = 1
|
|||||||
|
|
||||||
def agent_loop(messages: list):
|
def agent_loop(messages: list):
|
||||||
reactive_retries = 0
|
reactive_retries = 0
|
||||||
while True:
|
# s09: inject relevant memory content into the current user turn
|
||||||
# s09: rebuild system with current memory index + relevant memories
|
|
||||||
system = build_system()
|
|
||||||
memories_content = load_memories(messages)
|
memories_content = load_memories(messages)
|
||||||
if memories_content:
|
memory_turn = len(messages) - 1 if messages and isinstance(messages[-1].get("content"), str) else None
|
||||||
system += "\n\n" + memories_content
|
while True:
|
||||||
|
# s09: rebuild system with current memory index
|
||||||
|
system = build_system()
|
||||||
|
|
||||||
# s09: save pre-compression snapshot for accurate memory extraction
|
# s09: save pre-compression snapshot for accurate memory extraction
|
||||||
pre_compress = [m if isinstance(m, dict) else {"role": m.get("role",""),
|
pre_compress = [m if isinstance(m, dict) else {"role": m.get("role",""),
|
||||||
@@ -571,8 +571,15 @@ def agent_loop(messages: list):
|
|||||||
messages[:] = compact_history(messages)
|
messages[:] = compact_history(messages)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
request_messages = messages
|
||||||
|
if memories_content and memory_turn is not None and memory_turn < len(messages):
|
||||||
|
request_messages = messages.copy()
|
||||||
|
request_messages[memory_turn] = {
|
||||||
|
**messages[memory_turn],
|
||||||
|
"content": memories_content + "\n\n" + messages[memory_turn]["content"],
|
||||||
|
}
|
||||||
response = client.messages.create(
|
response = client.messages.create(
|
||||||
model=MODEL, system=system, messages=messages, tools=TOOLS, max_tokens=8000
|
model=MODEL, system=system, messages=request_messages, tools=TOOLS, max_tokens=8000
|
||||||
)
|
)
|
||||||
reactive_retries = 0
|
reactive_retries = 0
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user