mirror of
https://github.com/shareAI-lab/analysis_claude_code.git
synced 2026-06-20 20:23:36 +08:00
Merge pull request #326 from wangle201210/fix/snip_compact
Fix compaction splitting tool-use/result pairs
This commit is contained in:
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install anthropic python-dotenv pytest
|
||||
run: pip install -r requirements.txt pytest
|
||||
|
||||
- name: Run Python smoke tests
|
||||
run: python -m pytest tests -q
|
||||
|
||||
@@ -39,20 +39,24 @@ Core design: cheap first, expensive last.
|
||||
|
||||
The agent ran 80 turns of conversation, accumulating 160 `messages`. The very first "help me create hello.py" is barely relevant to current work, yet it still occupies space.
|
||||
|
||||
Message count exceeds 50 → keep the first 3 (initial context) and the last 47 (current work), trim the middle:
|
||||
Message count exceeds 50 → keep the first 3 (initial context) and the last 47 (current work), trim the middle; the only extra boundary rule is that `assistant(tool_use)` must not be separated from the following `user(tool_result)`:
|
||||
|
||||
```python
|
||||
def snip_compact(messages, max_messages=50):
|
||||
if len(messages) <= max_messages:
|
||||
return messages
|
||||
keep_head, keep_tail = 3, max_messages - 3
|
||||
snipped = len(messages) - keep_head - keep_tail
|
||||
placeholder = {"role": "user",
|
||||
"content": f"[snipped {snipped} messages from conversation middle]"}
|
||||
return messages[:keep_head] + [placeholder] + messages[-keep_tail:]
|
||||
head_end, tail_start = 3, len(messages) - (max_messages - 3)
|
||||
if _message_has_tool_use(messages[head_end - 1]):
|
||||
while head_end < len(messages) and _is_tool_result_message(messages[head_end]):
|
||||
head_end += 1
|
||||
if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):
|
||||
tail_start -= 1
|
||||
snipped = tail_start - head_end
|
||||
placeholder = {"role": "user", "content": f"[snipped {snipped} messages from conversation middle]"}
|
||||
return messages[:head_end] + [placeholder] + messages[tail_start:]
|
||||
```
|
||||
|
||||
Entire messages are trimmed, but `tool_result` content within remaining messages keeps accumulating — message #34 may still hold 30KB of old file contents. → L2.
|
||||
Messages are still trimmed directly; this just adds one boundary guard. `tool_result` content within remaining messages still keeps accumulating — message #34 may still hold 30KB of old file contents. → L2.
|
||||
|
||||
### L2: micro_compact — Placeholder for Old Tool Results
|
||||
|
||||
@@ -130,15 +134,17 @@ def compact_history(messages):
|
||||
|
||||
Sometimes the API still returns `prompt_too_long` (413) — when context grows faster than compression triggers.
|
||||
|
||||
This triggers **reactive_compact**: more aggressive than compact_history, it retreats from the tail, trimming to an API-acceptable size with byte-level precision, keeping only the last 5 messages + summary.
|
||||
This triggers **reactive_compact**: more aggressive than compact_history, it retreats from the tail, but still avoids leaving an orphaned `tool_result`.
|
||||
|
||||
```python
|
||||
def reactive_compact(messages):
|
||||
transcript = write_transcript(messages)
|
||||
summary = summarize_history(messages)
|
||||
tail = messages[-5:]
|
||||
tail_start = max(0, len(messages) - 5)
|
||||
if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):
|
||||
tail_start -= 1
|
||||
return [{"role": "user",
|
||||
"content": f"[Reactive compact]\n\n{summary}"}, *tail]
|
||||
"content": f"[Reactive compact]\n\n{summary}"}, *messages[tail_start:]]
|
||||
```
|
||||
|
||||
Reactive compact has a retry limit (default 1). If it still fails, an exception is raised instead of looping forever. Full error recovery is deferred to s11.
|
||||
|
||||
@@ -39,20 +39,24 @@ s07 のフック構造、スキルロード、サブ Agent の骨格を維持し
|
||||
|
||||
Agent が 80 ラウンドの会話を実行し、`messages` が 160 件まで溜まった。先頭の「hello.py を作って」は現在の作業とほぼ無関係だが、スペースを占有し続けている。
|
||||
|
||||
メッセージ数が 50 を超えた場合 → 先頭 3 件(初期コンテキスト)と末尾 47 件(現在の作業)を保持し、中間を切り捨て:
|
||||
メッセージ数が 50 を超えた場合 → 先頭 3 件(初期コンテキスト)と末尾 47 件(現在の作業)を保持して中間を切り詰める。ただし切れ目だけは調整し、`assistant(tool_use)` と後続の `user(tool_result)` を分断しない:
|
||||
|
||||
```python
|
||||
def snip_compact(messages, max_messages=50):
|
||||
if len(messages) <= max_messages:
|
||||
return messages
|
||||
keep_head, keep_tail = 3, max_messages - 3
|
||||
snipped = len(messages) - keep_head - keep_tail
|
||||
placeholder = {"role": "user",
|
||||
"content": f"[snipped {snipped} messages from conversation middle]"}
|
||||
return messages[:keep_head] + [placeholder] + messages[-keep_tail:]
|
||||
head_end, tail_start = 3, len(messages) - (max_messages - 3)
|
||||
if _message_has_tool_use(messages[head_end - 1]):
|
||||
while head_end < len(messages) and _is_tool_result_message(messages[head_end]):
|
||||
head_end += 1
|
||||
if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):
|
||||
tail_start -= 1
|
||||
snipped = tail_start - head_end
|
||||
placeholder = {"role": "user", "content": f"[snipped {snipped} messages from conversation middle]"}
|
||||
return messages[:head_end] + [placeholder] + messages[tail_start:]
|
||||
```
|
||||
|
||||
メッセージ全体は切り捨てたが、残ったメッセージ内の `tool_result` 内容はまだ蓄積され続けている。34 番目のメッセージに 30KB の古いファイル内容が残っているかもしれない。→ L2。
|
||||
切り捨て自体は単純なままで、境界だけを保護する。残ったメッセージ内の `tool_result` 内容はまだ蓄積され続けている。34 番目のメッセージに 30KB の古いファイル内容が残っているかもしれない。→ L2。
|
||||
|
||||
### L2: micro_compact — 古いツール結果をプレースホルダに置換
|
||||
|
||||
@@ -130,15 +134,17 @@ def compact_history(messages):
|
||||
|
||||
API がまだ `prompt_too_long`(413)を返すことがある。コンテキストの増加速度が圧縮のトリガー速度を上回る場合。
|
||||
|
||||
この時 **reactive_compact** がトリガーされる:compact_history よりもさらに積極的で、末尾からバイト単位の精度で API が受け入れ可能なサイズまで切り詰め、最後の 5 件のメッセージ + 要約のみを保持。
|
||||
この時 **reactive_compact** がトリガーされる:compact_history よりもさらに積極的だが、末尾を残す際も孤立した `tool_result` を残さないようにする。
|
||||
|
||||
```python
|
||||
def reactive_compact(messages):
|
||||
transcript = write_transcript(messages)
|
||||
summary = summarize_history(messages)
|
||||
tail = messages[-5:]
|
||||
tail_start = max(0, len(messages) - 5)
|
||||
if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):
|
||||
tail_start -= 1
|
||||
return [{"role": "user",
|
||||
"content": f"[Reactive compact]\n\n{summary}"}, *tail]
|
||||
"content": f"[Reactive compact]\n\n{summary}"}, *messages[tail_start:]]
|
||||
```
|
||||
|
||||
reactive compact にはリトライ上限がある(デフォルト 1 回)。さらに失敗した場合は例外をスローし、無限ループしない。完全なエラー回復ロジックは s11 に委ねる。
|
||||
|
||||
@@ -39,20 +39,24 @@ Agent 跑着跑着,不动了。
|
||||
|
||||
Agent 跑了 80 轮对话,`messages` 攒了 160 条。最前面的"帮我创建 hello.py"和当前工作几乎无关了,但全占着位置。
|
||||
|
||||
消息数超过 50 条 → 保留头部 3 条(初始上下文)和尾部 47 条(当前工作),中间裁掉:
|
||||
消息数超过 50 条 → 保留头部 3 条(初始上下文)和尾部 47 条(当前工作),中间裁掉;唯一额外边界条件是,不能把 `assistant(tool_use)` 和后面的 `user(tool_result)` 拆开:
|
||||
|
||||
```python
|
||||
def snip_compact(messages, max_messages=50):
|
||||
if len(messages) <= max_messages:
|
||||
return messages
|
||||
keep_head, keep_tail = 3, max_messages - 3
|
||||
snipped = len(messages) - keep_head - keep_tail
|
||||
placeholder = {"role": "user",
|
||||
"content": f"[snipped {snipped} messages from conversation middle]"}
|
||||
return messages[:keep_head] + [placeholder] + messages[-keep_tail:]
|
||||
head_end, tail_start = 3, len(messages) - (max_messages - 3)
|
||||
if _message_has_tool_use(messages[head_end - 1]):
|
||||
while head_end < len(messages) and _is_tool_result_message(messages[head_end]):
|
||||
head_end += 1
|
||||
if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):
|
||||
tail_start -= 1
|
||||
snipped = tail_start - head_end
|
||||
placeholder = {"role": "user", "content": f"[snipped {snipped} messages from conversation middle]"}
|
||||
return messages[:head_end] + [placeholder] + messages[tail_start:]
|
||||
```
|
||||
|
||||
裁掉了整条消息,但剩下的消息里 `tool_result` 内容仍在累积——第 34 条消息里可能躺着 30KB 的旧文件内容。→ L2。
|
||||
裁掉的是消息本身,只是在切口处多做一步保护;剩下的消息里 `tool_result` 内容仍在累积——第 34 条消息里可能躺着 30KB 的旧文件内容。→ L2。
|
||||
|
||||
### L2: micro_compact — 旧工具结果占位
|
||||
|
||||
@@ -130,15 +134,17 @@ def compact_history(messages):
|
||||
|
||||
有时候 API 还是返回 `prompt_too_long`(413),上下文增长速度快于压缩触发速度时。
|
||||
|
||||
这时触发 **reactive_compact**:比 compact_history 更激进,从尾部回退,以字节级精度裁剪到 API 可接受的大小,只保留最后 5 条消息 + 摘要。
|
||||
这时触发 **reactive_compact**:比 compact_history 更激进,从尾部回退,但仍要避免留下孤立 `tool_result`。
|
||||
|
||||
```python
|
||||
def reactive_compact(messages):
|
||||
transcript = write_transcript(messages)
|
||||
summary = summarize_history(messages)
|
||||
tail = messages[-5:]
|
||||
tail_start = max(0, len(messages) - 5)
|
||||
if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):
|
||||
tail_start -= 1
|
||||
return [{"role": "user",
|
||||
"content": f"[Reactive compact]\n\n{summary}"}, *tail]
|
||||
"content": f"[Reactive compact]\n\n{summary}"}, *messages[tail_start:]]
|
||||
```
|
||||
|
||||
reactive compact 有重试上限(默认 1 次)。再失败就抛出异常,不无限循环。完整的错误恢复逻辑留给 s11。
|
||||
|
||||
@@ -268,13 +268,45 @@ PERSIST_THRESHOLD = 30000
|
||||
|
||||
def estimate_size(msgs): return len(str(msgs))
|
||||
|
||||
def _block_type(block):
|
||||
return block.get("type") if isinstance(block, dict) else getattr(block, "type", None)
|
||||
|
||||
|
||||
def _message_has_tool_use(msg):
|
||||
if msg.get("role") != "assistant":
|
||||
return False
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, list):
|
||||
return False
|
||||
return any(_block_type(block) == "tool_use" for block in content)
|
||||
|
||||
|
||||
def _is_tool_result_message(msg):
|
||||
if msg.get("role") != "user":
|
||||
return False
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, list):
|
||||
return False
|
||||
return any(isinstance(block, dict) and block.get("type") == "tool_result"
|
||||
for block in content)
|
||||
|
||||
|
||||
# L1: snipCompact — trim middle messages
|
||||
def snip_compact(messages, max_messages=50):
|
||||
if len(messages) <= max_messages: return messages
|
||||
keep_head, keep_tail = 3, max_messages - 3
|
||||
snipped = len(messages) - keep_head - keep_tail
|
||||
return messages[:keep_head] + [{"role": "user", "content": f"[snipped {snipped} messages]"}] + messages[-keep_tail:]
|
||||
head_end, tail_start = keep_head, len(messages) - keep_tail
|
||||
if head_end > 0 and _message_has_tool_use(messages[head_end - 1]):
|
||||
while head_end < len(messages) and _is_tool_result_message(messages[head_end]):
|
||||
head_end += 1
|
||||
if (tail_start > 0 and tail_start < len(messages)
|
||||
and _is_tool_result_message(messages[tail_start])
|
||||
and _message_has_tool_use(messages[tail_start - 1])):
|
||||
tail_start -= 1
|
||||
if head_end >= tail_start:
|
||||
return messages
|
||||
snipped = tail_start - head_end
|
||||
return messages[:head_end] + [{"role": "user", "content": f"[snipped {snipped} messages]"}] + messages[tail_start:]
|
||||
|
||||
|
||||
# L2: microCompact — old result placeholders
|
||||
@@ -351,7 +383,12 @@ def compact_history(messages):
|
||||
def reactive_compact(messages):
|
||||
transcript = write_transcript(messages)
|
||||
summary = summarize_history(messages)
|
||||
return [{"role": "user", "content": f"[Reactive compact]\n\n{summary}"}, *messages[-5:]]
|
||||
tail_start = max(0, len(messages) - 5)
|
||||
if (tail_start > 0 and tail_start < len(messages)
|
||||
and _is_tool_result_message(messages[tail_start])
|
||||
and _message_has_tool_use(messages[tail_start - 1])):
|
||||
tail_start -= 1
|
||||
return [{"role": "user", "content": f"[Reactive compact]\n\n{summary}"}, *messages[tail_start:]]
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════
|
||||
|
||||
@@ -449,9 +449,38 @@ CONTEXT_LIMIT = 50000; KEEP_RECENT = 3; PERSIST_THRESHOLD = 30000
|
||||
|
||||
def estimate_size(msgs): return len(str(msgs))
|
||||
|
||||
def _block_type(block):
|
||||
return block.get("type") if isinstance(block, dict) else getattr(block, "type", None)
|
||||
|
||||
def _message_has_tool_use(msg):
|
||||
if msg.get("role") != "assistant":
|
||||
return False
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, list):
|
||||
return False
|
||||
return any(_block_type(block) == "tool_use" for block in content)
|
||||
|
||||
def _is_tool_result_message(msg):
|
||||
if msg.get("role") != "user":
|
||||
return False
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, list):
|
||||
return False
|
||||
return any(isinstance(block, dict) and block.get("type") == "tool_result" for block in content)
|
||||
|
||||
def snip_compact(msgs, mx=50):
|
||||
if len(msgs) <= mx: return msgs
|
||||
return msgs[:3] + [{"role": "user", "content": f"[snipped {len(msgs)-mx} msgs]"}] + msgs[-(mx-3):]
|
||||
head_end, tail_start = 3, len(msgs) - (mx - 3)
|
||||
if head_end > 0 and _message_has_tool_use(msgs[head_end - 1]):
|
||||
while head_end < len(msgs) and _is_tool_result_message(msgs[head_end]):
|
||||
head_end += 1
|
||||
if (tail_start > 0 and tail_start < len(msgs)
|
||||
and _is_tool_result_message(msgs[tail_start])
|
||||
and _message_has_tool_use(msgs[tail_start - 1])):
|
||||
tail_start -= 1
|
||||
if head_end >= tail_start:
|
||||
return msgs
|
||||
return msgs[:head_end] + [{"role": "user", "content": f"[snipped {tail_start - head_end} msgs]"}] + msgs[tail_start:]
|
||||
|
||||
def collect_tool_results(msgs):
|
||||
blocks = []
|
||||
@@ -512,7 +541,12 @@ def compact_history(msgs):
|
||||
def reactive_compact(msgs):
|
||||
write_transcript(msgs)
|
||||
summary = summarize_history(msgs)
|
||||
return [{"role": "user", "content": f"[Reactive compact]\n\n{summary}"}, *msgs[-5:]]
|
||||
tail_start = max(0, len(msgs) - 5)
|
||||
if (tail_start > 0 and tail_start < len(msgs)
|
||||
and _is_tool_result_message(msgs[tail_start])
|
||||
and _message_has_tool_use(msgs[tail_start - 1])):
|
||||
tail_start -= 1
|
||||
return [{"role": "user", "content": f"[Reactive compact]\n\n{summary}"}, *msgs[tail_start:]]
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════
|
||||
|
||||
@@ -1060,6 +1060,28 @@ def spawn_subagent(description: str) -> str:
|
||||
def estimate_size(messages: list) -> int:
|
||||
return len(json.dumps(messages, default=str))
|
||||
|
||||
def block_type(block):
|
||||
return block.get("type") if isinstance(block, dict) else getattr(block, "type", None)
|
||||
|
||||
|
||||
def message_has_tool_use(message: dict) -> bool:
|
||||
if message.get("role") != "assistant":
|
||||
return False
|
||||
content = message.get("content")
|
||||
if not isinstance(content, list):
|
||||
return False
|
||||
return any(block_type(block) == "tool_use" for block in content)
|
||||
|
||||
|
||||
def is_tool_result_message(message: dict) -> bool:
|
||||
if message.get("role") != "user":
|
||||
return False
|
||||
content = message.get("content")
|
||||
if not isinstance(content, list):
|
||||
return False
|
||||
return any(isinstance(block, dict) and block.get("type") == "tool_result"
|
||||
for block in content)
|
||||
|
||||
|
||||
def collect_tool_results(messages: list):
|
||||
found = []
|
||||
@@ -1111,11 +1133,20 @@ def tool_result_budget(messages: list, max_bytes: int = 200_000) -> list:
|
||||
def snip_compact(messages: list, max_messages: int = 50) -> list:
|
||||
if len(messages) <= max_messages:
|
||||
return messages
|
||||
keep_head, keep_tail = 3, max_messages - 3
|
||||
snipped = len(messages) - keep_head - keep_tail
|
||||
return (messages[:keep_head]
|
||||
head_end, tail_start = 3, len(messages) - (max_messages - 3)
|
||||
if head_end > 0 and message_has_tool_use(messages[head_end - 1]):
|
||||
while head_end < len(messages) and is_tool_result_message(messages[head_end]):
|
||||
head_end += 1
|
||||
if (tail_start > 0 and tail_start < len(messages)
|
||||
and is_tool_result_message(messages[tail_start])
|
||||
and message_has_tool_use(messages[tail_start - 1])):
|
||||
tail_start -= 1
|
||||
if head_end >= tail_start:
|
||||
return messages
|
||||
snipped = tail_start - head_end
|
||||
return (messages[:head_end]
|
||||
+ [{"role": "user", "content": f"[snipped {snipped} messages]"}]
|
||||
+ messages[-keep_tail:])
|
||||
+ messages[tail_start:])
|
||||
|
||||
|
||||
def micro_compact(messages: list) -> list:
|
||||
@@ -1163,8 +1194,13 @@ def reactive_compact(messages: list) -> list:
|
||||
summary = summarize_history(messages)
|
||||
except Exception:
|
||||
summary = "Earlier conversation was trimmed after a prompt-too-long error."
|
||||
tail_start = max(0, len(messages) - 5)
|
||||
if (tail_start > 0 and tail_start < len(messages)
|
||||
and is_tool_result_message(messages[tail_start])
|
||||
and message_has_tool_use(messages[tail_start - 1])):
|
||||
tail_start -= 1
|
||||
return [{"role": "user", "content": f"[Reactive compact]\n\n{summary}"},
|
||||
*messages[-5:]]
|
||||
*messages[tail_start:]]
|
||||
|
||||
|
||||
# ── Error Recovery ──
|
||||
|
||||
189
tests/test_compaction_tool_pairs.py
Normal file
189
tests/test_compaction_tool_pairs.py
Normal file
@@ -0,0 +1,189 @@
|
||||
import importlib.util
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import types
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
MODULES = {
|
||||
"s08": REPO_ROOT / "s08_context_compact" / "code.py",
|
||||
"s09": REPO_ROOT / "s09_memory" / "code.py",
|
||||
"s20": REPO_ROOT / "s20_comprehensive" / "code.py",
|
||||
}
|
||||
|
||||
|
||||
def load_module(name: str, path: Path, temp_cwd: Path):
|
||||
fake_anthropic = types.ModuleType("anthropic")
|
||||
|
||||
class FakeAnthropic:
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.messages = types.SimpleNamespace(create=None)
|
||||
|
||||
fake_dotenv = types.ModuleType("dotenv")
|
||||
setattr(fake_anthropic, "Anthropic", FakeAnthropic)
|
||||
setattr(fake_dotenv, "load_dotenv", lambda override=True: None)
|
||||
|
||||
previous_anthropic = sys.modules.get("anthropic")
|
||||
previous_dotenv = sys.modules.get("dotenv")
|
||||
previous_cwd = Path.cwd()
|
||||
previous_model = os.environ.get("MODEL_ID")
|
||||
previous_key = os.environ.get("ANTHROPIC_API_KEY")
|
||||
|
||||
spec = importlib.util.spec_from_file_location(name, path)
|
||||
if spec is None or spec.loader is None:
|
||||
raise RuntimeError(f"Unable to load {path}")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
|
||||
sys.modules["anthropic"] = fake_anthropic
|
||||
sys.modules["dotenv"] = fake_dotenv
|
||||
os.environ["MODEL_ID"] = "test-model"
|
||||
os.environ["ANTHROPIC_API_KEY"] = "test-key"
|
||||
try:
|
||||
os.chdir(temp_cwd)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
finally:
|
||||
os.chdir(previous_cwd)
|
||||
if previous_anthropic is None:
|
||||
sys.modules.pop("anthropic", None)
|
||||
else:
|
||||
sys.modules["anthropic"] = previous_anthropic
|
||||
if previous_dotenv is None:
|
||||
sys.modules.pop("dotenv", None)
|
||||
else:
|
||||
sys.modules["dotenv"] = previous_dotenv
|
||||
if previous_model is None:
|
||||
os.environ.pop("MODEL_ID", None)
|
||||
else:
|
||||
os.environ["MODEL_ID"] = previous_model
|
||||
if previous_key is None:
|
||||
os.environ.pop("ANTHROPIC_API_KEY", None)
|
||||
else:
|
||||
os.environ["ANTHROPIC_API_KEY"] = previous_key
|
||||
|
||||
|
||||
def assistant_text():
|
||||
return {"role": "assistant", "content": [types.SimpleNamespace(type="text", text="ok")]}
|
||||
|
||||
|
||||
def user_text():
|
||||
return {"role": "user", "content": "continue"}
|
||||
|
||||
|
||||
def tool_use_message(tool_id="tool-1"):
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": [types.SimpleNamespace(type="tool_use", id=tool_id, name="bash")],
|
||||
}
|
||||
|
||||
|
||||
def tool_result_message(tool_id="tool-1"):
|
||||
return {
|
||||
"role": "user",
|
||||
"content": [{"type": "tool_result", "tool_use_id": tool_id, "content": "ok"}],
|
||||
}
|
||||
|
||||
|
||||
def message_has_tool_use(message):
|
||||
content = message.get("content")
|
||||
return (
|
||||
message.get("role") == "assistant"
|
||||
and isinstance(content, list)
|
||||
and any(getattr(block, "type", None) == "tool_use" for block in content)
|
||||
)
|
||||
|
||||
|
||||
def assert_no_orphan_tool_results(testcase, messages):
|
||||
for idx, message in enumerate(messages):
|
||||
content = message.get("content")
|
||||
if message.get("role") != "user" or not isinstance(content, list):
|
||||
continue
|
||||
if not any(isinstance(block, dict) and block.get("type") == "tool_result" for block in content):
|
||||
continue
|
||||
testcase.assertGreater(idx, 0)
|
||||
testcase.assertTrue(message_has_tool_use(messages[idx - 1]), messages)
|
||||
|
||||
|
||||
class CompactionToolPairTests(unittest.TestCase):
|
||||
def test_snip_compact_keeps_head_tool_pair(self):
|
||||
messages = [
|
||||
user_text(),
|
||||
assistant_text(),
|
||||
tool_use_message("head-tool"),
|
||||
tool_result_message("head-tool"),
|
||||
assistant_text(),
|
||||
user_text(),
|
||||
assistant_text(),
|
||||
user_text(),
|
||||
assistant_text(),
|
||||
user_text(),
|
||||
]
|
||||
|
||||
for name, path in MODULES.items():
|
||||
with self.subTest(name=name), tempfile.TemporaryDirectory() as tmp:
|
||||
module = load_module(f"{name}_head_under_test", path, Path(tmp))
|
||||
if name == "s09":
|
||||
compacted = module.snip_compact(list(messages), mx=6)
|
||||
else:
|
||||
compacted = module.snip_compact(list(messages), max_messages=6)
|
||||
self.assertEqual(compacted[2], messages[2])
|
||||
self.assertEqual(compacted[3], messages[3])
|
||||
assert_no_orphan_tool_results(self, compacted)
|
||||
|
||||
def test_snip_compact_keeps_tail_tool_pair(self):
|
||||
messages = [
|
||||
user_text(),
|
||||
assistant_text(),
|
||||
user_text(),
|
||||
assistant_text(),
|
||||
user_text(),
|
||||
assistant_text(),
|
||||
tool_use_message("tail-tool"),
|
||||
tool_result_message("tail-tool"),
|
||||
assistant_text(),
|
||||
user_text(),
|
||||
]
|
||||
|
||||
for name, path in MODULES.items():
|
||||
with self.subTest(name=name), tempfile.TemporaryDirectory() as tmp:
|
||||
module = load_module(f"{name}_under_test", path, Path(tmp))
|
||||
if name == "s09":
|
||||
compacted = module.snip_compact(list(messages), mx=6)
|
||||
else:
|
||||
compacted = module.snip_compact(list(messages), max_messages=6)
|
||||
assert_no_orphan_tool_results(self, compacted)
|
||||
|
||||
def test_reactive_compact_keeps_tail_tool_pair(self):
|
||||
messages = [
|
||||
user_text(),
|
||||
assistant_text(),
|
||||
user_text(),
|
||||
tool_use_message("reactive-tool"),
|
||||
tool_result_message("reactive-tool"),
|
||||
assistant_text(),
|
||||
user_text(),
|
||||
assistant_text(),
|
||||
user_text(),
|
||||
]
|
||||
|
||||
for name, path in MODULES.items():
|
||||
with self.subTest(name=name), tempfile.TemporaryDirectory() as tmp:
|
||||
module = load_module(f"{name}_reactive_under_test", path, Path(tmp))
|
||||
module.write_transcript = lambda _messages: Path("transcript.jsonl")
|
||||
module.summarize_history = lambda _messages: "summary"
|
||||
compacted = module.reactive_compact(list(messages))
|
||||
self.assertEqual(compacted[1], messages[3])
|
||||
assert_no_orphan_tool_results(self, compacted)
|
||||
|
||||
def test_s20_has_tool_use_still_accepts_content_blocks(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
module = load_module("s20_has_tool_use_under_test", MODULES["s20"], Path(tmp))
|
||||
self.assertTrue(module.has_tool_use([types.SimpleNamespace(type="tool_use")]))
|
||||
self.assertFalse(module.has_tool_use([types.SimpleNamespace(type="text")]))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user