mirror of
https://github.com/shareAI-lab/analysis_claude_code.git
synced 2026-06-21 04:33:36 +08:00
fix: harden compaction pair handling
This commit is contained in:
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
|||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: pip install anthropic python-dotenv pytest
|
run: pip install -r requirements.txt pytest
|
||||||
|
|
||||||
- name: Run Python smoke tests
|
- name: Run Python smoke tests
|
||||||
run: python -m pytest tests -q
|
run: python -m pytest tests -q
|
||||||
|
|||||||
@@ -46,10 +46,10 @@ def snip_compact(messages, max_messages=50):
|
|||||||
if len(messages) <= max_messages:
|
if len(messages) <= max_messages:
|
||||||
return messages
|
return messages
|
||||||
head_end, tail_start = 3, len(messages) - (max_messages - 3)
|
head_end, tail_start = 3, len(messages) - (max_messages - 3)
|
||||||
if has_tool_use(messages[head_end - 1]):
|
if _message_has_tool_use(messages[head_end - 1]):
|
||||||
while head_end < len(messages) and is_tool_result_message(messages[head_end]):
|
while head_end < len(messages) and _is_tool_result_message(messages[head_end]):
|
||||||
head_end += 1
|
head_end += 1
|
||||||
if is_tool_result_message(messages[tail_start]) and has_tool_use(messages[tail_start - 1]):
|
if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):
|
||||||
tail_start -= 1
|
tail_start -= 1
|
||||||
snipped = tail_start - head_end
|
snipped = tail_start - head_end
|
||||||
placeholder = {"role": "user", "content": f"[snipped {snipped} messages from conversation middle]"}
|
placeholder = {"role": "user", "content": f"[snipped {snipped} messages from conversation middle]"}
|
||||||
@@ -141,7 +141,7 @@ def reactive_compact(messages):
|
|||||||
transcript = write_transcript(messages)
|
transcript = write_transcript(messages)
|
||||||
summary = summarize_history(messages)
|
summary = summarize_history(messages)
|
||||||
tail_start = max(0, len(messages) - 5)
|
tail_start = max(0, len(messages) - 5)
|
||||||
if is_tool_result_message(messages[tail_start]) and has_tool_use(messages[tail_start - 1]):
|
if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):
|
||||||
tail_start -= 1
|
tail_start -= 1
|
||||||
return [{"role": "user",
|
return [{"role": "user",
|
||||||
"content": f"[Reactive compact]\n\n{summary}"}, *messages[tail_start:]]
|
"content": f"[Reactive compact]\n\n{summary}"}, *messages[tail_start:]]
|
||||||
|
|||||||
@@ -46,10 +46,10 @@ def snip_compact(messages, max_messages=50):
|
|||||||
if len(messages) <= max_messages:
|
if len(messages) <= max_messages:
|
||||||
return messages
|
return messages
|
||||||
head_end, tail_start = 3, len(messages) - (max_messages - 3)
|
head_end, tail_start = 3, len(messages) - (max_messages - 3)
|
||||||
if has_tool_use(messages[head_end - 1]):
|
if _message_has_tool_use(messages[head_end - 1]):
|
||||||
while head_end < len(messages) and is_tool_result_message(messages[head_end]):
|
while head_end < len(messages) and _is_tool_result_message(messages[head_end]):
|
||||||
head_end += 1
|
head_end += 1
|
||||||
if is_tool_result_message(messages[tail_start]) and has_tool_use(messages[tail_start - 1]):
|
if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):
|
||||||
tail_start -= 1
|
tail_start -= 1
|
||||||
snipped = tail_start - head_end
|
snipped = tail_start - head_end
|
||||||
placeholder = {"role": "user", "content": f"[snipped {snipped} messages from conversation middle]"}
|
placeholder = {"role": "user", "content": f"[snipped {snipped} messages from conversation middle]"}
|
||||||
@@ -141,7 +141,7 @@ def reactive_compact(messages):
|
|||||||
transcript = write_transcript(messages)
|
transcript = write_transcript(messages)
|
||||||
summary = summarize_history(messages)
|
summary = summarize_history(messages)
|
||||||
tail_start = max(0, len(messages) - 5)
|
tail_start = max(0, len(messages) - 5)
|
||||||
if is_tool_result_message(messages[tail_start]) and has_tool_use(messages[tail_start - 1]):
|
if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):
|
||||||
tail_start -= 1
|
tail_start -= 1
|
||||||
return [{"role": "user",
|
return [{"role": "user",
|
||||||
"content": f"[Reactive compact]\n\n{summary}"}, *messages[tail_start:]]
|
"content": f"[Reactive compact]\n\n{summary}"}, *messages[tail_start:]]
|
||||||
|
|||||||
@@ -46,10 +46,10 @@ def snip_compact(messages, max_messages=50):
|
|||||||
if len(messages) <= max_messages:
|
if len(messages) <= max_messages:
|
||||||
return messages
|
return messages
|
||||||
head_end, tail_start = 3, len(messages) - (max_messages - 3)
|
head_end, tail_start = 3, len(messages) - (max_messages - 3)
|
||||||
if has_tool_use(messages[head_end - 1]):
|
if _message_has_tool_use(messages[head_end - 1]):
|
||||||
while head_end < len(messages) and is_tool_result_message(messages[head_end]):
|
while head_end < len(messages) and _is_tool_result_message(messages[head_end]):
|
||||||
head_end += 1
|
head_end += 1
|
||||||
if is_tool_result_message(messages[tail_start]) and has_tool_use(messages[tail_start - 1]):
|
if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):
|
||||||
tail_start -= 1
|
tail_start -= 1
|
||||||
snipped = tail_start - head_end
|
snipped = tail_start - head_end
|
||||||
placeholder = {"role": "user", "content": f"[snipped {snipped} messages from conversation middle]"}
|
placeholder = {"role": "user", "content": f"[snipped {snipped} messages from conversation middle]"}
|
||||||
@@ -141,7 +141,7 @@ def reactive_compact(messages):
|
|||||||
transcript = write_transcript(messages)
|
transcript = write_transcript(messages)
|
||||||
summary = summarize_history(messages)
|
summary = summarize_history(messages)
|
||||||
tail_start = max(0, len(messages) - 5)
|
tail_start = max(0, len(messages) - 5)
|
||||||
if is_tool_result_message(messages[tail_start]) and has_tool_use(messages[tail_start - 1]):
|
if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):
|
||||||
tail_start -= 1
|
tail_start -= 1
|
||||||
return [{"role": "user",
|
return [{"role": "user",
|
||||||
"content": f"[Reactive compact]\n\n{summary}"}, *messages[tail_start:]]
|
"content": f"[Reactive compact]\n\n{summary}"}, *messages[tail_start:]]
|
||||||
|
|||||||
@@ -269,9 +269,10 @@ PERSIST_THRESHOLD = 30000
|
|||||||
def estimate_size(msgs): return len(str(msgs))
|
def estimate_size(msgs): return len(str(msgs))
|
||||||
|
|
||||||
def _block_type(block):
|
def _block_type(block):
|
||||||
return getattr(block, "type", None) if not isinstance(block, dict) else block.get("type")
|
return block.get("type") if isinstance(block, dict) else getattr(block, "type", None)
|
||||||
|
|
||||||
def _has_tool_use(msg):
|
|
||||||
|
def _message_has_tool_use(msg):
|
||||||
if msg.get("role") != "assistant":
|
if msg.get("role") != "assistant":
|
||||||
return False
|
return False
|
||||||
content = msg.get("content")
|
content = msg.get("content")
|
||||||
@@ -279,13 +280,15 @@ def _has_tool_use(msg):
|
|||||||
return False
|
return False
|
||||||
return any(_block_type(block) == "tool_use" for block in content)
|
return any(_block_type(block) == "tool_use" for block in content)
|
||||||
|
|
||||||
|
|
||||||
def _is_tool_result_message(msg):
|
def _is_tool_result_message(msg):
|
||||||
if msg.get("role") != "user":
|
if msg.get("role") != "user":
|
||||||
return False
|
return False
|
||||||
content = msg.get("content")
|
content = msg.get("content")
|
||||||
if not isinstance(content, list):
|
if not isinstance(content, list):
|
||||||
return False
|
return False
|
||||||
return any(isinstance(block, dict) and block.get("type") == "tool_result" for block in content)
|
return any(isinstance(block, dict) and block.get("type") == "tool_result"
|
||||||
|
for block in content)
|
||||||
|
|
||||||
|
|
||||||
# L1: snipCompact — trim middle messages
|
# L1: snipCompact — trim middle messages
|
||||||
@@ -293,10 +296,12 @@ def snip_compact(messages, max_messages=50):
|
|||||||
if len(messages) <= max_messages: return messages
|
if len(messages) <= max_messages: return messages
|
||||||
keep_head, keep_tail = 3, max_messages - 3
|
keep_head, keep_tail = 3, max_messages - 3
|
||||||
head_end, tail_start = keep_head, len(messages) - keep_tail
|
head_end, tail_start = keep_head, len(messages) - keep_tail
|
||||||
if head_end > 0 and _has_tool_use(messages[head_end - 1]):
|
if head_end > 0 and _message_has_tool_use(messages[head_end - 1]):
|
||||||
while head_end < len(messages) and _is_tool_result_message(messages[head_end]):
|
while head_end < len(messages) and _is_tool_result_message(messages[head_end]):
|
||||||
head_end += 1
|
head_end += 1
|
||||||
if tail_start > 0 and tail_start < len(messages) and _is_tool_result_message(messages[tail_start]) and _has_tool_use(messages[tail_start - 1]):
|
if (tail_start > 0 and tail_start < len(messages)
|
||||||
|
and _is_tool_result_message(messages[tail_start])
|
||||||
|
and _message_has_tool_use(messages[tail_start - 1])):
|
||||||
tail_start -= 1
|
tail_start -= 1
|
||||||
if head_end >= tail_start:
|
if head_end >= tail_start:
|
||||||
return messages
|
return messages
|
||||||
@@ -379,7 +384,9 @@ def reactive_compact(messages):
|
|||||||
transcript = write_transcript(messages)
|
transcript = write_transcript(messages)
|
||||||
summary = summarize_history(messages)
|
summary = summarize_history(messages)
|
||||||
tail_start = max(0, len(messages) - 5)
|
tail_start = max(0, len(messages) - 5)
|
||||||
if tail_start > 0 and tail_start < len(messages) and _is_tool_result_message(messages[tail_start]) and _has_tool_use(messages[tail_start - 1]):
|
if (tail_start > 0 and tail_start < len(messages)
|
||||||
|
and _is_tool_result_message(messages[tail_start])
|
||||||
|
and _message_has_tool_use(messages[tail_start - 1])):
|
||||||
tail_start -= 1
|
tail_start -= 1
|
||||||
return [{"role": "user", "content": f"[Reactive compact]\n\n{summary}"}, *messages[tail_start:]]
|
return [{"role": "user", "content": f"[Reactive compact]\n\n{summary}"}, *messages[tail_start:]]
|
||||||
|
|
||||||
|
|||||||
@@ -450,9 +450,9 @@ CONTEXT_LIMIT = 50000; KEEP_RECENT = 3; PERSIST_THRESHOLD = 30000
|
|||||||
def estimate_size(msgs): return len(str(msgs))
|
def estimate_size(msgs): return len(str(msgs))
|
||||||
|
|
||||||
def _block_type(block):
|
def _block_type(block):
|
||||||
return getattr(block, "type", None) if not isinstance(block, dict) else block.get("type")
|
return block.get("type") if isinstance(block, dict) else getattr(block, "type", None)
|
||||||
|
|
||||||
def _has_tool_use(msg):
|
def _message_has_tool_use(msg):
|
||||||
if msg.get("role") != "assistant":
|
if msg.get("role") != "assistant":
|
||||||
return False
|
return False
|
||||||
content = msg.get("content")
|
content = msg.get("content")
|
||||||
@@ -471,10 +471,12 @@ def _is_tool_result_message(msg):
|
|||||||
def snip_compact(msgs, mx=50):
|
def snip_compact(msgs, mx=50):
|
||||||
if len(msgs) <= mx: return msgs
|
if len(msgs) <= mx: return msgs
|
||||||
head_end, tail_start = 3, len(msgs) - (mx - 3)
|
head_end, tail_start = 3, len(msgs) - (mx - 3)
|
||||||
if head_end > 0 and _has_tool_use(msgs[head_end - 1]):
|
if head_end > 0 and _message_has_tool_use(msgs[head_end - 1]):
|
||||||
while head_end < len(msgs) and _is_tool_result_message(msgs[head_end]):
|
while head_end < len(msgs) and _is_tool_result_message(msgs[head_end]):
|
||||||
head_end += 1
|
head_end += 1
|
||||||
if tail_start > 0 and tail_start < len(msgs) and _is_tool_result_message(msgs[tail_start]) and _has_tool_use(msgs[tail_start - 1]):
|
if (tail_start > 0 and tail_start < len(msgs)
|
||||||
|
and _is_tool_result_message(msgs[tail_start])
|
||||||
|
and _message_has_tool_use(msgs[tail_start - 1])):
|
||||||
tail_start -= 1
|
tail_start -= 1
|
||||||
if head_end >= tail_start:
|
if head_end >= tail_start:
|
||||||
return msgs
|
return msgs
|
||||||
@@ -540,7 +542,9 @@ def reactive_compact(msgs):
|
|||||||
write_transcript(msgs)
|
write_transcript(msgs)
|
||||||
summary = summarize_history(msgs)
|
summary = summarize_history(msgs)
|
||||||
tail_start = max(0, len(msgs) - 5)
|
tail_start = max(0, len(msgs) - 5)
|
||||||
if tail_start > 0 and tail_start < len(msgs) and _is_tool_result_message(msgs[tail_start]) and _has_tool_use(msgs[tail_start - 1]):
|
if (tail_start > 0 and tail_start < len(msgs)
|
||||||
|
and _is_tool_result_message(msgs[tail_start])
|
||||||
|
and _message_has_tool_use(msgs[tail_start - 1])):
|
||||||
tail_start -= 1
|
tail_start -= 1
|
||||||
return [{"role": "user", "content": f"[Reactive compact]\n\n{summary}"}, *msgs[tail_start:]]
|
return [{"role": "user", "content": f"[Reactive compact]\n\n{summary}"}, *msgs[tail_start:]]
|
||||||
|
|
||||||
|
|||||||
@@ -1061,9 +1061,10 @@ def estimate_size(messages: list) -> int:
|
|||||||
return len(json.dumps(messages, default=str))
|
return len(json.dumps(messages, default=str))
|
||||||
|
|
||||||
def block_type(block):
|
def block_type(block):
|
||||||
return getattr(block, "type", None) if not isinstance(block, dict) else block.get("type")
|
return block.get("type") if isinstance(block, dict) else getattr(block, "type", None)
|
||||||
|
|
||||||
def has_tool_use(message: dict) -> bool:
|
|
||||||
|
def message_has_tool_use(message: dict) -> bool:
|
||||||
if message.get("role") != "assistant":
|
if message.get("role") != "assistant":
|
||||||
return False
|
return False
|
||||||
content = message.get("content")
|
content = message.get("content")
|
||||||
@@ -1071,6 +1072,7 @@ def has_tool_use(message: dict) -> bool:
|
|||||||
return False
|
return False
|
||||||
return any(block_type(block) == "tool_use" for block in content)
|
return any(block_type(block) == "tool_use" for block in content)
|
||||||
|
|
||||||
|
|
||||||
def is_tool_result_message(message: dict) -> bool:
|
def is_tool_result_message(message: dict) -> bool:
|
||||||
if message.get("role") != "user":
|
if message.get("role") != "user":
|
||||||
return False
|
return False
|
||||||
@@ -1132,10 +1134,12 @@ def snip_compact(messages: list, max_messages: int = 50) -> list:
|
|||||||
if len(messages) <= max_messages:
|
if len(messages) <= max_messages:
|
||||||
return messages
|
return messages
|
||||||
head_end, tail_start = 3, len(messages) - (max_messages - 3)
|
head_end, tail_start = 3, len(messages) - (max_messages - 3)
|
||||||
if head_end > 0 and has_tool_use(messages[head_end - 1]):
|
if head_end > 0 and message_has_tool_use(messages[head_end - 1]):
|
||||||
while head_end < len(messages) and is_tool_result_message(messages[head_end]):
|
while head_end < len(messages) and is_tool_result_message(messages[head_end]):
|
||||||
head_end += 1
|
head_end += 1
|
||||||
if tail_start > 0 and tail_start < len(messages) and is_tool_result_message(messages[tail_start]) and has_tool_use(messages[tail_start - 1]):
|
if (tail_start > 0 and tail_start < len(messages)
|
||||||
|
and is_tool_result_message(messages[tail_start])
|
||||||
|
and message_has_tool_use(messages[tail_start - 1])):
|
||||||
tail_start -= 1
|
tail_start -= 1
|
||||||
if head_end >= tail_start:
|
if head_end >= tail_start:
|
||||||
return messages
|
return messages
|
||||||
@@ -1191,7 +1195,9 @@ def reactive_compact(messages: list) -> list:
|
|||||||
except Exception:
|
except Exception:
|
||||||
summary = "Earlier conversation was trimmed after a prompt-too-long error."
|
summary = "Earlier conversation was trimmed after a prompt-too-long error."
|
||||||
tail_start = max(0, len(messages) - 5)
|
tail_start = max(0, len(messages) - 5)
|
||||||
if tail_start > 0 and tail_start < len(messages) and is_tool_result_message(messages[tail_start]) and has_tool_use(messages[tail_start - 1]):
|
if (tail_start > 0 and tail_start < len(messages)
|
||||||
|
and is_tool_result_message(messages[tail_start])
|
||||||
|
and message_has_tool_use(messages[tail_start - 1])):
|
||||||
tail_start -= 1
|
tail_start -= 1
|
||||||
return [{"role": "user", "content": f"[Reactive compact]\n\n{summary}"},
|
return [{"role": "user", "content": f"[Reactive compact]\n\n{summary}"},
|
||||||
*messages[tail_start:]]
|
*messages[tail_start:]]
|
||||||
|
|||||||
189
tests/test_compaction_tool_pairs.py
Normal file
189
tests/test_compaction_tool_pairs.py
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
import importlib.util
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import types
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
MODULES = {
|
||||||
|
"s08": REPO_ROOT / "s08_context_compact" / "code.py",
|
||||||
|
"s09": REPO_ROOT / "s09_memory" / "code.py",
|
||||||
|
"s20": REPO_ROOT / "s20_comprehensive" / "code.py",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def load_module(name: str, path: Path, temp_cwd: Path):
|
||||||
|
fake_anthropic = types.ModuleType("anthropic")
|
||||||
|
|
||||||
|
class FakeAnthropic:
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.messages = types.SimpleNamespace(create=None)
|
||||||
|
|
||||||
|
fake_dotenv = types.ModuleType("dotenv")
|
||||||
|
setattr(fake_anthropic, "Anthropic", FakeAnthropic)
|
||||||
|
setattr(fake_dotenv, "load_dotenv", lambda override=True: None)
|
||||||
|
|
||||||
|
previous_anthropic = sys.modules.get("anthropic")
|
||||||
|
previous_dotenv = sys.modules.get("dotenv")
|
||||||
|
previous_cwd = Path.cwd()
|
||||||
|
previous_model = os.environ.get("MODEL_ID")
|
||||||
|
previous_key = os.environ.get("ANTHROPIC_API_KEY")
|
||||||
|
|
||||||
|
spec = importlib.util.spec_from_file_location(name, path)
|
||||||
|
if spec is None or spec.loader is None:
|
||||||
|
raise RuntimeError(f"Unable to load {path}")
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
|
||||||
|
sys.modules["anthropic"] = fake_anthropic
|
||||||
|
sys.modules["dotenv"] = fake_dotenv
|
||||||
|
os.environ["MODEL_ID"] = "test-model"
|
||||||
|
os.environ["ANTHROPIC_API_KEY"] = "test-key"
|
||||||
|
try:
|
||||||
|
os.chdir(temp_cwd)
|
||||||
|
spec.loader.exec_module(module)
|
||||||
|
return module
|
||||||
|
finally:
|
||||||
|
os.chdir(previous_cwd)
|
||||||
|
if previous_anthropic is None:
|
||||||
|
sys.modules.pop("anthropic", None)
|
||||||
|
else:
|
||||||
|
sys.modules["anthropic"] = previous_anthropic
|
||||||
|
if previous_dotenv is None:
|
||||||
|
sys.modules.pop("dotenv", None)
|
||||||
|
else:
|
||||||
|
sys.modules["dotenv"] = previous_dotenv
|
||||||
|
if previous_model is None:
|
||||||
|
os.environ.pop("MODEL_ID", None)
|
||||||
|
else:
|
||||||
|
os.environ["MODEL_ID"] = previous_model
|
||||||
|
if previous_key is None:
|
||||||
|
os.environ.pop("ANTHROPIC_API_KEY", None)
|
||||||
|
else:
|
||||||
|
os.environ["ANTHROPIC_API_KEY"] = previous_key
|
||||||
|
|
||||||
|
|
||||||
|
def assistant_text():
|
||||||
|
return {"role": "assistant", "content": [types.SimpleNamespace(type="text", text="ok")]}
|
||||||
|
|
||||||
|
|
||||||
|
def user_text():
|
||||||
|
return {"role": "user", "content": "continue"}
|
||||||
|
|
||||||
|
|
||||||
|
def tool_use_message(tool_id="tool-1"):
|
||||||
|
return {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [types.SimpleNamespace(type="tool_use", id=tool_id, name="bash")],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def tool_result_message(tool_id="tool-1"):
|
||||||
|
return {
|
||||||
|
"role": "user",
|
||||||
|
"content": [{"type": "tool_result", "tool_use_id": tool_id, "content": "ok"}],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def message_has_tool_use(message):
|
||||||
|
content = message.get("content")
|
||||||
|
return (
|
||||||
|
message.get("role") == "assistant"
|
||||||
|
and isinstance(content, list)
|
||||||
|
and any(getattr(block, "type", None) == "tool_use" for block in content)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def assert_no_orphan_tool_results(testcase, messages):
|
||||||
|
for idx, message in enumerate(messages):
|
||||||
|
content = message.get("content")
|
||||||
|
if message.get("role") != "user" or not isinstance(content, list):
|
||||||
|
continue
|
||||||
|
if not any(isinstance(block, dict) and block.get("type") == "tool_result" for block in content):
|
||||||
|
continue
|
||||||
|
testcase.assertGreater(idx, 0)
|
||||||
|
testcase.assertTrue(message_has_tool_use(messages[idx - 1]), messages)
|
||||||
|
|
||||||
|
|
||||||
|
class CompactionToolPairTests(unittest.TestCase):
|
||||||
|
def test_snip_compact_keeps_head_tool_pair(self):
|
||||||
|
messages = [
|
||||||
|
user_text(),
|
||||||
|
assistant_text(),
|
||||||
|
tool_use_message("head-tool"),
|
||||||
|
tool_result_message("head-tool"),
|
||||||
|
assistant_text(),
|
||||||
|
user_text(),
|
||||||
|
assistant_text(),
|
||||||
|
user_text(),
|
||||||
|
assistant_text(),
|
||||||
|
user_text(),
|
||||||
|
]
|
||||||
|
|
||||||
|
for name, path in MODULES.items():
|
||||||
|
with self.subTest(name=name), tempfile.TemporaryDirectory() as tmp:
|
||||||
|
module = load_module(f"{name}_head_under_test", path, Path(tmp))
|
||||||
|
if name == "s09":
|
||||||
|
compacted = module.snip_compact(list(messages), mx=6)
|
||||||
|
else:
|
||||||
|
compacted = module.snip_compact(list(messages), max_messages=6)
|
||||||
|
self.assertEqual(compacted[2], messages[2])
|
||||||
|
self.assertEqual(compacted[3], messages[3])
|
||||||
|
assert_no_orphan_tool_results(self, compacted)
|
||||||
|
|
||||||
|
def test_snip_compact_keeps_tail_tool_pair(self):
|
||||||
|
messages = [
|
||||||
|
user_text(),
|
||||||
|
assistant_text(),
|
||||||
|
user_text(),
|
||||||
|
assistant_text(),
|
||||||
|
user_text(),
|
||||||
|
assistant_text(),
|
||||||
|
tool_use_message("tail-tool"),
|
||||||
|
tool_result_message("tail-tool"),
|
||||||
|
assistant_text(),
|
||||||
|
user_text(),
|
||||||
|
]
|
||||||
|
|
||||||
|
for name, path in MODULES.items():
|
||||||
|
with self.subTest(name=name), tempfile.TemporaryDirectory() as tmp:
|
||||||
|
module = load_module(f"{name}_under_test", path, Path(tmp))
|
||||||
|
if name == "s09":
|
||||||
|
compacted = module.snip_compact(list(messages), mx=6)
|
||||||
|
else:
|
||||||
|
compacted = module.snip_compact(list(messages), max_messages=6)
|
||||||
|
assert_no_orphan_tool_results(self, compacted)
|
||||||
|
|
||||||
|
def test_reactive_compact_keeps_tail_tool_pair(self):
|
||||||
|
messages = [
|
||||||
|
user_text(),
|
||||||
|
assistant_text(),
|
||||||
|
user_text(),
|
||||||
|
tool_use_message("reactive-tool"),
|
||||||
|
tool_result_message("reactive-tool"),
|
||||||
|
assistant_text(),
|
||||||
|
user_text(),
|
||||||
|
assistant_text(),
|
||||||
|
user_text(),
|
||||||
|
]
|
||||||
|
|
||||||
|
for name, path in MODULES.items():
|
||||||
|
with self.subTest(name=name), tempfile.TemporaryDirectory() as tmp:
|
||||||
|
module = load_module(f"{name}_reactive_under_test", path, Path(tmp))
|
||||||
|
module.write_transcript = lambda _messages: Path("transcript.jsonl")
|
||||||
|
module.summarize_history = lambda _messages: "summary"
|
||||||
|
compacted = module.reactive_compact(list(messages))
|
||||||
|
self.assertEqual(compacted[1], messages[3])
|
||||||
|
assert_no_orphan_tool_results(self, compacted)
|
||||||
|
|
||||||
|
def test_s20_has_tool_use_still_accepts_content_blocks(self):
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
module = load_module("s20_has_tool_use_under_test", MODULES["s20"], Path(tmp))
|
||||||
|
self.assertTrue(module.has_tool_use([types.SimpleNamespace(type="tool_use")]))
|
||||||
|
self.assertFalse(module.has_tool_use([types.SimpleNamespace(type="text")]))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user