Files
redmine/tests/semantic_index/test_refresh.py
T
2026-05-04 09:50:03 -04:00

278 lines
11 KiB
Python

import io
import json
import tempfile
import unittest
from contextlib import redirect_stdout
from pathlib import Path
from semantic_index.__main__ import main
from semantic_index.models import IndexDocument
from semantic_index.refresh import FileRefreshState, RedmineRefreshService
def issue(updated_on="2026-04-25T12:00:00Z"):
return {
"id": 39779,
"subject": "Goods return",
"description": "Please return our goods.",
"updated_on": updated_on,
"project": {"id": 1, "identifier": "customer-service", "name": "Customer Service"},
}
class FakeRedmineSource:
project_identifier = None
def __init__(self, issues=None):
self.issues = issues or [issue()]
self.calls = []
def recent_helpdesk_issues(self, limit):
self.calls.append((self.project_identifier, limit))
return self.issues[:limit]
class SummaryDetailRedmineSource(FakeRedmineSource):
def __init__(self, summaries, details):
super().__init__([])
self.summaries = summaries
self.details = details
self.summary_calls = []
self.detail_calls = []
def recent_issue_summaries(self, limit):
self.summary_calls.append((self.project_identifier, limit))
return self.summaries[:limit]
def issue_detail(self, issue_id):
self.detail_calls.append(issue_id)
return self.details[issue_id]
class RecordingEmbedder:
def __init__(self):
self.calls = []
def embed_documents(self, docs):
self.calls.append(list(docs))
return [[0.1, 0.2, 0.3] for _ in docs]
class RefreshStore:
def __init__(self, existing=None):
self.existing = existing or {}
self.upserts = []
self.deleted_ids = []
def list_documents(self, limit=10, source=None, project_identifier=None, doc_type=None, issue_id=None):
return list(self.existing.values())[:limit]
def upsert(self, docs, vectors):
self.upserts.append((list(docs), list(vectors)))
def delete_documents(self, document_ids):
self.deleted_ids.extend(document_ids)
class RedmineRefreshServiceTest(unittest.TestCase):
def test_refresh_skips_embeddings_when_source_hash_matches_existing_document(self):
source = FakeRedmineSource()
embedder = RecordingEmbedder()
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
candidate = service.mapper.issue_to_documents(issue())[0]
service.store.existing[candidate.id] = {
"id": candidate.id,
"text": candidate.text,
"payload": dict(candidate.payload),
}
result = service.refresh_redmine_project_limits({"customer-service": 1})
self.assertEqual(1, result["unchanged_documents"])
self.assertEqual(0, result["embedded_documents"])
self.assertEqual([], embedder.calls)
self.assertEqual([], service.store.upserts)
def test_refresh_embeds_only_changed_and_new_documents(self):
source = FakeRedmineSource()
embedder = RecordingEmbedder()
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
candidate = service.mapper.issue_to_documents(issue())[0]
service.store.existing[candidate.id] = {
"id": candidate.id,
"text": "Old text",
"payload": {**candidate.payload, "source_hash": "old-hash"},
}
result = service.refresh_redmine_project_limits({"customer-service": 1})
self.assertEqual(1, result["changed_documents"])
self.assertEqual(1, result["embedded_documents"])
self.assertEqual([[candidate]], embedder.calls)
self.assertEqual([candidate.id], [doc.id for doc in service.store.upserts[0][0]])
def test_refresh_deletes_stale_issue_documents_without_embedding(self):
source = FakeRedmineSource()
embedder = RecordingEmbedder()
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
candidate = service.mapper.issue_to_documents(issue())[0]
service.store.existing[candidate.id] = {"id": candidate.id, "text": candidate.text, "payload": dict(candidate.payload)}
service.store.existing["redmine:issue:39779:journal:1:chunk:0"] = {
"id": "redmine:issue:39779:journal:1:chunk:0",
"text": "Deleted note",
"payload": {"source_hash": "gone", "issue_id": 39779},
}
result = service.refresh_redmine_project_limits({"customer-service": 1})
self.assertEqual(1, result["stale_documents"])
self.assertEqual(["redmine:issue:39779:journal:1:chunk:0"], service.store.deleted_ids)
self.assertEqual([], embedder.calls)
def test_dry_run_reports_planned_embeddings_without_embedding_or_mutating(self):
source = FakeRedmineSource()
embedder = RecordingEmbedder()
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
result = service.refresh_redmine_project_limits({"customer-service": 1}, dry_run=True)
self.assertEqual(1, result["new_documents"])
self.assertEqual(1, result["would_embed_documents"])
self.assertEqual(0, result["embedded_documents"])
self.assertEqual([], embedder.calls)
self.assertEqual([], service.store.upserts)
self.assertEqual([], service.store.deleted_ids)
def test_force_rebuild_embeds_unchanged_documents(self):
source = FakeRedmineSource()
embedder = RecordingEmbedder()
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
candidate = service.mapper.issue_to_documents(issue())[0]
service.store.existing[candidate.id] = {"id": candidate.id, "text": candidate.text, "payload": dict(candidate.payload)}
result = service.refresh_redmine_project_limits({"customer-service": 1}, force_rebuild=True)
self.assertEqual(1, result["force_rebuilt_documents"])
self.assertEqual(1, result["embedded_documents"])
self.assertEqual([[candidate]], embedder.calls)
def test_force_rebuild_ignores_refresh_state_window_for_fetched_candidates(self):
source = FakeRedmineSource([issue(updated_on="2026-04-25T10:00:00Z")])
embedder = RecordingEmbedder()
with tempfile.TemporaryDirectory() as tmp:
state = FileRefreshState(Path(tmp) / "refresh.json")
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore(), state=state)
result = service.refresh_redmine_project_limits({"customer-service": 1}, force_rebuild=True, overlap_minutes=15)
self.assertEqual(0, result["skipped_issues"])
self.assertEqual(1, result["embedded_documents"])
def test_file_refresh_state_updates_only_when_called(self):
with tempfile.TemporaryDirectory() as tmp:
state = FileRefreshState(Path(tmp) / "refresh.json")
self.assertEqual({}, state.load())
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
self.assertEqual(
{"projects": {"customer-service": {"last_successful_refresh_at": "2026-04-25T12:00:00Z"}}},
json.loads((Path(tmp) / "refresh.json").read_text(encoding="utf-8")),
)
def test_refresh_state_skips_issues_older_than_overlap_window(self):
source = FakeRedmineSource([issue(updated_on="2026-04-25T10:00:00Z")])
embedder = RecordingEmbedder()
with tempfile.TemporaryDirectory() as tmp:
state = FileRefreshState(Path(tmp) / "refresh.json")
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore(), state=state)
result = service.refresh_redmine_project_limits({"customer-service": 1}, dry_run=True, overlap_minutes=15)
self.assertEqual(1, result["issues"])
self.assertEqual(1, result["skipped_issues"])
self.assertEqual(0, result["documents"])
self.assertEqual([], embedder.calls)
def test_refresh_skips_old_summaries_without_fetching_issue_detail(self):
old_summary = {"id": 39779, "updated_on": "2026-04-25T10:00:00Z"}
new_summary = {"id": 39780, "updated_on": "2026-04-25T11:50:00Z"}
source = SummaryDetailRedmineSource(
summaries=[old_summary, new_summary],
details={39780: {**issue("2026-04-25T11:50:00Z"), "id": 39780}},
)
embedder = RecordingEmbedder()
with tempfile.TemporaryDirectory() as tmp:
state = FileRefreshState(Path(tmp) / "refresh.json")
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore(), state=state)
result = service.refresh_redmine_project_limits({"customer-service": 2}, dry_run=True, overlap_minutes=15)
self.assertEqual(2, result["scanned_issues"])
self.assertEqual(1, result["skipped_issues"])
self.assertEqual(1, result["detail_fetched_issues"])
self.assertEqual([39780], source.detail_calls)
class RefreshCliTest(unittest.TestCase):
def test_refresh_redmine_projects_cli_parses_project_limits_and_dry_run(self):
class FakeRefresh:
def __init__(self):
self.calls = []
def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15):
self.calls.append((project_limits, dry_run, force_rebuild, overlap_minutes))
return {"source": "redmine", "projects": len(project_limits), "issues": sum(project_limits.values())}
refresh = FakeRefresh()
services = {"refresh": refresh}
out = io.StringIO()
with redirect_stdout(out):
main(
[
"--refresh-redmine-projects",
"--project-limits",
"customer-service=5,hiring=2",
"--dry-run",
"--overlap-minutes",
"30",
],
service_builder=lambda: services,
)
self.assertEqual(({"customer-service": 5, "hiring": 2}, True, False, 30), refresh.calls[0])
self.assertIn("'projects': 2", out.getvalue())
def test_refresh_redmine_projects_cli_can_override_state_path(self):
class FakeRefresh:
def __init__(self):
self.state = None
def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15):
return {"state_path": str(self.state.path)}
refresh = FakeRefresh()
out = io.StringIO()
with redirect_stdout(out):
main(
[
"--refresh-redmine-projects",
"--project-limits",
"customer-service=1",
"--state-path",
"/tmp/semantic-refresh-state.json",
],
service_builder=lambda: {"refresh": refresh},
)
self.assertIn("/tmp/semantic-refresh-state.json", out.getvalue())
if __name__ == "__main__":
unittest.main()