import io import json import tempfile import unittest from contextlib import redirect_stdout from pathlib import Path from semantic_index.__main__ import main from semantic_index.models import IndexDocument from semantic_index.refresh import FileRefreshState, RedmineRefreshService def issue(updated_on="2026-04-25T12:00:00Z"): return { "id": 39779, "subject": "Goods return", "description": "Please return our goods.", "updated_on": updated_on, "project": {"id": 1, "identifier": "customer-service", "name": "Customer Service"}, } class FakeRedmineSource: project_identifier = None def __init__(self, issues=None): self.issues = issues or [issue()] self.calls = [] def recent_helpdesk_issues(self, limit): self.calls.append((self.project_identifier, limit)) return self.issues[:limit] class SummaryDetailRedmineSource(FakeRedmineSource): def __init__(self, summaries, details): super().__init__([]) self.summaries = summaries self.details = details self.summary_calls = [] self.detail_calls = [] def recent_issue_summaries(self, limit): self.summary_calls.append((self.project_identifier, limit)) return self.summaries[:limit] def issue_detail(self, issue_id): self.detail_calls.append(issue_id) return self.details[issue_id] class RecordingEmbedder: def __init__(self): self.calls = [] def embed_documents(self, docs): self.calls.append(list(docs)) return [[0.1, 0.2, 0.3] for _ in docs] class RefreshStore: def __init__(self, existing=None): self.existing = existing or {} self.upserts = [] self.deleted_ids = [] def list_documents(self, limit=10, source=None, project_identifier=None, doc_type=None, issue_id=None): return list(self.existing.values())[:limit] def upsert(self, docs, vectors): self.upserts.append((list(docs), list(vectors))) def delete_documents(self, document_ids): self.deleted_ids.extend(document_ids) class RedmineRefreshServiceTest(unittest.TestCase): def test_refresh_skips_embeddings_when_source_hash_matches_existing_document(self): source = FakeRedmineSource() embedder = RecordingEmbedder() service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore()) candidate = service.mapper.issue_to_documents(issue())[0] service.store.existing[candidate.id] = { "id": candidate.id, "text": candidate.text, "payload": dict(candidate.payload), } result = service.refresh_redmine_project_limits({"customer-service": 1}) self.assertEqual(1, result["unchanged_documents"]) self.assertEqual(0, result["embedded_documents"]) self.assertEqual([], embedder.calls) self.assertEqual([], service.store.upserts) def test_refresh_embeds_only_changed_and_new_documents(self): source = FakeRedmineSource() embedder = RecordingEmbedder() service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore()) candidate = service.mapper.issue_to_documents(issue())[0] service.store.existing[candidate.id] = { "id": candidate.id, "text": "Old text", "payload": {**candidate.payload, "source_hash": "old-hash"}, } result = service.refresh_redmine_project_limits({"customer-service": 1}) self.assertEqual(1, result["changed_documents"]) self.assertEqual(1, result["embedded_documents"]) self.assertEqual([[candidate]], embedder.calls) self.assertEqual([candidate.id], [doc.id for doc in service.store.upserts[0][0]]) def test_refresh_deletes_stale_issue_documents_without_embedding(self): source = FakeRedmineSource() embedder = RecordingEmbedder() service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore()) candidate = service.mapper.issue_to_documents(issue())[0] service.store.existing[candidate.id] = {"id": candidate.id, "text": candidate.text, "payload": dict(candidate.payload)} service.store.existing["redmine:issue:39779:journal:1:chunk:0"] = { "id": "redmine:issue:39779:journal:1:chunk:0", "text": "Deleted note", "payload": {"source_hash": "gone", "issue_id": 39779}, } result = service.refresh_redmine_project_limits({"customer-service": 1}) self.assertEqual(1, result["stale_documents"]) self.assertEqual(["redmine:issue:39779:journal:1:chunk:0"], service.store.deleted_ids) self.assertEqual([], embedder.calls) def test_dry_run_reports_planned_embeddings_without_embedding_or_mutating(self): source = FakeRedmineSource() embedder = RecordingEmbedder() service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore()) result = service.refresh_redmine_project_limits({"customer-service": 1}, dry_run=True) self.assertEqual(1, result["new_documents"]) self.assertEqual(1, result["would_embed_documents"]) self.assertEqual(0, result["embedded_documents"]) self.assertEqual([], embedder.calls) self.assertEqual([], service.store.upserts) self.assertEqual([], service.store.deleted_ids) def test_force_rebuild_embeds_unchanged_documents(self): source = FakeRedmineSource() embedder = RecordingEmbedder() service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore()) candidate = service.mapper.issue_to_documents(issue())[0] service.store.existing[candidate.id] = {"id": candidate.id, "text": candidate.text, "payload": dict(candidate.payload)} result = service.refresh_redmine_project_limits({"customer-service": 1}, force_rebuild=True) self.assertEqual(1, result["force_rebuilt_documents"]) self.assertEqual(1, result["embedded_documents"]) self.assertEqual([[candidate]], embedder.calls) def test_force_rebuild_ignores_refresh_state_window_for_fetched_candidates(self): source = FakeRedmineSource([issue(updated_on="2026-04-25T10:00:00Z")]) embedder = RecordingEmbedder() with tempfile.TemporaryDirectory() as tmp: state = FileRefreshState(Path(tmp) / "refresh.json") state.mark_success("customer-service", "2026-04-25T12:00:00Z") service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore(), state=state) result = service.refresh_redmine_project_limits({"customer-service": 1}, force_rebuild=True, overlap_minutes=15) self.assertEqual(0, result["skipped_issues"]) self.assertEqual(1, result["embedded_documents"]) def test_file_refresh_state_updates_only_when_called(self): with tempfile.TemporaryDirectory() as tmp: state = FileRefreshState(Path(tmp) / "refresh.json") self.assertEqual({}, state.load()) state.mark_success("customer-service", "2026-04-25T12:00:00Z") self.assertEqual( {"projects": {"customer-service": {"last_successful_refresh_at": "2026-04-25T12:00:00Z"}}}, json.loads((Path(tmp) / "refresh.json").read_text(encoding="utf-8")), ) def test_refresh_state_skips_issues_older_than_overlap_window(self): source = FakeRedmineSource([issue(updated_on="2026-04-25T10:00:00Z")]) embedder = RecordingEmbedder() with tempfile.TemporaryDirectory() as tmp: state = FileRefreshState(Path(tmp) / "refresh.json") state.mark_success("customer-service", "2026-04-25T12:00:00Z") service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore(), state=state) result = service.refresh_redmine_project_limits({"customer-service": 1}, dry_run=True, overlap_minutes=15) self.assertEqual(1, result["issues"]) self.assertEqual(1, result["skipped_issues"]) self.assertEqual(0, result["documents"]) self.assertEqual([], embedder.calls) def test_refresh_skips_old_summaries_without_fetching_issue_detail(self): old_summary = {"id": 39779, "updated_on": "2026-04-25T10:00:00Z"} new_summary = {"id": 39780, "updated_on": "2026-04-25T11:50:00Z"} source = SummaryDetailRedmineSource( summaries=[old_summary, new_summary], details={39780: {**issue("2026-04-25T11:50:00Z"), "id": 39780}}, ) embedder = RecordingEmbedder() with tempfile.TemporaryDirectory() as tmp: state = FileRefreshState(Path(tmp) / "refresh.json") state.mark_success("customer-service", "2026-04-25T12:00:00Z") service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore(), state=state) result = service.refresh_redmine_project_limits({"customer-service": 2}, dry_run=True, overlap_minutes=15) self.assertEqual(2, result["scanned_issues"]) self.assertEqual(1, result["skipped_issues"]) self.assertEqual(1, result["detail_fetched_issues"]) self.assertEqual([39780], source.detail_calls) class RefreshCliTest(unittest.TestCase): def test_refresh_redmine_projects_cli_parses_project_limits_and_dry_run(self): class FakeRefresh: def __init__(self): self.calls = [] def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15): self.calls.append((project_limits, dry_run, force_rebuild, overlap_minutes)) return {"source": "redmine", "projects": len(project_limits), "issues": sum(project_limits.values())} refresh = FakeRefresh() services = {"refresh": refresh} out = io.StringIO() with redirect_stdout(out): main( [ "--refresh-redmine-projects", "--project-limits", "customer-service=5,hiring=2", "--dry-run", "--overlap-minutes", "30", ], service_builder=lambda: services, ) self.assertEqual(({"customer-service": 5, "hiring": 2}, True, False, 30), refresh.calls[0]) self.assertIn("'projects': 2", out.getvalue()) def test_refresh_redmine_projects_cli_can_override_state_path(self): class FakeRefresh: def __init__(self): self.state = None def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15): return {"state_path": str(self.state.path)} refresh = FakeRefresh() out = io.StringIO() with redirect_stdout(out): main( [ "--refresh-redmine-projects", "--project-limits", "customer-service=1", "--state-path", "/tmp/semantic-refresh-state.json", ], service_builder=lambda: {"refresh": refresh}, ) self.assertIn("/tmp/semantic-refresh-state.json", out.getvalue()) if __name__ == "__main__": unittest.main()