import unittest from semantic_index.ingest import BackfillService from semantic_index.mcp import SemanticMCP from semantic_index.models import SearchQuery, SearchResult from semantic_index.redmine import RedmineMapper class FakeRedmineSource: project_identifier = None def recent_helpdesk_issues(self, limit): return [ { "id": 1, "subject": "First", "description": "First body", "project": {"identifier": self.project_identifier}, }, { "id": 2, "subject": "Second", "description": "Second body", "project": {"identifier": self.project_identifier}, }, ][:limit] class DuplicateDocumentRedmineSource: project_identifier = "customer-service" def recent_helpdesk_issues(self, limit): return [ {"id": 1, "subject": "First", "description": "First body", "project": {"identifier": "customer-service"}}, {"id": 1, "subject": "First duplicate", "description": "Duplicate body", "project": {"identifier": "customer-service"}}, ][:limit] class FakeEmbedder: def embed_documents(self, docs): return [[float(i), 0.0, 0.0] for i, _ in enumerate(docs, start=1)] def embed_query(self, text): return [0.1, 0.0, 0.0] class FakeStore: def __init__(self): self.deleted = [] self.upserts = [] def rebuild_source(self, source, docs, vectors, project_identifier=None): self.deleted.append((source, project_identifier)) self.upserts.append((docs, vectors)) def list_projects(self, source=None, limit=1000): return [ {"project_identifier": "customer-service", "document_count": 1684}, {"project_identifier": "hiring", "document_count": 409}, ] class FakeRefreshService: def __init__(self): self.calls = [] def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15): self.calls.append((project_limits, dry_run, force_rebuild, overlap_minutes)) return {"source": "redmine", "projects": len(project_limits), "dry_run": dry_run} class FakeSearchService: def __init__(self): self.queries = [] def search(self, query): self.queries.append(query) return [SearchResult(id="doc1", score=0.5, text="Snippet", payload={"redmine_url": "http://redmine/issues/1"})] def get_document(self, document_id): return {"id": document_id, "text": "Snippet"} class BackfillAndMCPTest(unittest.TestCase): def test_sample_backfill_rebuilds_redmine_source(self): service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=FakeStore()) result = service.backfill_redmine_sample(limit=2) self.assertEqual({"source": "redmine", "issues": 2, "documents": 2}, result) self.assertEqual([("redmine", None)], service.store.deleted) docs, vectors = service.store.upserts[0] self.assertEqual(["redmine:issue:1:chunk:0", "redmine:issue:2:chunk:0"], [doc.id for doc in docs]) self.assertEqual(2, len(vectors)) def test_sample_backfill_rebuilds_only_the_configured_project_scope(self): store = FakeStore() service = BackfillService( source=FakeRedmineSource(), embedder=FakeEmbedder(), store=store, mapper=RedmineMapper(redmine_url="", project_identifier="customer-service"), ) service.backfill_redmine_sample(limit=1) self.assertEqual([("redmine", "customer-service")], store.deleted) def test_multi_project_backfill_rebuilds_each_project_scope(self): store = FakeStore() service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=store) result = service.backfill_redmine_projects(["customer-service", "hiring"], per_project_limit=1) self.assertEqual( { "source": "redmine", "projects": 2, "issues": 2, "documents": 2, "project_results": [ {"project_identifier": "customer-service", "issues": 1, "documents": 1}, {"project_identifier": "hiring", "issues": 1, "documents": 1}, ], }, result, ) self.assertEqual([("redmine", "customer-service"), ("redmine", "hiring")], store.deleted) self.assertEqual("customer-service", store.upserts[0][0][0].payload["project_identifier"]) self.assertEqual("hiring", store.upserts[1][0][0].payload["project_identifier"]) def test_multi_project_backfill_accepts_per_project_limits(self): store = FakeStore() service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=store) result = service.backfill_redmine_project_limits({"customer-service": 2, "hiring": 1}) self.assertEqual(3, result["issues"]) self.assertEqual( [ {"project_identifier": "customer-service", "issues": 2, "documents": 2}, {"project_identifier": "hiring", "issues": 1, "documents": 1}, ], result["project_results"], ) def test_backfill_deduplicates_documents_by_stable_id_before_embedding(self): store = FakeStore() service = BackfillService(source=DuplicateDocumentRedmineSource(), embedder=FakeEmbedder(), store=store) result = service.backfill_redmine_sample(limit=2) self.assertEqual({"source": "redmine", "issues": 2, "documents": 1}, result) docs, vectors = store.upserts[0] self.assertEqual(["redmine:issue:1:chunk:0"], [doc.id for doc in docs]) self.assertEqual(1, len(vectors)) def test_mcp_tools_return_json_ready_results(self): search = FakeSearchService() refresh = FakeRefreshService() mcp = SemanticMCP(search_service=search, backfill_service=None, store=FakeStore(), refresh_service=refresh) response = mcp.call_tool("semantic_search", {"query": "printer", "source": "redmine", "project_identifier": "hiring", "limit": 3}) document = mcp.call_tool("semantic_get_document", {"id": "doc1"}) projects = mcp.call_tool("semantic_list_projects", {"source": "redmine"}) refresh_response = mcp.call_tool("semantic_refresh_redmine", {"project_identifier": "customer-service", "limit": 5, "dry_run": True}) self.assertEqual("printer", response["query"]) self.assertEqual("hiring", response["filters"]["project_identifier"]) self.assertEqual("doc1", response["results"][0]["id"]) self.assertEqual("http://redmine/issues/1", response["results"][0]["citation"]["url"]) self.assertIsInstance(search.queries[0], SearchQuery) self.assertEqual("redmine", search.queries[0].source) self.assertEqual("hiring", search.queries[0].project_identifier) self.assertEqual({"id": "doc1", "text": "Snippet"}, document) self.assertEqual("customer-service", projects["projects"][0]["project_identifier"]) self.assertTrue(refresh_response["dry_run"]) self.assertEqual(({"customer-service": 5}, True, False, 15), refresh.calls[0]) if __name__ == "__main__": unittest.main()