Files
redmine/tests/semantic_index/test_backfill_mcp.py
T
2026-05-04 09:50:03 -04:00

183 lines
7.2 KiB
Python

import unittest
from semantic_index.ingest import BackfillService
from semantic_index.mcp import SemanticMCP
from semantic_index.models import SearchQuery, SearchResult
from semantic_index.redmine import RedmineMapper
class FakeRedmineSource:
project_identifier = None
def recent_helpdesk_issues(self, limit):
return [
{
"id": 1,
"subject": "First",
"description": "First body",
"project": {"identifier": self.project_identifier},
},
{
"id": 2,
"subject": "Second",
"description": "Second body",
"project": {"identifier": self.project_identifier},
},
][:limit]
class DuplicateDocumentRedmineSource:
project_identifier = "customer-service"
def recent_helpdesk_issues(self, limit):
return [
{"id": 1, "subject": "First", "description": "First body", "project": {"identifier": "customer-service"}},
{"id": 1, "subject": "First duplicate", "description": "Duplicate body", "project": {"identifier": "customer-service"}},
][:limit]
class FakeEmbedder:
def embed_documents(self, docs):
return [[float(i), 0.0, 0.0] for i, _ in enumerate(docs, start=1)]
def embed_query(self, text):
return [0.1, 0.0, 0.0]
class FakeStore:
def __init__(self):
self.deleted = []
self.upserts = []
def rebuild_source(self, source, docs, vectors, project_identifier=None):
self.deleted.append((source, project_identifier))
self.upserts.append((docs, vectors))
def list_projects(self, source=None, limit=1000):
return [
{"project_identifier": "customer-service", "document_count": 1684},
{"project_identifier": "hiring", "document_count": 409},
]
class FakeRefreshService:
def __init__(self):
self.calls = []
def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15):
self.calls.append((project_limits, dry_run, force_rebuild, overlap_minutes))
return {"source": "redmine", "projects": len(project_limits), "dry_run": dry_run}
class FakeSearchService:
def __init__(self):
self.queries = []
def search(self, query):
self.queries.append(query)
return [SearchResult(id="doc1", score=0.5, text="Snippet", payload={"redmine_url": "http://redmine/issues/1"})]
def get_document(self, document_id):
return {"id": document_id, "text": "Snippet"}
class BackfillAndMCPTest(unittest.TestCase):
def test_sample_backfill_rebuilds_redmine_source(self):
service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=FakeStore())
result = service.backfill_redmine_sample(limit=2)
self.assertEqual({"source": "redmine", "issues": 2, "documents": 2}, result)
self.assertEqual([("redmine", None)], service.store.deleted)
docs, vectors = service.store.upserts[0]
self.assertEqual(["redmine:issue:1:chunk:0", "redmine:issue:2:chunk:0"], [doc.id for doc in docs])
self.assertEqual(2, len(vectors))
def test_sample_backfill_rebuilds_only_the_configured_project_scope(self):
store = FakeStore()
service = BackfillService(
source=FakeRedmineSource(),
embedder=FakeEmbedder(),
store=store,
mapper=RedmineMapper(redmine_url="", project_identifier="customer-service"),
)
service.backfill_redmine_sample(limit=1)
self.assertEqual([("redmine", "customer-service")], store.deleted)
def test_multi_project_backfill_rebuilds_each_project_scope(self):
store = FakeStore()
service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=store)
result = service.backfill_redmine_projects(["customer-service", "hiring"], per_project_limit=1)
self.assertEqual(
{
"source": "redmine",
"projects": 2,
"issues": 2,
"documents": 2,
"project_results": [
{"project_identifier": "customer-service", "issues": 1, "documents": 1},
{"project_identifier": "hiring", "issues": 1, "documents": 1},
],
},
result,
)
self.assertEqual([("redmine", "customer-service"), ("redmine", "hiring")], store.deleted)
self.assertEqual("customer-service", store.upserts[0][0][0].payload["project_identifier"])
self.assertEqual("hiring", store.upserts[1][0][0].payload["project_identifier"])
def test_multi_project_backfill_accepts_per_project_limits(self):
store = FakeStore()
service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=store)
result = service.backfill_redmine_project_limits({"customer-service": 2, "hiring": 1})
self.assertEqual(3, result["issues"])
self.assertEqual(
[
{"project_identifier": "customer-service", "issues": 2, "documents": 2},
{"project_identifier": "hiring", "issues": 1, "documents": 1},
],
result["project_results"],
)
def test_backfill_deduplicates_documents_by_stable_id_before_embedding(self):
store = FakeStore()
service = BackfillService(source=DuplicateDocumentRedmineSource(), embedder=FakeEmbedder(), store=store)
result = service.backfill_redmine_sample(limit=2)
self.assertEqual({"source": "redmine", "issues": 2, "documents": 1}, result)
docs, vectors = store.upserts[0]
self.assertEqual(["redmine:issue:1:chunk:0"], [doc.id for doc in docs])
self.assertEqual(1, len(vectors))
def test_mcp_tools_return_json_ready_results(self):
search = FakeSearchService()
refresh = FakeRefreshService()
mcp = SemanticMCP(search_service=search, backfill_service=None, store=FakeStore(), refresh_service=refresh)
response = mcp.call_tool("semantic_search", {"query": "printer", "source": "redmine", "project_identifier": "hiring", "limit": 3})
document = mcp.call_tool("semantic_get_document", {"id": "doc1"})
projects = mcp.call_tool("semantic_list_projects", {"source": "redmine"})
refresh_response = mcp.call_tool("semantic_refresh_redmine", {"project_identifier": "customer-service", "limit": 5, "dry_run": True})
self.assertEqual("printer", response["query"])
self.assertEqual("hiring", response["filters"]["project_identifier"])
self.assertEqual("doc1", response["results"][0]["id"])
self.assertEqual("http://redmine/issues/1", response["results"][0]["citation"]["url"])
self.assertIsInstance(search.queries[0], SearchQuery)
self.assertEqual("redmine", search.queries[0].source)
self.assertEqual("hiring", search.queries[0].project_identifier)
self.assertEqual({"id": "doc1", "text": "Snippet"}, document)
self.assertEqual("customer-service", projects["projects"][0]["project_identifier"])
self.assertTrue(refresh_response["dry_run"])
self.assertEqual(({"customer-service": 5}, True, False, 15), refresh.calls[0])
if __name__ == "__main__":
unittest.main()