183 lines
7.2 KiB
Python
183 lines
7.2 KiB
Python
import unittest
|
|
|
|
from semantic_index.ingest import BackfillService
|
|
from semantic_index.mcp import SemanticMCP
|
|
from semantic_index.models import SearchQuery, SearchResult
|
|
from semantic_index.redmine import RedmineMapper
|
|
|
|
|
|
class FakeRedmineSource:
|
|
project_identifier = None
|
|
|
|
def recent_helpdesk_issues(self, limit):
|
|
return [
|
|
{
|
|
"id": 1,
|
|
"subject": "First",
|
|
"description": "First body",
|
|
"project": {"identifier": self.project_identifier},
|
|
},
|
|
{
|
|
"id": 2,
|
|
"subject": "Second",
|
|
"description": "Second body",
|
|
"project": {"identifier": self.project_identifier},
|
|
},
|
|
][:limit]
|
|
|
|
|
|
class DuplicateDocumentRedmineSource:
|
|
project_identifier = "customer-service"
|
|
|
|
def recent_helpdesk_issues(self, limit):
|
|
return [
|
|
{"id": 1, "subject": "First", "description": "First body", "project": {"identifier": "customer-service"}},
|
|
{"id": 1, "subject": "First duplicate", "description": "Duplicate body", "project": {"identifier": "customer-service"}},
|
|
][:limit]
|
|
|
|
|
|
class FakeEmbedder:
|
|
def embed_documents(self, docs):
|
|
return [[float(i), 0.0, 0.0] for i, _ in enumerate(docs, start=1)]
|
|
|
|
def embed_query(self, text):
|
|
return [0.1, 0.0, 0.0]
|
|
|
|
|
|
class FakeStore:
|
|
def __init__(self):
|
|
self.deleted = []
|
|
self.upserts = []
|
|
|
|
def rebuild_source(self, source, docs, vectors, project_identifier=None):
|
|
self.deleted.append((source, project_identifier))
|
|
self.upserts.append((docs, vectors))
|
|
|
|
def list_projects(self, source=None, limit=1000):
|
|
return [
|
|
{"project_identifier": "customer-service", "document_count": 1684},
|
|
{"project_identifier": "hiring", "document_count": 409},
|
|
]
|
|
|
|
|
|
class FakeRefreshService:
|
|
def __init__(self):
|
|
self.calls = []
|
|
|
|
def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15):
|
|
self.calls.append((project_limits, dry_run, force_rebuild, overlap_minutes))
|
|
return {"source": "redmine", "projects": len(project_limits), "dry_run": dry_run}
|
|
|
|
|
|
class FakeSearchService:
|
|
def __init__(self):
|
|
self.queries = []
|
|
|
|
def search(self, query):
|
|
self.queries.append(query)
|
|
return [SearchResult(id="doc1", score=0.5, text="Snippet", payload={"redmine_url": "http://redmine/issues/1"})]
|
|
|
|
def get_document(self, document_id):
|
|
return {"id": document_id, "text": "Snippet"}
|
|
|
|
|
|
class BackfillAndMCPTest(unittest.TestCase):
|
|
def test_sample_backfill_rebuilds_redmine_source(self):
|
|
service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=FakeStore())
|
|
|
|
result = service.backfill_redmine_sample(limit=2)
|
|
|
|
self.assertEqual({"source": "redmine", "issues": 2, "documents": 2}, result)
|
|
self.assertEqual([("redmine", None)], service.store.deleted)
|
|
docs, vectors = service.store.upserts[0]
|
|
self.assertEqual(["redmine:issue:1:chunk:0", "redmine:issue:2:chunk:0"], [doc.id for doc in docs])
|
|
self.assertEqual(2, len(vectors))
|
|
|
|
def test_sample_backfill_rebuilds_only_the_configured_project_scope(self):
|
|
store = FakeStore()
|
|
service = BackfillService(
|
|
source=FakeRedmineSource(),
|
|
embedder=FakeEmbedder(),
|
|
store=store,
|
|
mapper=RedmineMapper(redmine_url="", project_identifier="customer-service"),
|
|
)
|
|
|
|
service.backfill_redmine_sample(limit=1)
|
|
|
|
self.assertEqual([("redmine", "customer-service")], store.deleted)
|
|
|
|
def test_multi_project_backfill_rebuilds_each_project_scope(self):
|
|
store = FakeStore()
|
|
service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=store)
|
|
|
|
result = service.backfill_redmine_projects(["customer-service", "hiring"], per_project_limit=1)
|
|
|
|
self.assertEqual(
|
|
{
|
|
"source": "redmine",
|
|
"projects": 2,
|
|
"issues": 2,
|
|
"documents": 2,
|
|
"project_results": [
|
|
{"project_identifier": "customer-service", "issues": 1, "documents": 1},
|
|
{"project_identifier": "hiring", "issues": 1, "documents": 1},
|
|
],
|
|
},
|
|
result,
|
|
)
|
|
self.assertEqual([("redmine", "customer-service"), ("redmine", "hiring")], store.deleted)
|
|
self.assertEqual("customer-service", store.upserts[0][0][0].payload["project_identifier"])
|
|
self.assertEqual("hiring", store.upserts[1][0][0].payload["project_identifier"])
|
|
|
|
def test_multi_project_backfill_accepts_per_project_limits(self):
|
|
store = FakeStore()
|
|
service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=store)
|
|
|
|
result = service.backfill_redmine_project_limits({"customer-service": 2, "hiring": 1})
|
|
|
|
self.assertEqual(3, result["issues"])
|
|
self.assertEqual(
|
|
[
|
|
{"project_identifier": "customer-service", "issues": 2, "documents": 2},
|
|
{"project_identifier": "hiring", "issues": 1, "documents": 1},
|
|
],
|
|
result["project_results"],
|
|
)
|
|
|
|
def test_backfill_deduplicates_documents_by_stable_id_before_embedding(self):
|
|
store = FakeStore()
|
|
service = BackfillService(source=DuplicateDocumentRedmineSource(), embedder=FakeEmbedder(), store=store)
|
|
|
|
result = service.backfill_redmine_sample(limit=2)
|
|
|
|
self.assertEqual({"source": "redmine", "issues": 2, "documents": 1}, result)
|
|
docs, vectors = store.upserts[0]
|
|
self.assertEqual(["redmine:issue:1:chunk:0"], [doc.id for doc in docs])
|
|
self.assertEqual(1, len(vectors))
|
|
|
|
def test_mcp_tools_return_json_ready_results(self):
|
|
search = FakeSearchService()
|
|
refresh = FakeRefreshService()
|
|
mcp = SemanticMCP(search_service=search, backfill_service=None, store=FakeStore(), refresh_service=refresh)
|
|
|
|
response = mcp.call_tool("semantic_search", {"query": "printer", "source": "redmine", "project_identifier": "hiring", "limit": 3})
|
|
document = mcp.call_tool("semantic_get_document", {"id": "doc1"})
|
|
projects = mcp.call_tool("semantic_list_projects", {"source": "redmine"})
|
|
refresh_response = mcp.call_tool("semantic_refresh_redmine", {"project_identifier": "customer-service", "limit": 5, "dry_run": True})
|
|
|
|
self.assertEqual("printer", response["query"])
|
|
self.assertEqual("hiring", response["filters"]["project_identifier"])
|
|
self.assertEqual("doc1", response["results"][0]["id"])
|
|
self.assertEqual("http://redmine/issues/1", response["results"][0]["citation"]["url"])
|
|
self.assertIsInstance(search.queries[0], SearchQuery)
|
|
self.assertEqual("redmine", search.queries[0].source)
|
|
self.assertEqual("hiring", search.queries[0].project_identifier)
|
|
self.assertEqual({"id": "doc1", "text": "Snippet"}, document)
|
|
self.assertEqual("customer-service", projects["projects"][0]["project_identifier"])
|
|
self.assertTrue(refresh_response["dry_run"])
|
|
self.assertEqual(({"customer-service": 5}, True, False, 15), refresh.calls[0])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|