Add semantic-index service, deployment assets, and tests
This commit is contained in:
@@ -0,0 +1,182 @@
|
||||
import unittest
|
||||
|
||||
from semantic_index.ingest import BackfillService
|
||||
from semantic_index.mcp import SemanticMCP
|
||||
from semantic_index.models import SearchQuery, SearchResult
|
||||
from semantic_index.redmine import RedmineMapper
|
||||
|
||||
|
||||
class FakeRedmineSource:
|
||||
project_identifier = None
|
||||
|
||||
def recent_helpdesk_issues(self, limit):
|
||||
return [
|
||||
{
|
||||
"id": 1,
|
||||
"subject": "First",
|
||||
"description": "First body",
|
||||
"project": {"identifier": self.project_identifier},
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"subject": "Second",
|
||||
"description": "Second body",
|
||||
"project": {"identifier": self.project_identifier},
|
||||
},
|
||||
][:limit]
|
||||
|
||||
|
||||
class DuplicateDocumentRedmineSource:
|
||||
project_identifier = "customer-service"
|
||||
|
||||
def recent_helpdesk_issues(self, limit):
|
||||
return [
|
||||
{"id": 1, "subject": "First", "description": "First body", "project": {"identifier": "customer-service"}},
|
||||
{"id": 1, "subject": "First duplicate", "description": "Duplicate body", "project": {"identifier": "customer-service"}},
|
||||
][:limit]
|
||||
|
||||
|
||||
class FakeEmbedder:
|
||||
def embed_documents(self, docs):
|
||||
return [[float(i), 0.0, 0.0] for i, _ in enumerate(docs, start=1)]
|
||||
|
||||
def embed_query(self, text):
|
||||
return [0.1, 0.0, 0.0]
|
||||
|
||||
|
||||
class FakeStore:
|
||||
def __init__(self):
|
||||
self.deleted = []
|
||||
self.upserts = []
|
||||
|
||||
def rebuild_source(self, source, docs, vectors, project_identifier=None):
|
||||
self.deleted.append((source, project_identifier))
|
||||
self.upserts.append((docs, vectors))
|
||||
|
||||
def list_projects(self, source=None, limit=1000):
|
||||
return [
|
||||
{"project_identifier": "customer-service", "document_count": 1684},
|
||||
{"project_identifier": "hiring", "document_count": 409},
|
||||
]
|
||||
|
||||
|
||||
class FakeRefreshService:
|
||||
def __init__(self):
|
||||
self.calls = []
|
||||
|
||||
def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15):
|
||||
self.calls.append((project_limits, dry_run, force_rebuild, overlap_minutes))
|
||||
return {"source": "redmine", "projects": len(project_limits), "dry_run": dry_run}
|
||||
|
||||
|
||||
class FakeSearchService:
|
||||
def __init__(self):
|
||||
self.queries = []
|
||||
|
||||
def search(self, query):
|
||||
self.queries.append(query)
|
||||
return [SearchResult(id="doc1", score=0.5, text="Snippet", payload={"redmine_url": "http://redmine/issues/1"})]
|
||||
|
||||
def get_document(self, document_id):
|
||||
return {"id": document_id, "text": "Snippet"}
|
||||
|
||||
|
||||
class BackfillAndMCPTest(unittest.TestCase):
|
||||
def test_sample_backfill_rebuilds_redmine_source(self):
|
||||
service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=FakeStore())
|
||||
|
||||
result = service.backfill_redmine_sample(limit=2)
|
||||
|
||||
self.assertEqual({"source": "redmine", "issues": 2, "documents": 2}, result)
|
||||
self.assertEqual([("redmine", None)], service.store.deleted)
|
||||
docs, vectors = service.store.upserts[0]
|
||||
self.assertEqual(["redmine:issue:1:chunk:0", "redmine:issue:2:chunk:0"], [doc.id for doc in docs])
|
||||
self.assertEqual(2, len(vectors))
|
||||
|
||||
def test_sample_backfill_rebuilds_only_the_configured_project_scope(self):
|
||||
store = FakeStore()
|
||||
service = BackfillService(
|
||||
source=FakeRedmineSource(),
|
||||
embedder=FakeEmbedder(),
|
||||
store=store,
|
||||
mapper=RedmineMapper(redmine_url="", project_identifier="customer-service"),
|
||||
)
|
||||
|
||||
service.backfill_redmine_sample(limit=1)
|
||||
|
||||
self.assertEqual([("redmine", "customer-service")], store.deleted)
|
||||
|
||||
def test_multi_project_backfill_rebuilds_each_project_scope(self):
|
||||
store = FakeStore()
|
||||
service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=store)
|
||||
|
||||
result = service.backfill_redmine_projects(["customer-service", "hiring"], per_project_limit=1)
|
||||
|
||||
self.assertEqual(
|
||||
{
|
||||
"source": "redmine",
|
||||
"projects": 2,
|
||||
"issues": 2,
|
||||
"documents": 2,
|
||||
"project_results": [
|
||||
{"project_identifier": "customer-service", "issues": 1, "documents": 1},
|
||||
{"project_identifier": "hiring", "issues": 1, "documents": 1},
|
||||
],
|
||||
},
|
||||
result,
|
||||
)
|
||||
self.assertEqual([("redmine", "customer-service"), ("redmine", "hiring")], store.deleted)
|
||||
self.assertEqual("customer-service", store.upserts[0][0][0].payload["project_identifier"])
|
||||
self.assertEqual("hiring", store.upserts[1][0][0].payload["project_identifier"])
|
||||
|
||||
def test_multi_project_backfill_accepts_per_project_limits(self):
|
||||
store = FakeStore()
|
||||
service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=store)
|
||||
|
||||
result = service.backfill_redmine_project_limits({"customer-service": 2, "hiring": 1})
|
||||
|
||||
self.assertEqual(3, result["issues"])
|
||||
self.assertEqual(
|
||||
[
|
||||
{"project_identifier": "customer-service", "issues": 2, "documents": 2},
|
||||
{"project_identifier": "hiring", "issues": 1, "documents": 1},
|
||||
],
|
||||
result["project_results"],
|
||||
)
|
||||
|
||||
def test_backfill_deduplicates_documents_by_stable_id_before_embedding(self):
|
||||
store = FakeStore()
|
||||
service = BackfillService(source=DuplicateDocumentRedmineSource(), embedder=FakeEmbedder(), store=store)
|
||||
|
||||
result = service.backfill_redmine_sample(limit=2)
|
||||
|
||||
self.assertEqual({"source": "redmine", "issues": 2, "documents": 1}, result)
|
||||
docs, vectors = store.upserts[0]
|
||||
self.assertEqual(["redmine:issue:1:chunk:0"], [doc.id for doc in docs])
|
||||
self.assertEqual(1, len(vectors))
|
||||
|
||||
def test_mcp_tools_return_json_ready_results(self):
|
||||
search = FakeSearchService()
|
||||
refresh = FakeRefreshService()
|
||||
mcp = SemanticMCP(search_service=search, backfill_service=None, store=FakeStore(), refresh_service=refresh)
|
||||
|
||||
response = mcp.call_tool("semantic_search", {"query": "printer", "source": "redmine", "project_identifier": "hiring", "limit": 3})
|
||||
document = mcp.call_tool("semantic_get_document", {"id": "doc1"})
|
||||
projects = mcp.call_tool("semantic_list_projects", {"source": "redmine"})
|
||||
refresh_response = mcp.call_tool("semantic_refresh_redmine", {"project_identifier": "customer-service", "limit": 5, "dry_run": True})
|
||||
|
||||
self.assertEqual("printer", response["query"])
|
||||
self.assertEqual("hiring", response["filters"]["project_identifier"])
|
||||
self.assertEqual("doc1", response["results"][0]["id"])
|
||||
self.assertEqual("http://redmine/issues/1", response["results"][0]["citation"]["url"])
|
||||
self.assertIsInstance(search.queries[0], SearchQuery)
|
||||
self.assertEqual("redmine", search.queries[0].source)
|
||||
self.assertEqual("hiring", search.queries[0].project_identifier)
|
||||
self.assertEqual({"id": "doc1", "text": "Snippet"}, document)
|
||||
self.assertEqual("customer-service", projects["projects"][0]["project_identifier"])
|
||||
self.assertTrue(refresh_response["dry_run"])
|
||||
self.assertEqual(({"customer-service": 5}, True, False, 15), refresh.calls[0])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user