Add semantic-index service, deployment assets, and tests

This commit is contained in:
Jason Thistlethwaite
2026-05-04 09:50:03 -04:00
parent faad70872b
commit b305544f63
42 changed files with 5059 additions and 0 deletions
+85
View File
@@ -0,0 +1,85 @@
import unittest
from semantic_index.models import IndexDocument, SearchQuery, SearchResult
from semantic_index.qdrant_store import build_filter, point_id_for_document
from semantic_index.search import HybridSearchService, keyword_boost
class FakeEmbedder:
def embed_query(self, text):
return [0.1, 0.2, 0.3]
class FakeStore:
def __init__(self):
self.query = None
def search(self, vector, query, limit):
self.query = query
return [
SearchResult(
id="weak",
score=0.7,
text="general support text",
payload={"redmine_url": "http://redmine/issues/1"},
),
SearchResult(
id="strong",
score=0.6,
text="Customer ada@example.com asked about ORD-12345",
payload={"redmine_url": "http://redmine/issues/2"},
),
][:limit]
class SearchTest(unittest.TestCase):
def test_qdrant_point_id_is_deterministic_uuid_for_stable_document_id(self):
first = point_id_for_document("redmine:issue:42:journal:5:chunk:0")
second = point_id_for_document("redmine:issue:42:journal:5:chunk:0")
self.assertEqual(first, second)
self.assertRegex(first, r"^[0-9a-f-]{36}$")
def test_filter_maps_supported_metadata(self):
query = SearchQuery(
text="printer",
source="redmine",
project_identifier="fud-helpdesk",
doc_type="message",
issue_id=42,
contact_email="ada@example.com",
date_from="2026-04-01T00:00:00Z",
date_to="2026-04-30T23:59:59Z",
)
qfilter = build_filter(query)
self.assertEqual(
[
{"key": "source", "match": {"value": "redmine"}},
{"key": "project_identifier", "match": {"value": "fud-helpdesk"}},
{"key": "doc_type", "match": {"value": "message"}},
{"key": "issue_id", "match": {"value": 42}},
{"key": "contact_email", "match": {"value": "ada@example.com"}},
{"key": "created_on", "range": {"gte": "2026-04-01T00:00:00Z", "lte": "2026-04-30T23:59:59Z"}},
],
qfilter["must"],
)
def test_keyword_boost_prioritizes_exact_email_and_order_matches(self):
weak = SearchResult(id="weak", score=0.7, text="general support text", payload={})
strong = SearchResult(id="strong", score=0.6, text="Customer ada@example.com asked about ORD-12345", payload={})
self.assertGreater(
keyword_boost('ada@example.com "ORD-12345"', strong),
keyword_boost('ada@example.com "ORD-12345"', weak),
)
service = HybridSearchService(embedder=FakeEmbedder(), store=FakeStore())
results = service.search(SearchQuery(text='ada@example.com "ORD-12345"', limit=2))
self.assertEqual("strong", results[0].id)
self.assertEqual("http://redmine/issues/2", results[0].citation["url"])
if __name__ == "__main__":
unittest.main()