Add semantic-index service, deployment assets, and tests
This commit is contained in:
@@ -0,0 +1,85 @@
|
||||
import unittest
|
||||
|
||||
from semantic_index.models import IndexDocument, SearchQuery, SearchResult
|
||||
from semantic_index.qdrant_store import build_filter, point_id_for_document
|
||||
from semantic_index.search import HybridSearchService, keyword_boost
|
||||
|
||||
|
||||
class FakeEmbedder:
|
||||
def embed_query(self, text):
|
||||
return [0.1, 0.2, 0.3]
|
||||
|
||||
|
||||
class FakeStore:
|
||||
def __init__(self):
|
||||
self.query = None
|
||||
|
||||
def search(self, vector, query, limit):
|
||||
self.query = query
|
||||
return [
|
||||
SearchResult(
|
||||
id="weak",
|
||||
score=0.7,
|
||||
text="general support text",
|
||||
payload={"redmine_url": "http://redmine/issues/1"},
|
||||
),
|
||||
SearchResult(
|
||||
id="strong",
|
||||
score=0.6,
|
||||
text="Customer ada@example.com asked about ORD-12345",
|
||||
payload={"redmine_url": "http://redmine/issues/2"},
|
||||
),
|
||||
][:limit]
|
||||
|
||||
|
||||
class SearchTest(unittest.TestCase):
|
||||
def test_qdrant_point_id_is_deterministic_uuid_for_stable_document_id(self):
|
||||
first = point_id_for_document("redmine:issue:42:journal:5:chunk:0")
|
||||
second = point_id_for_document("redmine:issue:42:journal:5:chunk:0")
|
||||
|
||||
self.assertEqual(first, second)
|
||||
self.assertRegex(first, r"^[0-9a-f-]{36}$")
|
||||
|
||||
def test_filter_maps_supported_metadata(self):
|
||||
query = SearchQuery(
|
||||
text="printer",
|
||||
source="redmine",
|
||||
project_identifier="fud-helpdesk",
|
||||
doc_type="message",
|
||||
issue_id=42,
|
||||
contact_email="ada@example.com",
|
||||
date_from="2026-04-01T00:00:00Z",
|
||||
date_to="2026-04-30T23:59:59Z",
|
||||
)
|
||||
|
||||
qfilter = build_filter(query)
|
||||
|
||||
self.assertEqual(
|
||||
[
|
||||
{"key": "source", "match": {"value": "redmine"}},
|
||||
{"key": "project_identifier", "match": {"value": "fud-helpdesk"}},
|
||||
{"key": "doc_type", "match": {"value": "message"}},
|
||||
{"key": "issue_id", "match": {"value": 42}},
|
||||
{"key": "contact_email", "match": {"value": "ada@example.com"}},
|
||||
{"key": "created_on", "range": {"gte": "2026-04-01T00:00:00Z", "lte": "2026-04-30T23:59:59Z"}},
|
||||
],
|
||||
qfilter["must"],
|
||||
)
|
||||
|
||||
def test_keyword_boost_prioritizes_exact_email_and_order_matches(self):
|
||||
weak = SearchResult(id="weak", score=0.7, text="general support text", payload={})
|
||||
strong = SearchResult(id="strong", score=0.6, text="Customer ada@example.com asked about ORD-12345", payload={})
|
||||
|
||||
self.assertGreater(
|
||||
keyword_boost('ada@example.com "ORD-12345"', strong),
|
||||
keyword_boost('ada@example.com "ORD-12345"', weak),
|
||||
)
|
||||
|
||||
service = HybridSearchService(embedder=FakeEmbedder(), store=FakeStore())
|
||||
results = service.search(SearchQuery(text='ada@example.com "ORD-12345"', limit=2))
|
||||
self.assertEqual("strong", results[0].id)
|
||||
self.assertEqual("http://redmine/issues/2", results[0].citation["url"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user