86 lines
3.0 KiB
Python
86 lines
3.0 KiB
Python
import unittest
|
|
|
|
from semantic_index.models import IndexDocument, SearchQuery, SearchResult
|
|
from semantic_index.qdrant_store import build_filter, point_id_for_document
|
|
from semantic_index.search import HybridSearchService, keyword_boost
|
|
|
|
|
|
class FakeEmbedder:
|
|
def embed_query(self, text):
|
|
return [0.1, 0.2, 0.3]
|
|
|
|
|
|
class FakeStore:
|
|
def __init__(self):
|
|
self.query = None
|
|
|
|
def search(self, vector, query, limit):
|
|
self.query = query
|
|
return [
|
|
SearchResult(
|
|
id="weak",
|
|
score=0.7,
|
|
text="general support text",
|
|
payload={"redmine_url": "http://redmine/issues/1"},
|
|
),
|
|
SearchResult(
|
|
id="strong",
|
|
score=0.6,
|
|
text="Customer ada@example.com asked about ORD-12345",
|
|
payload={"redmine_url": "http://redmine/issues/2"},
|
|
),
|
|
][:limit]
|
|
|
|
|
|
class SearchTest(unittest.TestCase):
|
|
def test_qdrant_point_id_is_deterministic_uuid_for_stable_document_id(self):
|
|
first = point_id_for_document("redmine:issue:42:journal:5:chunk:0")
|
|
second = point_id_for_document("redmine:issue:42:journal:5:chunk:0")
|
|
|
|
self.assertEqual(first, second)
|
|
self.assertRegex(first, r"^[0-9a-f-]{36}$")
|
|
|
|
def test_filter_maps_supported_metadata(self):
|
|
query = SearchQuery(
|
|
text="printer",
|
|
source="redmine",
|
|
project_identifier="fud-helpdesk",
|
|
doc_type="message",
|
|
issue_id=42,
|
|
contact_email="ada@example.com",
|
|
date_from="2026-04-01T00:00:00Z",
|
|
date_to="2026-04-30T23:59:59Z",
|
|
)
|
|
|
|
qfilter = build_filter(query)
|
|
|
|
self.assertEqual(
|
|
[
|
|
{"key": "source", "match": {"value": "redmine"}},
|
|
{"key": "project_identifier", "match": {"value": "fud-helpdesk"}},
|
|
{"key": "doc_type", "match": {"value": "message"}},
|
|
{"key": "issue_id", "match": {"value": 42}},
|
|
{"key": "contact_email", "match": {"value": "ada@example.com"}},
|
|
{"key": "created_on", "range": {"gte": "2026-04-01T00:00:00Z", "lte": "2026-04-30T23:59:59Z"}},
|
|
],
|
|
qfilter["must"],
|
|
)
|
|
|
|
def test_keyword_boost_prioritizes_exact_email_and_order_matches(self):
|
|
weak = SearchResult(id="weak", score=0.7, text="general support text", payload={})
|
|
strong = SearchResult(id="strong", score=0.6, text="Customer ada@example.com asked about ORD-12345", payload={})
|
|
|
|
self.assertGreater(
|
|
keyword_boost('ada@example.com "ORD-12345"', strong),
|
|
keyword_boost('ada@example.com "ORD-12345"', weak),
|
|
)
|
|
|
|
service = HybridSearchService(embedder=FakeEmbedder(), store=FakeStore())
|
|
results = service.search(SearchQuery(text='ada@example.com "ORD-12345"', limit=2))
|
|
self.assertEqual("strong", results[0].id)
|
|
self.assertEqual("http://redmine/issues/2", results[0].citation["url"])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|