Add semantic-index service, deployment assets, and tests
This commit is contained in:
@@ -0,0 +1,115 @@
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from semantic_index.app import create_app
|
||||
from semantic_index.config import Settings
|
||||
from semantic_index.models import SearchResult
|
||||
|
||||
|
||||
class FakeSearchService:
|
||||
def search(self, query):
|
||||
return [
|
||||
SearchResult(
|
||||
id="redmine:issue:1:chunk:0",
|
||||
score=0.8,
|
||||
text="Snippet text",
|
||||
payload={
|
||||
"source": "redmine",
|
||||
"project_identifier": "customer-service",
|
||||
"doc_type": "issue",
|
||||
"issue_id": 1,
|
||||
"redmine_url": "http://redmine/issues/1",
|
||||
"source_record_id": "issue:1",
|
||||
},
|
||||
)
|
||||
]
|
||||
|
||||
def get_document(self, document_id):
|
||||
return {"id": document_id, "text": "Full text", "payload": {}}
|
||||
|
||||
|
||||
class FakeStore:
|
||||
def list_projects(self, source=None, limit=1000):
|
||||
return [{"project_identifier": "customer-service", "document_count": 10}]
|
||||
|
||||
|
||||
class FakeRefreshService:
|
||||
def __init__(self):
|
||||
self.calls = []
|
||||
|
||||
def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15):
|
||||
self.calls.append((project_limits, dry_run, force_rebuild, overlap_minutes))
|
||||
return {"source": "redmine", "projects": len(project_limits), "dry_run": dry_run}
|
||||
|
||||
|
||||
def fake_services():
|
||||
refresh = FakeRefreshService()
|
||||
return {
|
||||
"settings": Settings(
|
||||
openai_api_key="",
|
||||
qdrant_url="http://qdrant",
|
||||
qdrant_api_key=None,
|
||||
qdrant_collection="semantic",
|
||||
redmine_url="http://redmine",
|
||||
redmine_api_key="",
|
||||
redmine_project_identifier=None,
|
||||
sample_limit=50,
|
||||
bind_host="127.0.0.1",
|
||||
bind_port=8787,
|
||||
service_api_key=None,
|
||||
refresh_state_path=Path(".cache/semantic_index/refresh_state.json"),
|
||||
),
|
||||
"search": FakeSearchService(),
|
||||
"store": FakeStore(),
|
||||
"refresh": refresh,
|
||||
}
|
||||
|
||||
|
||||
class SemanticIndexAppTest(unittest.TestCase):
|
||||
def test_health_does_not_build_live_services(self):
|
||||
def broken_builder():
|
||||
raise AssertionError("health should not build live clients")
|
||||
|
||||
app = create_app(service_builder=broken_builder)
|
||||
routes = {route.path: route.endpoint for route in app.routes}
|
||||
|
||||
self.assertEqual({"status": "ok"}, routes["/health"]())
|
||||
|
||||
def test_search_endpoint_returns_normalized_agent_response(self):
|
||||
app = create_app(service_builder=fake_services)
|
||||
routes = {route.path: route.endpoint for route in app.routes}
|
||||
|
||||
response = routes["/search"]({"query": "printer", "project_identifier": "customer-service", "limit": 3})
|
||||
|
||||
self.assertEqual("printer", response["query"])
|
||||
self.assertEqual("customer-service", response["filters"]["project_identifier"])
|
||||
self.assertEqual("customer-service", response["results"][0]["citation"]["project_identifier"])
|
||||
|
||||
def test_projects_endpoint_lists_indexed_projects(self):
|
||||
app = create_app(service_builder=fake_services)
|
||||
routes = {route.path: route.endpoint for route in app.routes}
|
||||
|
||||
response = routes["/projects"]()
|
||||
|
||||
self.assertEqual("customer-service", response["projects"][0]["project_identifier"])
|
||||
|
||||
def test_refresh_endpoint_passes_project_limits_and_cost_flags(self):
|
||||
services = fake_services()
|
||||
app = create_app(service_builder=lambda: services)
|
||||
routes = {route.path: route.endpoint for route in app.routes}
|
||||
|
||||
response = routes["/sources/redmine/refresh"](
|
||||
{
|
||||
"project_limits": {"customer-service": 5},
|
||||
"dry_run": True,
|
||||
"force_rebuild": False,
|
||||
"overlap_minutes": 30,
|
||||
}
|
||||
)
|
||||
|
||||
self.assertTrue(response["dry_run"])
|
||||
self.assertEqual(({"customer-service": 5}, True, False, 30), services["refresh"].calls[0])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,182 @@
|
||||
import unittest
|
||||
|
||||
from semantic_index.ingest import BackfillService
|
||||
from semantic_index.mcp import SemanticMCP
|
||||
from semantic_index.models import SearchQuery, SearchResult
|
||||
from semantic_index.redmine import RedmineMapper
|
||||
|
||||
|
||||
class FakeRedmineSource:
|
||||
project_identifier = None
|
||||
|
||||
def recent_helpdesk_issues(self, limit):
|
||||
return [
|
||||
{
|
||||
"id": 1,
|
||||
"subject": "First",
|
||||
"description": "First body",
|
||||
"project": {"identifier": self.project_identifier},
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"subject": "Second",
|
||||
"description": "Second body",
|
||||
"project": {"identifier": self.project_identifier},
|
||||
},
|
||||
][:limit]
|
||||
|
||||
|
||||
class DuplicateDocumentRedmineSource:
|
||||
project_identifier = "customer-service"
|
||||
|
||||
def recent_helpdesk_issues(self, limit):
|
||||
return [
|
||||
{"id": 1, "subject": "First", "description": "First body", "project": {"identifier": "customer-service"}},
|
||||
{"id": 1, "subject": "First duplicate", "description": "Duplicate body", "project": {"identifier": "customer-service"}},
|
||||
][:limit]
|
||||
|
||||
|
||||
class FakeEmbedder:
|
||||
def embed_documents(self, docs):
|
||||
return [[float(i), 0.0, 0.0] for i, _ in enumerate(docs, start=1)]
|
||||
|
||||
def embed_query(self, text):
|
||||
return [0.1, 0.0, 0.0]
|
||||
|
||||
|
||||
class FakeStore:
|
||||
def __init__(self):
|
||||
self.deleted = []
|
||||
self.upserts = []
|
||||
|
||||
def rebuild_source(self, source, docs, vectors, project_identifier=None):
|
||||
self.deleted.append((source, project_identifier))
|
||||
self.upserts.append((docs, vectors))
|
||||
|
||||
def list_projects(self, source=None, limit=1000):
|
||||
return [
|
||||
{"project_identifier": "customer-service", "document_count": 1684},
|
||||
{"project_identifier": "hiring", "document_count": 409},
|
||||
]
|
||||
|
||||
|
||||
class FakeRefreshService:
|
||||
def __init__(self):
|
||||
self.calls = []
|
||||
|
||||
def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15):
|
||||
self.calls.append((project_limits, dry_run, force_rebuild, overlap_minutes))
|
||||
return {"source": "redmine", "projects": len(project_limits), "dry_run": dry_run}
|
||||
|
||||
|
||||
class FakeSearchService:
|
||||
def __init__(self):
|
||||
self.queries = []
|
||||
|
||||
def search(self, query):
|
||||
self.queries.append(query)
|
||||
return [SearchResult(id="doc1", score=0.5, text="Snippet", payload={"redmine_url": "http://redmine/issues/1"})]
|
||||
|
||||
def get_document(self, document_id):
|
||||
return {"id": document_id, "text": "Snippet"}
|
||||
|
||||
|
||||
class BackfillAndMCPTest(unittest.TestCase):
|
||||
def test_sample_backfill_rebuilds_redmine_source(self):
|
||||
service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=FakeStore())
|
||||
|
||||
result = service.backfill_redmine_sample(limit=2)
|
||||
|
||||
self.assertEqual({"source": "redmine", "issues": 2, "documents": 2}, result)
|
||||
self.assertEqual([("redmine", None)], service.store.deleted)
|
||||
docs, vectors = service.store.upserts[0]
|
||||
self.assertEqual(["redmine:issue:1:chunk:0", "redmine:issue:2:chunk:0"], [doc.id for doc in docs])
|
||||
self.assertEqual(2, len(vectors))
|
||||
|
||||
def test_sample_backfill_rebuilds_only_the_configured_project_scope(self):
|
||||
store = FakeStore()
|
||||
service = BackfillService(
|
||||
source=FakeRedmineSource(),
|
||||
embedder=FakeEmbedder(),
|
||||
store=store,
|
||||
mapper=RedmineMapper(redmine_url="", project_identifier="customer-service"),
|
||||
)
|
||||
|
||||
service.backfill_redmine_sample(limit=1)
|
||||
|
||||
self.assertEqual([("redmine", "customer-service")], store.deleted)
|
||||
|
||||
def test_multi_project_backfill_rebuilds_each_project_scope(self):
|
||||
store = FakeStore()
|
||||
service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=store)
|
||||
|
||||
result = service.backfill_redmine_projects(["customer-service", "hiring"], per_project_limit=1)
|
||||
|
||||
self.assertEqual(
|
||||
{
|
||||
"source": "redmine",
|
||||
"projects": 2,
|
||||
"issues": 2,
|
||||
"documents": 2,
|
||||
"project_results": [
|
||||
{"project_identifier": "customer-service", "issues": 1, "documents": 1},
|
||||
{"project_identifier": "hiring", "issues": 1, "documents": 1},
|
||||
],
|
||||
},
|
||||
result,
|
||||
)
|
||||
self.assertEqual([("redmine", "customer-service"), ("redmine", "hiring")], store.deleted)
|
||||
self.assertEqual("customer-service", store.upserts[0][0][0].payload["project_identifier"])
|
||||
self.assertEqual("hiring", store.upserts[1][0][0].payload["project_identifier"])
|
||||
|
||||
def test_multi_project_backfill_accepts_per_project_limits(self):
|
||||
store = FakeStore()
|
||||
service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=store)
|
||||
|
||||
result = service.backfill_redmine_project_limits({"customer-service": 2, "hiring": 1})
|
||||
|
||||
self.assertEqual(3, result["issues"])
|
||||
self.assertEqual(
|
||||
[
|
||||
{"project_identifier": "customer-service", "issues": 2, "documents": 2},
|
||||
{"project_identifier": "hiring", "issues": 1, "documents": 1},
|
||||
],
|
||||
result["project_results"],
|
||||
)
|
||||
|
||||
def test_backfill_deduplicates_documents_by_stable_id_before_embedding(self):
|
||||
store = FakeStore()
|
||||
service = BackfillService(source=DuplicateDocumentRedmineSource(), embedder=FakeEmbedder(), store=store)
|
||||
|
||||
result = service.backfill_redmine_sample(limit=2)
|
||||
|
||||
self.assertEqual({"source": "redmine", "issues": 2, "documents": 1}, result)
|
||||
docs, vectors = store.upserts[0]
|
||||
self.assertEqual(["redmine:issue:1:chunk:0"], [doc.id for doc in docs])
|
||||
self.assertEqual(1, len(vectors))
|
||||
|
||||
def test_mcp_tools_return_json_ready_results(self):
|
||||
search = FakeSearchService()
|
||||
refresh = FakeRefreshService()
|
||||
mcp = SemanticMCP(search_service=search, backfill_service=None, store=FakeStore(), refresh_service=refresh)
|
||||
|
||||
response = mcp.call_tool("semantic_search", {"query": "printer", "source": "redmine", "project_identifier": "hiring", "limit": 3})
|
||||
document = mcp.call_tool("semantic_get_document", {"id": "doc1"})
|
||||
projects = mcp.call_tool("semantic_list_projects", {"source": "redmine"})
|
||||
refresh_response = mcp.call_tool("semantic_refresh_redmine", {"project_identifier": "customer-service", "limit": 5, "dry_run": True})
|
||||
|
||||
self.assertEqual("printer", response["query"])
|
||||
self.assertEqual("hiring", response["filters"]["project_identifier"])
|
||||
self.assertEqual("doc1", response["results"][0]["id"])
|
||||
self.assertEqual("http://redmine/issues/1", response["results"][0]["citation"]["url"])
|
||||
self.assertIsInstance(search.queries[0], SearchQuery)
|
||||
self.assertEqual("redmine", search.queries[0].source)
|
||||
self.assertEqual("hiring", search.queries[0].project_identifier)
|
||||
self.assertEqual({"id": "doc1", "text": "Snippet"}, document)
|
||||
self.assertEqual("customer-service", projects["projects"][0]["project_identifier"])
|
||||
self.assertTrue(refresh_response["dry_run"])
|
||||
self.assertEqual(({"customer-service": 5}, True, False, 15), refresh.calls[0])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,37 @@
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
import unittest
|
||||
|
||||
from semantic_index.config import load_settings
|
||||
|
||||
|
||||
class SemanticIndexCliTest(unittest.TestCase):
|
||||
def test_help_does_not_require_http_runtime_dependencies(self):
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "semantic_index", "--help"],
|
||||
check=False,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
|
||||
self.assertEqual("", result.stderr)
|
||||
self.assertEqual(0, result.returncode)
|
||||
self.assertIn("--mcp-stdio", result.stdout)
|
||||
|
||||
def test_settings_load_from_package_env_when_root_env_missing(self):
|
||||
with TemporaryDirectory() as tmp:
|
||||
env_path = Path(tmp) / "semantic_index" / ".env"
|
||||
env_path.parent.mkdir()
|
||||
env_path.write_text("QDRANT_URL=http://qdrant.example:6333\nREDMINE_SAMPLE_LIMIT=7\n", encoding="utf-8")
|
||||
|
||||
settings = load_settings(Path(tmp) / ".env")
|
||||
|
||||
self.assertEqual("http://qdrant.example:6333", settings.qdrant_url)
|
||||
self.assertEqual(7, settings.sample_limit)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,87 @@
|
||||
import json
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
from semantic_index.client import SemanticIndexClient
|
||||
from semantic_index.models import SearchResult
|
||||
|
||||
|
||||
class FakeSearchService:
|
||||
def __init__(self):
|
||||
self.queries = []
|
||||
|
||||
def search(self, query):
|
||||
self.queries.append(query)
|
||||
return [
|
||||
SearchResult(
|
||||
id="redmine:issue:1:chunk:0",
|
||||
score=0.7,
|
||||
text="Candidate follow up",
|
||||
payload={
|
||||
"source": "redmine",
|
||||
"project_identifier": "hiring",
|
||||
"doc_type": "issue",
|
||||
"issue_id": 1,
|
||||
"redmine_url": "http://redmine/issues/1",
|
||||
"source_record_id": "issue:1",
|
||||
},
|
||||
)
|
||||
]
|
||||
|
||||
def get_document(self, document_id):
|
||||
return {"id": document_id, "text": "Full text", "payload": {"project_identifier": "hiring"}}
|
||||
|
||||
|
||||
class SemanticIndexClientTest(unittest.TestCase):
|
||||
def test_in_process_client_returns_normalized_search_response(self):
|
||||
search = FakeSearchService()
|
||||
client = SemanticIndexClient(search_service=search)
|
||||
|
||||
response = client.search("candidate follow up", project_identifier="hiring", limit=3)
|
||||
|
||||
self.assertEqual("candidate follow up", response["query"])
|
||||
self.assertEqual({"project_identifier": "hiring", "limit": 3}, response["filters"])
|
||||
self.assertEqual("redmine:issue:1:chunk:0", response["results"][0]["id"])
|
||||
self.assertEqual("hiring", response["results"][0]["citation"]["project_identifier"])
|
||||
self.assertEqual("hiring", search.queries[0].project_identifier)
|
||||
|
||||
def test_in_process_client_get_document(self):
|
||||
client = SemanticIndexClient(search_service=FakeSearchService())
|
||||
|
||||
document = client.get_document("redmine:issue:1:chunk:0")
|
||||
|
||||
self.assertEqual("Full text", document["text"])
|
||||
|
||||
def test_http_client_sends_auth_header_and_parses_search_response(self):
|
||||
body = json.dumps({"query": "printer", "filters": {}, "results": []}).encode()
|
||||
|
||||
class FakeResponse:
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def read(self):
|
||||
return body
|
||||
|
||||
captured = {}
|
||||
|
||||
def fake_urlopen(request, timeout):
|
||||
captured["url"] = request.full_url
|
||||
captured["authorization"] = request.headers.get("Authorization")
|
||||
captured["body"] = json.loads(request.data.decode())
|
||||
return FakeResponse()
|
||||
|
||||
with patch("urllib.request.urlopen", fake_urlopen):
|
||||
client = SemanticIndexClient(base_url="http://semantic.local", api_key="secret")
|
||||
response = client.search("printer", project_identifier="customer-service")
|
||||
|
||||
self.assertEqual("http://semantic.local/search", captured["url"])
|
||||
self.assertEqual("Bearer secret", captured["authorization"])
|
||||
self.assertEqual("customer-service", captured["body"]["project_identifier"])
|
||||
self.assertEqual("printer", response["query"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,138 @@
|
||||
import unittest
|
||||
|
||||
from semantic_index.models import IndexDocument
|
||||
from semantic_index.redmine import RedmineMapper
|
||||
|
||||
|
||||
class RedmineMapperTest(unittest.TestCase):
|
||||
def test_issue_chunks_have_stable_ids_and_metadata(self):
|
||||
issue = {
|
||||
"id": 42,
|
||||
"subject": "Widget order ORD-12345 cannot ship",
|
||||
"description": "Customer reports that widget order ORD-12345 is blocked.",
|
||||
"project": {"id": 7, "identifier": "fud-helpdesk"},
|
||||
"contact": {"id": 9, "email": "ada@example.com", "name": "Ada Lovelace"},
|
||||
"created_on": "2026-04-01T10:00:00Z",
|
||||
"updated_on": "2026-04-02T10:00:00Z",
|
||||
"url": "http://redmine.local/issues/42",
|
||||
}
|
||||
|
||||
first = RedmineMapper(redmine_url="http://redmine.local").issue_to_documents(issue)
|
||||
second = RedmineMapper(redmine_url="http://redmine.local").issue_to_documents(issue)
|
||||
|
||||
self.assertEqual([doc.id for doc in first], [doc.id for doc in second])
|
||||
self.assertEqual("redmine:issue:42:chunk:0", first[0].id)
|
||||
self.assertEqual("issue", first[0].payload["doc_type"])
|
||||
self.assertEqual(42, first[0].payload["issue_id"])
|
||||
self.assertEqual("fud-helpdesk", first[0].payload["project_identifier"])
|
||||
self.assertIsNone(first[0].payload["project_name"])
|
||||
self.assertFalse(first[0].payload["has_helpdesk_ticket"])
|
||||
self.assertEqual("ada@example.com", first[0].payload["contact_email"])
|
||||
self.assertEqual("Ada Lovelace", first[0].payload["contact_name"])
|
||||
self.assertEqual("http://redmine.local/issues/42", first[0].payload["redmine_url"])
|
||||
self.assertIn("source_hash", first[0].payload)
|
||||
|
||||
def test_helpdesk_ticket_contact_is_mapped_to_all_issue_chunks(self):
|
||||
issue = {
|
||||
"id": 39779,
|
||||
"subject": "Goods return",
|
||||
"description": "Please arrange to return these goods.",
|
||||
"project": {"id": 1, "identifier": "customer-service"},
|
||||
"helpdesk_ticket": {
|
||||
"id": 35159,
|
||||
"contact_id": 1890,
|
||||
"from_address": "callum@safetagtracking.com",
|
||||
"contact": {
|
||||
"id": 1890,
|
||||
"name": "Callum Mackeonis",
|
||||
"company": "SafeTag Tracking",
|
||||
"email": "callum@safetagtracking.com",
|
||||
},
|
||||
},
|
||||
"journals": [
|
||||
{"id": 71570, "notes": "Hello, yes we can arrange this today.", "created_on": "2026-04-14T14:29:49Z"}
|
||||
],
|
||||
}
|
||||
|
||||
docs = RedmineMapper(redmine_url="http://redmine.local").issue_to_documents(issue)
|
||||
issue_doc = next(doc for doc in docs if doc.payload["doc_type"] == "issue")
|
||||
journal_doc = next(doc for doc in docs if doc.payload["doc_type"] == "journal")
|
||||
contact_doc = next(doc for doc in docs if doc.payload["doc_type"] == "contact")
|
||||
|
||||
for doc in (issue_doc, journal_doc, contact_doc):
|
||||
self.assertEqual(35159, doc.payload["helpdesk_ticket_id"])
|
||||
self.assertTrue(doc.payload["has_helpdesk_ticket"])
|
||||
self.assertEqual(1890, doc.payload["contact_id"])
|
||||
self.assertEqual("Callum Mackeonis", doc.payload["contact_name"])
|
||||
self.assertEqual("SafeTag Tracking", doc.payload["contact_company"])
|
||||
self.assertEqual("callum@safetagtracking.com", doc.payload["contact_email"])
|
||||
self.assertIn("Callum Mackeonis", issue_doc.text)
|
||||
self.assertIn("callum@safetagtracking.com", contact_doc.text)
|
||||
|
||||
def test_configured_project_identifier_is_used_when_issue_payload_omits_identifier(self):
|
||||
issue = {
|
||||
"id": 42,
|
||||
"subject": "Widget order",
|
||||
"description": "Body",
|
||||
"project": {"id": 1, "name": "Customer Service"},
|
||||
}
|
||||
|
||||
docs = RedmineMapper(
|
||||
redmine_url="http://redmine.local",
|
||||
project_identifier="customer-service",
|
||||
).issue_to_documents(issue)
|
||||
|
||||
self.assertEqual("customer-service", docs[0].payload["project_identifier"])
|
||||
self.assertEqual("Customer Service", docs[0].payload["project_name"])
|
||||
|
||||
def test_internal_non_helpdesk_issue_keeps_project_metadata_without_contact(self):
|
||||
issue = {
|
||||
"id": 55,
|
||||
"subject": "Internal hiring task",
|
||||
"description": "Follow up with candidate.",
|
||||
"project": {"id": 68, "identifier": "hiring", "name": "Hiring"},
|
||||
}
|
||||
|
||||
docs = RedmineMapper(redmine_url="http://redmine.local").issue_to_documents(issue)
|
||||
|
||||
self.assertEqual(1, len(docs))
|
||||
self.assertEqual("hiring", docs[0].payload["project_identifier"])
|
||||
self.assertEqual("Hiring", docs[0].payload["project_name"])
|
||||
self.assertFalse(docs[0].payload["has_helpdesk_ticket"])
|
||||
self.assertIsNone(docs[0].payload["contact_id"])
|
||||
|
||||
def test_issue_journals_messages_and_contact_are_mapped(self):
|
||||
issue = {
|
||||
"id": 42,
|
||||
"subject": "Widget order",
|
||||
"description": "Ticket envelope",
|
||||
"project": {"id": 7, "identifier": "fud-helpdesk"},
|
||||
"contact": {"id": 9, "email": "ada@example.com", "name": "Ada Lovelace"},
|
||||
"journals": [
|
||||
{"id": 5, "notes": "Private escalation note", "private_notes": True, "created_on": "2026-04-03T10:00:00Z"}
|
||||
],
|
||||
"messages": [
|
||||
{"id": 6, "body": "Customer reply body", "direction": "incoming", "created_on": "2026-04-03T11:00:00Z"}
|
||||
],
|
||||
}
|
||||
|
||||
docs = RedmineMapper(redmine_url="http://redmine.local").issue_to_documents(issue)
|
||||
ids = {doc.id for doc in docs}
|
||||
types = {doc.payload["doc_type"] for doc in docs}
|
||||
|
||||
self.assertIn("redmine:issue:42:journal:5:chunk:0", ids)
|
||||
self.assertIn("redmine:issue:42:message:6:chunk:0", ids)
|
||||
self.assertIn("redmine:contact:9:issue:42:chunk:0", ids)
|
||||
self.assertEqual({"issue", "journal", "message", "contact"}, types)
|
||||
journal = next(doc for doc in docs if doc.payload["doc_type"] == "journal")
|
||||
message = next(doc for doc in docs if doc.payload["doc_type"] == "message")
|
||||
self.assertEqual("private", journal.payload["visibility"])
|
||||
self.assertEqual("incoming", message.payload["direction"])
|
||||
|
||||
def test_empty_documents_are_rejected(self):
|
||||
with self.assertRaises(ValueError):
|
||||
IndexDocument(id="x", text=" ", payload={})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,46 @@
|
||||
import unittest
|
||||
|
||||
from semantic_index.embeddings import OpenAIEmbedder
|
||||
from semantic_index.models import IndexDocument
|
||||
|
||||
|
||||
class FakeOpenAIClient:
|
||||
def __init__(self):
|
||||
self.calls = []
|
||||
|
||||
def create_embeddings(self, model, inputs, dimensions=None):
|
||||
self.calls.append({"model": model, "inputs": list(inputs), "dimensions": dimensions})
|
||||
return [[float(i)] * 3 for i, _ in enumerate(inputs, start=1)]
|
||||
|
||||
|
||||
class OpenAIEmbedderTest(unittest.TestCase):
|
||||
def test_batches_embedding_requests(self):
|
||||
client = FakeOpenAIClient()
|
||||
embedder = OpenAIEmbedder(client=client, batch_size=2, dimensions=1536)
|
||||
docs = [
|
||||
IndexDocument(id="a", text="alpha", payload={}),
|
||||
IndexDocument(id="b", text="bravo", payload={}),
|
||||
IndexDocument(id="c", text="charlie", payload={}),
|
||||
]
|
||||
|
||||
vectors = embedder.embed_documents(docs)
|
||||
|
||||
self.assertEqual([[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [1.0, 1.0, 1.0]], vectors)
|
||||
self.assertEqual(2, len(client.calls))
|
||||
self.assertEqual(["alpha", "bravo"], client.calls[0]["inputs"])
|
||||
self.assertEqual("text-embedding-3-small", client.calls[0]["model"])
|
||||
self.assertEqual(1536, client.calls[0]["dimensions"])
|
||||
|
||||
def test_rejects_empty_or_oversized_chunks_before_api_call(self):
|
||||
client = FakeOpenAIClient()
|
||||
embedder = OpenAIEmbedder(client=client, max_chars=5)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
embedder.embed_texts(["ok", " "])
|
||||
with self.assertRaises(ValueError):
|
||||
embedder.embed_texts(["toolong"])
|
||||
self.assertEqual([], client.calls)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,394 @@
|
||||
import io
|
||||
import json
|
||||
import unittest
|
||||
from contextlib import redirect_stdout
|
||||
from pathlib import Path
|
||||
|
||||
from semantic_index.__main__ import main
|
||||
from semantic_index.config import Settings
|
||||
from semantic_index.models import SearchResult
|
||||
|
||||
|
||||
class FakeSearchService:
|
||||
def __init__(self):
|
||||
self.queries = []
|
||||
|
||||
def search(self, query):
|
||||
self.queries.append(query)
|
||||
if "missing@example.test" in query.text:
|
||||
return []
|
||||
return [
|
||||
SearchResult(
|
||||
id="redmine:contact:1890:issue:39779:chunk:0" if "callum" in query.text else "redmine:issue:39779:chunk:0",
|
||||
score=0.58,
|
||||
text="Callum Mackeonis callum@safetagtracking.com SafeTag Tracking",
|
||||
payload={
|
||||
"source": "redmine",
|
||||
"doc_type": "contact" if "callum" in query.text else "issue",
|
||||
"issue_id": 39779,
|
||||
"project_identifier": "customer-service",
|
||||
"contact_id": 1890,
|
||||
"contact_name": "Callum Mackeonis",
|
||||
"contact_email": "callum@safetagtracking.com",
|
||||
"contact_company": "SafeTag Tracking",
|
||||
"redmine_url": "http://redmine/issues/39779",
|
||||
},
|
||||
)
|
||||
]
|
||||
|
||||
def get_document(self, document_id):
|
||||
return {
|
||||
"id": document_id,
|
||||
"text": "Full indexed text",
|
||||
"payload": {
|
||||
"source": "redmine",
|
||||
"doc_type": "journal",
|
||||
"issue_id": 39778,
|
||||
"project_identifier": "customer-service",
|
||||
"contact_id": 1890,
|
||||
"contact_name": "Callum Mackeonis",
|
||||
"contact_email": "callum@safetagtracking.com",
|
||||
"redmine_url": "http://redmine/issues/39778",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class FakeStore:
|
||||
def __init__(self):
|
||||
self.list_limits = []
|
||||
|
||||
def count_documents(self, source=None, project_identifier=None, doc_type=None):
|
||||
return 12
|
||||
|
||||
def list_documents(self, limit=10, source=None, project_identifier=None, doc_type=None):
|
||||
self.list_limits.append(limit)
|
||||
return [
|
||||
{
|
||||
"id": "redmine:issue:39779:chunk:0",
|
||||
"text": "Issue #39779: Goods return\nPlease return our goods.",
|
||||
"payload": {
|
||||
"source": "redmine",
|
||||
"doc_type": "issue",
|
||||
"issue_id": 39779,
|
||||
"project_identifier": "customer-service",
|
||||
"project_name": "Customer Service",
|
||||
"has_helpdesk_ticket": True,
|
||||
"contact_id": 1890,
|
||||
"contact_name": "Callum Mackeonis",
|
||||
"contact_email": "callum@safetagtracking.com",
|
||||
"contact_company": "SafeTag Tracking",
|
||||
"source_hash": "issue-hash",
|
||||
"redmine_url": "http://redmine/issues/39779",
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "redmine:issue:39779:journal:71570:chunk:0",
|
||||
"text": "Hello, we can arrange this today.",
|
||||
"payload": {
|
||||
"source": "redmine",
|
||||
"doc_type": "journal",
|
||||
"issue_id": 39779,
|
||||
"project_identifier": "customer-service",
|
||||
"project_name": "Customer Service",
|
||||
"has_helpdesk_ticket": True,
|
||||
"contact_id": 1890,
|
||||
"contact_name": "Callum Mackeonis",
|
||||
"contact_email": "callum@safetagtracking.com",
|
||||
"contact_company": "SafeTag Tracking",
|
||||
"source_hash": "journal-hash",
|
||||
"redmine_url": "http://redmine/issues/39779",
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "redmine:contact:1890:issue:39779:chunk:0",
|
||||
"text": "Callum Mackeonis callum@safetagtracking.com SafeTag Tracking",
|
||||
"payload": {
|
||||
"source": "redmine",
|
||||
"doc_type": "contact",
|
||||
"issue_id": 39779,
|
||||
"project_identifier": "customer-service",
|
||||
"project_name": "Customer Service",
|
||||
"has_helpdesk_ticket": True,
|
||||
"contact_id": 1890,
|
||||
"contact_name": "Callum Mackeonis",
|
||||
"contact_email": "callum@safetagtracking.com",
|
||||
"contact_company": "SafeTag Tracking",
|
||||
"source_hash": "contact-hash",
|
||||
"redmine_url": "http://redmine/issues/39779",
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "redmine:issue:39800:chunk:0",
|
||||
"text": "Ordinary issue with no helpdesk contact.",
|
||||
"payload": {
|
||||
"source": "redmine",
|
||||
"doc_type": "issue",
|
||||
"issue_id": 39800,
|
||||
"project_identifier": "hiring",
|
||||
"project_name": "Hiring",
|
||||
"has_helpdesk_ticket": False,
|
||||
"source_hash": "ordinary-hash",
|
||||
"redmine_url": "http://redmine/issues/39800",
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
class FakeRedmineSource:
|
||||
def recent_helpdesk_issues(self, limit):
|
||||
return [
|
||||
{
|
||||
"id": 39779,
|
||||
"subject": "Goods return",
|
||||
"description": "Please return our goods.",
|
||||
"project": {"id": 1, "identifier": "customer-service"},
|
||||
"helpdesk_ticket": {
|
||||
"id": 35159,
|
||||
"contact_id": 1890,
|
||||
"contact": {
|
||||
"id": 1890,
|
||||
"name": "Callum Mackeonis",
|
||||
"email": "callum@safetagtracking.com",
|
||||
"company": "SafeTag Tracking",
|
||||
},
|
||||
},
|
||||
}
|
||||
][:limit]
|
||||
|
||||
|
||||
def fake_services(store=None, search=None):
|
||||
settings = Settings(
|
||||
openai_api_key="",
|
||||
qdrant_url="http://qdrant",
|
||||
qdrant_api_key=None,
|
||||
qdrant_collection="semantic",
|
||||
redmine_url="http://redmine",
|
||||
redmine_api_key="",
|
||||
redmine_project_identifier="customer-service",
|
||||
sample_limit=50,
|
||||
bind_host="127.0.0.1",
|
||||
bind_port=8787,
|
||||
service_api_key=None,
|
||||
refresh_state_path=Path(".cache/semantic_index/refresh_state.json"),
|
||||
)
|
||||
return {
|
||||
"settings": settings,
|
||||
"search": search or FakeSearchService(),
|
||||
"store": store or FakeStore(),
|
||||
"redmine_source": FakeRedmineSource(),
|
||||
"backfill": FakeBackfillService(),
|
||||
}
|
||||
|
||||
|
||||
class FakeBackfillService:
|
||||
def __init__(self):
|
||||
self.calls = []
|
||||
|
||||
def backfill_redmine_sample(self, limit):
|
||||
self.calls.append(("sample", limit))
|
||||
return {"source": "redmine", "issues": limit, "documents": limit}
|
||||
|
||||
def backfill_redmine_projects(self, projects, per_project_limit):
|
||||
self.calls.append(("projects", projects, per_project_limit))
|
||||
return {
|
||||
"source": "redmine",
|
||||
"projects": len(projects),
|
||||
"issues": len(projects) * per_project_limit,
|
||||
"documents": len(projects) * per_project_limit,
|
||||
"project_results": [
|
||||
{"project_identifier": project, "issues": per_project_limit, "documents": per_project_limit}
|
||||
for project in projects
|
||||
],
|
||||
}
|
||||
|
||||
def backfill_redmine_project_limits(self, project_limits):
|
||||
self.calls.append(("project_limits", project_limits))
|
||||
return {
|
||||
"source": "redmine",
|
||||
"projects": len(project_limits),
|
||||
"issues": sum(project_limits.values()),
|
||||
"documents": sum(project_limits.values()),
|
||||
"project_results": [
|
||||
{"project_identifier": project, "issues": limit, "documents": limit}
|
||||
for project, limit in project_limits.items()
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class InspectCliTest(unittest.TestCase):
|
||||
def run_cli(self, args):
|
||||
out = io.StringIO()
|
||||
with redirect_stdout(out):
|
||||
main(args, service_builder=fake_services)
|
||||
return out.getvalue()
|
||||
|
||||
def test_no_args_prints_help_without_building_services(self):
|
||||
def broken_services():
|
||||
raise AssertionError("help should not build live services")
|
||||
|
||||
out = io.StringIO()
|
||||
with redirect_stdout(out):
|
||||
main([], service_builder=broken_services)
|
||||
|
||||
self.assertIn("inspect", out.getvalue())
|
||||
|
||||
def test_count_lists_matching_document_count(self):
|
||||
output = self.run_cli(["inspect", "count", "--source", "redmine", "--project", "customer-service"])
|
||||
|
||||
self.assertIn("12", output)
|
||||
|
||||
def test_list_shows_snippet_and_metadata_by_default(self):
|
||||
output = self.run_cli(["inspect", "list", "--limit", "5", "--source", "redmine", "--project", "customer-service"])
|
||||
|
||||
self.assertIn("redmine:issue:39779:chunk:0", output)
|
||||
self.assertIn("issue #39779", output.lower())
|
||||
self.assertIn("customer-service", output)
|
||||
self.assertIn("contact=#1890", output)
|
||||
self.assertIn("Callum Mackeonis", output)
|
||||
self.assertIn("callum@safetagtracking.com", output)
|
||||
self.assertNotIn("Full indexed text", output)
|
||||
|
||||
def test_search_runs_query_and_prints_citation(self):
|
||||
output = self.run_cli(["inspect", "search", "order status", "--limit", "3", "--project", "customer-service"])
|
||||
|
||||
self.assertIn("score=0.5800", output)
|
||||
self.assertIn("http://redmine/issues/39779", output)
|
||||
|
||||
def test_show_prints_full_document_text(self):
|
||||
output = self.run_cli(["inspect", "show", "redmine:issue:39778:chunk:0"])
|
||||
|
||||
self.assertIn("Full indexed text", output)
|
||||
self.assertIn("doc_type=journal", output)
|
||||
|
||||
def test_preview_redmine_maps_documents_without_writing(self):
|
||||
output = self.run_cli(["inspect", "preview-redmine", "--limit", "1", "--project", "customer-service"])
|
||||
|
||||
self.assertIn("redmine:issue:39779:chunk:0", output)
|
||||
self.assertIn("project=customer-service", output)
|
||||
self.assertIn("Please return our goods", output)
|
||||
|
||||
def test_preview_redmine_uses_minimal_service_builder(self):
|
||||
services = []
|
||||
|
||||
def minimal_builder(settings):
|
||||
services.append(settings.redmine_project_identifier)
|
||||
return {"settings": settings, "redmine_source": FakeRedmineSource()}
|
||||
|
||||
out = io.StringIO()
|
||||
with redirect_stdout(out):
|
||||
main(
|
||||
["inspect", "preview-redmine", "--limit", "1", "--project", "customer-service"],
|
||||
service_builder=lambda: (_ for _ in ()).throw(AssertionError("full services should not be built")),
|
||||
preview_service_builder=minimal_builder,
|
||||
settings_loader=lambda: fake_services()["settings"],
|
||||
)
|
||||
|
||||
self.assertEqual(["customer-service"], services)
|
||||
self.assertIn("redmine:issue:39779:chunk:0", out.getvalue())
|
||||
|
||||
def test_audit_prints_doc_type_counts_contact_coverage_and_attachment_check(self):
|
||||
output = self.run_cli(["inspect", "audit", "--limit", "10", "--source", "redmine", "--project", "customer-service"])
|
||||
|
||||
self.assertIn("documents=4", output)
|
||||
self.assertIn("doc_type issue=2", output)
|
||||
self.assertIn("doc_type journal=1", output)
|
||||
self.assertIn("doc_type contact=1", output)
|
||||
self.assertIn("contact_metadata 3/4", output)
|
||||
self.assertIn("helpdesk_contact_metadata 3/3", output)
|
||||
self.assertIn("project customer-service=3", output)
|
||||
self.assertIn("project hiring=1", output)
|
||||
self.assertIn("attachments=0", output)
|
||||
self.assertNotIn("missing_contact redmine:issue:39800:chunk:0", output)
|
||||
|
||||
def test_audit_json_returns_machine_readable_summary(self):
|
||||
output = self.run_cli(["inspect", "audit", "--limit", "10", "--project", "customer-service", "--json"])
|
||||
payload = json.loads(output)
|
||||
|
||||
self.assertEqual(4, payload["total_documents"])
|
||||
self.assertEqual(2, payload["doc_type_counts"]["issue"])
|
||||
self.assertEqual(3, payload["project_counts"]["customer-service"])
|
||||
self.assertEqual(1, payload["project_counts"]["hiring"])
|
||||
self.assertEqual([], payload["missing_helpdesk_contact_metadata"])
|
||||
|
||||
def test_compare_redmine_reports_missing_stale_and_contact_mismatches(self):
|
||||
output = self.run_cli(["inspect", "compare-redmine", "--limit", "1", "--project", "customer-service"])
|
||||
|
||||
self.assertIn("preview_documents=2", output)
|
||||
self.assertIn("indexed_documents=4", output)
|
||||
self.assertIn("stale", output)
|
||||
self.assertIn("redmine:issue:39779:chunk:0", output)
|
||||
|
||||
def test_compare_redmine_fetches_a_large_index_window_to_avoid_false_missing_results(self):
|
||||
store = FakeStore()
|
||||
out = io.StringIO()
|
||||
with redirect_stdout(out):
|
||||
main(["inspect", "compare-redmine", "--limit", "3", "--project", "customer-service"], service_builder=lambda: fake_services(store=store))
|
||||
|
||||
self.assertEqual(5000, store.list_limits[0])
|
||||
|
||||
def test_smoke_search_prints_pass_fail_for_known_queries(self):
|
||||
output = self.run_cli(["inspect", "smoke-search", "--project", "customer-service", "--email", "callum@safetagtracking.com", "--issue-id", "39779"])
|
||||
|
||||
self.assertIn("PASS email callum@safetagtracking.com", output)
|
||||
self.assertIn("PASS issue 39779", output)
|
||||
self.assertIn("redmine:contact:1890:issue:39779:chunk:0", output)
|
||||
|
||||
def test_smoke_search_uses_issue_id_filter_for_issue_checks(self):
|
||||
search = FakeSearchService()
|
||||
out = io.StringIO()
|
||||
with redirect_stdout(out):
|
||||
main(["inspect", "smoke-search", "--project", "customer-service", "--issue-id", "39779"], service_builder=lambda: fake_services(search=search))
|
||||
|
||||
issue_queries = [query for query in search.queries if query.text == "39779"]
|
||||
self.assertEqual(39779, issue_queries[0].issue_id)
|
||||
|
||||
def test_smoke_search_json_returns_check_results(self):
|
||||
output = self.run_cli(["inspect", "smoke-search", "--project", "customer-service", "--email", "missing@example.test", "--json"])
|
||||
payload = json.loads(output)
|
||||
|
||||
self.assertFalse(payload["checks"][0]["passed"])
|
||||
self.assertEqual("email", payload["checks"][0]["kind"])
|
||||
|
||||
def test_backfill_redmine_projects_cli_parses_comma_separated_projects(self):
|
||||
backfill = FakeBackfillService()
|
||||
services = fake_services()
|
||||
services["backfill"] = backfill
|
||||
out = io.StringIO()
|
||||
|
||||
with redirect_stdout(out):
|
||||
main(
|
||||
[
|
||||
"--backfill-redmine-projects",
|
||||
"--projects",
|
||||
"customer-service,hiring",
|
||||
"--per-project-limit",
|
||||
"25",
|
||||
],
|
||||
service_builder=lambda: services,
|
||||
)
|
||||
|
||||
self.assertEqual(("projects", ["customer-service", "hiring"], 25), backfill.calls[0])
|
||||
self.assertIn("'projects': 2", out.getvalue())
|
||||
|
||||
def test_backfill_redmine_projects_cli_parses_project_specific_limits(self):
|
||||
backfill = FakeBackfillService()
|
||||
services = fake_services()
|
||||
services["backfill"] = backfill
|
||||
out = io.StringIO()
|
||||
|
||||
with redirect_stdout(out):
|
||||
main(
|
||||
[
|
||||
"--backfill-redmine-projects",
|
||||
"--project-limits",
|
||||
"customer-service=500,hiring=200",
|
||||
],
|
||||
service_builder=lambda: services,
|
||||
)
|
||||
|
||||
self.assertEqual(("project_limits", {"customer-service": 500, "hiring": 200}), backfill.calls[0])
|
||||
self.assertIn("'issues': 700", out.getvalue())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,58 @@
|
||||
import subprocess
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
INSTALLER = ROOT / "deploy" / "semantic-index" / "install.sh"
|
||||
|
||||
|
||||
class SemanticIndexInstallerTest(unittest.TestCase):
|
||||
def run_installer(self, *args, env=None):
|
||||
return subprocess.run(
|
||||
[str(INSTALLER), *args],
|
||||
cwd=ROOT,
|
||||
text=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
check=False,
|
||||
env=env,
|
||||
)
|
||||
|
||||
def test_default_mode_is_dry_run(self):
|
||||
result = self.run_installer()
|
||||
|
||||
self.assertEqual(0, result.returncode, result.stderr)
|
||||
self.assertIn("mode=dry-run", result.stdout)
|
||||
self.assertIn("would run: sudo mkdir -p /opt/semantic-index", result.stdout)
|
||||
self.assertIn("would run: sudo rsync", result.stdout)
|
||||
self.assertNotIn("Semantic Index installed, but deployment is not complete.", result.stdout)
|
||||
|
||||
def test_apply_prints_manual_next_step_warning(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
env = {
|
||||
"PATH": "/usr/bin:/bin",
|
||||
"SEMANTIC_INDEX_INSTALL_DIR": str(tmp_path / "opt" / "semantic-index"),
|
||||
"SEMANTIC_INDEX_ENV_FILE": str(tmp_path / "etc" / "semantic-index.env"),
|
||||
"SEMANTIC_INDEX_STATE_DIR": str(tmp_path / "var" / "lib" / "semantic-index"),
|
||||
"SEMANTIC_INDEX_LOG_DIR": str(tmp_path / "var" / "log" / "semantic-index"),
|
||||
"SEMANTIC_INDEX_SYSTEMD_DIR": str(tmp_path / "etc" / "systemd" / "system"),
|
||||
}
|
||||
result = self.run_installer("--apply", "--no-system", "--skip-deps", env=env)
|
||||
|
||||
self.assertEqual(0, result.returncode, result.stderr)
|
||||
self.assertIn("Semantic Index installed, but deployment is not complete.", result.stdout)
|
||||
self.assertIn("The refresh timer was NOT enabled automatically.", result.stdout)
|
||||
self.assertIn("Do not use --force-rebuild", result.stdout)
|
||||
|
||||
def test_invalid_argument_fails_with_usage(self):
|
||||
result = self.run_installer("--force-rebuild")
|
||||
|
||||
self.assertEqual(2, result.returncode)
|
||||
self.assertIn("Usage:", result.stderr)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,187 @@
|
||||
import unittest
|
||||
|
||||
from semantic_index.models import IndexDocument
|
||||
from semantic_index.qdrant_store import QdrantStore
|
||||
|
||||
|
||||
class FakeMatchValue:
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
|
||||
|
||||
class FakeFieldCondition:
|
||||
def __init__(self, key, match=None, range=None):
|
||||
self.key = key
|
||||
self.match = match
|
||||
self.range = range
|
||||
|
||||
|
||||
class FakeFilter:
|
||||
def __init__(self, must):
|
||||
self.must = must
|
||||
|
||||
|
||||
class FakeFilterSelector:
|
||||
def __init__(self, filter):
|
||||
self.filter = filter
|
||||
|
||||
|
||||
class FakePointIdsList:
|
||||
def __init__(self, points):
|
||||
self.points = points
|
||||
|
||||
|
||||
class FakeQModels:
|
||||
MatchValue = FakeMatchValue
|
||||
FieldCondition = FakeFieldCondition
|
||||
Filter = FakeFilter
|
||||
FilterSelector = FakeFilterSelector
|
||||
PointIdsList = FakePointIdsList
|
||||
|
||||
class PointStruct:
|
||||
def __init__(self, id, vector, payload):
|
||||
self.id = id
|
||||
self.vector = vector
|
||||
self.payload = payload
|
||||
|
||||
|
||||
class FakeCountResult:
|
||||
count = 7
|
||||
|
||||
|
||||
class FakeRecord:
|
||||
def __init__(self):
|
||||
self.id = "point-id"
|
||||
self.payload = {
|
||||
"document_id": "redmine:issue:1:chunk:0",
|
||||
"text": "Indexed text",
|
||||
"source": "redmine",
|
||||
"project_identifier": "customer-service",
|
||||
}
|
||||
|
||||
|
||||
class FakeClient:
|
||||
def __init__(self):
|
||||
self.count_filter = None
|
||||
self.scroll_filter = None
|
||||
self.delete_filter = None
|
||||
self.delete_selector = None
|
||||
self.upsert_batches = []
|
||||
|
||||
def get_collections(self):
|
||||
collection = type("Collection", (), {"name": "semantic"})()
|
||||
return type("Collections", (), {"collections": [collection]})()
|
||||
|
||||
def count(self, collection_name, count_filter, exact):
|
||||
self.count_filter = count_filter
|
||||
return FakeCountResult()
|
||||
|
||||
def scroll(self, collection_name, scroll_filter, limit, with_payload, with_vectors, offset=None):
|
||||
self.scroll_filter = scroll_filter
|
||||
return [FakeRecord()], None
|
||||
|
||||
def delete(self, collection_name, points_selector):
|
||||
self.delete_selector = points_selector
|
||||
self.delete_filter = getattr(points_selector, "filter", None)
|
||||
|
||||
def upsert(self, collection_name, points):
|
||||
self.upsert_batches.append(points)
|
||||
|
||||
|
||||
class QdrantStoreReadTest(unittest.TestCase):
|
||||
def make_store(self):
|
||||
store = object.__new__(QdrantStore)
|
||||
store.client = FakeClient()
|
||||
store.collection = "semantic"
|
||||
store.vector_size = 1536
|
||||
store.qmodels = FakeQModels
|
||||
store.upsert_batch_size = 2
|
||||
return store
|
||||
|
||||
def test_count_documents_builds_metadata_filter(self):
|
||||
store = self.make_store()
|
||||
|
||||
count = store.count_documents(source="redmine", project_identifier="customer-service", doc_type="issue")
|
||||
|
||||
self.assertEqual(7, count)
|
||||
conditions = store.client.count_filter.must
|
||||
self.assertEqual(["source", "project_identifier", "doc_type"], [condition.key for condition in conditions])
|
||||
self.assertEqual("customer-service", conditions[1].match.value)
|
||||
|
||||
def test_list_documents_strips_internal_payload_fields(self):
|
||||
store = self.make_store()
|
||||
|
||||
documents = store.list_documents(limit=5, source="redmine", project_identifier="customer-service")
|
||||
|
||||
self.assertEqual("redmine:issue:1:chunk:0", documents[0]["id"])
|
||||
self.assertEqual("Indexed text", documents[0]["text"])
|
||||
self.assertNotIn("document_id", documents[0]["payload"])
|
||||
self.assertNotIn("text", documents[0]["payload"])
|
||||
|
||||
def test_delete_by_source_can_be_limited_to_project_scope(self):
|
||||
store = self.make_store()
|
||||
|
||||
store.delete_by_source("redmine", project_identifier="customer-service")
|
||||
|
||||
conditions = store.client.delete_filter.must
|
||||
self.assertEqual(["source", "project_identifier"], [condition.key for condition in conditions])
|
||||
self.assertEqual("redmine", conditions[0].match.value)
|
||||
self.assertEqual("customer-service", conditions[1].match.value)
|
||||
|
||||
def test_list_documents_can_be_limited_to_issue_scope(self):
|
||||
store = self.make_store()
|
||||
|
||||
store.list_documents(limit=5, source="redmine", project_identifier="customer-service", issue_id=39779)
|
||||
|
||||
conditions = store.client.scroll_filter.must
|
||||
self.assertEqual(["source", "project_identifier", "issue_id"], [condition.key for condition in conditions])
|
||||
self.assertEqual(39779, conditions[2].match.value)
|
||||
|
||||
def test_delete_documents_deletes_stable_document_point_ids(self):
|
||||
store = self.make_store()
|
||||
|
||||
store.delete_documents(["redmine:issue:39779:chunk:0"])
|
||||
|
||||
self.assertEqual(1, len(store.client.delete_selector.points))
|
||||
self.assertNotEqual("redmine:issue:39779:chunk:0", store.client.delete_selector.points[0])
|
||||
|
||||
def test_upsert_sends_points_in_batches(self):
|
||||
store = self.make_store()
|
||||
documents = [
|
||||
IndexDocument(id=f"redmine:issue:{issue_id}:chunk:0", text=f"Issue {issue_id}", payload={"source": "redmine"})
|
||||
for issue_id in range(5)
|
||||
]
|
||||
vectors = [[0.1, 0.2, 0.3] for _ in documents]
|
||||
|
||||
store.upsert(documents, vectors)
|
||||
|
||||
self.assertEqual([2, 2, 1], [len(batch) for batch in store.client.upsert_batches])
|
||||
self.assertEqual("Issue 0", store.client.upsert_batches[0][0].payload["text"])
|
||||
|
||||
def test_list_documents_paginates_qdrant_scroll_until_requested_limit(self):
|
||||
class PagedClient(FakeClient):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.offsets = []
|
||||
|
||||
def scroll(self, collection_name, scroll_filter, limit, with_payload, with_vectors, offset=None):
|
||||
self.offsets.append(offset)
|
||||
first = FakeRecord()
|
||||
first.payload = {**first.payload, "document_id": f"doc:{len(self.offsets)}a"}
|
||||
second = FakeRecord()
|
||||
second.payload = {**second.payload, "document_id": f"doc:{len(self.offsets)}b"}
|
||||
if offset is None:
|
||||
return [first, second], "next"
|
||||
return [first, second], None
|
||||
|
||||
store = self.make_store()
|
||||
store.client = PagedClient()
|
||||
|
||||
documents = store.list_documents(limit=3, source="redmine")
|
||||
|
||||
self.assertEqual(["doc:1a", "doc:1b", "doc:2a"], [document["id"] for document in documents])
|
||||
self.assertEqual([None, "next"], store.client.offsets)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,102 @@
|
||||
import unittest
|
||||
|
||||
from semantic_index.redmine import RedmineApiSource
|
||||
|
||||
|
||||
class RecordingRedmineSource(RedmineApiSource):
|
||||
def __init__(self):
|
||||
super().__init__(redmine_url="http://redmine.local", api_key="secret", project_identifier="customer-service")
|
||||
self.urls = []
|
||||
|
||||
def _get_json(self, url):
|
||||
self.urls.append(url)
|
||||
if url.startswith("http://redmine.local/issues.json"):
|
||||
return {"issues": [{"id": 39779}]}
|
||||
return {"issue": {"id": 39779, "subject": "Goods return"}}
|
||||
|
||||
|
||||
class PagedRedmineSource(RedmineApiSource):
|
||||
def __init__(self):
|
||||
super().__init__(redmine_url="http://redmine.local", api_key="secret", project_identifier="customer-service")
|
||||
self.urls = []
|
||||
|
||||
def _get_json(self, url):
|
||||
self.urls.append(url)
|
||||
if url.startswith("http://redmine.local/issues.json"):
|
||||
query = url.split("?", 1)[1]
|
||||
params = dict(part.split("=", 1) for part in query.split("&"))
|
||||
offset = int(params.get("offset", "0"))
|
||||
limit = int(params.get("limit", "0"))
|
||||
return {"issues": [{"id": issue_id} for issue_id in range(offset + 1, offset + limit + 1)]}
|
||||
issue_id = int(url.split("/issues/", 1)[1].split(".", 1)[0])
|
||||
return {"issue": {"id": issue_id, "subject": f"Issue {issue_id}"}}
|
||||
|
||||
|
||||
class DuplicatePagedRedmineSource(RedmineApiSource):
|
||||
def __init__(self):
|
||||
super().__init__(redmine_url="http://redmine.local", api_key="secret", project_identifier="customer-service")
|
||||
|
||||
def _get_json(self, url):
|
||||
if url.startswith("http://redmine.local/issues.json"):
|
||||
query = url.split("?", 1)[1]
|
||||
params = dict(part.split("=", 1) for part in query.split("&"))
|
||||
offset = int(params.get("offset", "0"))
|
||||
if offset == 0:
|
||||
return {"issues": [{"id": 1}, {"id": 2}]}
|
||||
if offset == 2:
|
||||
return {"issues": [{"id": 2}, {"id": 3}]}
|
||||
return {"issues": []}
|
||||
issue_id = int(url.split("/issues/", 1)[1].split(".", 1)[0])
|
||||
return {"issue": {"id": issue_id, "subject": f"Issue {issue_id}"}}
|
||||
|
||||
|
||||
class RedmineApiSourceTest(unittest.TestCase):
|
||||
def test_recent_issue_summaries_do_not_fetch_issue_details(self):
|
||||
source = RecordingRedmineSource()
|
||||
|
||||
summaries = list(source.recent_issue_summaries(limit=1))
|
||||
|
||||
self.assertEqual(39779, summaries[0]["id"])
|
||||
self.assertEqual(1, len(source.urls))
|
||||
self.assertTrue(source.urls[0].startswith("http://redmine.local/issues.json"))
|
||||
|
||||
def test_issue_detail_fetches_journals_and_helpdesk(self):
|
||||
source = RecordingRedmineSource()
|
||||
|
||||
detail = source.issue_detail(39779)
|
||||
|
||||
self.assertEqual(39779, detail["id"])
|
||||
self.assertIn("include=journals%2Chelpdesk", source.urls[0])
|
||||
|
||||
def test_recent_helpdesk_issues_requests_helpdesk_include_with_journals(self):
|
||||
source = RecordingRedmineSource()
|
||||
|
||||
issues = list(source.recent_helpdesk_issues(limit=1))
|
||||
|
||||
self.assertEqual(39779, issues[0]["id"])
|
||||
self.assertIn("include=journals%2Chelpdesk", source.urls[1])
|
||||
self.assertIn("subproject_id=%21%2A", source.urls[0])
|
||||
|
||||
def test_recent_helpdesk_issues_paginates_past_redmine_page_limit(self):
|
||||
source = PagedRedmineSource()
|
||||
|
||||
issues = list(source.recent_helpdesk_issues(limit=250))
|
||||
|
||||
self.assertEqual(250, len(issues))
|
||||
list_urls = [url for url in source.urls if url.startswith("http://redmine.local/issues.json")]
|
||||
self.assertEqual(3, len(list_urls))
|
||||
self.assertIn("limit=100", list_urls[0])
|
||||
self.assertIn("offset=0", list_urls[0])
|
||||
self.assertIn("offset=100", list_urls[1])
|
||||
self.assertIn("offset=200", list_urls[2])
|
||||
|
||||
def test_recent_helpdesk_issues_skips_duplicate_issue_ids_across_pages(self):
|
||||
source = DuplicatePagedRedmineSource()
|
||||
|
||||
issues = list(source.recent_helpdesk_issues(limit=3))
|
||||
|
||||
self.assertEqual([1, 2, 3], [issue["id"] for issue in issues])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,277 @@
|
||||
import io
|
||||
import json
|
||||
import tempfile
|
||||
import unittest
|
||||
from contextlib import redirect_stdout
|
||||
from pathlib import Path
|
||||
|
||||
from semantic_index.__main__ import main
|
||||
from semantic_index.models import IndexDocument
|
||||
from semantic_index.refresh import FileRefreshState, RedmineRefreshService
|
||||
|
||||
|
||||
def issue(updated_on="2026-04-25T12:00:00Z"):
|
||||
return {
|
||||
"id": 39779,
|
||||
"subject": "Goods return",
|
||||
"description": "Please return our goods.",
|
||||
"updated_on": updated_on,
|
||||
"project": {"id": 1, "identifier": "customer-service", "name": "Customer Service"},
|
||||
}
|
||||
|
||||
|
||||
class FakeRedmineSource:
|
||||
project_identifier = None
|
||||
|
||||
def __init__(self, issues=None):
|
||||
self.issues = issues or [issue()]
|
||||
self.calls = []
|
||||
|
||||
def recent_helpdesk_issues(self, limit):
|
||||
self.calls.append((self.project_identifier, limit))
|
||||
return self.issues[:limit]
|
||||
|
||||
|
||||
class SummaryDetailRedmineSource(FakeRedmineSource):
|
||||
def __init__(self, summaries, details):
|
||||
super().__init__([])
|
||||
self.summaries = summaries
|
||||
self.details = details
|
||||
self.summary_calls = []
|
||||
self.detail_calls = []
|
||||
|
||||
def recent_issue_summaries(self, limit):
|
||||
self.summary_calls.append((self.project_identifier, limit))
|
||||
return self.summaries[:limit]
|
||||
|
||||
def issue_detail(self, issue_id):
|
||||
self.detail_calls.append(issue_id)
|
||||
return self.details[issue_id]
|
||||
|
||||
|
||||
class RecordingEmbedder:
|
||||
def __init__(self):
|
||||
self.calls = []
|
||||
|
||||
def embed_documents(self, docs):
|
||||
self.calls.append(list(docs))
|
||||
return [[0.1, 0.2, 0.3] for _ in docs]
|
||||
|
||||
|
||||
class RefreshStore:
|
||||
def __init__(self, existing=None):
|
||||
self.existing = existing or {}
|
||||
self.upserts = []
|
||||
self.deleted_ids = []
|
||||
|
||||
def list_documents(self, limit=10, source=None, project_identifier=None, doc_type=None, issue_id=None):
|
||||
return list(self.existing.values())[:limit]
|
||||
|
||||
def upsert(self, docs, vectors):
|
||||
self.upserts.append((list(docs), list(vectors)))
|
||||
|
||||
def delete_documents(self, document_ids):
|
||||
self.deleted_ids.extend(document_ids)
|
||||
|
||||
|
||||
class RedmineRefreshServiceTest(unittest.TestCase):
|
||||
def test_refresh_skips_embeddings_when_source_hash_matches_existing_document(self):
|
||||
source = FakeRedmineSource()
|
||||
embedder = RecordingEmbedder()
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
|
||||
candidate = service.mapper.issue_to_documents(issue())[0]
|
||||
service.store.existing[candidate.id] = {
|
||||
"id": candidate.id,
|
||||
"text": candidate.text,
|
||||
"payload": dict(candidate.payload),
|
||||
}
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 1})
|
||||
|
||||
self.assertEqual(1, result["unchanged_documents"])
|
||||
self.assertEqual(0, result["embedded_documents"])
|
||||
self.assertEqual([], embedder.calls)
|
||||
self.assertEqual([], service.store.upserts)
|
||||
|
||||
def test_refresh_embeds_only_changed_and_new_documents(self):
|
||||
source = FakeRedmineSource()
|
||||
embedder = RecordingEmbedder()
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
|
||||
candidate = service.mapper.issue_to_documents(issue())[0]
|
||||
service.store.existing[candidate.id] = {
|
||||
"id": candidate.id,
|
||||
"text": "Old text",
|
||||
"payload": {**candidate.payload, "source_hash": "old-hash"},
|
||||
}
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 1})
|
||||
|
||||
self.assertEqual(1, result["changed_documents"])
|
||||
self.assertEqual(1, result["embedded_documents"])
|
||||
self.assertEqual([[candidate]], embedder.calls)
|
||||
self.assertEqual([candidate.id], [doc.id for doc in service.store.upserts[0][0]])
|
||||
|
||||
def test_refresh_deletes_stale_issue_documents_without_embedding(self):
|
||||
source = FakeRedmineSource()
|
||||
embedder = RecordingEmbedder()
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
|
||||
candidate = service.mapper.issue_to_documents(issue())[0]
|
||||
service.store.existing[candidate.id] = {"id": candidate.id, "text": candidate.text, "payload": dict(candidate.payload)}
|
||||
service.store.existing["redmine:issue:39779:journal:1:chunk:0"] = {
|
||||
"id": "redmine:issue:39779:journal:1:chunk:0",
|
||||
"text": "Deleted note",
|
||||
"payload": {"source_hash": "gone", "issue_id": 39779},
|
||||
}
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 1})
|
||||
|
||||
self.assertEqual(1, result["stale_documents"])
|
||||
self.assertEqual(["redmine:issue:39779:journal:1:chunk:0"], service.store.deleted_ids)
|
||||
self.assertEqual([], embedder.calls)
|
||||
|
||||
def test_dry_run_reports_planned_embeddings_without_embedding_or_mutating(self):
|
||||
source = FakeRedmineSource()
|
||||
embedder = RecordingEmbedder()
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 1}, dry_run=True)
|
||||
|
||||
self.assertEqual(1, result["new_documents"])
|
||||
self.assertEqual(1, result["would_embed_documents"])
|
||||
self.assertEqual(0, result["embedded_documents"])
|
||||
self.assertEqual([], embedder.calls)
|
||||
self.assertEqual([], service.store.upserts)
|
||||
self.assertEqual([], service.store.deleted_ids)
|
||||
|
||||
def test_force_rebuild_embeds_unchanged_documents(self):
|
||||
source = FakeRedmineSource()
|
||||
embedder = RecordingEmbedder()
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
|
||||
candidate = service.mapper.issue_to_documents(issue())[0]
|
||||
service.store.existing[candidate.id] = {"id": candidate.id, "text": candidate.text, "payload": dict(candidate.payload)}
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 1}, force_rebuild=True)
|
||||
|
||||
self.assertEqual(1, result["force_rebuilt_documents"])
|
||||
self.assertEqual(1, result["embedded_documents"])
|
||||
self.assertEqual([[candidate]], embedder.calls)
|
||||
|
||||
def test_force_rebuild_ignores_refresh_state_window_for_fetched_candidates(self):
|
||||
source = FakeRedmineSource([issue(updated_on="2026-04-25T10:00:00Z")])
|
||||
embedder = RecordingEmbedder()
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
state = FileRefreshState(Path(tmp) / "refresh.json")
|
||||
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore(), state=state)
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 1}, force_rebuild=True, overlap_minutes=15)
|
||||
|
||||
self.assertEqual(0, result["skipped_issues"])
|
||||
self.assertEqual(1, result["embedded_documents"])
|
||||
|
||||
def test_file_refresh_state_updates_only_when_called(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
state = FileRefreshState(Path(tmp) / "refresh.json")
|
||||
self.assertEqual({}, state.load())
|
||||
|
||||
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
|
||||
|
||||
self.assertEqual(
|
||||
{"projects": {"customer-service": {"last_successful_refresh_at": "2026-04-25T12:00:00Z"}}},
|
||||
json.loads((Path(tmp) / "refresh.json").read_text(encoding="utf-8")),
|
||||
)
|
||||
|
||||
def test_refresh_state_skips_issues_older_than_overlap_window(self):
|
||||
source = FakeRedmineSource([issue(updated_on="2026-04-25T10:00:00Z")])
|
||||
embedder = RecordingEmbedder()
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
state = FileRefreshState(Path(tmp) / "refresh.json")
|
||||
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore(), state=state)
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 1}, dry_run=True, overlap_minutes=15)
|
||||
|
||||
self.assertEqual(1, result["issues"])
|
||||
self.assertEqual(1, result["skipped_issues"])
|
||||
self.assertEqual(0, result["documents"])
|
||||
self.assertEqual([], embedder.calls)
|
||||
|
||||
def test_refresh_skips_old_summaries_without_fetching_issue_detail(self):
|
||||
old_summary = {"id": 39779, "updated_on": "2026-04-25T10:00:00Z"}
|
||||
new_summary = {"id": 39780, "updated_on": "2026-04-25T11:50:00Z"}
|
||||
source = SummaryDetailRedmineSource(
|
||||
summaries=[old_summary, new_summary],
|
||||
details={39780: {**issue("2026-04-25T11:50:00Z"), "id": 39780}},
|
||||
)
|
||||
embedder = RecordingEmbedder()
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
state = FileRefreshState(Path(tmp) / "refresh.json")
|
||||
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore(), state=state)
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 2}, dry_run=True, overlap_minutes=15)
|
||||
|
||||
self.assertEqual(2, result["scanned_issues"])
|
||||
self.assertEqual(1, result["skipped_issues"])
|
||||
self.assertEqual(1, result["detail_fetched_issues"])
|
||||
self.assertEqual([39780], source.detail_calls)
|
||||
|
||||
|
||||
class RefreshCliTest(unittest.TestCase):
|
||||
def test_refresh_redmine_projects_cli_parses_project_limits_and_dry_run(self):
|
||||
class FakeRefresh:
|
||||
def __init__(self):
|
||||
self.calls = []
|
||||
|
||||
def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15):
|
||||
self.calls.append((project_limits, dry_run, force_rebuild, overlap_minutes))
|
||||
return {"source": "redmine", "projects": len(project_limits), "issues": sum(project_limits.values())}
|
||||
|
||||
refresh = FakeRefresh()
|
||||
services = {"refresh": refresh}
|
||||
out = io.StringIO()
|
||||
|
||||
with redirect_stdout(out):
|
||||
main(
|
||||
[
|
||||
"--refresh-redmine-projects",
|
||||
"--project-limits",
|
||||
"customer-service=5,hiring=2",
|
||||
"--dry-run",
|
||||
"--overlap-minutes",
|
||||
"30",
|
||||
],
|
||||
service_builder=lambda: services,
|
||||
)
|
||||
|
||||
self.assertEqual(({"customer-service": 5, "hiring": 2}, True, False, 30), refresh.calls[0])
|
||||
self.assertIn("'projects': 2", out.getvalue())
|
||||
|
||||
def test_refresh_redmine_projects_cli_can_override_state_path(self):
|
||||
class FakeRefresh:
|
||||
def __init__(self):
|
||||
self.state = None
|
||||
|
||||
def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15):
|
||||
return {"state_path": str(self.state.path)}
|
||||
|
||||
refresh = FakeRefresh()
|
||||
out = io.StringIO()
|
||||
|
||||
with redirect_stdout(out):
|
||||
main(
|
||||
[
|
||||
"--refresh-redmine-projects",
|
||||
"--project-limits",
|
||||
"customer-service=1",
|
||||
"--state-path",
|
||||
"/tmp/semantic-refresh-state.json",
|
||||
],
|
||||
service_builder=lambda: {"refresh": refresh},
|
||||
)
|
||||
|
||||
self.assertIn("/tmp/semantic-refresh-state.json", out.getvalue())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,85 @@
|
||||
import unittest
|
||||
|
||||
from semantic_index.models import IndexDocument, SearchQuery, SearchResult
|
||||
from semantic_index.qdrant_store import build_filter, point_id_for_document
|
||||
from semantic_index.search import HybridSearchService, keyword_boost
|
||||
|
||||
|
||||
class FakeEmbedder:
|
||||
def embed_query(self, text):
|
||||
return [0.1, 0.2, 0.3]
|
||||
|
||||
|
||||
class FakeStore:
|
||||
def __init__(self):
|
||||
self.query = None
|
||||
|
||||
def search(self, vector, query, limit):
|
||||
self.query = query
|
||||
return [
|
||||
SearchResult(
|
||||
id="weak",
|
||||
score=0.7,
|
||||
text="general support text",
|
||||
payload={"redmine_url": "http://redmine/issues/1"},
|
||||
),
|
||||
SearchResult(
|
||||
id="strong",
|
||||
score=0.6,
|
||||
text="Customer ada@example.com asked about ORD-12345",
|
||||
payload={"redmine_url": "http://redmine/issues/2"},
|
||||
),
|
||||
][:limit]
|
||||
|
||||
|
||||
class SearchTest(unittest.TestCase):
|
||||
def test_qdrant_point_id_is_deterministic_uuid_for_stable_document_id(self):
|
||||
first = point_id_for_document("redmine:issue:42:journal:5:chunk:0")
|
||||
second = point_id_for_document("redmine:issue:42:journal:5:chunk:0")
|
||||
|
||||
self.assertEqual(first, second)
|
||||
self.assertRegex(first, r"^[0-9a-f-]{36}$")
|
||||
|
||||
def test_filter_maps_supported_metadata(self):
|
||||
query = SearchQuery(
|
||||
text="printer",
|
||||
source="redmine",
|
||||
project_identifier="fud-helpdesk",
|
||||
doc_type="message",
|
||||
issue_id=42,
|
||||
contact_email="ada@example.com",
|
||||
date_from="2026-04-01T00:00:00Z",
|
||||
date_to="2026-04-30T23:59:59Z",
|
||||
)
|
||||
|
||||
qfilter = build_filter(query)
|
||||
|
||||
self.assertEqual(
|
||||
[
|
||||
{"key": "source", "match": {"value": "redmine"}},
|
||||
{"key": "project_identifier", "match": {"value": "fud-helpdesk"}},
|
||||
{"key": "doc_type", "match": {"value": "message"}},
|
||||
{"key": "issue_id", "match": {"value": 42}},
|
||||
{"key": "contact_email", "match": {"value": "ada@example.com"}},
|
||||
{"key": "created_on", "range": {"gte": "2026-04-01T00:00:00Z", "lte": "2026-04-30T23:59:59Z"}},
|
||||
],
|
||||
qfilter["must"],
|
||||
)
|
||||
|
||||
def test_keyword_boost_prioritizes_exact_email_and_order_matches(self):
|
||||
weak = SearchResult(id="weak", score=0.7, text="general support text", payload={})
|
||||
strong = SearchResult(id="strong", score=0.6, text="Customer ada@example.com asked about ORD-12345", payload={})
|
||||
|
||||
self.assertGreater(
|
||||
keyword_boost('ada@example.com "ORD-12345"', strong),
|
||||
keyword_boost('ada@example.com "ORD-12345"', weak),
|
||||
)
|
||||
|
||||
service = HybridSearchService(embedder=FakeEmbedder(), store=FakeStore())
|
||||
results = service.search(SearchQuery(text='ada@example.com "ORD-12345"', limit=2))
|
||||
self.assertEqual("strong", results[0].id)
|
||||
self.assertEqual("http://redmine/issues/2", results[0].citation["url"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,41 @@
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
REFRESH = ROOT / "semantic_index" / "refresh.sh"
|
||||
|
||||
|
||||
class SemanticIndexShellWrapperTest(unittest.TestCase):
|
||||
def test_refresh_wrapper_is_self_locating_when_called_from_another_directory(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
env = {
|
||||
**os.environ,
|
||||
"PYTHON": "/bin/echo",
|
||||
"SEMANTIC_INDEX_PROJECT_LIMITS": "customer-service=5",
|
||||
"SEMANTIC_INDEX_LOG_DIR": str(tmp_path / "logs"),
|
||||
"SEMANTIC_INDEX_STATE_PATH": str(tmp_path / "state" / "refresh_state.json"),
|
||||
}
|
||||
|
||||
result = subprocess.run(
|
||||
[str(REFRESH)],
|
||||
cwd=tmp,
|
||||
env=env,
|
||||
text=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
check=False,
|
||||
)
|
||||
|
||||
self.assertEqual(0, result.returncode, result.stderr)
|
||||
self.assertIn("-m semantic_index --refresh-redmine-projects", result.stdout)
|
||||
self.assertIn("--project-limits customer-service=5", result.stdout)
|
||||
self.assertIn("log_file=", result.stdout)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user