Add semantic-index service, deployment assets, and tests

This commit is contained in:
Jason Thistlethwaite
2026-05-04 09:50:03 -04:00
parent faad70872b
commit b305544f63
42 changed files with 5059 additions and 0 deletions
+394
View File
@@ -0,0 +1,394 @@
import io
import json
import unittest
from contextlib import redirect_stdout
from pathlib import Path
from semantic_index.__main__ import main
from semantic_index.config import Settings
from semantic_index.models import SearchResult
class FakeSearchService:
def __init__(self):
self.queries = []
def search(self, query):
self.queries.append(query)
if "missing@example.test" in query.text:
return []
return [
SearchResult(
id="redmine:contact:1890:issue:39779:chunk:0" if "callum" in query.text else "redmine:issue:39779:chunk:0",
score=0.58,
text="Callum Mackeonis callum@safetagtracking.com SafeTag Tracking",
payload={
"source": "redmine",
"doc_type": "contact" if "callum" in query.text else "issue",
"issue_id": 39779,
"project_identifier": "customer-service",
"contact_id": 1890,
"contact_name": "Callum Mackeonis",
"contact_email": "callum@safetagtracking.com",
"contact_company": "SafeTag Tracking",
"redmine_url": "http://redmine/issues/39779",
},
)
]
def get_document(self, document_id):
return {
"id": document_id,
"text": "Full indexed text",
"payload": {
"source": "redmine",
"doc_type": "journal",
"issue_id": 39778,
"project_identifier": "customer-service",
"contact_id": 1890,
"contact_name": "Callum Mackeonis",
"contact_email": "callum@safetagtracking.com",
"redmine_url": "http://redmine/issues/39778",
},
}
class FakeStore:
def __init__(self):
self.list_limits = []
def count_documents(self, source=None, project_identifier=None, doc_type=None):
return 12
def list_documents(self, limit=10, source=None, project_identifier=None, doc_type=None):
self.list_limits.append(limit)
return [
{
"id": "redmine:issue:39779:chunk:0",
"text": "Issue #39779: Goods return\nPlease return our goods.",
"payload": {
"source": "redmine",
"doc_type": "issue",
"issue_id": 39779,
"project_identifier": "customer-service",
"project_name": "Customer Service",
"has_helpdesk_ticket": True,
"contact_id": 1890,
"contact_name": "Callum Mackeonis",
"contact_email": "callum@safetagtracking.com",
"contact_company": "SafeTag Tracking",
"source_hash": "issue-hash",
"redmine_url": "http://redmine/issues/39779",
},
},
{
"id": "redmine:issue:39779:journal:71570:chunk:0",
"text": "Hello, we can arrange this today.",
"payload": {
"source": "redmine",
"doc_type": "journal",
"issue_id": 39779,
"project_identifier": "customer-service",
"project_name": "Customer Service",
"has_helpdesk_ticket": True,
"contact_id": 1890,
"contact_name": "Callum Mackeonis",
"contact_email": "callum@safetagtracking.com",
"contact_company": "SafeTag Tracking",
"source_hash": "journal-hash",
"redmine_url": "http://redmine/issues/39779",
},
},
{
"id": "redmine:contact:1890:issue:39779:chunk:0",
"text": "Callum Mackeonis callum@safetagtracking.com SafeTag Tracking",
"payload": {
"source": "redmine",
"doc_type": "contact",
"issue_id": 39779,
"project_identifier": "customer-service",
"project_name": "Customer Service",
"has_helpdesk_ticket": True,
"contact_id": 1890,
"contact_name": "Callum Mackeonis",
"contact_email": "callum@safetagtracking.com",
"contact_company": "SafeTag Tracking",
"source_hash": "contact-hash",
"redmine_url": "http://redmine/issues/39779",
},
},
{
"id": "redmine:issue:39800:chunk:0",
"text": "Ordinary issue with no helpdesk contact.",
"payload": {
"source": "redmine",
"doc_type": "issue",
"issue_id": 39800,
"project_identifier": "hiring",
"project_name": "Hiring",
"has_helpdesk_ticket": False,
"source_hash": "ordinary-hash",
"redmine_url": "http://redmine/issues/39800",
},
},
]
class FakeRedmineSource:
def recent_helpdesk_issues(self, limit):
return [
{
"id": 39779,
"subject": "Goods return",
"description": "Please return our goods.",
"project": {"id": 1, "identifier": "customer-service"},
"helpdesk_ticket": {
"id": 35159,
"contact_id": 1890,
"contact": {
"id": 1890,
"name": "Callum Mackeonis",
"email": "callum@safetagtracking.com",
"company": "SafeTag Tracking",
},
},
}
][:limit]
def fake_services(store=None, search=None):
settings = Settings(
openai_api_key="",
qdrant_url="http://qdrant",
qdrant_api_key=None,
qdrant_collection="semantic",
redmine_url="http://redmine",
redmine_api_key="",
redmine_project_identifier="customer-service",
sample_limit=50,
bind_host="127.0.0.1",
bind_port=8787,
service_api_key=None,
refresh_state_path=Path(".cache/semantic_index/refresh_state.json"),
)
return {
"settings": settings,
"search": search or FakeSearchService(),
"store": store or FakeStore(),
"redmine_source": FakeRedmineSource(),
"backfill": FakeBackfillService(),
}
class FakeBackfillService:
def __init__(self):
self.calls = []
def backfill_redmine_sample(self, limit):
self.calls.append(("sample", limit))
return {"source": "redmine", "issues": limit, "documents": limit}
def backfill_redmine_projects(self, projects, per_project_limit):
self.calls.append(("projects", projects, per_project_limit))
return {
"source": "redmine",
"projects": len(projects),
"issues": len(projects) * per_project_limit,
"documents": len(projects) * per_project_limit,
"project_results": [
{"project_identifier": project, "issues": per_project_limit, "documents": per_project_limit}
for project in projects
],
}
def backfill_redmine_project_limits(self, project_limits):
self.calls.append(("project_limits", project_limits))
return {
"source": "redmine",
"projects": len(project_limits),
"issues": sum(project_limits.values()),
"documents": sum(project_limits.values()),
"project_results": [
{"project_identifier": project, "issues": limit, "documents": limit}
for project, limit in project_limits.items()
],
}
class InspectCliTest(unittest.TestCase):
def run_cli(self, args):
out = io.StringIO()
with redirect_stdout(out):
main(args, service_builder=fake_services)
return out.getvalue()
def test_no_args_prints_help_without_building_services(self):
def broken_services():
raise AssertionError("help should not build live services")
out = io.StringIO()
with redirect_stdout(out):
main([], service_builder=broken_services)
self.assertIn("inspect", out.getvalue())
def test_count_lists_matching_document_count(self):
output = self.run_cli(["inspect", "count", "--source", "redmine", "--project", "customer-service"])
self.assertIn("12", output)
def test_list_shows_snippet_and_metadata_by_default(self):
output = self.run_cli(["inspect", "list", "--limit", "5", "--source", "redmine", "--project", "customer-service"])
self.assertIn("redmine:issue:39779:chunk:0", output)
self.assertIn("issue #39779", output.lower())
self.assertIn("customer-service", output)
self.assertIn("contact=#1890", output)
self.assertIn("Callum Mackeonis", output)
self.assertIn("callum@safetagtracking.com", output)
self.assertNotIn("Full indexed text", output)
def test_search_runs_query_and_prints_citation(self):
output = self.run_cli(["inspect", "search", "order status", "--limit", "3", "--project", "customer-service"])
self.assertIn("score=0.5800", output)
self.assertIn("http://redmine/issues/39779", output)
def test_show_prints_full_document_text(self):
output = self.run_cli(["inspect", "show", "redmine:issue:39778:chunk:0"])
self.assertIn("Full indexed text", output)
self.assertIn("doc_type=journal", output)
def test_preview_redmine_maps_documents_without_writing(self):
output = self.run_cli(["inspect", "preview-redmine", "--limit", "1", "--project", "customer-service"])
self.assertIn("redmine:issue:39779:chunk:0", output)
self.assertIn("project=customer-service", output)
self.assertIn("Please return our goods", output)
def test_preview_redmine_uses_minimal_service_builder(self):
services = []
def minimal_builder(settings):
services.append(settings.redmine_project_identifier)
return {"settings": settings, "redmine_source": FakeRedmineSource()}
out = io.StringIO()
with redirect_stdout(out):
main(
["inspect", "preview-redmine", "--limit", "1", "--project", "customer-service"],
service_builder=lambda: (_ for _ in ()).throw(AssertionError("full services should not be built")),
preview_service_builder=minimal_builder,
settings_loader=lambda: fake_services()["settings"],
)
self.assertEqual(["customer-service"], services)
self.assertIn("redmine:issue:39779:chunk:0", out.getvalue())
def test_audit_prints_doc_type_counts_contact_coverage_and_attachment_check(self):
output = self.run_cli(["inspect", "audit", "--limit", "10", "--source", "redmine", "--project", "customer-service"])
self.assertIn("documents=4", output)
self.assertIn("doc_type issue=2", output)
self.assertIn("doc_type journal=1", output)
self.assertIn("doc_type contact=1", output)
self.assertIn("contact_metadata 3/4", output)
self.assertIn("helpdesk_contact_metadata 3/3", output)
self.assertIn("project customer-service=3", output)
self.assertIn("project hiring=1", output)
self.assertIn("attachments=0", output)
self.assertNotIn("missing_contact redmine:issue:39800:chunk:0", output)
def test_audit_json_returns_machine_readable_summary(self):
output = self.run_cli(["inspect", "audit", "--limit", "10", "--project", "customer-service", "--json"])
payload = json.loads(output)
self.assertEqual(4, payload["total_documents"])
self.assertEqual(2, payload["doc_type_counts"]["issue"])
self.assertEqual(3, payload["project_counts"]["customer-service"])
self.assertEqual(1, payload["project_counts"]["hiring"])
self.assertEqual([], payload["missing_helpdesk_contact_metadata"])
def test_compare_redmine_reports_missing_stale_and_contact_mismatches(self):
output = self.run_cli(["inspect", "compare-redmine", "--limit", "1", "--project", "customer-service"])
self.assertIn("preview_documents=2", output)
self.assertIn("indexed_documents=4", output)
self.assertIn("stale", output)
self.assertIn("redmine:issue:39779:chunk:0", output)
def test_compare_redmine_fetches_a_large_index_window_to_avoid_false_missing_results(self):
store = FakeStore()
out = io.StringIO()
with redirect_stdout(out):
main(["inspect", "compare-redmine", "--limit", "3", "--project", "customer-service"], service_builder=lambda: fake_services(store=store))
self.assertEqual(5000, store.list_limits[0])
def test_smoke_search_prints_pass_fail_for_known_queries(self):
output = self.run_cli(["inspect", "smoke-search", "--project", "customer-service", "--email", "callum@safetagtracking.com", "--issue-id", "39779"])
self.assertIn("PASS email callum@safetagtracking.com", output)
self.assertIn("PASS issue 39779", output)
self.assertIn("redmine:contact:1890:issue:39779:chunk:0", output)
def test_smoke_search_uses_issue_id_filter_for_issue_checks(self):
search = FakeSearchService()
out = io.StringIO()
with redirect_stdout(out):
main(["inspect", "smoke-search", "--project", "customer-service", "--issue-id", "39779"], service_builder=lambda: fake_services(search=search))
issue_queries = [query for query in search.queries if query.text == "39779"]
self.assertEqual(39779, issue_queries[0].issue_id)
def test_smoke_search_json_returns_check_results(self):
output = self.run_cli(["inspect", "smoke-search", "--project", "customer-service", "--email", "missing@example.test", "--json"])
payload = json.loads(output)
self.assertFalse(payload["checks"][0]["passed"])
self.assertEqual("email", payload["checks"][0]["kind"])
def test_backfill_redmine_projects_cli_parses_comma_separated_projects(self):
backfill = FakeBackfillService()
services = fake_services()
services["backfill"] = backfill
out = io.StringIO()
with redirect_stdout(out):
main(
[
"--backfill-redmine-projects",
"--projects",
"customer-service,hiring",
"--per-project-limit",
"25",
],
service_builder=lambda: services,
)
self.assertEqual(("projects", ["customer-service", "hiring"], 25), backfill.calls[0])
self.assertIn("'projects': 2", out.getvalue())
def test_backfill_redmine_projects_cli_parses_project_specific_limits(self):
backfill = FakeBackfillService()
services = fake_services()
services["backfill"] = backfill
out = io.StringIO()
with redirect_stdout(out):
main(
[
"--backfill-redmine-projects",
"--project-limits",
"customer-service=500,hiring=200",
],
service_builder=lambda: services,
)
self.assertEqual(("project_limits", {"customer-service": 500, "hiring": 200}), backfill.calls[0])
self.assertIn("'issues': 700", out.getvalue())
if __name__ == "__main__":
unittest.main()