Add semantic-index service, deployment assets, and tests
This commit is contained in:
@@ -0,0 +1,394 @@
|
||||
import io
|
||||
import json
|
||||
import unittest
|
||||
from contextlib import redirect_stdout
|
||||
from pathlib import Path
|
||||
|
||||
from semantic_index.__main__ import main
|
||||
from semantic_index.config import Settings
|
||||
from semantic_index.models import SearchResult
|
||||
|
||||
|
||||
class FakeSearchService:
|
||||
def __init__(self):
|
||||
self.queries = []
|
||||
|
||||
def search(self, query):
|
||||
self.queries.append(query)
|
||||
if "missing@example.test" in query.text:
|
||||
return []
|
||||
return [
|
||||
SearchResult(
|
||||
id="redmine:contact:1890:issue:39779:chunk:0" if "callum" in query.text else "redmine:issue:39779:chunk:0",
|
||||
score=0.58,
|
||||
text="Callum Mackeonis callum@safetagtracking.com SafeTag Tracking",
|
||||
payload={
|
||||
"source": "redmine",
|
||||
"doc_type": "contact" if "callum" in query.text else "issue",
|
||||
"issue_id": 39779,
|
||||
"project_identifier": "customer-service",
|
||||
"contact_id": 1890,
|
||||
"contact_name": "Callum Mackeonis",
|
||||
"contact_email": "callum@safetagtracking.com",
|
||||
"contact_company": "SafeTag Tracking",
|
||||
"redmine_url": "http://redmine/issues/39779",
|
||||
},
|
||||
)
|
||||
]
|
||||
|
||||
def get_document(self, document_id):
|
||||
return {
|
||||
"id": document_id,
|
||||
"text": "Full indexed text",
|
||||
"payload": {
|
||||
"source": "redmine",
|
||||
"doc_type": "journal",
|
||||
"issue_id": 39778,
|
||||
"project_identifier": "customer-service",
|
||||
"contact_id": 1890,
|
||||
"contact_name": "Callum Mackeonis",
|
||||
"contact_email": "callum@safetagtracking.com",
|
||||
"redmine_url": "http://redmine/issues/39778",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class FakeStore:
|
||||
def __init__(self):
|
||||
self.list_limits = []
|
||||
|
||||
def count_documents(self, source=None, project_identifier=None, doc_type=None):
|
||||
return 12
|
||||
|
||||
def list_documents(self, limit=10, source=None, project_identifier=None, doc_type=None):
|
||||
self.list_limits.append(limit)
|
||||
return [
|
||||
{
|
||||
"id": "redmine:issue:39779:chunk:0",
|
||||
"text": "Issue #39779: Goods return\nPlease return our goods.",
|
||||
"payload": {
|
||||
"source": "redmine",
|
||||
"doc_type": "issue",
|
||||
"issue_id": 39779,
|
||||
"project_identifier": "customer-service",
|
||||
"project_name": "Customer Service",
|
||||
"has_helpdesk_ticket": True,
|
||||
"contact_id": 1890,
|
||||
"contact_name": "Callum Mackeonis",
|
||||
"contact_email": "callum@safetagtracking.com",
|
||||
"contact_company": "SafeTag Tracking",
|
||||
"source_hash": "issue-hash",
|
||||
"redmine_url": "http://redmine/issues/39779",
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "redmine:issue:39779:journal:71570:chunk:0",
|
||||
"text": "Hello, we can arrange this today.",
|
||||
"payload": {
|
||||
"source": "redmine",
|
||||
"doc_type": "journal",
|
||||
"issue_id": 39779,
|
||||
"project_identifier": "customer-service",
|
||||
"project_name": "Customer Service",
|
||||
"has_helpdesk_ticket": True,
|
||||
"contact_id": 1890,
|
||||
"contact_name": "Callum Mackeonis",
|
||||
"contact_email": "callum@safetagtracking.com",
|
||||
"contact_company": "SafeTag Tracking",
|
||||
"source_hash": "journal-hash",
|
||||
"redmine_url": "http://redmine/issues/39779",
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "redmine:contact:1890:issue:39779:chunk:0",
|
||||
"text": "Callum Mackeonis callum@safetagtracking.com SafeTag Tracking",
|
||||
"payload": {
|
||||
"source": "redmine",
|
||||
"doc_type": "contact",
|
||||
"issue_id": 39779,
|
||||
"project_identifier": "customer-service",
|
||||
"project_name": "Customer Service",
|
||||
"has_helpdesk_ticket": True,
|
||||
"contact_id": 1890,
|
||||
"contact_name": "Callum Mackeonis",
|
||||
"contact_email": "callum@safetagtracking.com",
|
||||
"contact_company": "SafeTag Tracking",
|
||||
"source_hash": "contact-hash",
|
||||
"redmine_url": "http://redmine/issues/39779",
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "redmine:issue:39800:chunk:0",
|
||||
"text": "Ordinary issue with no helpdesk contact.",
|
||||
"payload": {
|
||||
"source": "redmine",
|
||||
"doc_type": "issue",
|
||||
"issue_id": 39800,
|
||||
"project_identifier": "hiring",
|
||||
"project_name": "Hiring",
|
||||
"has_helpdesk_ticket": False,
|
||||
"source_hash": "ordinary-hash",
|
||||
"redmine_url": "http://redmine/issues/39800",
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
class FakeRedmineSource:
|
||||
def recent_helpdesk_issues(self, limit):
|
||||
return [
|
||||
{
|
||||
"id": 39779,
|
||||
"subject": "Goods return",
|
||||
"description": "Please return our goods.",
|
||||
"project": {"id": 1, "identifier": "customer-service"},
|
||||
"helpdesk_ticket": {
|
||||
"id": 35159,
|
||||
"contact_id": 1890,
|
||||
"contact": {
|
||||
"id": 1890,
|
||||
"name": "Callum Mackeonis",
|
||||
"email": "callum@safetagtracking.com",
|
||||
"company": "SafeTag Tracking",
|
||||
},
|
||||
},
|
||||
}
|
||||
][:limit]
|
||||
|
||||
|
||||
def fake_services(store=None, search=None):
|
||||
settings = Settings(
|
||||
openai_api_key="",
|
||||
qdrant_url="http://qdrant",
|
||||
qdrant_api_key=None,
|
||||
qdrant_collection="semantic",
|
||||
redmine_url="http://redmine",
|
||||
redmine_api_key="",
|
||||
redmine_project_identifier="customer-service",
|
||||
sample_limit=50,
|
||||
bind_host="127.0.0.1",
|
||||
bind_port=8787,
|
||||
service_api_key=None,
|
||||
refresh_state_path=Path(".cache/semantic_index/refresh_state.json"),
|
||||
)
|
||||
return {
|
||||
"settings": settings,
|
||||
"search": search or FakeSearchService(),
|
||||
"store": store or FakeStore(),
|
||||
"redmine_source": FakeRedmineSource(),
|
||||
"backfill": FakeBackfillService(),
|
||||
}
|
||||
|
||||
|
||||
class FakeBackfillService:
|
||||
def __init__(self):
|
||||
self.calls = []
|
||||
|
||||
def backfill_redmine_sample(self, limit):
|
||||
self.calls.append(("sample", limit))
|
||||
return {"source": "redmine", "issues": limit, "documents": limit}
|
||||
|
||||
def backfill_redmine_projects(self, projects, per_project_limit):
|
||||
self.calls.append(("projects", projects, per_project_limit))
|
||||
return {
|
||||
"source": "redmine",
|
||||
"projects": len(projects),
|
||||
"issues": len(projects) * per_project_limit,
|
||||
"documents": len(projects) * per_project_limit,
|
||||
"project_results": [
|
||||
{"project_identifier": project, "issues": per_project_limit, "documents": per_project_limit}
|
||||
for project in projects
|
||||
],
|
||||
}
|
||||
|
||||
def backfill_redmine_project_limits(self, project_limits):
|
||||
self.calls.append(("project_limits", project_limits))
|
||||
return {
|
||||
"source": "redmine",
|
||||
"projects": len(project_limits),
|
||||
"issues": sum(project_limits.values()),
|
||||
"documents": sum(project_limits.values()),
|
||||
"project_results": [
|
||||
{"project_identifier": project, "issues": limit, "documents": limit}
|
||||
for project, limit in project_limits.items()
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class InspectCliTest(unittest.TestCase):
|
||||
def run_cli(self, args):
|
||||
out = io.StringIO()
|
||||
with redirect_stdout(out):
|
||||
main(args, service_builder=fake_services)
|
||||
return out.getvalue()
|
||||
|
||||
def test_no_args_prints_help_without_building_services(self):
|
||||
def broken_services():
|
||||
raise AssertionError("help should not build live services")
|
||||
|
||||
out = io.StringIO()
|
||||
with redirect_stdout(out):
|
||||
main([], service_builder=broken_services)
|
||||
|
||||
self.assertIn("inspect", out.getvalue())
|
||||
|
||||
def test_count_lists_matching_document_count(self):
|
||||
output = self.run_cli(["inspect", "count", "--source", "redmine", "--project", "customer-service"])
|
||||
|
||||
self.assertIn("12", output)
|
||||
|
||||
def test_list_shows_snippet_and_metadata_by_default(self):
|
||||
output = self.run_cli(["inspect", "list", "--limit", "5", "--source", "redmine", "--project", "customer-service"])
|
||||
|
||||
self.assertIn("redmine:issue:39779:chunk:0", output)
|
||||
self.assertIn("issue #39779", output.lower())
|
||||
self.assertIn("customer-service", output)
|
||||
self.assertIn("contact=#1890", output)
|
||||
self.assertIn("Callum Mackeonis", output)
|
||||
self.assertIn("callum@safetagtracking.com", output)
|
||||
self.assertNotIn("Full indexed text", output)
|
||||
|
||||
def test_search_runs_query_and_prints_citation(self):
|
||||
output = self.run_cli(["inspect", "search", "order status", "--limit", "3", "--project", "customer-service"])
|
||||
|
||||
self.assertIn("score=0.5800", output)
|
||||
self.assertIn("http://redmine/issues/39779", output)
|
||||
|
||||
def test_show_prints_full_document_text(self):
|
||||
output = self.run_cli(["inspect", "show", "redmine:issue:39778:chunk:0"])
|
||||
|
||||
self.assertIn("Full indexed text", output)
|
||||
self.assertIn("doc_type=journal", output)
|
||||
|
||||
def test_preview_redmine_maps_documents_without_writing(self):
|
||||
output = self.run_cli(["inspect", "preview-redmine", "--limit", "1", "--project", "customer-service"])
|
||||
|
||||
self.assertIn("redmine:issue:39779:chunk:0", output)
|
||||
self.assertIn("project=customer-service", output)
|
||||
self.assertIn("Please return our goods", output)
|
||||
|
||||
def test_preview_redmine_uses_minimal_service_builder(self):
|
||||
services = []
|
||||
|
||||
def minimal_builder(settings):
|
||||
services.append(settings.redmine_project_identifier)
|
||||
return {"settings": settings, "redmine_source": FakeRedmineSource()}
|
||||
|
||||
out = io.StringIO()
|
||||
with redirect_stdout(out):
|
||||
main(
|
||||
["inspect", "preview-redmine", "--limit", "1", "--project", "customer-service"],
|
||||
service_builder=lambda: (_ for _ in ()).throw(AssertionError("full services should not be built")),
|
||||
preview_service_builder=minimal_builder,
|
||||
settings_loader=lambda: fake_services()["settings"],
|
||||
)
|
||||
|
||||
self.assertEqual(["customer-service"], services)
|
||||
self.assertIn("redmine:issue:39779:chunk:0", out.getvalue())
|
||||
|
||||
def test_audit_prints_doc_type_counts_contact_coverage_and_attachment_check(self):
|
||||
output = self.run_cli(["inspect", "audit", "--limit", "10", "--source", "redmine", "--project", "customer-service"])
|
||||
|
||||
self.assertIn("documents=4", output)
|
||||
self.assertIn("doc_type issue=2", output)
|
||||
self.assertIn("doc_type journal=1", output)
|
||||
self.assertIn("doc_type contact=1", output)
|
||||
self.assertIn("contact_metadata 3/4", output)
|
||||
self.assertIn("helpdesk_contact_metadata 3/3", output)
|
||||
self.assertIn("project customer-service=3", output)
|
||||
self.assertIn("project hiring=1", output)
|
||||
self.assertIn("attachments=0", output)
|
||||
self.assertNotIn("missing_contact redmine:issue:39800:chunk:0", output)
|
||||
|
||||
def test_audit_json_returns_machine_readable_summary(self):
|
||||
output = self.run_cli(["inspect", "audit", "--limit", "10", "--project", "customer-service", "--json"])
|
||||
payload = json.loads(output)
|
||||
|
||||
self.assertEqual(4, payload["total_documents"])
|
||||
self.assertEqual(2, payload["doc_type_counts"]["issue"])
|
||||
self.assertEqual(3, payload["project_counts"]["customer-service"])
|
||||
self.assertEqual(1, payload["project_counts"]["hiring"])
|
||||
self.assertEqual([], payload["missing_helpdesk_contact_metadata"])
|
||||
|
||||
def test_compare_redmine_reports_missing_stale_and_contact_mismatches(self):
|
||||
output = self.run_cli(["inspect", "compare-redmine", "--limit", "1", "--project", "customer-service"])
|
||||
|
||||
self.assertIn("preview_documents=2", output)
|
||||
self.assertIn("indexed_documents=4", output)
|
||||
self.assertIn("stale", output)
|
||||
self.assertIn("redmine:issue:39779:chunk:0", output)
|
||||
|
||||
def test_compare_redmine_fetches_a_large_index_window_to_avoid_false_missing_results(self):
|
||||
store = FakeStore()
|
||||
out = io.StringIO()
|
||||
with redirect_stdout(out):
|
||||
main(["inspect", "compare-redmine", "--limit", "3", "--project", "customer-service"], service_builder=lambda: fake_services(store=store))
|
||||
|
||||
self.assertEqual(5000, store.list_limits[0])
|
||||
|
||||
def test_smoke_search_prints_pass_fail_for_known_queries(self):
|
||||
output = self.run_cli(["inspect", "smoke-search", "--project", "customer-service", "--email", "callum@safetagtracking.com", "--issue-id", "39779"])
|
||||
|
||||
self.assertIn("PASS email callum@safetagtracking.com", output)
|
||||
self.assertIn("PASS issue 39779", output)
|
||||
self.assertIn("redmine:contact:1890:issue:39779:chunk:0", output)
|
||||
|
||||
def test_smoke_search_uses_issue_id_filter_for_issue_checks(self):
|
||||
search = FakeSearchService()
|
||||
out = io.StringIO()
|
||||
with redirect_stdout(out):
|
||||
main(["inspect", "smoke-search", "--project", "customer-service", "--issue-id", "39779"], service_builder=lambda: fake_services(search=search))
|
||||
|
||||
issue_queries = [query for query in search.queries if query.text == "39779"]
|
||||
self.assertEqual(39779, issue_queries[0].issue_id)
|
||||
|
||||
def test_smoke_search_json_returns_check_results(self):
|
||||
output = self.run_cli(["inspect", "smoke-search", "--project", "customer-service", "--email", "missing@example.test", "--json"])
|
||||
payload = json.loads(output)
|
||||
|
||||
self.assertFalse(payload["checks"][0]["passed"])
|
||||
self.assertEqual("email", payload["checks"][0]["kind"])
|
||||
|
||||
def test_backfill_redmine_projects_cli_parses_comma_separated_projects(self):
|
||||
backfill = FakeBackfillService()
|
||||
services = fake_services()
|
||||
services["backfill"] = backfill
|
||||
out = io.StringIO()
|
||||
|
||||
with redirect_stdout(out):
|
||||
main(
|
||||
[
|
||||
"--backfill-redmine-projects",
|
||||
"--projects",
|
||||
"customer-service,hiring",
|
||||
"--per-project-limit",
|
||||
"25",
|
||||
],
|
||||
service_builder=lambda: services,
|
||||
)
|
||||
|
||||
self.assertEqual(("projects", ["customer-service", "hiring"], 25), backfill.calls[0])
|
||||
self.assertIn("'projects': 2", out.getvalue())
|
||||
|
||||
def test_backfill_redmine_projects_cli_parses_project_specific_limits(self):
|
||||
backfill = FakeBackfillService()
|
||||
services = fake_services()
|
||||
services["backfill"] = backfill
|
||||
out = io.StringIO()
|
||||
|
||||
with redirect_stdout(out):
|
||||
main(
|
||||
[
|
||||
"--backfill-redmine-projects",
|
||||
"--project-limits",
|
||||
"customer-service=500,hiring=200",
|
||||
],
|
||||
service_builder=lambda: services,
|
||||
)
|
||||
|
||||
self.assertEqual(("project_limits", {"customer-service": 500, "hiring": 200}), backfill.calls[0])
|
||||
self.assertIn("'issues': 700", out.getvalue())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user