Add semantic-index service, deployment assets, and tests

This commit is contained in:
Jason Thistlethwaite
2026-05-04 09:50:03 -04:00
parent faad70872b
commit b305544f63
42 changed files with 5059 additions and 0 deletions
+115
View File
@@ -0,0 +1,115 @@
import unittest
from pathlib import Path
from semantic_index.app import create_app
from semantic_index.config import Settings
from semantic_index.models import SearchResult
class FakeSearchService:
def search(self, query):
return [
SearchResult(
id="redmine:issue:1:chunk:0",
score=0.8,
text="Snippet text",
payload={
"source": "redmine",
"project_identifier": "customer-service",
"doc_type": "issue",
"issue_id": 1,
"redmine_url": "http://redmine/issues/1",
"source_record_id": "issue:1",
},
)
]
def get_document(self, document_id):
return {"id": document_id, "text": "Full text", "payload": {}}
class FakeStore:
def list_projects(self, source=None, limit=1000):
return [{"project_identifier": "customer-service", "document_count": 10}]
class FakeRefreshService:
def __init__(self):
self.calls = []
def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15):
self.calls.append((project_limits, dry_run, force_rebuild, overlap_minutes))
return {"source": "redmine", "projects": len(project_limits), "dry_run": dry_run}
def fake_services():
refresh = FakeRefreshService()
return {
"settings": Settings(
openai_api_key="",
qdrant_url="http://qdrant",
qdrant_api_key=None,
qdrant_collection="semantic",
redmine_url="http://redmine",
redmine_api_key="",
redmine_project_identifier=None,
sample_limit=50,
bind_host="127.0.0.1",
bind_port=8787,
service_api_key=None,
refresh_state_path=Path(".cache/semantic_index/refresh_state.json"),
),
"search": FakeSearchService(),
"store": FakeStore(),
"refresh": refresh,
}
class SemanticIndexAppTest(unittest.TestCase):
def test_health_does_not_build_live_services(self):
def broken_builder():
raise AssertionError("health should not build live clients")
app = create_app(service_builder=broken_builder)
routes = {route.path: route.endpoint for route in app.routes}
self.assertEqual({"status": "ok"}, routes["/health"]())
def test_search_endpoint_returns_normalized_agent_response(self):
app = create_app(service_builder=fake_services)
routes = {route.path: route.endpoint for route in app.routes}
response = routes["/search"]({"query": "printer", "project_identifier": "customer-service", "limit": 3})
self.assertEqual("printer", response["query"])
self.assertEqual("customer-service", response["filters"]["project_identifier"])
self.assertEqual("customer-service", response["results"][0]["citation"]["project_identifier"])
def test_projects_endpoint_lists_indexed_projects(self):
app = create_app(service_builder=fake_services)
routes = {route.path: route.endpoint for route in app.routes}
response = routes["/projects"]()
self.assertEqual("customer-service", response["projects"][0]["project_identifier"])
def test_refresh_endpoint_passes_project_limits_and_cost_flags(self):
services = fake_services()
app = create_app(service_builder=lambda: services)
routes = {route.path: route.endpoint for route in app.routes}
response = routes["/sources/redmine/refresh"](
{
"project_limits": {"customer-service": 5},
"dry_run": True,
"force_rebuild": False,
"overlap_minutes": 30,
}
)
self.assertTrue(response["dry_run"])
self.assertEqual(({"customer-service": 5}, True, False, 30), services["refresh"].calls[0])
if __name__ == "__main__":
unittest.main()
+182
View File
@@ -0,0 +1,182 @@
import unittest
from semantic_index.ingest import BackfillService
from semantic_index.mcp import SemanticMCP
from semantic_index.models import SearchQuery, SearchResult
from semantic_index.redmine import RedmineMapper
class FakeRedmineSource:
project_identifier = None
def recent_helpdesk_issues(self, limit):
return [
{
"id": 1,
"subject": "First",
"description": "First body",
"project": {"identifier": self.project_identifier},
},
{
"id": 2,
"subject": "Second",
"description": "Second body",
"project": {"identifier": self.project_identifier},
},
][:limit]
class DuplicateDocumentRedmineSource:
project_identifier = "customer-service"
def recent_helpdesk_issues(self, limit):
return [
{"id": 1, "subject": "First", "description": "First body", "project": {"identifier": "customer-service"}},
{"id": 1, "subject": "First duplicate", "description": "Duplicate body", "project": {"identifier": "customer-service"}},
][:limit]
class FakeEmbedder:
def embed_documents(self, docs):
return [[float(i), 0.0, 0.0] for i, _ in enumerate(docs, start=1)]
def embed_query(self, text):
return [0.1, 0.0, 0.0]
class FakeStore:
def __init__(self):
self.deleted = []
self.upserts = []
def rebuild_source(self, source, docs, vectors, project_identifier=None):
self.deleted.append((source, project_identifier))
self.upserts.append((docs, vectors))
def list_projects(self, source=None, limit=1000):
return [
{"project_identifier": "customer-service", "document_count": 1684},
{"project_identifier": "hiring", "document_count": 409},
]
class FakeRefreshService:
def __init__(self):
self.calls = []
def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15):
self.calls.append((project_limits, dry_run, force_rebuild, overlap_minutes))
return {"source": "redmine", "projects": len(project_limits), "dry_run": dry_run}
class FakeSearchService:
def __init__(self):
self.queries = []
def search(self, query):
self.queries.append(query)
return [SearchResult(id="doc1", score=0.5, text="Snippet", payload={"redmine_url": "http://redmine/issues/1"})]
def get_document(self, document_id):
return {"id": document_id, "text": "Snippet"}
class BackfillAndMCPTest(unittest.TestCase):
def test_sample_backfill_rebuilds_redmine_source(self):
service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=FakeStore())
result = service.backfill_redmine_sample(limit=2)
self.assertEqual({"source": "redmine", "issues": 2, "documents": 2}, result)
self.assertEqual([("redmine", None)], service.store.deleted)
docs, vectors = service.store.upserts[0]
self.assertEqual(["redmine:issue:1:chunk:0", "redmine:issue:2:chunk:0"], [doc.id for doc in docs])
self.assertEqual(2, len(vectors))
def test_sample_backfill_rebuilds_only_the_configured_project_scope(self):
store = FakeStore()
service = BackfillService(
source=FakeRedmineSource(),
embedder=FakeEmbedder(),
store=store,
mapper=RedmineMapper(redmine_url="", project_identifier="customer-service"),
)
service.backfill_redmine_sample(limit=1)
self.assertEqual([("redmine", "customer-service")], store.deleted)
def test_multi_project_backfill_rebuilds_each_project_scope(self):
store = FakeStore()
service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=store)
result = service.backfill_redmine_projects(["customer-service", "hiring"], per_project_limit=1)
self.assertEqual(
{
"source": "redmine",
"projects": 2,
"issues": 2,
"documents": 2,
"project_results": [
{"project_identifier": "customer-service", "issues": 1, "documents": 1},
{"project_identifier": "hiring", "issues": 1, "documents": 1},
],
},
result,
)
self.assertEqual([("redmine", "customer-service"), ("redmine", "hiring")], store.deleted)
self.assertEqual("customer-service", store.upserts[0][0][0].payload["project_identifier"])
self.assertEqual("hiring", store.upserts[1][0][0].payload["project_identifier"])
def test_multi_project_backfill_accepts_per_project_limits(self):
store = FakeStore()
service = BackfillService(source=FakeRedmineSource(), embedder=FakeEmbedder(), store=store)
result = service.backfill_redmine_project_limits({"customer-service": 2, "hiring": 1})
self.assertEqual(3, result["issues"])
self.assertEqual(
[
{"project_identifier": "customer-service", "issues": 2, "documents": 2},
{"project_identifier": "hiring", "issues": 1, "documents": 1},
],
result["project_results"],
)
def test_backfill_deduplicates_documents_by_stable_id_before_embedding(self):
store = FakeStore()
service = BackfillService(source=DuplicateDocumentRedmineSource(), embedder=FakeEmbedder(), store=store)
result = service.backfill_redmine_sample(limit=2)
self.assertEqual({"source": "redmine", "issues": 2, "documents": 1}, result)
docs, vectors = store.upserts[0]
self.assertEqual(["redmine:issue:1:chunk:0"], [doc.id for doc in docs])
self.assertEqual(1, len(vectors))
def test_mcp_tools_return_json_ready_results(self):
search = FakeSearchService()
refresh = FakeRefreshService()
mcp = SemanticMCP(search_service=search, backfill_service=None, store=FakeStore(), refresh_service=refresh)
response = mcp.call_tool("semantic_search", {"query": "printer", "source": "redmine", "project_identifier": "hiring", "limit": 3})
document = mcp.call_tool("semantic_get_document", {"id": "doc1"})
projects = mcp.call_tool("semantic_list_projects", {"source": "redmine"})
refresh_response = mcp.call_tool("semantic_refresh_redmine", {"project_identifier": "customer-service", "limit": 5, "dry_run": True})
self.assertEqual("printer", response["query"])
self.assertEqual("hiring", response["filters"]["project_identifier"])
self.assertEqual("doc1", response["results"][0]["id"])
self.assertEqual("http://redmine/issues/1", response["results"][0]["citation"]["url"])
self.assertIsInstance(search.queries[0], SearchQuery)
self.assertEqual("redmine", search.queries[0].source)
self.assertEqual("hiring", search.queries[0].project_identifier)
self.assertEqual({"id": "doc1", "text": "Snippet"}, document)
self.assertEqual("customer-service", projects["projects"][0]["project_identifier"])
self.assertTrue(refresh_response["dry_run"])
self.assertEqual(({"customer-service": 5}, True, False, 15), refresh.calls[0])
if __name__ == "__main__":
unittest.main()
+37
View File
@@ -0,0 +1,37 @@
import subprocess
import sys
from pathlib import Path
from tempfile import TemporaryDirectory
import unittest
from semantic_index.config import load_settings
class SemanticIndexCliTest(unittest.TestCase):
def test_help_does_not_require_http_runtime_dependencies(self):
result = subprocess.run(
[sys.executable, "-m", "semantic_index", "--help"],
check=False,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
self.assertEqual("", result.stderr)
self.assertEqual(0, result.returncode)
self.assertIn("--mcp-stdio", result.stdout)
def test_settings_load_from_package_env_when_root_env_missing(self):
with TemporaryDirectory() as tmp:
env_path = Path(tmp) / "semantic_index" / ".env"
env_path.parent.mkdir()
env_path.write_text("QDRANT_URL=http://qdrant.example:6333\nREDMINE_SAMPLE_LIMIT=7\n", encoding="utf-8")
settings = load_settings(Path(tmp) / ".env")
self.assertEqual("http://qdrant.example:6333", settings.qdrant_url)
self.assertEqual(7, settings.sample_limit)
if __name__ == "__main__":
unittest.main()
+87
View File
@@ -0,0 +1,87 @@
import json
import unittest
from unittest.mock import patch
from semantic_index.client import SemanticIndexClient
from semantic_index.models import SearchResult
class FakeSearchService:
def __init__(self):
self.queries = []
def search(self, query):
self.queries.append(query)
return [
SearchResult(
id="redmine:issue:1:chunk:0",
score=0.7,
text="Candidate follow up",
payload={
"source": "redmine",
"project_identifier": "hiring",
"doc_type": "issue",
"issue_id": 1,
"redmine_url": "http://redmine/issues/1",
"source_record_id": "issue:1",
},
)
]
def get_document(self, document_id):
return {"id": document_id, "text": "Full text", "payload": {"project_identifier": "hiring"}}
class SemanticIndexClientTest(unittest.TestCase):
def test_in_process_client_returns_normalized_search_response(self):
search = FakeSearchService()
client = SemanticIndexClient(search_service=search)
response = client.search("candidate follow up", project_identifier="hiring", limit=3)
self.assertEqual("candidate follow up", response["query"])
self.assertEqual({"project_identifier": "hiring", "limit": 3}, response["filters"])
self.assertEqual("redmine:issue:1:chunk:0", response["results"][0]["id"])
self.assertEqual("hiring", response["results"][0]["citation"]["project_identifier"])
self.assertEqual("hiring", search.queries[0].project_identifier)
def test_in_process_client_get_document(self):
client = SemanticIndexClient(search_service=FakeSearchService())
document = client.get_document("redmine:issue:1:chunk:0")
self.assertEqual("Full text", document["text"])
def test_http_client_sends_auth_header_and_parses_search_response(self):
body = json.dumps({"query": "printer", "filters": {}, "results": []}).encode()
class FakeResponse:
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def read(self):
return body
captured = {}
def fake_urlopen(request, timeout):
captured["url"] = request.full_url
captured["authorization"] = request.headers.get("Authorization")
captured["body"] = json.loads(request.data.decode())
return FakeResponse()
with patch("urllib.request.urlopen", fake_urlopen):
client = SemanticIndexClient(base_url="http://semantic.local", api_key="secret")
response = client.search("printer", project_identifier="customer-service")
self.assertEqual("http://semantic.local/search", captured["url"])
self.assertEqual("Bearer secret", captured["authorization"])
self.assertEqual("customer-service", captured["body"]["project_identifier"])
self.assertEqual("printer", response["query"])
if __name__ == "__main__":
unittest.main()
+138
View File
@@ -0,0 +1,138 @@
import unittest
from semantic_index.models import IndexDocument
from semantic_index.redmine import RedmineMapper
class RedmineMapperTest(unittest.TestCase):
def test_issue_chunks_have_stable_ids_and_metadata(self):
issue = {
"id": 42,
"subject": "Widget order ORD-12345 cannot ship",
"description": "Customer reports that widget order ORD-12345 is blocked.",
"project": {"id": 7, "identifier": "fud-helpdesk"},
"contact": {"id": 9, "email": "ada@example.com", "name": "Ada Lovelace"},
"created_on": "2026-04-01T10:00:00Z",
"updated_on": "2026-04-02T10:00:00Z",
"url": "http://redmine.local/issues/42",
}
first = RedmineMapper(redmine_url="http://redmine.local").issue_to_documents(issue)
second = RedmineMapper(redmine_url="http://redmine.local").issue_to_documents(issue)
self.assertEqual([doc.id for doc in first], [doc.id for doc in second])
self.assertEqual("redmine:issue:42:chunk:0", first[0].id)
self.assertEqual("issue", first[0].payload["doc_type"])
self.assertEqual(42, first[0].payload["issue_id"])
self.assertEqual("fud-helpdesk", first[0].payload["project_identifier"])
self.assertIsNone(first[0].payload["project_name"])
self.assertFalse(first[0].payload["has_helpdesk_ticket"])
self.assertEqual("ada@example.com", first[0].payload["contact_email"])
self.assertEqual("Ada Lovelace", first[0].payload["contact_name"])
self.assertEqual("http://redmine.local/issues/42", first[0].payload["redmine_url"])
self.assertIn("source_hash", first[0].payload)
def test_helpdesk_ticket_contact_is_mapped_to_all_issue_chunks(self):
issue = {
"id": 39779,
"subject": "Goods return",
"description": "Please arrange to return these goods.",
"project": {"id": 1, "identifier": "customer-service"},
"helpdesk_ticket": {
"id": 35159,
"contact_id": 1890,
"from_address": "callum@safetagtracking.com",
"contact": {
"id": 1890,
"name": "Callum Mackeonis",
"company": "SafeTag Tracking",
"email": "callum@safetagtracking.com",
},
},
"journals": [
{"id": 71570, "notes": "Hello, yes we can arrange this today.", "created_on": "2026-04-14T14:29:49Z"}
],
}
docs = RedmineMapper(redmine_url="http://redmine.local").issue_to_documents(issue)
issue_doc = next(doc for doc in docs if doc.payload["doc_type"] == "issue")
journal_doc = next(doc for doc in docs if doc.payload["doc_type"] == "journal")
contact_doc = next(doc for doc in docs if doc.payload["doc_type"] == "contact")
for doc in (issue_doc, journal_doc, contact_doc):
self.assertEqual(35159, doc.payload["helpdesk_ticket_id"])
self.assertTrue(doc.payload["has_helpdesk_ticket"])
self.assertEqual(1890, doc.payload["contact_id"])
self.assertEqual("Callum Mackeonis", doc.payload["contact_name"])
self.assertEqual("SafeTag Tracking", doc.payload["contact_company"])
self.assertEqual("callum@safetagtracking.com", doc.payload["contact_email"])
self.assertIn("Callum Mackeonis", issue_doc.text)
self.assertIn("callum@safetagtracking.com", contact_doc.text)
def test_configured_project_identifier_is_used_when_issue_payload_omits_identifier(self):
issue = {
"id": 42,
"subject": "Widget order",
"description": "Body",
"project": {"id": 1, "name": "Customer Service"},
}
docs = RedmineMapper(
redmine_url="http://redmine.local",
project_identifier="customer-service",
).issue_to_documents(issue)
self.assertEqual("customer-service", docs[0].payload["project_identifier"])
self.assertEqual("Customer Service", docs[0].payload["project_name"])
def test_internal_non_helpdesk_issue_keeps_project_metadata_without_contact(self):
issue = {
"id": 55,
"subject": "Internal hiring task",
"description": "Follow up with candidate.",
"project": {"id": 68, "identifier": "hiring", "name": "Hiring"},
}
docs = RedmineMapper(redmine_url="http://redmine.local").issue_to_documents(issue)
self.assertEqual(1, len(docs))
self.assertEqual("hiring", docs[0].payload["project_identifier"])
self.assertEqual("Hiring", docs[0].payload["project_name"])
self.assertFalse(docs[0].payload["has_helpdesk_ticket"])
self.assertIsNone(docs[0].payload["contact_id"])
def test_issue_journals_messages_and_contact_are_mapped(self):
issue = {
"id": 42,
"subject": "Widget order",
"description": "Ticket envelope",
"project": {"id": 7, "identifier": "fud-helpdesk"},
"contact": {"id": 9, "email": "ada@example.com", "name": "Ada Lovelace"},
"journals": [
{"id": 5, "notes": "Private escalation note", "private_notes": True, "created_on": "2026-04-03T10:00:00Z"}
],
"messages": [
{"id": 6, "body": "Customer reply body", "direction": "incoming", "created_on": "2026-04-03T11:00:00Z"}
],
}
docs = RedmineMapper(redmine_url="http://redmine.local").issue_to_documents(issue)
ids = {doc.id for doc in docs}
types = {doc.payload["doc_type"] for doc in docs}
self.assertIn("redmine:issue:42:journal:5:chunk:0", ids)
self.assertIn("redmine:issue:42:message:6:chunk:0", ids)
self.assertIn("redmine:contact:9:issue:42:chunk:0", ids)
self.assertEqual({"issue", "journal", "message", "contact"}, types)
journal = next(doc for doc in docs if doc.payload["doc_type"] == "journal")
message = next(doc for doc in docs if doc.payload["doc_type"] == "message")
self.assertEqual("private", journal.payload["visibility"])
self.assertEqual("incoming", message.payload["direction"])
def test_empty_documents_are_rejected(self):
with self.assertRaises(ValueError):
IndexDocument(id="x", text=" ", payload={})
if __name__ == "__main__":
unittest.main()
+46
View File
@@ -0,0 +1,46 @@
import unittest
from semantic_index.embeddings import OpenAIEmbedder
from semantic_index.models import IndexDocument
class FakeOpenAIClient:
def __init__(self):
self.calls = []
def create_embeddings(self, model, inputs, dimensions=None):
self.calls.append({"model": model, "inputs": list(inputs), "dimensions": dimensions})
return [[float(i)] * 3 for i, _ in enumerate(inputs, start=1)]
class OpenAIEmbedderTest(unittest.TestCase):
def test_batches_embedding_requests(self):
client = FakeOpenAIClient()
embedder = OpenAIEmbedder(client=client, batch_size=2, dimensions=1536)
docs = [
IndexDocument(id="a", text="alpha", payload={}),
IndexDocument(id="b", text="bravo", payload={}),
IndexDocument(id="c", text="charlie", payload={}),
]
vectors = embedder.embed_documents(docs)
self.assertEqual([[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [1.0, 1.0, 1.0]], vectors)
self.assertEqual(2, len(client.calls))
self.assertEqual(["alpha", "bravo"], client.calls[0]["inputs"])
self.assertEqual("text-embedding-3-small", client.calls[0]["model"])
self.assertEqual(1536, client.calls[0]["dimensions"])
def test_rejects_empty_or_oversized_chunks_before_api_call(self):
client = FakeOpenAIClient()
embedder = OpenAIEmbedder(client=client, max_chars=5)
with self.assertRaises(ValueError):
embedder.embed_texts(["ok", " "])
with self.assertRaises(ValueError):
embedder.embed_texts(["toolong"])
self.assertEqual([], client.calls)
if __name__ == "__main__":
unittest.main()
+394
View File
@@ -0,0 +1,394 @@
import io
import json
import unittest
from contextlib import redirect_stdout
from pathlib import Path
from semantic_index.__main__ import main
from semantic_index.config import Settings
from semantic_index.models import SearchResult
class FakeSearchService:
def __init__(self):
self.queries = []
def search(self, query):
self.queries.append(query)
if "missing@example.test" in query.text:
return []
return [
SearchResult(
id="redmine:contact:1890:issue:39779:chunk:0" if "callum" in query.text else "redmine:issue:39779:chunk:0",
score=0.58,
text="Callum Mackeonis callum@safetagtracking.com SafeTag Tracking",
payload={
"source": "redmine",
"doc_type": "contact" if "callum" in query.text else "issue",
"issue_id": 39779,
"project_identifier": "customer-service",
"contact_id": 1890,
"contact_name": "Callum Mackeonis",
"contact_email": "callum@safetagtracking.com",
"contact_company": "SafeTag Tracking",
"redmine_url": "http://redmine/issues/39779",
},
)
]
def get_document(self, document_id):
return {
"id": document_id,
"text": "Full indexed text",
"payload": {
"source": "redmine",
"doc_type": "journal",
"issue_id": 39778,
"project_identifier": "customer-service",
"contact_id": 1890,
"contact_name": "Callum Mackeonis",
"contact_email": "callum@safetagtracking.com",
"redmine_url": "http://redmine/issues/39778",
},
}
class FakeStore:
def __init__(self):
self.list_limits = []
def count_documents(self, source=None, project_identifier=None, doc_type=None):
return 12
def list_documents(self, limit=10, source=None, project_identifier=None, doc_type=None):
self.list_limits.append(limit)
return [
{
"id": "redmine:issue:39779:chunk:0",
"text": "Issue #39779: Goods return\nPlease return our goods.",
"payload": {
"source": "redmine",
"doc_type": "issue",
"issue_id": 39779,
"project_identifier": "customer-service",
"project_name": "Customer Service",
"has_helpdesk_ticket": True,
"contact_id": 1890,
"contact_name": "Callum Mackeonis",
"contact_email": "callum@safetagtracking.com",
"contact_company": "SafeTag Tracking",
"source_hash": "issue-hash",
"redmine_url": "http://redmine/issues/39779",
},
},
{
"id": "redmine:issue:39779:journal:71570:chunk:0",
"text": "Hello, we can arrange this today.",
"payload": {
"source": "redmine",
"doc_type": "journal",
"issue_id": 39779,
"project_identifier": "customer-service",
"project_name": "Customer Service",
"has_helpdesk_ticket": True,
"contact_id": 1890,
"contact_name": "Callum Mackeonis",
"contact_email": "callum@safetagtracking.com",
"contact_company": "SafeTag Tracking",
"source_hash": "journal-hash",
"redmine_url": "http://redmine/issues/39779",
},
},
{
"id": "redmine:contact:1890:issue:39779:chunk:0",
"text": "Callum Mackeonis callum@safetagtracking.com SafeTag Tracking",
"payload": {
"source": "redmine",
"doc_type": "contact",
"issue_id": 39779,
"project_identifier": "customer-service",
"project_name": "Customer Service",
"has_helpdesk_ticket": True,
"contact_id": 1890,
"contact_name": "Callum Mackeonis",
"contact_email": "callum@safetagtracking.com",
"contact_company": "SafeTag Tracking",
"source_hash": "contact-hash",
"redmine_url": "http://redmine/issues/39779",
},
},
{
"id": "redmine:issue:39800:chunk:0",
"text": "Ordinary issue with no helpdesk contact.",
"payload": {
"source": "redmine",
"doc_type": "issue",
"issue_id": 39800,
"project_identifier": "hiring",
"project_name": "Hiring",
"has_helpdesk_ticket": False,
"source_hash": "ordinary-hash",
"redmine_url": "http://redmine/issues/39800",
},
},
]
class FakeRedmineSource:
def recent_helpdesk_issues(self, limit):
return [
{
"id": 39779,
"subject": "Goods return",
"description": "Please return our goods.",
"project": {"id": 1, "identifier": "customer-service"},
"helpdesk_ticket": {
"id": 35159,
"contact_id": 1890,
"contact": {
"id": 1890,
"name": "Callum Mackeonis",
"email": "callum@safetagtracking.com",
"company": "SafeTag Tracking",
},
},
}
][:limit]
def fake_services(store=None, search=None):
settings = Settings(
openai_api_key="",
qdrant_url="http://qdrant",
qdrant_api_key=None,
qdrant_collection="semantic",
redmine_url="http://redmine",
redmine_api_key="",
redmine_project_identifier="customer-service",
sample_limit=50,
bind_host="127.0.0.1",
bind_port=8787,
service_api_key=None,
refresh_state_path=Path(".cache/semantic_index/refresh_state.json"),
)
return {
"settings": settings,
"search": search or FakeSearchService(),
"store": store or FakeStore(),
"redmine_source": FakeRedmineSource(),
"backfill": FakeBackfillService(),
}
class FakeBackfillService:
def __init__(self):
self.calls = []
def backfill_redmine_sample(self, limit):
self.calls.append(("sample", limit))
return {"source": "redmine", "issues": limit, "documents": limit}
def backfill_redmine_projects(self, projects, per_project_limit):
self.calls.append(("projects", projects, per_project_limit))
return {
"source": "redmine",
"projects": len(projects),
"issues": len(projects) * per_project_limit,
"documents": len(projects) * per_project_limit,
"project_results": [
{"project_identifier": project, "issues": per_project_limit, "documents": per_project_limit}
for project in projects
],
}
def backfill_redmine_project_limits(self, project_limits):
self.calls.append(("project_limits", project_limits))
return {
"source": "redmine",
"projects": len(project_limits),
"issues": sum(project_limits.values()),
"documents": sum(project_limits.values()),
"project_results": [
{"project_identifier": project, "issues": limit, "documents": limit}
for project, limit in project_limits.items()
],
}
class InspectCliTest(unittest.TestCase):
def run_cli(self, args):
out = io.StringIO()
with redirect_stdout(out):
main(args, service_builder=fake_services)
return out.getvalue()
def test_no_args_prints_help_without_building_services(self):
def broken_services():
raise AssertionError("help should not build live services")
out = io.StringIO()
with redirect_stdout(out):
main([], service_builder=broken_services)
self.assertIn("inspect", out.getvalue())
def test_count_lists_matching_document_count(self):
output = self.run_cli(["inspect", "count", "--source", "redmine", "--project", "customer-service"])
self.assertIn("12", output)
def test_list_shows_snippet_and_metadata_by_default(self):
output = self.run_cli(["inspect", "list", "--limit", "5", "--source", "redmine", "--project", "customer-service"])
self.assertIn("redmine:issue:39779:chunk:0", output)
self.assertIn("issue #39779", output.lower())
self.assertIn("customer-service", output)
self.assertIn("contact=#1890", output)
self.assertIn("Callum Mackeonis", output)
self.assertIn("callum@safetagtracking.com", output)
self.assertNotIn("Full indexed text", output)
def test_search_runs_query_and_prints_citation(self):
output = self.run_cli(["inspect", "search", "order status", "--limit", "3", "--project", "customer-service"])
self.assertIn("score=0.5800", output)
self.assertIn("http://redmine/issues/39779", output)
def test_show_prints_full_document_text(self):
output = self.run_cli(["inspect", "show", "redmine:issue:39778:chunk:0"])
self.assertIn("Full indexed text", output)
self.assertIn("doc_type=journal", output)
def test_preview_redmine_maps_documents_without_writing(self):
output = self.run_cli(["inspect", "preview-redmine", "--limit", "1", "--project", "customer-service"])
self.assertIn("redmine:issue:39779:chunk:0", output)
self.assertIn("project=customer-service", output)
self.assertIn("Please return our goods", output)
def test_preview_redmine_uses_minimal_service_builder(self):
services = []
def minimal_builder(settings):
services.append(settings.redmine_project_identifier)
return {"settings": settings, "redmine_source": FakeRedmineSource()}
out = io.StringIO()
with redirect_stdout(out):
main(
["inspect", "preview-redmine", "--limit", "1", "--project", "customer-service"],
service_builder=lambda: (_ for _ in ()).throw(AssertionError("full services should not be built")),
preview_service_builder=minimal_builder,
settings_loader=lambda: fake_services()["settings"],
)
self.assertEqual(["customer-service"], services)
self.assertIn("redmine:issue:39779:chunk:0", out.getvalue())
def test_audit_prints_doc_type_counts_contact_coverage_and_attachment_check(self):
output = self.run_cli(["inspect", "audit", "--limit", "10", "--source", "redmine", "--project", "customer-service"])
self.assertIn("documents=4", output)
self.assertIn("doc_type issue=2", output)
self.assertIn("doc_type journal=1", output)
self.assertIn("doc_type contact=1", output)
self.assertIn("contact_metadata 3/4", output)
self.assertIn("helpdesk_contact_metadata 3/3", output)
self.assertIn("project customer-service=3", output)
self.assertIn("project hiring=1", output)
self.assertIn("attachments=0", output)
self.assertNotIn("missing_contact redmine:issue:39800:chunk:0", output)
def test_audit_json_returns_machine_readable_summary(self):
output = self.run_cli(["inspect", "audit", "--limit", "10", "--project", "customer-service", "--json"])
payload = json.loads(output)
self.assertEqual(4, payload["total_documents"])
self.assertEqual(2, payload["doc_type_counts"]["issue"])
self.assertEqual(3, payload["project_counts"]["customer-service"])
self.assertEqual(1, payload["project_counts"]["hiring"])
self.assertEqual([], payload["missing_helpdesk_contact_metadata"])
def test_compare_redmine_reports_missing_stale_and_contact_mismatches(self):
output = self.run_cli(["inspect", "compare-redmine", "--limit", "1", "--project", "customer-service"])
self.assertIn("preview_documents=2", output)
self.assertIn("indexed_documents=4", output)
self.assertIn("stale", output)
self.assertIn("redmine:issue:39779:chunk:0", output)
def test_compare_redmine_fetches_a_large_index_window_to_avoid_false_missing_results(self):
store = FakeStore()
out = io.StringIO()
with redirect_stdout(out):
main(["inspect", "compare-redmine", "--limit", "3", "--project", "customer-service"], service_builder=lambda: fake_services(store=store))
self.assertEqual(5000, store.list_limits[0])
def test_smoke_search_prints_pass_fail_for_known_queries(self):
output = self.run_cli(["inspect", "smoke-search", "--project", "customer-service", "--email", "callum@safetagtracking.com", "--issue-id", "39779"])
self.assertIn("PASS email callum@safetagtracking.com", output)
self.assertIn("PASS issue 39779", output)
self.assertIn("redmine:contact:1890:issue:39779:chunk:0", output)
def test_smoke_search_uses_issue_id_filter_for_issue_checks(self):
search = FakeSearchService()
out = io.StringIO()
with redirect_stdout(out):
main(["inspect", "smoke-search", "--project", "customer-service", "--issue-id", "39779"], service_builder=lambda: fake_services(search=search))
issue_queries = [query for query in search.queries if query.text == "39779"]
self.assertEqual(39779, issue_queries[0].issue_id)
def test_smoke_search_json_returns_check_results(self):
output = self.run_cli(["inspect", "smoke-search", "--project", "customer-service", "--email", "missing@example.test", "--json"])
payload = json.loads(output)
self.assertFalse(payload["checks"][0]["passed"])
self.assertEqual("email", payload["checks"][0]["kind"])
def test_backfill_redmine_projects_cli_parses_comma_separated_projects(self):
backfill = FakeBackfillService()
services = fake_services()
services["backfill"] = backfill
out = io.StringIO()
with redirect_stdout(out):
main(
[
"--backfill-redmine-projects",
"--projects",
"customer-service,hiring",
"--per-project-limit",
"25",
],
service_builder=lambda: services,
)
self.assertEqual(("projects", ["customer-service", "hiring"], 25), backfill.calls[0])
self.assertIn("'projects': 2", out.getvalue())
def test_backfill_redmine_projects_cli_parses_project_specific_limits(self):
backfill = FakeBackfillService()
services = fake_services()
services["backfill"] = backfill
out = io.StringIO()
with redirect_stdout(out):
main(
[
"--backfill-redmine-projects",
"--project-limits",
"customer-service=500,hiring=200",
],
service_builder=lambda: services,
)
self.assertEqual(("project_limits", {"customer-service": 500, "hiring": 200}), backfill.calls[0])
self.assertIn("'issues': 700", out.getvalue())
if __name__ == "__main__":
unittest.main()
+58
View File
@@ -0,0 +1,58 @@
import subprocess
import tempfile
import unittest
from pathlib import Path
ROOT = Path(__file__).resolve().parents[2]
INSTALLER = ROOT / "deploy" / "semantic-index" / "install.sh"
class SemanticIndexInstallerTest(unittest.TestCase):
def run_installer(self, *args, env=None):
return subprocess.run(
[str(INSTALLER), *args],
cwd=ROOT,
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=False,
env=env,
)
def test_default_mode_is_dry_run(self):
result = self.run_installer()
self.assertEqual(0, result.returncode, result.stderr)
self.assertIn("mode=dry-run", result.stdout)
self.assertIn("would run: sudo mkdir -p /opt/semantic-index", result.stdout)
self.assertIn("would run: sudo rsync", result.stdout)
self.assertNotIn("Semantic Index installed, but deployment is not complete.", result.stdout)
def test_apply_prints_manual_next_step_warning(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
env = {
"PATH": "/usr/bin:/bin",
"SEMANTIC_INDEX_INSTALL_DIR": str(tmp_path / "opt" / "semantic-index"),
"SEMANTIC_INDEX_ENV_FILE": str(tmp_path / "etc" / "semantic-index.env"),
"SEMANTIC_INDEX_STATE_DIR": str(tmp_path / "var" / "lib" / "semantic-index"),
"SEMANTIC_INDEX_LOG_DIR": str(tmp_path / "var" / "log" / "semantic-index"),
"SEMANTIC_INDEX_SYSTEMD_DIR": str(tmp_path / "etc" / "systemd" / "system"),
}
result = self.run_installer("--apply", "--no-system", "--skip-deps", env=env)
self.assertEqual(0, result.returncode, result.stderr)
self.assertIn("Semantic Index installed, but deployment is not complete.", result.stdout)
self.assertIn("The refresh timer was NOT enabled automatically.", result.stdout)
self.assertIn("Do not use --force-rebuild", result.stdout)
def test_invalid_argument_fails_with_usage(self):
result = self.run_installer("--force-rebuild")
self.assertEqual(2, result.returncode)
self.assertIn("Usage:", result.stderr)
if __name__ == "__main__":
unittest.main()
+187
View File
@@ -0,0 +1,187 @@
import unittest
from semantic_index.models import IndexDocument
from semantic_index.qdrant_store import QdrantStore
class FakeMatchValue:
def __init__(self, value):
self.value = value
class FakeFieldCondition:
def __init__(self, key, match=None, range=None):
self.key = key
self.match = match
self.range = range
class FakeFilter:
def __init__(self, must):
self.must = must
class FakeFilterSelector:
def __init__(self, filter):
self.filter = filter
class FakePointIdsList:
def __init__(self, points):
self.points = points
class FakeQModels:
MatchValue = FakeMatchValue
FieldCondition = FakeFieldCondition
Filter = FakeFilter
FilterSelector = FakeFilterSelector
PointIdsList = FakePointIdsList
class PointStruct:
def __init__(self, id, vector, payload):
self.id = id
self.vector = vector
self.payload = payload
class FakeCountResult:
count = 7
class FakeRecord:
def __init__(self):
self.id = "point-id"
self.payload = {
"document_id": "redmine:issue:1:chunk:0",
"text": "Indexed text",
"source": "redmine",
"project_identifier": "customer-service",
}
class FakeClient:
def __init__(self):
self.count_filter = None
self.scroll_filter = None
self.delete_filter = None
self.delete_selector = None
self.upsert_batches = []
def get_collections(self):
collection = type("Collection", (), {"name": "semantic"})()
return type("Collections", (), {"collections": [collection]})()
def count(self, collection_name, count_filter, exact):
self.count_filter = count_filter
return FakeCountResult()
def scroll(self, collection_name, scroll_filter, limit, with_payload, with_vectors, offset=None):
self.scroll_filter = scroll_filter
return [FakeRecord()], None
def delete(self, collection_name, points_selector):
self.delete_selector = points_selector
self.delete_filter = getattr(points_selector, "filter", None)
def upsert(self, collection_name, points):
self.upsert_batches.append(points)
class QdrantStoreReadTest(unittest.TestCase):
def make_store(self):
store = object.__new__(QdrantStore)
store.client = FakeClient()
store.collection = "semantic"
store.vector_size = 1536
store.qmodels = FakeQModels
store.upsert_batch_size = 2
return store
def test_count_documents_builds_metadata_filter(self):
store = self.make_store()
count = store.count_documents(source="redmine", project_identifier="customer-service", doc_type="issue")
self.assertEqual(7, count)
conditions = store.client.count_filter.must
self.assertEqual(["source", "project_identifier", "doc_type"], [condition.key for condition in conditions])
self.assertEqual("customer-service", conditions[1].match.value)
def test_list_documents_strips_internal_payload_fields(self):
store = self.make_store()
documents = store.list_documents(limit=5, source="redmine", project_identifier="customer-service")
self.assertEqual("redmine:issue:1:chunk:0", documents[0]["id"])
self.assertEqual("Indexed text", documents[0]["text"])
self.assertNotIn("document_id", documents[0]["payload"])
self.assertNotIn("text", documents[0]["payload"])
def test_delete_by_source_can_be_limited_to_project_scope(self):
store = self.make_store()
store.delete_by_source("redmine", project_identifier="customer-service")
conditions = store.client.delete_filter.must
self.assertEqual(["source", "project_identifier"], [condition.key for condition in conditions])
self.assertEqual("redmine", conditions[0].match.value)
self.assertEqual("customer-service", conditions[1].match.value)
def test_list_documents_can_be_limited_to_issue_scope(self):
store = self.make_store()
store.list_documents(limit=5, source="redmine", project_identifier="customer-service", issue_id=39779)
conditions = store.client.scroll_filter.must
self.assertEqual(["source", "project_identifier", "issue_id"], [condition.key for condition in conditions])
self.assertEqual(39779, conditions[2].match.value)
def test_delete_documents_deletes_stable_document_point_ids(self):
store = self.make_store()
store.delete_documents(["redmine:issue:39779:chunk:0"])
self.assertEqual(1, len(store.client.delete_selector.points))
self.assertNotEqual("redmine:issue:39779:chunk:0", store.client.delete_selector.points[0])
def test_upsert_sends_points_in_batches(self):
store = self.make_store()
documents = [
IndexDocument(id=f"redmine:issue:{issue_id}:chunk:0", text=f"Issue {issue_id}", payload={"source": "redmine"})
for issue_id in range(5)
]
vectors = [[0.1, 0.2, 0.3] for _ in documents]
store.upsert(documents, vectors)
self.assertEqual([2, 2, 1], [len(batch) for batch in store.client.upsert_batches])
self.assertEqual("Issue 0", store.client.upsert_batches[0][0].payload["text"])
def test_list_documents_paginates_qdrant_scroll_until_requested_limit(self):
class PagedClient(FakeClient):
def __init__(self):
super().__init__()
self.offsets = []
def scroll(self, collection_name, scroll_filter, limit, with_payload, with_vectors, offset=None):
self.offsets.append(offset)
first = FakeRecord()
first.payload = {**first.payload, "document_id": f"doc:{len(self.offsets)}a"}
second = FakeRecord()
second.payload = {**second.payload, "document_id": f"doc:{len(self.offsets)}b"}
if offset is None:
return [first, second], "next"
return [first, second], None
store = self.make_store()
store.client = PagedClient()
documents = store.list_documents(limit=3, source="redmine")
self.assertEqual(["doc:1a", "doc:1b", "doc:2a"], [document["id"] for document in documents])
self.assertEqual([None, "next"], store.client.offsets)
if __name__ == "__main__":
unittest.main()
+102
View File
@@ -0,0 +1,102 @@
import unittest
from semantic_index.redmine import RedmineApiSource
class RecordingRedmineSource(RedmineApiSource):
def __init__(self):
super().__init__(redmine_url="http://redmine.local", api_key="secret", project_identifier="customer-service")
self.urls = []
def _get_json(self, url):
self.urls.append(url)
if url.startswith("http://redmine.local/issues.json"):
return {"issues": [{"id": 39779}]}
return {"issue": {"id": 39779, "subject": "Goods return"}}
class PagedRedmineSource(RedmineApiSource):
def __init__(self):
super().__init__(redmine_url="http://redmine.local", api_key="secret", project_identifier="customer-service")
self.urls = []
def _get_json(self, url):
self.urls.append(url)
if url.startswith("http://redmine.local/issues.json"):
query = url.split("?", 1)[1]
params = dict(part.split("=", 1) for part in query.split("&"))
offset = int(params.get("offset", "0"))
limit = int(params.get("limit", "0"))
return {"issues": [{"id": issue_id} for issue_id in range(offset + 1, offset + limit + 1)]}
issue_id = int(url.split("/issues/", 1)[1].split(".", 1)[0])
return {"issue": {"id": issue_id, "subject": f"Issue {issue_id}"}}
class DuplicatePagedRedmineSource(RedmineApiSource):
def __init__(self):
super().__init__(redmine_url="http://redmine.local", api_key="secret", project_identifier="customer-service")
def _get_json(self, url):
if url.startswith("http://redmine.local/issues.json"):
query = url.split("?", 1)[1]
params = dict(part.split("=", 1) for part in query.split("&"))
offset = int(params.get("offset", "0"))
if offset == 0:
return {"issues": [{"id": 1}, {"id": 2}]}
if offset == 2:
return {"issues": [{"id": 2}, {"id": 3}]}
return {"issues": []}
issue_id = int(url.split("/issues/", 1)[1].split(".", 1)[0])
return {"issue": {"id": issue_id, "subject": f"Issue {issue_id}"}}
class RedmineApiSourceTest(unittest.TestCase):
def test_recent_issue_summaries_do_not_fetch_issue_details(self):
source = RecordingRedmineSource()
summaries = list(source.recent_issue_summaries(limit=1))
self.assertEqual(39779, summaries[0]["id"])
self.assertEqual(1, len(source.urls))
self.assertTrue(source.urls[0].startswith("http://redmine.local/issues.json"))
def test_issue_detail_fetches_journals_and_helpdesk(self):
source = RecordingRedmineSource()
detail = source.issue_detail(39779)
self.assertEqual(39779, detail["id"])
self.assertIn("include=journals%2Chelpdesk", source.urls[0])
def test_recent_helpdesk_issues_requests_helpdesk_include_with_journals(self):
source = RecordingRedmineSource()
issues = list(source.recent_helpdesk_issues(limit=1))
self.assertEqual(39779, issues[0]["id"])
self.assertIn("include=journals%2Chelpdesk", source.urls[1])
self.assertIn("subproject_id=%21%2A", source.urls[0])
def test_recent_helpdesk_issues_paginates_past_redmine_page_limit(self):
source = PagedRedmineSource()
issues = list(source.recent_helpdesk_issues(limit=250))
self.assertEqual(250, len(issues))
list_urls = [url for url in source.urls if url.startswith("http://redmine.local/issues.json")]
self.assertEqual(3, len(list_urls))
self.assertIn("limit=100", list_urls[0])
self.assertIn("offset=0", list_urls[0])
self.assertIn("offset=100", list_urls[1])
self.assertIn("offset=200", list_urls[2])
def test_recent_helpdesk_issues_skips_duplicate_issue_ids_across_pages(self):
source = DuplicatePagedRedmineSource()
issues = list(source.recent_helpdesk_issues(limit=3))
self.assertEqual([1, 2, 3], [issue["id"] for issue in issues])
if __name__ == "__main__":
unittest.main()
+277
View File
@@ -0,0 +1,277 @@
import io
import json
import tempfile
import unittest
from contextlib import redirect_stdout
from pathlib import Path
from semantic_index.__main__ import main
from semantic_index.models import IndexDocument
from semantic_index.refresh import FileRefreshState, RedmineRefreshService
def issue(updated_on="2026-04-25T12:00:00Z"):
return {
"id": 39779,
"subject": "Goods return",
"description": "Please return our goods.",
"updated_on": updated_on,
"project": {"id": 1, "identifier": "customer-service", "name": "Customer Service"},
}
class FakeRedmineSource:
project_identifier = None
def __init__(self, issues=None):
self.issues = issues or [issue()]
self.calls = []
def recent_helpdesk_issues(self, limit):
self.calls.append((self.project_identifier, limit))
return self.issues[:limit]
class SummaryDetailRedmineSource(FakeRedmineSource):
def __init__(self, summaries, details):
super().__init__([])
self.summaries = summaries
self.details = details
self.summary_calls = []
self.detail_calls = []
def recent_issue_summaries(self, limit):
self.summary_calls.append((self.project_identifier, limit))
return self.summaries[:limit]
def issue_detail(self, issue_id):
self.detail_calls.append(issue_id)
return self.details[issue_id]
class RecordingEmbedder:
def __init__(self):
self.calls = []
def embed_documents(self, docs):
self.calls.append(list(docs))
return [[0.1, 0.2, 0.3] for _ in docs]
class RefreshStore:
def __init__(self, existing=None):
self.existing = existing or {}
self.upserts = []
self.deleted_ids = []
def list_documents(self, limit=10, source=None, project_identifier=None, doc_type=None, issue_id=None):
return list(self.existing.values())[:limit]
def upsert(self, docs, vectors):
self.upserts.append((list(docs), list(vectors)))
def delete_documents(self, document_ids):
self.deleted_ids.extend(document_ids)
class RedmineRefreshServiceTest(unittest.TestCase):
def test_refresh_skips_embeddings_when_source_hash_matches_existing_document(self):
source = FakeRedmineSource()
embedder = RecordingEmbedder()
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
candidate = service.mapper.issue_to_documents(issue())[0]
service.store.existing[candidate.id] = {
"id": candidate.id,
"text": candidate.text,
"payload": dict(candidate.payload),
}
result = service.refresh_redmine_project_limits({"customer-service": 1})
self.assertEqual(1, result["unchanged_documents"])
self.assertEqual(0, result["embedded_documents"])
self.assertEqual([], embedder.calls)
self.assertEqual([], service.store.upserts)
def test_refresh_embeds_only_changed_and_new_documents(self):
source = FakeRedmineSource()
embedder = RecordingEmbedder()
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
candidate = service.mapper.issue_to_documents(issue())[0]
service.store.existing[candidate.id] = {
"id": candidate.id,
"text": "Old text",
"payload": {**candidate.payload, "source_hash": "old-hash"},
}
result = service.refresh_redmine_project_limits({"customer-service": 1})
self.assertEqual(1, result["changed_documents"])
self.assertEqual(1, result["embedded_documents"])
self.assertEqual([[candidate]], embedder.calls)
self.assertEqual([candidate.id], [doc.id for doc in service.store.upserts[0][0]])
def test_refresh_deletes_stale_issue_documents_without_embedding(self):
source = FakeRedmineSource()
embedder = RecordingEmbedder()
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
candidate = service.mapper.issue_to_documents(issue())[0]
service.store.existing[candidate.id] = {"id": candidate.id, "text": candidate.text, "payload": dict(candidate.payload)}
service.store.existing["redmine:issue:39779:journal:1:chunk:0"] = {
"id": "redmine:issue:39779:journal:1:chunk:0",
"text": "Deleted note",
"payload": {"source_hash": "gone", "issue_id": 39779},
}
result = service.refresh_redmine_project_limits({"customer-service": 1})
self.assertEqual(1, result["stale_documents"])
self.assertEqual(["redmine:issue:39779:journal:1:chunk:0"], service.store.deleted_ids)
self.assertEqual([], embedder.calls)
def test_dry_run_reports_planned_embeddings_without_embedding_or_mutating(self):
source = FakeRedmineSource()
embedder = RecordingEmbedder()
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
result = service.refresh_redmine_project_limits({"customer-service": 1}, dry_run=True)
self.assertEqual(1, result["new_documents"])
self.assertEqual(1, result["would_embed_documents"])
self.assertEqual(0, result["embedded_documents"])
self.assertEqual([], embedder.calls)
self.assertEqual([], service.store.upserts)
self.assertEqual([], service.store.deleted_ids)
def test_force_rebuild_embeds_unchanged_documents(self):
source = FakeRedmineSource()
embedder = RecordingEmbedder()
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
candidate = service.mapper.issue_to_documents(issue())[0]
service.store.existing[candidate.id] = {"id": candidate.id, "text": candidate.text, "payload": dict(candidate.payload)}
result = service.refresh_redmine_project_limits({"customer-service": 1}, force_rebuild=True)
self.assertEqual(1, result["force_rebuilt_documents"])
self.assertEqual(1, result["embedded_documents"])
self.assertEqual([[candidate]], embedder.calls)
def test_force_rebuild_ignores_refresh_state_window_for_fetched_candidates(self):
source = FakeRedmineSource([issue(updated_on="2026-04-25T10:00:00Z")])
embedder = RecordingEmbedder()
with tempfile.TemporaryDirectory() as tmp:
state = FileRefreshState(Path(tmp) / "refresh.json")
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore(), state=state)
result = service.refresh_redmine_project_limits({"customer-service": 1}, force_rebuild=True, overlap_minutes=15)
self.assertEqual(0, result["skipped_issues"])
self.assertEqual(1, result["embedded_documents"])
def test_file_refresh_state_updates_only_when_called(self):
with tempfile.TemporaryDirectory() as tmp:
state = FileRefreshState(Path(tmp) / "refresh.json")
self.assertEqual({}, state.load())
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
self.assertEqual(
{"projects": {"customer-service": {"last_successful_refresh_at": "2026-04-25T12:00:00Z"}}},
json.loads((Path(tmp) / "refresh.json").read_text(encoding="utf-8")),
)
def test_refresh_state_skips_issues_older_than_overlap_window(self):
source = FakeRedmineSource([issue(updated_on="2026-04-25T10:00:00Z")])
embedder = RecordingEmbedder()
with tempfile.TemporaryDirectory() as tmp:
state = FileRefreshState(Path(tmp) / "refresh.json")
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore(), state=state)
result = service.refresh_redmine_project_limits({"customer-service": 1}, dry_run=True, overlap_minutes=15)
self.assertEqual(1, result["issues"])
self.assertEqual(1, result["skipped_issues"])
self.assertEqual(0, result["documents"])
self.assertEqual([], embedder.calls)
def test_refresh_skips_old_summaries_without_fetching_issue_detail(self):
old_summary = {"id": 39779, "updated_on": "2026-04-25T10:00:00Z"}
new_summary = {"id": 39780, "updated_on": "2026-04-25T11:50:00Z"}
source = SummaryDetailRedmineSource(
summaries=[old_summary, new_summary],
details={39780: {**issue("2026-04-25T11:50:00Z"), "id": 39780}},
)
embedder = RecordingEmbedder()
with tempfile.TemporaryDirectory() as tmp:
state = FileRefreshState(Path(tmp) / "refresh.json")
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore(), state=state)
result = service.refresh_redmine_project_limits({"customer-service": 2}, dry_run=True, overlap_minutes=15)
self.assertEqual(2, result["scanned_issues"])
self.assertEqual(1, result["skipped_issues"])
self.assertEqual(1, result["detail_fetched_issues"])
self.assertEqual([39780], source.detail_calls)
class RefreshCliTest(unittest.TestCase):
def test_refresh_redmine_projects_cli_parses_project_limits_and_dry_run(self):
class FakeRefresh:
def __init__(self):
self.calls = []
def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15):
self.calls.append((project_limits, dry_run, force_rebuild, overlap_minutes))
return {"source": "redmine", "projects": len(project_limits), "issues": sum(project_limits.values())}
refresh = FakeRefresh()
services = {"refresh": refresh}
out = io.StringIO()
with redirect_stdout(out):
main(
[
"--refresh-redmine-projects",
"--project-limits",
"customer-service=5,hiring=2",
"--dry-run",
"--overlap-minutes",
"30",
],
service_builder=lambda: services,
)
self.assertEqual(({"customer-service": 5, "hiring": 2}, True, False, 30), refresh.calls[0])
self.assertIn("'projects': 2", out.getvalue())
def test_refresh_redmine_projects_cli_can_override_state_path(self):
class FakeRefresh:
def __init__(self):
self.state = None
def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15):
return {"state_path": str(self.state.path)}
refresh = FakeRefresh()
out = io.StringIO()
with redirect_stdout(out):
main(
[
"--refresh-redmine-projects",
"--project-limits",
"customer-service=1",
"--state-path",
"/tmp/semantic-refresh-state.json",
],
service_builder=lambda: {"refresh": refresh},
)
self.assertIn("/tmp/semantic-refresh-state.json", out.getvalue())
if __name__ == "__main__":
unittest.main()
+85
View File
@@ -0,0 +1,85 @@
import unittest
from semantic_index.models import IndexDocument, SearchQuery, SearchResult
from semantic_index.qdrant_store import build_filter, point_id_for_document
from semantic_index.search import HybridSearchService, keyword_boost
class FakeEmbedder:
def embed_query(self, text):
return [0.1, 0.2, 0.3]
class FakeStore:
def __init__(self):
self.query = None
def search(self, vector, query, limit):
self.query = query
return [
SearchResult(
id="weak",
score=0.7,
text="general support text",
payload={"redmine_url": "http://redmine/issues/1"},
),
SearchResult(
id="strong",
score=0.6,
text="Customer ada@example.com asked about ORD-12345",
payload={"redmine_url": "http://redmine/issues/2"},
),
][:limit]
class SearchTest(unittest.TestCase):
def test_qdrant_point_id_is_deterministic_uuid_for_stable_document_id(self):
first = point_id_for_document("redmine:issue:42:journal:5:chunk:0")
second = point_id_for_document("redmine:issue:42:journal:5:chunk:0")
self.assertEqual(first, second)
self.assertRegex(first, r"^[0-9a-f-]{36}$")
def test_filter_maps_supported_metadata(self):
query = SearchQuery(
text="printer",
source="redmine",
project_identifier="fud-helpdesk",
doc_type="message",
issue_id=42,
contact_email="ada@example.com",
date_from="2026-04-01T00:00:00Z",
date_to="2026-04-30T23:59:59Z",
)
qfilter = build_filter(query)
self.assertEqual(
[
{"key": "source", "match": {"value": "redmine"}},
{"key": "project_identifier", "match": {"value": "fud-helpdesk"}},
{"key": "doc_type", "match": {"value": "message"}},
{"key": "issue_id", "match": {"value": 42}},
{"key": "contact_email", "match": {"value": "ada@example.com"}},
{"key": "created_on", "range": {"gte": "2026-04-01T00:00:00Z", "lte": "2026-04-30T23:59:59Z"}},
],
qfilter["must"],
)
def test_keyword_boost_prioritizes_exact_email_and_order_matches(self):
weak = SearchResult(id="weak", score=0.7, text="general support text", payload={})
strong = SearchResult(id="strong", score=0.6, text="Customer ada@example.com asked about ORD-12345", payload={})
self.assertGreater(
keyword_boost('ada@example.com "ORD-12345"', strong),
keyword_boost('ada@example.com "ORD-12345"', weak),
)
service = HybridSearchService(embedder=FakeEmbedder(), store=FakeStore())
results = service.search(SearchQuery(text='ada@example.com "ORD-12345"', limit=2))
self.assertEqual("strong", results[0].id)
self.assertEqual("http://redmine/issues/2", results[0].citation["url"])
if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,41 @@
import os
import subprocess
import tempfile
import unittest
from pathlib import Path
ROOT = Path(__file__).resolve().parents[2]
REFRESH = ROOT / "semantic_index" / "refresh.sh"
class SemanticIndexShellWrapperTest(unittest.TestCase):
def test_refresh_wrapper_is_self_locating_when_called_from_another_directory(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
env = {
**os.environ,
"PYTHON": "/bin/echo",
"SEMANTIC_INDEX_PROJECT_LIMITS": "customer-service=5",
"SEMANTIC_INDEX_LOG_DIR": str(tmp_path / "logs"),
"SEMANTIC_INDEX_STATE_PATH": str(tmp_path / "state" / "refresh_state.json"),
}
result = subprocess.run(
[str(REFRESH)],
cwd=tmp,
env=env,
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=False,
)
self.assertEqual(0, result.returncode, result.stderr)
self.assertIn("-m semantic_index --refresh-redmine-projects", result.stdout)
self.assertIn("--project-limits customer-service=5", result.stdout)
self.assertIn("log_file=", result.stdout)
if __name__ == "__main__":
unittest.main()