Add semantic-index service, deployment assets, and tests
This commit is contained in:
@@ -0,0 +1,277 @@
|
||||
import io
|
||||
import json
|
||||
import tempfile
|
||||
import unittest
|
||||
from contextlib import redirect_stdout
|
||||
from pathlib import Path
|
||||
|
||||
from semantic_index.__main__ import main
|
||||
from semantic_index.models import IndexDocument
|
||||
from semantic_index.refresh import FileRefreshState, RedmineRefreshService
|
||||
|
||||
|
||||
def issue(updated_on="2026-04-25T12:00:00Z"):
|
||||
return {
|
||||
"id": 39779,
|
||||
"subject": "Goods return",
|
||||
"description": "Please return our goods.",
|
||||
"updated_on": updated_on,
|
||||
"project": {"id": 1, "identifier": "customer-service", "name": "Customer Service"},
|
||||
}
|
||||
|
||||
|
||||
class FakeRedmineSource:
|
||||
project_identifier = None
|
||||
|
||||
def __init__(self, issues=None):
|
||||
self.issues = issues or [issue()]
|
||||
self.calls = []
|
||||
|
||||
def recent_helpdesk_issues(self, limit):
|
||||
self.calls.append((self.project_identifier, limit))
|
||||
return self.issues[:limit]
|
||||
|
||||
|
||||
class SummaryDetailRedmineSource(FakeRedmineSource):
|
||||
def __init__(self, summaries, details):
|
||||
super().__init__([])
|
||||
self.summaries = summaries
|
||||
self.details = details
|
||||
self.summary_calls = []
|
||||
self.detail_calls = []
|
||||
|
||||
def recent_issue_summaries(self, limit):
|
||||
self.summary_calls.append((self.project_identifier, limit))
|
||||
return self.summaries[:limit]
|
||||
|
||||
def issue_detail(self, issue_id):
|
||||
self.detail_calls.append(issue_id)
|
||||
return self.details[issue_id]
|
||||
|
||||
|
||||
class RecordingEmbedder:
|
||||
def __init__(self):
|
||||
self.calls = []
|
||||
|
||||
def embed_documents(self, docs):
|
||||
self.calls.append(list(docs))
|
||||
return [[0.1, 0.2, 0.3] for _ in docs]
|
||||
|
||||
|
||||
class RefreshStore:
|
||||
def __init__(self, existing=None):
|
||||
self.existing = existing or {}
|
||||
self.upserts = []
|
||||
self.deleted_ids = []
|
||||
|
||||
def list_documents(self, limit=10, source=None, project_identifier=None, doc_type=None, issue_id=None):
|
||||
return list(self.existing.values())[:limit]
|
||||
|
||||
def upsert(self, docs, vectors):
|
||||
self.upserts.append((list(docs), list(vectors)))
|
||||
|
||||
def delete_documents(self, document_ids):
|
||||
self.deleted_ids.extend(document_ids)
|
||||
|
||||
|
||||
class RedmineRefreshServiceTest(unittest.TestCase):
|
||||
def test_refresh_skips_embeddings_when_source_hash_matches_existing_document(self):
|
||||
source = FakeRedmineSource()
|
||||
embedder = RecordingEmbedder()
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
|
||||
candidate = service.mapper.issue_to_documents(issue())[0]
|
||||
service.store.existing[candidate.id] = {
|
||||
"id": candidate.id,
|
||||
"text": candidate.text,
|
||||
"payload": dict(candidate.payload),
|
||||
}
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 1})
|
||||
|
||||
self.assertEqual(1, result["unchanged_documents"])
|
||||
self.assertEqual(0, result["embedded_documents"])
|
||||
self.assertEqual([], embedder.calls)
|
||||
self.assertEqual([], service.store.upserts)
|
||||
|
||||
def test_refresh_embeds_only_changed_and_new_documents(self):
|
||||
source = FakeRedmineSource()
|
||||
embedder = RecordingEmbedder()
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
|
||||
candidate = service.mapper.issue_to_documents(issue())[0]
|
||||
service.store.existing[candidate.id] = {
|
||||
"id": candidate.id,
|
||||
"text": "Old text",
|
||||
"payload": {**candidate.payload, "source_hash": "old-hash"},
|
||||
}
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 1})
|
||||
|
||||
self.assertEqual(1, result["changed_documents"])
|
||||
self.assertEqual(1, result["embedded_documents"])
|
||||
self.assertEqual([[candidate]], embedder.calls)
|
||||
self.assertEqual([candidate.id], [doc.id for doc in service.store.upserts[0][0]])
|
||||
|
||||
def test_refresh_deletes_stale_issue_documents_without_embedding(self):
|
||||
source = FakeRedmineSource()
|
||||
embedder = RecordingEmbedder()
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
|
||||
candidate = service.mapper.issue_to_documents(issue())[0]
|
||||
service.store.existing[candidate.id] = {"id": candidate.id, "text": candidate.text, "payload": dict(candidate.payload)}
|
||||
service.store.existing["redmine:issue:39779:journal:1:chunk:0"] = {
|
||||
"id": "redmine:issue:39779:journal:1:chunk:0",
|
||||
"text": "Deleted note",
|
||||
"payload": {"source_hash": "gone", "issue_id": 39779},
|
||||
}
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 1})
|
||||
|
||||
self.assertEqual(1, result["stale_documents"])
|
||||
self.assertEqual(["redmine:issue:39779:journal:1:chunk:0"], service.store.deleted_ids)
|
||||
self.assertEqual([], embedder.calls)
|
||||
|
||||
def test_dry_run_reports_planned_embeddings_without_embedding_or_mutating(self):
|
||||
source = FakeRedmineSource()
|
||||
embedder = RecordingEmbedder()
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 1}, dry_run=True)
|
||||
|
||||
self.assertEqual(1, result["new_documents"])
|
||||
self.assertEqual(1, result["would_embed_documents"])
|
||||
self.assertEqual(0, result["embedded_documents"])
|
||||
self.assertEqual([], embedder.calls)
|
||||
self.assertEqual([], service.store.upserts)
|
||||
self.assertEqual([], service.store.deleted_ids)
|
||||
|
||||
def test_force_rebuild_embeds_unchanged_documents(self):
|
||||
source = FakeRedmineSource()
|
||||
embedder = RecordingEmbedder()
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore())
|
||||
candidate = service.mapper.issue_to_documents(issue())[0]
|
||||
service.store.existing[candidate.id] = {"id": candidate.id, "text": candidate.text, "payload": dict(candidate.payload)}
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 1}, force_rebuild=True)
|
||||
|
||||
self.assertEqual(1, result["force_rebuilt_documents"])
|
||||
self.assertEqual(1, result["embedded_documents"])
|
||||
self.assertEqual([[candidate]], embedder.calls)
|
||||
|
||||
def test_force_rebuild_ignores_refresh_state_window_for_fetched_candidates(self):
|
||||
source = FakeRedmineSource([issue(updated_on="2026-04-25T10:00:00Z")])
|
||||
embedder = RecordingEmbedder()
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
state = FileRefreshState(Path(tmp) / "refresh.json")
|
||||
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore(), state=state)
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 1}, force_rebuild=True, overlap_minutes=15)
|
||||
|
||||
self.assertEqual(0, result["skipped_issues"])
|
||||
self.assertEqual(1, result["embedded_documents"])
|
||||
|
||||
def test_file_refresh_state_updates_only_when_called(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
state = FileRefreshState(Path(tmp) / "refresh.json")
|
||||
self.assertEqual({}, state.load())
|
||||
|
||||
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
|
||||
|
||||
self.assertEqual(
|
||||
{"projects": {"customer-service": {"last_successful_refresh_at": "2026-04-25T12:00:00Z"}}},
|
||||
json.loads((Path(tmp) / "refresh.json").read_text(encoding="utf-8")),
|
||||
)
|
||||
|
||||
def test_refresh_state_skips_issues_older_than_overlap_window(self):
|
||||
source = FakeRedmineSource([issue(updated_on="2026-04-25T10:00:00Z")])
|
||||
embedder = RecordingEmbedder()
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
state = FileRefreshState(Path(tmp) / "refresh.json")
|
||||
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore(), state=state)
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 1}, dry_run=True, overlap_minutes=15)
|
||||
|
||||
self.assertEqual(1, result["issues"])
|
||||
self.assertEqual(1, result["skipped_issues"])
|
||||
self.assertEqual(0, result["documents"])
|
||||
self.assertEqual([], embedder.calls)
|
||||
|
||||
def test_refresh_skips_old_summaries_without_fetching_issue_detail(self):
|
||||
old_summary = {"id": 39779, "updated_on": "2026-04-25T10:00:00Z"}
|
||||
new_summary = {"id": 39780, "updated_on": "2026-04-25T11:50:00Z"}
|
||||
source = SummaryDetailRedmineSource(
|
||||
summaries=[old_summary, new_summary],
|
||||
details={39780: {**issue("2026-04-25T11:50:00Z"), "id": 39780}},
|
||||
)
|
||||
embedder = RecordingEmbedder()
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
state = FileRefreshState(Path(tmp) / "refresh.json")
|
||||
state.mark_success("customer-service", "2026-04-25T12:00:00Z")
|
||||
service = RedmineRefreshService(source=source, embedder=embedder, store=RefreshStore(), state=state)
|
||||
|
||||
result = service.refresh_redmine_project_limits({"customer-service": 2}, dry_run=True, overlap_minutes=15)
|
||||
|
||||
self.assertEqual(2, result["scanned_issues"])
|
||||
self.assertEqual(1, result["skipped_issues"])
|
||||
self.assertEqual(1, result["detail_fetched_issues"])
|
||||
self.assertEqual([39780], source.detail_calls)
|
||||
|
||||
|
||||
class RefreshCliTest(unittest.TestCase):
|
||||
def test_refresh_redmine_projects_cli_parses_project_limits_and_dry_run(self):
|
||||
class FakeRefresh:
|
||||
def __init__(self):
|
||||
self.calls = []
|
||||
|
||||
def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15):
|
||||
self.calls.append((project_limits, dry_run, force_rebuild, overlap_minutes))
|
||||
return {"source": "redmine", "projects": len(project_limits), "issues": sum(project_limits.values())}
|
||||
|
||||
refresh = FakeRefresh()
|
||||
services = {"refresh": refresh}
|
||||
out = io.StringIO()
|
||||
|
||||
with redirect_stdout(out):
|
||||
main(
|
||||
[
|
||||
"--refresh-redmine-projects",
|
||||
"--project-limits",
|
||||
"customer-service=5,hiring=2",
|
||||
"--dry-run",
|
||||
"--overlap-minutes",
|
||||
"30",
|
||||
],
|
||||
service_builder=lambda: services,
|
||||
)
|
||||
|
||||
self.assertEqual(({"customer-service": 5, "hiring": 2}, True, False, 30), refresh.calls[0])
|
||||
self.assertIn("'projects': 2", out.getvalue())
|
||||
|
||||
def test_refresh_redmine_projects_cli_can_override_state_path(self):
|
||||
class FakeRefresh:
|
||||
def __init__(self):
|
||||
self.state = None
|
||||
|
||||
def refresh_redmine_project_limits(self, project_limits, dry_run=False, force_rebuild=False, overlap_minutes=15):
|
||||
return {"state_path": str(self.state.path)}
|
||||
|
||||
refresh = FakeRefresh()
|
||||
out = io.StringIO()
|
||||
|
||||
with redirect_stdout(out):
|
||||
main(
|
||||
[
|
||||
"--refresh-redmine-projects",
|
||||
"--project-limits",
|
||||
"customer-service=1",
|
||||
"--state-path",
|
||||
"/tmp/semantic-refresh-state.json",
|
||||
],
|
||||
service_builder=lambda: {"refresh": refresh},
|
||||
)
|
||||
|
||||
self.assertIn("/tmp/semantic-refresh-state.json", out.getvalue())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user