Add semantic-index service, deployment assets, and tests

This commit is contained in:
Jason Thistlethwaite
2026-05-04 09:50:03 -04:00
parent faad70872b
commit b305544f63
42 changed files with 5059 additions and 0 deletions
+206
View File
@@ -0,0 +1,206 @@
from __future__ import annotations
import argparse
from pathlib import Path
from typing import Callable, Dict, List, Optional
from .app import build_services
from .config import Settings, load_settings
from .inspect import (
print_audit,
print_compare_redmine,
print_count,
print_list,
print_preview_redmine,
print_search,
print_show,
print_smoke_search,
)
from .mcp import SemanticMCP, serve_stdio
from .refresh import FileRefreshState
from .redmine import RedmineApiSource
def build_preview_services(settings: Settings) -> Dict[str, object]:
return {
"settings": settings,
"redmine_source": RedmineApiSource(
redmine_url=settings.redmine_url,
api_key=settings.redmine_api_key or "",
project_identifier=settings.redmine_project_identifier,
),
}
def parse_projects(raw: str) -> List[str]:
return [project.strip() for project in raw.split(",") if project.strip()]
def parse_project_limits(raw: str) -> Dict[str, int]:
project_limits: Dict[str, int] = {}
for item in raw.split(","):
if not item.strip():
continue
project, limit = item.split("=", 1)
project_limits[project.strip()] = int(limit.strip())
return project_limits
def main(
argv: Optional[List[str]] = None,
service_builder: Callable[[], Dict[str, object]] = build_services,
preview_service_builder: Optional[Callable[[Settings], Dict[str, object]]] = None,
settings_loader: Callable[[], Settings] = load_settings,
) -> None:
parser = argparse.ArgumentParser(description="Semantic index helper", allow_abbrev=False)
parser.add_argument("--mcp-stdio", action="store_true", help="Run the MCP-compatible stdio tool server")
parser.add_argument("--backfill-redmine-sample", action="store_true", help="Backfill the configured Redmine sample")
parser.add_argument("--backfill-redmine-projects", action="store_true", help="Backfill multiple Redmine projects")
parser.add_argument("--refresh-redmine-projects", action="store_true", help="Refresh recent Redmine issues without re-embedding unchanged documents")
parser.add_argument("--projects", help="Comma-separated Redmine project identifiers for multi-project backfill")
parser.add_argument("--project-limits", help="Comma-separated project=limit pairs for multi-project backfill")
parser.add_argument("--per-project-limit", type=int, default=500)
parser.add_argument("--limit", type=int, default=500)
parser.add_argument("--dry-run", action="store_true", help="Report planned refresh work without embeddings or writes")
parser.add_argument("--force-rebuild", action="store_true", help="Embed and upsert refresh candidates even when source hashes match")
parser.add_argument("--overlap-minutes", type=int, default=15, help="Refresh overlap window for rolling update state")
parser.add_argument("--state-path", help="Override rolling refresh state file path")
subparsers = parser.add_subparsers(dest="command")
inspect_parser = subparsers.add_parser("inspect", help="Inspect indexed documents and preview Redmine chunks")
inspect_subparsers = inspect_parser.add_subparsers(dest="inspect_command", required=True)
def add_filters(command_parser: argparse.ArgumentParser) -> None:
command_parser.add_argument("--source", default="redmine")
command_parser.add_argument("--project", dest="project_identifier")
command_parser.add_argument("--doc-type")
count_parser = inspect_subparsers.add_parser("count", help="Count indexed documents")
add_filters(count_parser)
list_parser = inspect_subparsers.add_parser("list", help="List indexed documents")
add_filters(list_parser)
list_parser.add_argument("--limit", type=int, default=20)
list_parser.add_argument("--full-text", action="store_true")
search_parser = inspect_subparsers.add_parser("search", help="Search indexed documents")
search_parser.add_argument("query")
add_filters(search_parser)
search_parser.add_argument("--limit", type=int, default=10)
search_parser.add_argument("--full-text", action="store_true")
show_parser = inspect_subparsers.add_parser("show", help="Show one indexed document")
show_parser.add_argument("document_id")
preview_parser = inspect_subparsers.add_parser("preview-redmine", help="Preview Redmine chunks without writing to Qdrant")
preview_parser.add_argument("--limit", type=int, default=10)
preview_parser.add_argument("--project", dest="project_identifier")
preview_parser.add_argument("--full-text", action="store_true")
audit_parser = inspect_subparsers.add_parser("audit", help="Audit indexed documents for trust-check coverage")
add_filters(audit_parser)
audit_parser.add_argument("--limit", type=int, default=500)
audit_parser.add_argument("--json", action="store_true")
compare_parser = inspect_subparsers.add_parser("compare-redmine", help="Compare live Redmine preview chunks with indexed documents")
compare_parser.add_argument("--limit", type=int, default=20)
compare_parser.add_argument("--project", dest="project_identifier")
compare_parser.add_argument("--json", action="store_true")
smoke_parser = inspect_subparsers.add_parser("smoke-search", help="Run repeatable search checks against indexed documents")
smoke_parser.add_argument("--project", dest="project_identifier")
smoke_parser.add_argument("--email", default="callum@safetagtracking.com")
smoke_parser.add_argument("--issue-id", type=int, default=39779)
smoke_parser.add_argument("--order-token")
smoke_parser.add_argument("--natural-query", default="customer needs goods returned")
smoke_parser.add_argument("--json", action="store_true")
args = parser.parse_args(argv)
if not args.command and not args.backfill_redmine_sample and not args.backfill_redmine_projects and not args.refresh_redmine_projects and not args.mcp_stdio:
parser.print_help()
return
if args.command == "inspect" and args.inspect_command == "preview-redmine":
if preview_service_builder is not None:
services = preview_service_builder(settings_loader())
elif service_builder is build_services:
services = build_preview_services(settings_loader())
else:
services = service_builder()
project = args.project_identifier or services["settings"].redmine_project_identifier
print_preview_redmine(services["redmine_source"], services["settings"].redmine_url, project, args.limit, args.full_text)
return
services = service_builder()
if args.state_path and "refresh" in services and hasattr(services["refresh"], "state"):
services["refresh"].state = FileRefreshState(Path(args.state_path))
if args.backfill_redmine_sample:
print(services["backfill"].backfill_redmine_sample(limit=args.limit))
return
if args.backfill_redmine_projects:
if args.project_limits:
print(services["backfill"].backfill_redmine_project_limits(parse_project_limits(args.project_limits)))
return
projects = parse_projects(args.projects or "")
if not projects:
parser.error("--projects or --project-limits is required with --backfill-redmine-projects")
print(services["backfill"].backfill_redmine_projects(projects, per_project_limit=args.per_project_limit))
return
if args.refresh_redmine_projects:
if args.project_limits:
project_limits = parse_project_limits(args.project_limits)
else:
projects = parse_projects(args.projects or "")
if not projects:
parser.error("--projects or --project-limits is required with --refresh-redmine-projects")
project_limits = {project: args.per_project_limit for project in projects}
print(
services["refresh"].refresh_redmine_project_limits(
project_limits,
dry_run=args.dry_run,
force_rebuild=args.force_rebuild,
overlap_minutes=args.overlap_minutes,
)
)
return
if args.mcp_stdio:
serve_stdio(SemanticMCP(search_service=services["search"], backfill_service=services["backfill"], store=services["store"], refresh_service=services.get("refresh")))
return
if args.command == "inspect":
if args.inspect_command == "count":
print_count(services["store"], args.source, args.project_identifier, args.doc_type)
return
if args.inspect_command == "list":
print_list(services["store"], args.limit, args.source, args.project_identifier, args.doc_type, args.full_text)
return
if args.inspect_command == "search":
print_search(services["search"], args.query, args.limit, args.source, args.project_identifier, args.doc_type, args.full_text)
return
if args.inspect_command == "show":
print_show(services["search"], args.document_id)
return
if args.inspect_command == "audit":
print_audit(services["store"], args.limit, args.source, args.project_identifier, args.doc_type, args.json)
return
if args.inspect_command == "compare-redmine":
project = args.project_identifier or services["settings"].redmine_project_identifier
print_compare_redmine(services["store"], services["redmine_source"], services["settings"].redmine_url, project, args.limit, args.json)
return
if args.inspect_command == "smoke-search":
project = args.project_identifier or services["settings"].redmine_project_identifier
print_smoke_search(
services["search"],
project,
args.email,
args.issue_id,
args.order_token,
args.natural_query,
args.json,
)
return
parser.print_help()
if __name__ == "__main__":
main()