Add semantic-index service, deployment assets, and tests

This commit is contained in:
Jason Thistlethwaite
2026-05-04 09:50:03 -04:00
parent faad70872b
commit b305544f63
42 changed files with 5059 additions and 0 deletions
+100
View File
@@ -0,0 +1,100 @@
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Dict, Optional
Payload = Dict[str, Any]
@dataclass(frozen=True)
class IndexDocument:
id: str
text: str
payload: Payload = field(default_factory=dict)
def __post_init__(self) -> None:
if not self.id.strip():
raise ValueError("document id is required")
if not self.text.strip():
raise ValueError("document text is required")
@dataclass(frozen=True)
class SearchQuery:
text: str
source: Optional[str] = None
project_id: Optional[int] = None
project_identifier: Optional[str] = None
doc_type: Optional[str] = None
issue_id: Optional[int] = None
contact_id: Optional[int] = None
contact_email: Optional[str] = None
date_from: Optional[str] = None
date_to: Optional[str] = None
limit: int = 10
include_snippets: bool = True
def __post_init__(self) -> None:
if not self.text.strip():
raise ValueError("search text is required")
if self.limit < 1 or self.limit > 100:
raise ValueError("limit must be between 1 and 100")
@dataclass(frozen=True)
class SearchResult:
id: str
score: float
text: str
payload: Payload
@property
def snippet(self) -> str:
return self.text[:500]
@property
def citation(self) -> Payload:
return {
"id": self.id,
"source": self.payload.get("source"),
"doc_type": self.payload.get("doc_type"),
"issue_id": self.payload.get("issue_id"),
"project_identifier": self.payload.get("project_identifier"),
"contact_id": self.payload.get("contact_id"),
"contact_name": self.payload.get("contact_name"),
"contact_email": self.payload.get("contact_email"),
"url": self.payload.get("redmine_url"),
"record_id": self.payload.get("source_record_id"),
}
def to_dict(self, include_snippet: bool = True) -> Payload:
data: Payload = {
"id": self.id,
"score": self.score,
"payload": self.payload,
"citation": self.citation,
}
if include_snippet:
data["snippet"] = self.snippet
return data
def search_response(query: SearchQuery, results: list[SearchResult]) -> Payload:
filters = {
"source": query.source,
"project_id": query.project_id,
"project_identifier": query.project_identifier,
"doc_type": query.doc_type,
"issue_id": query.issue_id,
"contact_id": query.contact_id,
"contact_email": query.contact_email,
"date_from": query.date_from,
"date_to": query.date_to,
"limit": query.limit,
}
return {
"query": query.text,
"filters": {key: value for key, value in filters.items() if value is not None},
"results": [result.to_dict(include_snippet=query.include_snippets) for result in results],
}