Add semantic-index service, deployment assets, and tests

This commit is contained in:
Jason Thistlethwaite
2026-05-04 09:50:03 -04:00
parent faad70872b
commit b305544f63
42 changed files with 5059 additions and 0 deletions
+46
View File
@@ -0,0 +1,46 @@
import unittest
from semantic_index.embeddings import OpenAIEmbedder
from semantic_index.models import IndexDocument
class FakeOpenAIClient:
def __init__(self):
self.calls = []
def create_embeddings(self, model, inputs, dimensions=None):
self.calls.append({"model": model, "inputs": list(inputs), "dimensions": dimensions})
return [[float(i)] * 3 for i, _ in enumerate(inputs, start=1)]
class OpenAIEmbedderTest(unittest.TestCase):
def test_batches_embedding_requests(self):
client = FakeOpenAIClient()
embedder = OpenAIEmbedder(client=client, batch_size=2, dimensions=1536)
docs = [
IndexDocument(id="a", text="alpha", payload={}),
IndexDocument(id="b", text="bravo", payload={}),
IndexDocument(id="c", text="charlie", payload={}),
]
vectors = embedder.embed_documents(docs)
self.assertEqual([[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [1.0, 1.0, 1.0]], vectors)
self.assertEqual(2, len(client.calls))
self.assertEqual(["alpha", "bravo"], client.calls[0]["inputs"])
self.assertEqual("text-embedding-3-small", client.calls[0]["model"])
self.assertEqual(1536, client.calls[0]["dimensions"])
def test_rejects_empty_or_oversized_chunks_before_api_call(self):
client = FakeOpenAIClient()
embedder = OpenAIEmbedder(client=client, max_chars=5)
with self.assertRaises(ValueError):
embedder.embed_texts(["ok", " "])
with self.assertRaises(ValueError):
embedder.embed_texts(["toolong"])
self.assertEqual([], client.calls)
if __name__ == "__main__":
unittest.main()