Add semantic-index service, deployment assets, and tests
This commit is contained in:
@@ -0,0 +1,46 @@
|
||||
import unittest
|
||||
|
||||
from semantic_index.embeddings import OpenAIEmbedder
|
||||
from semantic_index.models import IndexDocument
|
||||
|
||||
|
||||
class FakeOpenAIClient:
|
||||
def __init__(self):
|
||||
self.calls = []
|
||||
|
||||
def create_embeddings(self, model, inputs, dimensions=None):
|
||||
self.calls.append({"model": model, "inputs": list(inputs), "dimensions": dimensions})
|
||||
return [[float(i)] * 3 for i, _ in enumerate(inputs, start=1)]
|
||||
|
||||
|
||||
class OpenAIEmbedderTest(unittest.TestCase):
|
||||
def test_batches_embedding_requests(self):
|
||||
client = FakeOpenAIClient()
|
||||
embedder = OpenAIEmbedder(client=client, batch_size=2, dimensions=1536)
|
||||
docs = [
|
||||
IndexDocument(id="a", text="alpha", payload={}),
|
||||
IndexDocument(id="b", text="bravo", payload={}),
|
||||
IndexDocument(id="c", text="charlie", payload={}),
|
||||
]
|
||||
|
||||
vectors = embedder.embed_documents(docs)
|
||||
|
||||
self.assertEqual([[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [1.0, 1.0, 1.0]], vectors)
|
||||
self.assertEqual(2, len(client.calls))
|
||||
self.assertEqual(["alpha", "bravo"], client.calls[0]["inputs"])
|
||||
self.assertEqual("text-embedding-3-small", client.calls[0]["model"])
|
||||
self.assertEqual(1536, client.calls[0]["dimensions"])
|
||||
|
||||
def test_rejects_empty_or_oversized_chunks_before_api_call(self):
|
||||
client = FakeOpenAIClient()
|
||||
embedder = OpenAIEmbedder(client=client, max_chars=5)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
embedder.embed_texts(["ok", " "])
|
||||
with self.assertRaises(ValueError):
|
||||
embedder.embed_texts(["toolong"])
|
||||
self.assertEqual([], client.calls)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user