-
Notifications
You must be signed in to change notification settings - Fork 68
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Integration tests for collections (#299)
## Problem We received a bug report that creation of indexes using `PodSpec` fails if `source_collection` is specified. ## Solution - The fix for the bug was a one-line change. - Added several integration tests to exercise `index --> collection --> index` path and error cases. - Restructured integration tests so that tests using pod-based indexes reside in `tests/integration/control/pod` and can be run separately from severless indexes tested in `tests/integration/control/serverless`. This allows for greater parallelism in CI. - Adjusted CI configs to run these tests in parallel to integration tests using serverless indexes. The collections tests are quite slow due to the waiting required for pod indexes and collections to become ready for use. ## Type of Change - [x] Bug fix (non-breaking change which fixes an issue) - [x] Infrastructure change (CI configs, etc)
- Loading branch information
Showing
21 changed files
with
474 additions
and
77 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import os | ||
from pinecone import Pinecone | ||
|
||
def read_env_var(name): | ||
value = os.environ.get(name) | ||
if value is None: | ||
raise Exception('Environment variable {} is not set'.format(name)) | ||
return value | ||
|
||
def main(): | ||
pc = Pinecone(api_key=read_env_var('PINECONE_API_KEY')) | ||
|
||
collections = pc.list_collections().names() | ||
for collection in collections: | ||
if collection != "": | ||
pc.delete_collection(collection) | ||
|
||
if __name__ == '__main__': | ||
main() | ||
|
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
import pytest | ||
import random | ||
import string | ||
import time | ||
from pinecone import Pinecone, PodSpec | ||
from ...helpers import generate_index_name, get_environment_var | ||
|
||
@pytest.fixture() | ||
def client(): | ||
api_key = get_environment_var('PINECONE_API_KEY') | ||
return Pinecone( | ||
api_key=api_key, | ||
additional_headers={'sdk-test-suite': 'pinecone-python-client'} | ||
) | ||
|
||
@pytest.fixture() | ||
def environment(): | ||
return get_environment_var('PINECONE_ENVIRONMENT') | ||
|
||
@pytest.fixture() | ||
def dimension(): | ||
return int(get_environment_var('DIMENSION')) | ||
|
||
@pytest.fixture() | ||
def create_index_params(index_name, environment, dimension, metric): | ||
spec = { | ||
'pod': { | ||
'environment': environment, | ||
'pod_type': 'p1.x1' | ||
} | ||
} | ||
return dict( | ||
name=index_name, | ||
dimension=dimension, | ||
metric=metric, | ||
spec=spec, | ||
timeout=-1 | ||
) | ||
|
||
@pytest.fixture() | ||
def metric(): | ||
return get_environment_var('METRIC') | ||
|
||
@pytest.fixture() | ||
def random_vector(dimension): | ||
def _random_vector(): | ||
return [random.uniform(0, 1) for _ in range(dimension)] | ||
return _random_vector | ||
|
||
@pytest.fixture() | ||
def index_name(request): | ||
test_name = request.node.name | ||
return generate_index_name(test_name) | ||
|
||
@pytest.fixture() | ||
def ready_index(client, index_name, create_index_params): | ||
create_index_params['timeout'] = None | ||
client.create_index(**create_index_params) | ||
time.sleep(10) # Extra wait, since status is sometimes inaccurate | ||
yield index_name | ||
client.delete_index(index_name, -1) | ||
|
||
@pytest.fixture() | ||
def notready_index(client, index_name, create_index_params): | ||
create_index_params.update({'timeout': -1 }) | ||
client.create_index(**create_index_params) | ||
yield index_name | ||
|
||
def index_exists(index_name, client): | ||
return index_name in client.list_indexes().names() | ||
|
||
|
||
def random_string(): | ||
return ''.join(random.choice(string.ascii_lowercase) for i in range(10)) | ||
|
||
@pytest.fixture(scope='session') | ||
def reusable_collection(): | ||
pc = Pinecone( | ||
api_key=get_environment_var('PINECONE_API_KEY'), | ||
additional_headers={'sdk-test-suite': 'pinecone-python-client'} | ||
) | ||
index_name = 'temp-index-' + random_string() | ||
dimension = int(get_environment_var('DIMENSION')) | ||
print(f"Creating index {index_name} to prepare a collection...") | ||
pc.create_index( | ||
name=index_name, | ||
dimension=dimension, | ||
metric=get_environment_var('METRIC'), | ||
spec=PodSpec( | ||
environment=get_environment_var('PINECONE_ENVIRONMENT'), | ||
) | ||
) | ||
print(f"Created index {index_name}. Waiting 10 seconds to make sure it's ready...") | ||
time.sleep(10) | ||
|
||
num_vectors = 10 | ||
vectors = [ | ||
(str(i), [random.uniform(0, 1) for _ in range(dimension)]) for i in range(num_vectors) ] | ||
|
||
index = pc.Index(index_name) | ||
index.upsert(vectors=vectors) | ||
|
||
collection_name = 'reused-coll-' + random_string() | ||
pc.create_collection( | ||
name=collection_name, | ||
source=index_name | ||
) | ||
|
||
time_waited = 0 | ||
desc = pc.describe_collection(collection_name) | ||
collection_ready = desc['status'] | ||
while collection_ready.lower() != 'ready' and time_waited < 120: | ||
print(f"Waiting for collection {collection_name} to be ready. Waited {time_waited} seconds...") | ||
time.sleep(5) | ||
time_waited += 5 | ||
desc = pc.describe_collection(collection_name) | ||
collection_ready = desc['status'] | ||
|
||
if time_waited >= 120: | ||
raise Exception(f"Collection {collection_name} is not ready after 120 seconds") | ||
|
||
print(f"Collection {collection_name} is ready. Deleting index {index_name}...") | ||
pc.delete_index(index_name) | ||
|
||
yield collection_name | ||
|
||
print(f"Deleting collection {collection_name}...") | ||
pc.delete_collection(collection_name) | ||
|
||
@pytest.fixture(autouse=True) | ||
def cleanup(client, index_name): | ||
yield | ||
|
||
time_waited = 0 | ||
while index_exists(index_name, client) and time_waited < 120: | ||
print(f"Waiting for index {index_name} to be ready to delete. Waited {time_waited} seconds..") | ||
time_waited += 5 | ||
time.sleep(5) | ||
try: | ||
print(f"Attempting delete of index {index_name}") | ||
client.delete_index(index_name, -1) | ||
print(f"Deleted index {index_name}") | ||
break | ||
except Exception as e: | ||
print(f"Unable to delete index {index_name}: {e}") | ||
pass | ||
|
||
if time_waited >= 120: | ||
raise Exception(f"Index {index_name} could not be deleted after 120 seconds") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import string | ||
import random | ||
import pytest | ||
import time | ||
from pinecone import PodSpec | ||
|
||
def random_string(): | ||
return ''.join(random.choice(string.ascii_lowercase) for i in range(10)) | ||
|
||
class TestCollectionsHappyPath: | ||
def test_index_to_collection_to_index_happy_path(self, client, environment, dimension, metric, ready_index, random_vector): | ||
index = client.Index(ready_index) | ||
num_vectors = 10 | ||
vectors = [ (str(i), random_vector()) for i in range(num_vectors) ] | ||
index.upsert(vectors=vectors) | ||
|
||
collection_name = 'coll1-' + random_string() | ||
client.create_collection(name=collection_name, source=ready_index) | ||
desc = client.describe_collection(collection_name) | ||
assert desc['name'] == collection_name | ||
assert desc['environment'] == environment | ||
assert desc['status'] == 'Initializing' | ||
|
||
time_waited = 0 | ||
collection_ready = desc['status'] | ||
while collection_ready.lower() != 'ready' and time_waited < 120: | ||
print(f"Waiting for collection {collection_name} to be ready. Waited {time_waited} seconds...") | ||
time.sleep(5) | ||
time_waited += 5 | ||
desc = client.describe_collection(collection_name) | ||
collection_ready = desc['status'] | ||
|
||
assert collection_name in client.list_collections().names() | ||
|
||
if time_waited >= 120: | ||
raise Exception(f"Collection {collection_name} is not ready after 120 seconds") | ||
|
||
# After collection ready, these should all be defined | ||
assert desc['name'] == collection_name | ||
assert desc['status'] == 'Ready' | ||
assert desc['environment'] == environment | ||
assert desc['dimension'] == dimension | ||
assert desc['vector_count'] == num_vectors | ||
assert desc['size'] != None | ||
assert desc['size'] > 0 | ||
|
||
# Create index from collection | ||
index_name = 'index-from-collection-' + collection_name | ||
print(f"Creating index {index_name} from collection {collection_name}...") | ||
client.create_index( | ||
name=index_name, | ||
dimension=dimension, | ||
metric=metric, | ||
spec=PodSpec( | ||
environment=environment, | ||
source_collection=collection_name | ||
) | ||
) | ||
print(f"Created index {index_name} from collection {collection_name}. Waiting a little more to make sure it's ready...") | ||
time.sleep(30) | ||
desc = client.describe_index(index_name) | ||
assert desc['name'] == index_name | ||
assert desc['status']['ready'] == True | ||
|
||
new_index = client.Index(index_name) | ||
|
||
# Verify stats reflect the vectors present in the collection | ||
stats = new_index.describe_index_stats() | ||
print(stats) | ||
assert stats.total_vector_count == num_vectors | ||
|
||
# Verify the vectors from the collection can be fetched | ||
results = new_index.fetch(ids=[v[0] for v in vectors]) | ||
print(results) | ||
for v in vectors: | ||
assert results.vectors[v[0]].id == v[0] | ||
assert results.vectors[v[0]].values == pytest.approx(v[1], rel=0.01) | ||
|
||
# Cleanup | ||
client.delete_collection(collection_name) | ||
client.delete_index(index_name) | ||
|
||
def test_create_index_with_different_metric_from_orig_index(self, client, dimension, metric, environment, reusable_collection): | ||
metrics = ['cosine', 'euclidean', 'dotproduct'] | ||
target_metric = random.choice([x for x in metrics if x != metric]) | ||
|
||
index_name = 'from-coll-' + random_string() | ||
client.create_index( | ||
name=index_name, | ||
dimension=dimension, | ||
metric=target_metric, | ||
spec=PodSpec( | ||
environment=environment, | ||
source_collection=reusable_collection | ||
) | ||
) | ||
time.sleep(10) | ||
client.delete_index(index_name, -1) |
Oops, something went wrong.