Skip to content

Commit

Permalink
Integration tests for collections (#299)
Browse files Browse the repository at this point in the history
## Problem

We received a bug report that creation of indexes using `PodSpec` fails
if `source_collection` is specified.

## Solution

- The fix for the bug was a one-line change.
- Added several integration tests to exercise `index --> collection -->
index` path and error cases.
- Restructured integration tests so that tests using pod-based indexes
reside in `tests/integration/control/pod` and can be run separately from
severless indexes tested in `tests/integration/control/serverless`. This
allows for greater parallelism in CI.
- Adjusted CI configs to run these tests in parallel to integration
tests using serverless indexes. The collections tests are quite slow due
to the waiting required for pod indexes and collections to become ready
for use.

## Type of Change

- [x] Bug fix (non-breaking change which fixes an issue)
- [x] Infrastructure change (CI configs, etc)
  • Loading branch information
jhamon committed Jan 24, 2024
1 parent 5eb002e commit 995c0a1
Show file tree
Hide file tree
Showing 21 changed files with 474 additions and 77 deletions.
60 changes: 52 additions & 8 deletions .github/workflows/testing-integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,54 @@ jobs:
# spec: '${{ matrix.spec }}'
# PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}'

control-rest:
name: control plane
control-rest-pod:
name: control plane pod/collection tests
runs-on: ubuntu-latest
strategy:
matrix:
pineconeEnv:
- prod
testConfig:
- python-version: 3.8
pod: { environment: 'us-east1-gcp'}
- python-version: 3.11
pod: { environment: 'us-east4-gcp'}
fail-fast: false
steps:
- uses: actions/checkout@v4
- name: 'Set up Python ${{ matrix.testConfig.python-version }}'
uses: actions/setup-python@v4
with:
python-version: '${{ matrix.testConfig.python-version }}'
- name: Setup Poetry
uses: ./.github/actions/setup-poetry
- name: 'Run integration tests (REST, prod)'
if: matrix.pineconeEnv == 'prod'
run: poetry run pytest tests/integration/control/serverless -s -v
env:
PINECONE_DEBUG_CURL: 'true'
PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}'
PINECONE_ENVIRONMENT: '${{ matrix.testConfig.pod.environment }}'
GITHUB_BUILD_NUMBER: '${{ github.run_number }}-s-${{ matrix.testConfig.python-version}}'
DIMENSION: 1536
METRIC: 'cosine'
- name: 'Run integration tests (REST, staging)'
if: matrix.pineconeEnv == 'staging'
run: poetry run pytest tests/integration/control/serverless -s -v
env:
PINECONE_DEBUG_CURL: 'true'
PINECONE_CONTROLLER_HOST: 'https://api-staging.pinecone.io'
PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY_STAGING }}'
PINECONE_ENVIRONMENT: '${{ matrix.testConfig.pod.environment }}'
GITHUB_BUILD_NUMBER: '${{ github.run_number }}-p-${{ matrix.testConfig.python-version}}'
DIMENSION: 1536
METRIC: 'cosine'




control-rest-serverless:
name: control plane serverless
runs-on: ubuntu-latest
strategy:
matrix:
Expand All @@ -59,7 +105,6 @@ jobs:
- python-version: 3.11
pod: { environment: 'us-east1-gcp'}
serverless: { cloud: 'aws', region: 'us-west-2'}
max-parallel: 1
fail-fast: false
steps:
- uses: actions/checkout@v4
Expand All @@ -71,21 +116,20 @@ jobs:
uses: ./.github/actions/setup-poetry
- name: 'Run integration tests (REST, prod)'
if: matrix.pineconeEnv == 'prod'
run: poetry run pytest tests/integration/control -s -v
run: poetry run pytest tests/integration/control/serverless -s -vv
env:
PINECONE_CONTROLLER_HOST: 'https://api.pinecone.io'
PINECONE_DEBUG_CURL: 'true'
PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}'
GITHUB_BUILD_NUMBER: '${{ github.run_number }}-p-${{ matrix.testConfig.python-version}}'
POD_ENVIRONMENT: '${{ matrix.testConfig.pod.environment }}'
SERVERLESS_CLOUD: '${{ matrix.testConfig.serverless.cloud }}'
SERVERLESS_REGION: '${{ matrix.testConfig.serverless.region }}'
- name: 'Run integration tests (REST, staging)'
if: matrix.pineconeEnv == 'staging'
run: poetry run pytest tests/integration -s -v
run: poetry run pytest tests/integration/control/serverless -s -vv
env:
PINECONE_DEBUG_CURL: 'true'
PINECONE_CONTROLLER_HOST: 'https://api-staging.pinecone.io'
PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY_STAGING }}'
GITHUB_BUILD_NUMBER: '${{ github.run_number }}-s-${{ matrix.testConfig.python-version}}'
POD_ENVIRONMENT: '${{ matrix.testConfig.pod.environment }}'
SERVERLESS_CLOUD: '${{ matrix.testConfig.serverless.cloud }}'
SERVERLESS_REGION: '${{ matrix.testConfig.serverless.region }}'
5 changes: 5 additions & 0 deletions pinecone/models/pod_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ class PodSpec(NamedTuple):
{'indexed': ['field1', 'field2']}
"""

source_collection: Optional[str] = None
"""
The name of the collection to use as the source for the pod index. This configuration is only used when creating a pod index from an existing collection.
"""

def asdict(self):
"""
Returns the PodSpec as a dictionary.
Expand Down
20 changes: 20 additions & 0 deletions scripts/delete-all-collections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import os
from pinecone import Pinecone

def read_env_var(name):
value = os.environ.get(name)
if value is None:
raise Exception('Environment variable {} is not set'.format(name))
return value

def main():
pc = Pinecone(api_key=read_env_var('PINECONE_API_KEY'))

collections = pc.list_collections().names()
for collection in collections:
if collection != "":
pc.delete_collection(collection)

if __name__ == '__main__':
main()

Empty file.
149 changes: 149 additions & 0 deletions tests/integration/control/pod/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import pytest
import random
import string
import time
from pinecone import Pinecone, PodSpec
from ...helpers import generate_index_name, get_environment_var

@pytest.fixture()
def client():
api_key = get_environment_var('PINECONE_API_KEY')
return Pinecone(
api_key=api_key,
additional_headers={'sdk-test-suite': 'pinecone-python-client'}
)

@pytest.fixture()
def environment():
return get_environment_var('PINECONE_ENVIRONMENT')

@pytest.fixture()
def dimension():
return int(get_environment_var('DIMENSION'))

@pytest.fixture()
def create_index_params(index_name, environment, dimension, metric):
spec = {
'pod': {
'environment': environment,
'pod_type': 'p1.x1'
}
}
return dict(
name=index_name,
dimension=dimension,
metric=metric,
spec=spec,
timeout=-1
)

@pytest.fixture()
def metric():
return get_environment_var('METRIC')

@pytest.fixture()
def random_vector(dimension):
def _random_vector():
return [random.uniform(0, 1) for _ in range(dimension)]
return _random_vector

@pytest.fixture()
def index_name(request):
test_name = request.node.name
return generate_index_name(test_name)

@pytest.fixture()
def ready_index(client, index_name, create_index_params):
create_index_params['timeout'] = None
client.create_index(**create_index_params)
time.sleep(10) # Extra wait, since status is sometimes inaccurate
yield index_name
client.delete_index(index_name, -1)

@pytest.fixture()
def notready_index(client, index_name, create_index_params):
create_index_params.update({'timeout': -1 })
client.create_index(**create_index_params)
yield index_name

def index_exists(index_name, client):
return index_name in client.list_indexes().names()


def random_string():
return ''.join(random.choice(string.ascii_lowercase) for i in range(10))

@pytest.fixture(scope='session')
def reusable_collection():
pc = Pinecone(
api_key=get_environment_var('PINECONE_API_KEY'),
additional_headers={'sdk-test-suite': 'pinecone-python-client'}
)
index_name = 'temp-index-' + random_string()
dimension = int(get_environment_var('DIMENSION'))
print(f"Creating index {index_name} to prepare a collection...")
pc.create_index(
name=index_name,
dimension=dimension,
metric=get_environment_var('METRIC'),
spec=PodSpec(
environment=get_environment_var('PINECONE_ENVIRONMENT'),
)
)
print(f"Created index {index_name}. Waiting 10 seconds to make sure it's ready...")
time.sleep(10)

num_vectors = 10
vectors = [
(str(i), [random.uniform(0, 1) for _ in range(dimension)]) for i in range(num_vectors) ]

index = pc.Index(index_name)
index.upsert(vectors=vectors)

collection_name = 'reused-coll-' + random_string()
pc.create_collection(
name=collection_name,
source=index_name
)

time_waited = 0
desc = pc.describe_collection(collection_name)
collection_ready = desc['status']
while collection_ready.lower() != 'ready' and time_waited < 120:
print(f"Waiting for collection {collection_name} to be ready. Waited {time_waited} seconds...")
time.sleep(5)
time_waited += 5
desc = pc.describe_collection(collection_name)
collection_ready = desc['status']

if time_waited >= 120:
raise Exception(f"Collection {collection_name} is not ready after 120 seconds")

print(f"Collection {collection_name} is ready. Deleting index {index_name}...")
pc.delete_index(index_name)

yield collection_name

print(f"Deleting collection {collection_name}...")
pc.delete_collection(collection_name)

@pytest.fixture(autouse=True)
def cleanup(client, index_name):
yield

time_waited = 0
while index_exists(index_name, client) and time_waited < 120:
print(f"Waiting for index {index_name} to be ready to delete. Waited {time_waited} seconds..")
time_waited += 5
time.sleep(5)
try:
print(f"Attempting delete of index {index_name}")
client.delete_index(index_name, -1)
print(f"Deleted index {index_name}")
break
except Exception as e:
print(f"Unable to delete index {index_name}: {e}")
pass

if time_waited >= 120:
raise Exception(f"Index {index_name} could not be deleted after 120 seconds")
98 changes: 98 additions & 0 deletions tests/integration/control/pod/test_collections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import string
import random
import pytest
import time
from pinecone import PodSpec

def random_string():
return ''.join(random.choice(string.ascii_lowercase) for i in range(10))

class TestCollectionsHappyPath:
def test_index_to_collection_to_index_happy_path(self, client, environment, dimension, metric, ready_index, random_vector):
index = client.Index(ready_index)
num_vectors = 10
vectors = [ (str(i), random_vector()) for i in range(num_vectors) ]
index.upsert(vectors=vectors)

collection_name = 'coll1-' + random_string()
client.create_collection(name=collection_name, source=ready_index)
desc = client.describe_collection(collection_name)
assert desc['name'] == collection_name
assert desc['environment'] == environment
assert desc['status'] == 'Initializing'

time_waited = 0
collection_ready = desc['status']
while collection_ready.lower() != 'ready' and time_waited < 120:
print(f"Waiting for collection {collection_name} to be ready. Waited {time_waited} seconds...")
time.sleep(5)
time_waited += 5
desc = client.describe_collection(collection_name)
collection_ready = desc['status']

assert collection_name in client.list_collections().names()

if time_waited >= 120:
raise Exception(f"Collection {collection_name} is not ready after 120 seconds")

# After collection ready, these should all be defined
assert desc['name'] == collection_name
assert desc['status'] == 'Ready'
assert desc['environment'] == environment
assert desc['dimension'] == dimension
assert desc['vector_count'] == num_vectors
assert desc['size'] != None
assert desc['size'] > 0

# Create index from collection
index_name = 'index-from-collection-' + collection_name
print(f"Creating index {index_name} from collection {collection_name}...")
client.create_index(
name=index_name,
dimension=dimension,
metric=metric,
spec=PodSpec(
environment=environment,
source_collection=collection_name
)
)
print(f"Created index {index_name} from collection {collection_name}. Waiting a little more to make sure it's ready...")
time.sleep(30)
desc = client.describe_index(index_name)
assert desc['name'] == index_name
assert desc['status']['ready'] == True

new_index = client.Index(index_name)

# Verify stats reflect the vectors present in the collection
stats = new_index.describe_index_stats()
print(stats)
assert stats.total_vector_count == num_vectors

# Verify the vectors from the collection can be fetched
results = new_index.fetch(ids=[v[0] for v in vectors])
print(results)
for v in vectors:
assert results.vectors[v[0]].id == v[0]
assert results.vectors[v[0]].values == pytest.approx(v[1], rel=0.01)

# Cleanup
client.delete_collection(collection_name)
client.delete_index(index_name)

def test_create_index_with_different_metric_from_orig_index(self, client, dimension, metric, environment, reusable_collection):
metrics = ['cosine', 'euclidean', 'dotproduct']
target_metric = random.choice([x for x in metrics if x != metric])

index_name = 'from-coll-' + random_string()
client.create_index(
name=index_name,
dimension=dimension,
metric=target_metric,
spec=PodSpec(
environment=environment,
source_collection=reusable_collection
)
)
time.sleep(10)
client.delete_index(index_name, -1)

0 comments on commit 995c0a1

Please sign in to comment.