Skip to content

Commit

Permalink
langchain[patch]: Add tests for indexing (langchain-ai#19342)
Browse files Browse the repository at this point in the history
This PR adds tests for the indexing API
  • Loading branch information
eyurtsev authored and chrispy-snps committed Mar 30, 2024
1 parent fc87901 commit 075a7bc
Showing 1 changed file with 154 additions and 0 deletions.
154 changes: 154 additions & 0 deletions libs/langchain/tests/unit_tests/indexes/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,160 @@ def test_incremental_delete(
}


def test_incremental_delete_with_batch_size(
record_manager: SQLRecordManager, vector_store: InMemoryVectorStore
) -> None:
"""Test indexing with incremental deletion strategy and batch size."""
loader = ToyLoader(
documents=[
Document(
page_content="1",
metadata={"source": "1"},
),
Document(
page_content="2",
metadata={"source": "2"},
),
Document(
page_content="3",
metadata={"source": "3"},
),
Document(
page_content="4",
metadata={"source": "4"},
),
]
)

with patch.object(
record_manager, "get_time", return_value=datetime(2021, 1, 2).timestamp()
):
assert index(
loader,
record_manager,
vector_store,
cleanup="incremental",
source_id_key="source",
batch_size=3,
) == {
"num_added": 4,
"num_deleted": 0,
"num_skipped": 0,
"num_updated": 0,
}

doc_texts = set(
# Ignoring type since doc should be in the store and not a None
vector_store.store.get(uid).page_content # type: ignore
for uid in vector_store.store
)
assert doc_texts == {"1", "2", "3", "4"}

# Attempt to index again verify that nothing changes
with patch.object(
record_manager, "get_time", return_value=datetime(2021, 1, 2).timestamp()
):
assert index(
loader,
record_manager,
vector_store,
cleanup="incremental",
source_id_key="source",
batch_size=3,
) == {
"num_added": 0,
"num_deleted": 0,
"num_skipped": 4,
"num_updated": 0,
}

# Attempt to index again verify that nothing changes
with patch.object(
record_manager, "get_time", return_value=datetime(2022, 1, 3).timestamp()
):
# Docs with same content
docs = [
Document(
page_content="1",
metadata={"source": "1"},
),
Document(
page_content="2",
metadata={"source": "2"},
),
]
assert index(
docs,
record_manager,
vector_store,
cleanup="incremental",
source_id_key="source",
batch_size=1,
) == {
"num_added": 0,
"num_deleted": 0,
"num_skipped": 2,
"num_updated": 0,
}

# Attempt to index again verify that nothing changes
with patch.object(
record_manager, "get_time", return_value=datetime(2023, 1, 3).timestamp()
):
# Docs with same content
docs = [
Document(
page_content="1",
metadata={"source": "1"},
),
Document(
page_content="2",
metadata={"source": "2"},
),
]
assert index(
docs,
record_manager,
vector_store,
cleanup="incremental",
source_id_key="source",
batch_size=1,
) == {
"num_added": 0,
"num_deleted": 0,
"num_skipped": 2,
"num_updated": 0,
}

# Try to index with changed docs now
with patch.object(
record_manager, "get_time", return_value=datetime(2024, 1, 3).timestamp()
):
# Docs with same content
docs = [
Document(
page_content="changed 1",
metadata={"source": "1"},
),
Document(
page_content="changed 2",
metadata={"source": "2"},
),
]
assert index(
docs,
record_manager,
vector_store,
cleanup="incremental",
source_id_key="source",
) == {
"num_added": 2,
"num_deleted": 2,
"num_skipped": 0,
"num_updated": 0,
}


@pytest.mark.requires("aiosqlite")
async def test_aincremental_delete(
arecord_manager: SQLRecordManager, vector_store: InMemoryVectorStore
Expand Down

0 comments on commit 075a7bc

Please sign in to comment.