Skip to content

Commit

Permalink
add test suites for document manager
Browse files Browse the repository at this point in the history
  • Loading branch information
2jimoo committed Feb 11, 2024
1 parent 53a4a7c commit 2f15311
Show file tree
Hide file tree
Showing 3 changed files with 283 additions and 3 deletions.
6 changes: 3 additions & 3 deletions libs/community/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions libs/community/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ hdbcli = {version = "^2.19.21", optional = true}
oci = {version = "^2.119.1", optional = true}
rdflib = {version = "7.0.0", optional = true}
nvidia-riva-client = {version = "^2.14.0", optional = true}
pymongo = {version = "^4.6.0", optional = true}

[tool.poetry.group.test]
optional = true
Expand Down Expand Up @@ -198,6 +199,7 @@ extended_testing = [
"pdfminer-six",
"pgvector",
"pypdf",
"pymongo",
"pymupdf",
"pypdfium2",
"tqdm",
Expand Down
278 changes: 278 additions & 0 deletions libs/community/tests/unit_tests/indexes/test_document_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
from datetime import datetime
from unittest.mock import patch

import pytest
import pytest_asyncio

from langchain_community.indexes._document_manager import MongoDocumentManager


@pytest.fixture
@pytest.mark.requires("pymongo")
def manager() -> MongoDocumentManager:
"""Initialize the test MongoDB and yield the DocumentManager instance."""
document_manager = MongoDocumentManager(
namespace="kittens",
mongodb_url="mongodb://localhost:27017/",
db_name="test_db",
collection_name="test_collection",
)
return document_manager


@pytest_asyncio.fixture
@pytest.mark.requires("motor")
async def amanager() -> MongoDocumentManager:
"""Initialize the test MongoDB and yield the DocumentManager instance."""
document_manager = MongoDocumentManager(
namespace="kittens",
mongodb_url="mongodb://localhost:27017/",
db_name="test_db",
collection_name="test_collection",
)
return document_manager


@pytest.mark.requires("pymongo")
def test_update(manager: MongoDocumentManager) -> None:
"""Test updating records in the MongoDB."""
read_keys = manager.list_keys()
assert read_keys == []
keys = ["key1", "key2", "key3"]
manager.update(keys)
read_keys = manager.list_keys()
assert sorted(read_keys) == sorted(["key1", "key2", "key3"])


@pytest.mark.asyncio
@pytest.mark.requires("motor")
async def test_aupdate(amanager: MongoDocumentManager) -> None:
"""Test updating records in the MongoDB."""
# no keys should be present in the set
read_keys = await amanager.alist_keys()
assert read_keys == []
# Insert records
keys = ["key1", "key2", "key3"]
await amanager.aupdate(keys)
# Retrieve the records
read_keys = await amanager.alist_keys()
assert sorted(read_keys) == sorted(["key1", "key2", "key3"])


@pytest.mark.requires("pymongo")
def test_update_timestamp(manager: MongoDocumentManager) -> None:
"""Test updating records with timestamps in MongoDB."""
with patch.object(
manager, "get_time", return_value=datetime(2021, 1, 2).timestamp()
):
manager.update(["key1"])

# MongoDB에서는 session을 사용하지 않으므로, 직접 컬렉션에서 조회합니다.
records = list(manager.sync_collection.find({"namespace": manager.namespace}))

assert [
{
"key": record["key"],
"namespace": record["namespace"],
"updated_at": record["updated_at"],
"group_id": record.get("group_id"),
}
for record in records
] == [
{
"group_id": None,
"key": "key1",
"namespace": "kittens",
"updated_at": datetime(2021, 1, 2).timestamp(),
}
]


@pytest.mark.asyncio
@pytest.mark.requires("motor")
async def test_aupdate_timestamp(amanager: MongoDocumentManager) -> None:
"""Test asynchronously updating records with timestamps in MongoDB."""
with patch.object(
amanager, "aget_time", return_value=datetime(2021, 1, 2).timestamp()
):
await amanager.aupdate(["key1"])

records = [
doc
async for doc in amanager.async_collection.find(
{"namespace": amanager.namespace}
)
]

assert [
{
"key": record["key"],
"namespace": record["namespace"],
"updated_at": record["updated_at"],
"group_id": record.get("group_id"),
}
for record in records
] == [
{
"group_id": None,
"key": "key1",
"namespace": "kittens",
"updated_at": datetime(2021, 1, 2).timestamp(),
}
]


@pytest.mark.requires("pymongo")
def test_exists(manager: MongoDocumentManager) -> None:
"""Test checking if keys exist in MongoDB."""
keys = ["key1", "key2", "key3"]
manager.update(keys)
exists = manager.exists(keys)
assert len(exists) == len(keys)
assert all(exists)

exists = manager.exists(["key1", "key4"])
assert len(exists) == 2
assert exists == [True, False]


@pytest.mark.asyncio
@pytest.mark.requires("motor")
async def test_aexists(amanager: MongoDocumentManager) -> None:
"""Test asynchronously checking if keys exist in MongoDB."""
keys = ["key1", "key2", "key3"]
await amanager.aupdate(keys)
exists = await amanager.aexists(keys)
assert len(exists) == len(keys)
assert all(exists)

exists = await amanager.aexists(["key1", "key4"])
assert len(exists) == 2
assert exists == [True, False]


@pytest.mark.requires("pymongo")
def test_list_keys(manager: MongoDocumentManager) -> None:
"""Test listing keys in MongoDB."""
manager.delete_keys(manager.list_keys())
with patch.object(
manager, "get_time", return_value=datetime(2021, 1, 1).timestamp()
):
manager.update(["key1"])
with patch.object(
manager, "get_time", return_value=datetime(2022, 1, 1).timestamp()
):
manager.update(["key2"])
with patch.object(
manager, "get_time", return_value=datetime(2023, 1, 1).timestamp()
):
manager.update(["key3"])
with patch.object(
manager, "get_time", return_value=datetime(2024, 1, 1).timestamp()
):
manager.update(["key4"], group_ids=["group1"])
assert sorted(manager.list_keys()) == sorted(["key1", "key2", "key3", "key4"])
assert sorted(manager.list_keys(after=datetime(2022, 2, 1).timestamp())) == sorted(
["key3", "key4"]
)
assert sorted(manager.list_keys(group_ids=["group1", "group2"])) == sorted(["key4"])


@pytest.mark.asyncio
@pytest.mark.requires("motor")
async def test_alist_keys(amanager: MongoDocumentManager) -> None:
"""Test asynchronously listing keys in MongoDB."""
await amanager.adelete_keys(await amanager.alist_keys())
with patch.object(
amanager, "aget_time", return_value=datetime(2021, 1, 1).timestamp()
):
await amanager.aupdate(["key1"])
with patch.object(
amanager, "aget_time", return_value=datetime(2022, 1, 1).timestamp()
):
await amanager.aupdate(["key2"])
with patch.object(
amanager, "aget_time", return_value=datetime(2023, 1, 1).timestamp()
):
await amanager.aupdate(["key3"])
with patch.object(
amanager, "aget_time", return_value=datetime(2024, 1, 1).timestamp()
):
await amanager.aupdate(["key4"], group_ids=["group1"])
assert sorted(await amanager.alist_keys()) == sorted(
["key1", "key2", "key3", "key4"]
)
assert sorted(
await amanager.alist_keys(after=datetime(2022, 2, 1).timestamp())
) == sorted(["key3", "key4"])
assert sorted(await amanager.alist_keys(group_ids=["group1", "group2"])) == sorted(
["key4"]
)


@pytest.mark.requires("pymongo")
def test_namespace_is_used(manager: MongoDocumentManager) -> None:
"""Verify that namespace is taken into account for all operations in MongoDB."""
manager.delete_keys(manager.list_keys())
manager.update(["key1", "key2"], group_ids=["group1", "group2"])
manager.sync_collection.insert_many(
[
{"key": "key1", "namespace": "puppies", "group_id": None},
{"key": "key3", "namespace": "puppies", "group_id": None},
]
)
assert sorted(manager.list_keys()) == sorted(["key1", "key2"])
manager.delete_keys(["key1"])
assert sorted(manager.list_keys()) == sorted(["key2"])
manager.update(["key3"], group_ids=["group3"])
assert (
manager.sync_collection.find_one({"key": "key3", "namespace": "kittens"})[
"group_id"
]
== "group3"
)


@pytest.mark.asyncio
@pytest.mark.requires("motor")
async def test_anamespace_is_used(amanager: MongoDocumentManager) -> None:
"""
Verify that namespace is taken into account for all operations
in MongoDB asynchronously.
"""
await amanager.adelete_keys(await amanager.alist_keys())
await amanager.aupdate(["key1", "key2"], group_ids=["group1", "group2"])
await amanager.async_collection.insert_many(
[
{"key": "key1", "namespace": "puppies", "group_id": None},
{"key": "key3", "namespace": "puppies", "group_id": None},
]
)
assert sorted(await amanager.alist_keys()) == sorted(["key1", "key2"])
await amanager.adelete_keys(["key1"])
assert sorted(await amanager.alist_keys()) == sorted(["key2"])
await amanager.aupdate(["key3"], group_ids=["group3"])
assert (
await amanager.async_collection.find_one(
{"key": "key3", "namespace": "kittens"}
)
)["group_id"] == "group3"


@pytest.mark.requires("pymongo")
def test_delete_keys(manager: MongoDocumentManager) -> None:
"""Test deleting keys from MongoDB."""
manager.update(["key1", "key2", "key3"])
manager.delete_keys(["key1", "key2"])
remaining_keys = manager.list_keys()
assert sorted(remaining_keys) == sorted(["key3"])


@pytest.mark.asyncio
@pytest.mark.requires("motor")
async def test_adelete_keys(amanager: MongoDocumentManager) -> None:
"""Test asynchronously deleting keys from MongoDB."""
await amanager.aupdate(["key1", "key2", "key3"])
await amanager.adelete_keys(["key1", "key2"])
remaining_keys = await amanager.alist_keys()
assert sorted(remaining_keys) == sorted(["key3"])

0 comments on commit 2f15311

Please sign in to comment.