Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove neo4j bloom labels from graph schema #18564

Merged
merged 1 commit into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 11 additions & 6 deletions libs/community/langchain_community/graphs/neo4j_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
from langchain_community.graphs.graph_store import GraphStore

BASE_ENTITY_LABEL = "__Entity__"
EXCLUDED_LABELS = ["_Bloom_Perspective_", "_Bloom_Scene_"]
EXCLUDED_RELS = ["_Bloom_HAS_SCENE_"]

node_properties_query = """
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
WHERE NOT type = "RELATIONSHIP" AND elementType = "node"
AND NOT label IN [$BASE_ENTITY_LABEL]
AND NOT label IN $EXCLUDED_LABELS
WITH label AS nodeLabels, collect({property:property, type:type}) AS properties
RETURN {labels: nodeLabels, properties: properties} AS output

Expand All @@ -21,6 +23,7 @@
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
WHERE NOT type = "RELATIONSHIP" AND elementType = "relationship"
AND NOT label in $EXCLUDED_LABELS
WITH label AS nodeLabels, collect({property:property, type:type}) AS properties
RETURN {type: nodeLabels, properties: properties} AS output
"""
Expand All @@ -30,8 +33,8 @@
YIELD label, other, elementType, type, property
WHERE type = "RELATIONSHIP" AND elementType = "node"
UNWIND other AS other_node
WITH * WHERE NOT label IN [$BASE_ENTITY_LABEL]
AND NOT other_node IN [$BASE_ENTITY_LABEL]
WITH * WHERE NOT label IN $EXCLUDED_LABELS
AND NOT other_node IN $EXCLUDED_LABELS
RETURN {start: label, type: property, end: toString(other_node)} AS output
"""

Expand Down Expand Up @@ -237,19 +240,21 @@ def refresh_schema(self) -> None:
node_properties = [
el["output"]
for el in self.query(
node_properties_query, params={"BASE_ENTITY_LABEL": BASE_ENTITY_LABEL}
node_properties_query,
params={"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]},
)
]
rel_properties = [
el["output"]
for el in self.query(
rel_properties_query, params={"BASE_ENTITY_LABEL": BASE_ENTITY_LABEL}
rel_properties_query, params={"EXCLUDED_LABELS": EXCLUDED_RELS}
)
]
relationships = [
el["output"]
for el in self.query(
rel_query, params={"BASE_ENTITY_LABEL": BASE_ENTITY_LABEL}
rel_query,
params={"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]},
)
]

Expand Down
30 changes: 27 additions & 3 deletions libs/community/tests/integration_tests/graphs/test_neo4j.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ def test_cypher_return_correct_schema() -> None:
graph.refresh_schema()

node_properties = graph.query(
node_properties_query, params={"BASE_ENTITY_LABEL": BASE_ENTITY_LABEL}
node_properties_query, params={"EXCLUDED_LABELS": [BASE_ENTITY_LABEL]}
)
relationships_properties = graph.query(
rel_properties_query, params={"BASE_ENTITY_LABEL": BASE_ENTITY_LABEL}
rel_properties_query, params={"EXCLUDED_LABELS": [BASE_ENTITY_LABEL]}
)
relationships = graph.query(
rel_query, params={"BASE_ENTITY_LABEL": BASE_ENTITY_LABEL}
rel_query, params={"EXCLUDED_LABELS": [BASE_ENTITY_LABEL]}
)

expected_node_properties = [
Expand Down Expand Up @@ -249,3 +249,27 @@ def test_neo4j_add_data_base_source() -> None:
{"label": [BASE_ENTITY_LABEL, "foo"], "count": 1},
]
assert graph.structured_schema["metadata"]["constraint"] != []


def test_neo4j_filtering_labels() -> None:
"""Test that neo4j correctly filters excluded labels."""
url = os.environ.get("NEO4J_URI")
username = os.environ.get("NEO4J_USERNAME")
password = os.environ.get("NEO4J_PASSWORD")
assert url is not None
assert username is not None
assert password is not None

graph = Neo4jGraph(url=url, username=username, password=password, sanitize=True)
# Delete all nodes in the graph
graph.query("MATCH (n) DETACH DELETE n")
# Remove all constraints
graph.query("CALL apoc.schema.assert({}, {})")
graph.query(
"CREATE (:`_Bloom_Scene_`)-[:_Bloom_HAS_SCENE_]->(:`_Bloom_Perspective_`)"
)
graph.refresh_schema()

# Assert both are empty
assert graph.structured_schema["node_props"] == {}
assert graph.structured_schema["relationships"] == []