Skip to content

Commit c33297c

Browse files
authoredMar 13, 2025··
feat: Fix graph visualization to work with latest spanner-graph-notebook code; also, allow visualization when only some columns are json. (#102)
* Multi-column support * Remove 'rows' field in results, as it's not used by the Javascript. Also, add a test for the multi-column case. * reformat * Fix test_bigquery.py tests, remove unnecessary mocking of GraphServer * reformat * Get basic graph visualization working against latest spanner code * Fix unit tests * Ignore columns we don't know how to visualize for visualization purposes, but still show them in the tabular view, and don't block visualizing remaining columns * reformat * Remove unused dependency on networkx * Implement stub callback for node expansion * Fix test_bigquery_graph_missing_spanner_deps. The mock query result must contain valid json so that the visualizer attempts to get launched, in order for the code path we're trying to test to get reached. * reformat * Add unit test for GraphServerHandler::handler_post_node_expansion() * Add test for invalid node expansion request * reformat * Tweaks to improve code coverage * More tweaks to improve code coverage * avoid list comprehension due to code coverage tooling * Fix visualization in colab. Problem is that, even though the port is not used in colab mode, the javascript still throws an error if it's undefined.
1 parent dbb6442 commit c33297c

File tree

5 files changed

+200
-91
lines changed

5 files changed

+200
-91
lines changed
 

‎bigquery_magics/bigquery.py

+41-17
Original file line numberDiff line numberDiff line change
@@ -596,19 +596,31 @@ def _handle_result(result, args):
596596
return result
597597

598598

599-
def _is_colab() -> bool:
600-
"""Check if code is running in Google Colab"""
601-
try:
602-
import google.colab # noqa: F401
599+
def _colab_query_callback(query: str, params: str):
600+
return IPython.core.display.JSON(
601+
graph_server.convert_graph_data(query_results=json.loads(params))
602+
)
603603

604-
return True
605-
except ImportError:
606-
return False
607604

605+
def _colab_node_expansion_callback(request: dict, params_str: str):
606+
"""Handle node expansion requests in Google Colab environment
607+
608+
Args:
609+
request: A dictionary containing node expansion details including:
610+
- uid: str - Unique identifier of the node to expand
611+
- node_labels: List[str] - Labels of the node
612+
- node_properties: List[Dict] - Properties of the node with key, value, and type
613+
- direction: str - Direction of expansion ("INCOMING" or "OUTGOING")
614+
- edge_label: Optional[str] - Label of edges to filter by
615+
params_str: A JSON string containing connection parameters
608616
609-
def _colab_callback(query: str, params: str):
617+
Returns:
618+
JSON: A JSON-serialized response containing either:
619+
- The query results with nodes and edges
620+
- An error message if the request failed
621+
"""
610622
return IPython.core.display.JSON(
611-
graph_server.convert_graph_data(query_results=json.loads(params))
623+
graph_server.execute_node_expansion(params_str, request)
612624
)
613625

614626

@@ -628,20 +640,30 @@ def _add_graph_widget(query_result):
628640
# visualizer widget. In colab, we are not able to create an http server on a
629641
# background thread, so we use a special colab-specific api to register a callback,
630642
# to be invoked from Javascript.
631-
if _is_colab():
643+
port = None
644+
try:
632645
from google.colab import output
633646

634-
output.register_callback("graph_visualization.Query", _colab_callback)
635-
else:
647+
output.register_callback("graph_visualization.Query", _colab_query_callback)
648+
output.register_callback(
649+
"graph_visualization.NodeExpansion", _colab_node_expansion_callback
650+
)
651+
652+
# In colab mode, the Javascript doesn't use the port value we pass in, as there is no
653+
# graph server, but it still has to be set to avoid triggering an exception.
654+
# TODO: Clean this up when the Javascript is fixed on the spanner-graph-notebook side.
655+
port = 0
656+
except ImportError:
636657
global singleton_server_thread
637658
alive = singleton_server_thread and singleton_server_thread.is_alive()
638659
if not alive:
639660
singleton_server_thread = graph_server.graph_server.init()
661+
port = graph_server.graph_server.port
640662

641663
# Create html to invoke the graph server
642664
html_content = generate_visualization_html(
643665
query="placeholder query",
644-
port=graph_server.graph_server.port,
666+
port=port,
645667
params=query_result.to_json().replace("\\", "\\\\").replace('"', '\\"'),
646668
)
647669
IPython.display.display(IPython.core.display.HTML(html_content))
@@ -656,11 +678,13 @@ def _is_valid_json(s: str):
656678

657679

658680
def _supports_graph_widget(query_result: pandas.DataFrame):
659-
num_rows, num_columns = query_result.shape
681+
# Visualization is supported if we have any json items to display.
682+
# (Non-json items are excluded from visualization, but we still want to bring up
683+
# the visualizer for the json items.)
660684
for column in query_result.columns:
661-
if not query_result[column].apply(_is_valid_json).all():
662-
return False
663-
return True
685+
if query_result[column].apply(_is_valid_json).any():
686+
return True
687+
return False
664688

665689

666690
def _make_bq_query(

‎bigquery_magics/graph_server.py

+50-35
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020
from typing import Dict, List
2121

2222

23+
def execute_node_expansion(params, request):
24+
return {"error": "Node expansion not yet implemented"}
25+
26+
2327
def convert_graph_data(query_results: Dict[str, Dict[str, str]]):
2428
"""
2529
Converts graph data to the form expected by the visualization framework.
@@ -49,16 +53,12 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]):
4953
# does not even get called unless spanner_graphs has already been confirmed
5054
# to exist upstream.
5155
from google.cloud.spanner_v1.types import StructType, Type, TypeCode
52-
import networkx
53-
from spanner_graphs.conversion import (
54-
columns_to_native_numpy,
55-
prepare_data_for_graphing,
56-
)
56+
from spanner_graphs.conversion import get_nodes_edges
5757

5858
try:
5959
fields: List[StructType.Field] = []
6060
data = {}
61-
rows = []
61+
tabular_data = {}
6262
for key, value in query_results.items():
6363
column_name = None
6464
column_value = None
@@ -73,45 +73,39 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]):
7373
StructType.Field(name=column_name, type=Type(code=TypeCode.JSON))
7474
)
7575
data[column_name] = []
76+
tabular_data[column_name] = []
7677
for value_key, value_value in column_value.items():
77-
if not isinstance(value_key, str):
78-
raise ValueError(
79-
f"Expected inner key to be str, got {type(value_key)}"
80-
)
81-
if not isinstance(value_value, str):
82-
raise ValueError(
83-
f"Expected inner value to be str, got {type(value_value)}"
84-
)
85-
row_json = json.loads(value_value)
86-
87-
if row_json is not None:
78+
try:
79+
row_json = json.loads(value_value)
8880
data[column_name].append(row_json)
89-
rows.append([row_json])
90-
91-
d, ignored_columns = columns_to_native_numpy(data, fields)
92-
93-
graph: networkx.classes.DiGraph = prepare_data_for_graphing(
94-
incoming=d, schema_json=None
95-
)
96-
97-
nodes = []
98-
for node_id, node in graph.nodes(data=True):
99-
nodes.append(node)
100-
101-
edges = []
102-
for from_id, to_id, edge in graph.edges(data=True):
103-
edges.append(edge)
81+
tabular_data[column_name].append(row_json)
82+
except (ValueError, TypeError):
83+
# Non-JSON columns cannot be visualized, but we still want them
84+
# in the tabular view.
85+
tabular_data[column_name].append(str(value_value))
86+
87+
nodes, edges = get_nodes_edges(data, fields, schema_json=None)
88+
89+
# Convert nodes and edges to json objects.
90+
# (Unfortunately, the code coverage tooling does not allow this
91+
# to be expressed as list comprehension).
92+
nodes_json = []
93+
for node in nodes:
94+
nodes_json.append(node.to_json())
95+
edges_json = []
96+
for edge in edges:
97+
edges_json.append(edge.to_json())
10498

10599
return {
106100
"response": {
107101
# These fields populate the graph result view.
108-
"nodes": nodes,
109-
"edges": edges,
102+
"nodes": nodes_json,
103+
"edges": edges_json,
110104
# This populates the visualizer's schema view, but not yet implemented on the
111105
# BigQuery side.
112106
"schema": None,
113107
# This field is used to populate the visualizer's tabular view.
114-
"query_result": data,
108+
"query_result": tabular_data,
115109
}
116110
}
117111
except Exception as e:
@@ -133,6 +127,7 @@ class GraphServer:
133127
endpoints = {
134128
"get_ping": "/get_ping",
135129
"post_ping": "/post_ping",
130+
"post_node_expansion": "/post_node_expansion",
136131
"post_query": "/post_query",
137132
}
138133

@@ -228,13 +223,33 @@ def handle_post_query(self):
228223
response = convert_graph_data(query_results=json.loads(data["params"]))
229224
self.do_data_response(response)
230225

226+
def handle_post_node_expansion(self):
227+
"""Handle POST requests for node expansion.
228+
229+
Expects a JSON payload with:
230+
- params: A JSON string containing connection parameters (project, instance, database, graph)
231+
- request: A dictionary with node details (uid, node_labels, node_properties, direction, edge_label)
232+
"""
233+
data = self.parse_post_data()
234+
235+
# Execute node expansion with:
236+
# - params_str: JSON string with connection parameters (project, instance, database, graph)
237+
# - request: Dict with node details (uid, node_labels, node_properties, direction, edge_label)
238+
self.do_data_response(
239+
execute_node_expansion(
240+
params=data.get("params"), request=data.get("request")
241+
)
242+
)
243+
231244
def do_GET(self):
232245
assert self.path == GraphServer.endpoints["get_ping"]
233246
self.handle_get_ping()
234247

235248
def do_POST(self):
236249
if self.path == GraphServer.endpoints["post_ping"]:
237250
self.handle_post_ping()
251+
elif self.path == GraphServer.endpoints["post_node_expansion"]:
252+
self.handle_post_node_expansion()
238253
else:
239254
assert self.path == GraphServer.endpoints["post_query"]
240255
self.handle_post_query()

‎setup.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,7 @@
5757
"bigframes": ["bigframes >= 1.17.0"],
5858
"geopandas": ["geopandas >= 1.0.1"],
5959
"spanner-graph-notebook": [
60-
"spanner-graph-notebook >= 1.1.1, <=1.1.1",
61-
"networkx",
60+
"spanner-graph-notebook >= 1.1.3",
6261
"portpicker",
6362
],
6463
}

‎tests/unit/test_bigquery.py

+34-3
Original file line numberDiff line numberDiff line change
@@ -891,8 +891,8 @@ def test_bigquery_graph_colab(monkeypatch):
891891
graph_visualization is None or bigquery_storage is None,
892892
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
893893
)
894-
def test_colab_callback():
895-
result = bigquery_magics.bigquery._colab_callback(
894+
def test_colab_query_callback():
895+
result = bigquery_magics.bigquery._colab_query_callback(
896896
"query", json.dumps({"result": {}})
897897
)
898898
assert result.data == {
@@ -905,6 +905,26 @@ def test_colab_callback():
905905
}
906906

907907

908+
@pytest.mark.usefixtures("ipython_interactive")
909+
@pytest.mark.skipif(
910+
graph_visualization is None or bigquery_storage is None,
911+
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
912+
)
913+
def test_colab_node_expansion_callback():
914+
result = bigquery_magics.bigquery._colab_node_expansion_callback(
915+
request={
916+
"uid": "test_uid",
917+
"node_labels": ["label1, label2"],
918+
"node_properites": {},
919+
"direction": "INCOMING",
920+
"edge_label": None,
921+
},
922+
params_str="{}",
923+
)
924+
925+
assert result.data == {"error": "Node expansion not yet implemented"}
926+
927+
908928
@pytest.mark.usefixtures("ipython_interactive")
909929
@pytest.mark.skipif(
910930
graph_visualization is not None or bigquery_storage is None,
@@ -932,7 +952,18 @@ def test_bigquery_graph_missing_spanner_deps(monkeypatch):
932952
"google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock
933953
)
934954
sql = "SELECT graph_json FROM t"
935-
result = pandas.DataFrame([], columns=["graph_json"])
955+
graph_json_rows = [
956+
"""
957+
[{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI=","kind":"node","labels":["Person"],"properties":{"birthday":"1991-12-21T08:00:00Z","city":"Adelaide","country":"Australia","id":1,"name":"Alex"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJECkQ6ZRmluR3JhcGguUGVyc29uAHiRAplGaW5HcmFwaC5BY2NvdW50AHiRDg==","kind":"edge","labels":["Owns"],"properties":{"account_id":7,"create_time":"2020-01-10T14:22:20.222Z","id":1},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-10T14:22:20.222Z","id":7,"is_blocked":false,"nick_name":"Vacation Fund"}}]
958+
""",
959+
"""
960+
[{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY=","kind":"node","labels":["Person"],"properties":{"birthday":"1986-12-07T08:00:00Z","city":"Kollam","country":"India","id":3,"name":"Lee"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEGkSCZRmluR3JhcGguUGVyc29uAHiRBplGaW5HcmFwaC5BY2NvdW50AHiRIA==","kind":"edge","labels":["Owns"],"properties":{"account_id":16,"create_time":"2020-02-18T13:44:20.655Z","id":3},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-28T01:55:09.206Z","id":16,"is_blocked":true,"nick_name":"Vacation Fund"}}]
961+
""",
962+
"""
963+
[{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ=","kind":"node","labels":["Person"],"properties":{"birthday":"1980-10-31T08:00:00Z","city":"Moravia","country":"Czech_Republic","id":2,"name":"Dana"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEEkSiZRmluR3JhcGguUGVyc29uAHiRBJlGaW5HcmFwaC5BY2NvdW50AHiRKA==","kind":"edge","labels":["Owns"],"properties":{"account_id":20,"create_time":"2020-01-28T01:55:09.206Z","id":2},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","kind":"node","labels":["Account"],"properties":{"create_time":"2020-02-18T13:44:20.655Z","id":20,"is_blocked":false,"nick_name":"Rainy Day Fund"}}]
964+
""",
965+
]
966+
result = pandas.DataFrame(graph_json_rows, columns=["graph_json"])
936967
run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True)
937968
display_patch = mock.patch("IPython.display.display", autospec=True)
938969
query_job_mock = mock.create_autospec(

‎tests/unit/test_graph_server.py

+74-34
Original file line numberDiff line numberDiff line change
@@ -116,18 +116,17 @@
116116

117117
def _validate_nodes_and_edges(result):
118118
for edge in result["response"]["edges"]:
119-
assert "id" in edge
120-
assert edge["label"] == "Owns"
121-
assert "source" in edge
122-
assert "target" in edge
119+
assert "source_node_identifier" in edge
120+
assert "destination_node_identifier" in edge
121+
assert "identifier" in edge
122+
assert "Owns" in edge["labels"]
123123
assert "properties" in edge
124124

125+
print(result["response"]["nodes"])
125126
for node in result["response"]["nodes"]:
126-
assert "id" in node
127-
assert "key_property_names" in node
128-
assert node["label"] in ("Account", "Person")
127+
assert "identifier" in node
128+
assert "Account" in node["labels"] or "Person" in node["labels"]
129129
assert "properties" in node
130-
assert "value" in node
131130

132131

133132
@pytest.mark.skipif(
@@ -169,23 +168,26 @@ def test_convert_one_column_one_row_one_column():
169168
@pytest.mark.skipif(
170169
graph_visualization is None, reason="Requires `spanner-graph-notebook`"
171170
)
172-
def test_convert_one_column_one_row_one_column_null_json():
171+
def test_convert_one_column_two_rows_one_column_null_json():
173172
result = graph_server.convert_graph_data(
174173
{
175174
"result": {
176175
"0": json.dumps(None),
176+
"1": json.dumps(row_alex_owns_account),
177177
}
178178
}
179179
)
180180

181-
assert result == {
182-
"response": {
183-
"edges": [],
184-
"nodes": [],
185-
"query_result": {"result": []},
186-
"schema": None,
187-
},
181+
# Null JSON element should be ignored in visualization, but should still be present in tabular view.
182+
assert len(result["response"]["nodes"]) == 2
183+
assert len(result["response"]["edges"]) == 1
184+
185+
_validate_nodes_and_edges(result)
186+
187+
assert result["response"]["query_result"] == {
188+
"result": [None, row_alex_owns_account]
188189
}
190+
assert result["response"]["schema"] is None
189191

190192
_validate_nodes_and_edges(result)
191193

@@ -228,7 +230,6 @@ def test_convert_one_row_two_columns():
228230
},
229231
}
230232
)
231-
print(json.dumps(result))
232233

233234
assert len(result["response"]["nodes"]) == 4
234235
assert len(result["response"]["edges"]) == 2
@@ -288,29 +289,29 @@ def test_convert_outer_value_not_dict():
288289
@pytest.mark.skipif(
289290
graph_visualization is None, reason="Requires `spanner-graph-notebook`"
290291
)
291-
def test_convert_inner_key_not_string():
292+
def test_convert_inner_value_not_string():
292293
result = graph_server.convert_graph_data(
293294
{
294-
"result": {
295-
0: json.dumps({"foo": 1, "bar": 2}),
296-
}
295+
"col1": {
296+
"0": json.dumps(row_alex_owns_account),
297+
},
298+
"col2": {
299+
"0": 12345,
300+
},
297301
}
298302
)
299-
assert result == {"error": "Expected inner key to be str, got <class 'int'>"}
300303

304+
# Non-JSON column should be ignored in visualizer view, but still appear in tabular view.
305+
assert len(result["response"]["nodes"]) == 2
306+
assert len(result["response"]["edges"]) == 1
301307

302-
@pytest.mark.skipif(
303-
graph_visualization is None, reason="Requires `spanner-graph-notebook`"
304-
)
305-
def test_convert_inner_value_not_string():
306-
result = graph_server.convert_graph_data(
307-
{
308-
"result": {
309-
"0": 1,
310-
}
311-
}
312-
)
313-
assert result == {"error": "Expected inner value to be str, got <class 'int'>"}
308+
_validate_nodes_and_edges(result)
309+
310+
assert result["response"]["query_result"] == {
311+
"col1": [row_alex_owns_account],
312+
"col2": ["12345"],
313+
}
314+
assert result["response"]["schema"] is None
314315

315316

316317
@pytest.mark.skipif(
@@ -415,6 +416,45 @@ def test_post_query(self):
415416
)
416417
self.assertIsNone(response_data["schema"])
417418

419+
@pytest.mark.skipif(
420+
graph_visualization is None, reason="Requires `spanner-graph-notebook`"
421+
)
422+
def test_post_node_expansion(self):
423+
self.assertTrue(self.server_thread.is_alive())
424+
route = graph_server.graph_server.build_route(
425+
graph_server.GraphServer.endpoints["post_node_expansion"]
426+
)
427+
request = {
428+
"request": {
429+
"uid": "test_uid",
430+
"node_labels": ["label1, label2"],
431+
"node_properites": {},
432+
"direction": "INCOMING",
433+
"edge_label": None,
434+
},
435+
"params": "{}",
436+
}
437+
response = requests.post(route, json={"params": json.dumps(request)})
438+
self.assertEqual(response.status_code, 200)
439+
self.assertEqual(
440+
response.json(), {"error": "Node expansion not yet implemented"}
441+
)
442+
443+
@pytest.mark.skipif(
444+
graph_visualization is None, reason="Requires `spanner-graph-notebook`"
445+
)
446+
def test_post_node_expansion_invalid_request(self):
447+
self.assertTrue(self.server_thread.is_alive())
448+
route = graph_server.graph_server.build_route(
449+
graph_server.GraphServer.endpoints["post_node_expansion"]
450+
)
451+
request = {}
452+
response = requests.post(route, json={"params": json.dumps(request)})
453+
self.assertEqual(response.status_code, 200)
454+
self.assertEqual(
455+
response.json(), {"error": "Node expansion not yet implemented"}
456+
)
457+
418458

419459
def test_stop_server_never_started():
420460
graph_server.graph_server.stop_server()

0 commit comments

Comments
 (0)
Please sign in to comment.