Skip to content

Commit dbb6442

Browse files
authoredMar 12, 2025··
feat: Support multiple columns in graph visualization (#100)
* Multi-column support * Remove 'rows' field in results, as it's not used by the Javascript. Also, add a test for the multi-column case. * reformat * Fix test_bigquery.py tests, remove unnecessary mocking of GraphServer * reformat
1 parent ae226ed commit dbb6442

File tree

4 files changed

+90
-97
lines changed

4 files changed

+90
-97
lines changed
 

‎bigquery_magics/bigquery.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -657,9 +657,10 @@ def _is_valid_json(s: str):
657657

658658
def _supports_graph_widget(query_result: pandas.DataFrame):
659659
num_rows, num_columns = query_result.shape
660-
if num_columns != 1:
661-
return False
662-
return query_result[query_result.columns[0]].apply(_is_valid_json).all()
660+
for column in query_result.columns:
661+
if not query_result[column].apply(_is_valid_json).all():
662+
return False
663+
return True
663664

664665

665666
def _make_bq_query(

‎bigquery_magics/graph_server.py

+31-36
Original file line numberDiff line numberDiff line change
@@ -56,45 +56,37 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]):
5656
)
5757

5858
try:
59-
column_name = None
60-
column_value = None
59+
fields: List[StructType.Field] = []
60+
data = {}
61+
rows = []
6162
for key, value in query_results.items():
62-
if column_name is None:
63-
if not isinstance(key, str):
64-
raise ValueError(f"Expected outer key to be str, got {type(key)}")
65-
if not isinstance(value, dict):
63+
column_name = None
64+
column_value = None
65+
if not isinstance(key, str):
66+
raise ValueError(f"Expected outer key to be str, got {type(key)}")
67+
if not isinstance(value, dict):
68+
raise ValueError(f"Expected outer value to be dict, got {type(value)}")
69+
column_name = key
70+
column_value = value
71+
72+
fields.append(
73+
StructType.Field(name=column_name, type=Type(code=TypeCode.JSON))
74+
)
75+
data[column_name] = []
76+
for value_key, value_value in column_value.items():
77+
if not isinstance(value_key, str):
6678
raise ValueError(
67-
f"Expected outer value to be dict, got {type(value)}"
79+
f"Expected inner key to be str, got {type(value_key)}"
6880
)
69-
column_name = key
70-
column_value = value
71-
else:
72-
# TODO: Implement multi-column support.
73-
raise ValueError(
74-
"Query has multiple columns - graph visualization not supported"
75-
)
76-
if column_name is None or column_value is None:
77-
raise ValueError(
78-
"query result with no columns is not supported for graph visualization"
79-
)
81+
if not isinstance(value_value, str):
82+
raise ValueError(
83+
f"Expected inner value to be str, got {type(value_value)}"
84+
)
85+
row_json = json.loads(value_value)
8086

81-
fields: List[StructType.Field] = [
82-
StructType.Field(name=column_name, type=Type(code=TypeCode.JSON))
83-
]
84-
data = {column_name: []}
85-
rows = []
86-
for value_key, value_value in column_value.items():
87-
if not isinstance(value_key, str):
88-
raise ValueError(f"Expected inner key to be str, got {type(value_key)}")
89-
if not isinstance(value_value, str):
90-
raise ValueError(
91-
f"Expected inner value to be str, got {type(value_value)}"
92-
)
93-
row_json = json.loads(value_value)
94-
95-
if row_json is not None:
96-
data[column_name].append(row_json)
97-
rows.append([row_json])
87+
if row_json is not None:
88+
data[column_name].append(row_json)
89+
rows.append([row_json])
9890

9991
d, ignored_columns = columns_to_native_numpy(data, fields)
10092

@@ -112,10 +104,13 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]):
112104

113105
return {
114106
"response": {
107+
# These fields populate the graph result view.
115108
"nodes": nodes,
116109
"edges": edges,
110+
# This populates the visualizer's schema view, but not yet implemented on the
111+
# BigQuery side.
117112
"schema": None,
118-
"rows": rows,
113+
# This field is used to populate the visualizer's tabular view.
119114
"query_result": data,
120115
}
121116
}

‎tests/unit/test_bigquery.py

+21-25
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
import bigquery_magics
4141
import bigquery_magics.bigquery as magics
42+
import bigquery_magics.graph_server as graph_server
4243

4344
try:
4445
import google.cloud.bigquery_storage as bigquery_storage
@@ -677,10 +678,12 @@ def test_bigquery_graph_json_json_result(monkeypatch):
677678
bqstorage_client_patch
678679
), display_patch as display_mock:
679680
run_query_mock.return_value = query_job_mock
680-
return_value = ip.run_cell_magic("bigquery", "--graph", sql)
681+
try:
682+
return_value = ip.run_cell_magic("bigquery", "--graph", sql)
683+
finally:
684+
graph_server.graph_server.stop_server()
681685

682-
# As we only support visualization with single-column queries, the visualizer should not be launched.
683-
display_mock.assert_not_called()
686+
display_mock.assert_called()
684687

685688
assert bqstorage_mock.called # BQ storage client was used
686689
assert isinstance(return_value, pandas.DataFrame)
@@ -729,9 +732,6 @@ def test_bigquery_graph_json_result(monkeypatch):
729732
]
730733
result = pandas.DataFrame(graph_json_rows, columns=["graph_json"])
731734
run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True)
732-
graph_server_init_patch = mock.patch(
733-
"bigquery_magics.graph_server.GraphServer.init", autospec=True
734-
)
735735
display_patch = mock.patch("IPython.display.display", autospec=True)
736736
query_job_mock = mock.create_autospec(
737737
google.cloud.bigquery.job.QueryJob, instance=True
@@ -740,10 +740,7 @@ def test_bigquery_graph_json_result(monkeypatch):
740740

741741
with run_query_patch as run_query_mock, (
742742
bqstorage_client_patch
743-
), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock:
744-
graph_server_init_mock.return_value = mock.Mock()
745-
graph_server_init_mock.return_value.is_alive = mock.Mock()
746-
graph_server_init_mock.return_value.is_alive.return_value = True
743+
), display_patch as display_mock:
747744
run_query_mock.return_value = query_job_mock
748745

749746
return_value = ip.run_cell_magic("bigquery", "--graph", sql)
@@ -770,7 +767,10 @@ def test_bigquery_graph_json_result(monkeypatch):
770767
) # identifier in 3rd row of query result
771768

772769
# Make sure we can run a second graph query, after the graph server is already running.
773-
return_value = ip.run_cell_magic("bigquery", "--graph", sql)
770+
try:
771+
return_value = ip.run_cell_magic("bigquery", "--graph", sql)
772+
finally:
773+
graph_server.graph_server.stop_server()
774774

775775
# Sanity check that the HTML content looks like graph visualization. Minimal check
776776
# to allow Spanner to change its implementation without breaking this test.
@@ -841,9 +841,6 @@ def test_bigquery_graph_colab(monkeypatch):
841841
]
842842
result = pandas.DataFrame(graph_json_rows, columns=["graph_json"])
843843
run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True)
844-
graph_server_init_patch = mock.patch(
845-
"bigquery_magics.graph_server.GraphServer.init", autospec=True
846-
)
847844
display_patch = mock.patch("IPython.display.display", autospec=True)
848845
query_job_mock = mock.create_autospec(
849846
google.cloud.bigquery.job.QueryJob, instance=True
@@ -852,10 +849,12 @@ def test_bigquery_graph_colab(monkeypatch):
852849

853850
with run_query_patch as run_query_mock, (
854851
bqstorage_client_patch
855-
), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock:
852+
), display_patch as display_mock:
856853
run_query_mock.return_value = query_job_mock
857-
graph_server_init_mock.return_value = None
858-
return_value = ip.run_cell_magic("bigquery", "--graph", sql)
854+
try:
855+
return_value = ip.run_cell_magic("bigquery", "--graph", sql)
856+
finally:
857+
graph_server.graph_server.stop_server()
859858

860859
assert len(display_mock.call_args_list) == 1
861860
assert len(display_mock.call_args_list[0]) == 2
@@ -880,7 +879,6 @@ def test_bigquery_graph_colab(monkeypatch):
880879

881880
# Make sure we actually used colab path, not GraphServer path.
882881
assert sys.modules["google.colab"].output.register_callback.called
883-
assert not graph_server_init_mock.called
884882

885883
assert bqstorage_mock.called # BQ storage client was used
886884
assert isinstance(return_value, pandas.DataFrame)
@@ -902,7 +900,6 @@ def test_colab_callback():
902900
"edges": [],
903901
"nodes": [],
904902
"query_result": {"result": []},
905-
"rows": [],
906903
"schema": None,
907904
}
908905
}
@@ -937,9 +934,6 @@ def test_bigquery_graph_missing_spanner_deps(monkeypatch):
937934
sql = "SELECT graph_json FROM t"
938935
result = pandas.DataFrame([], columns=["graph_json"])
939936
run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True)
940-
graph_server_init_patch = mock.patch(
941-
"bigquery_magics.graph_server.GraphServer.init", autospec=True
942-
)
943937
display_patch = mock.patch("IPython.display.display", autospec=True)
944938
query_job_mock = mock.create_autospec(
945939
google.cloud.bigquery.job.QueryJob, instance=True
@@ -948,11 +942,13 @@ def test_bigquery_graph_missing_spanner_deps(monkeypatch):
948942

949943
with run_query_patch as run_query_mock, (
950944
bqstorage_client_patch
951-
), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock:
945+
), display_patch as display_mock:
952946
run_query_mock.return_value = query_job_mock
953-
graph_server_init_mock.return_value = None
954947
with pytest.raises(ImportError):
955-
ip.run_cell_magic("bigquery", "--graph", sql)
948+
try:
949+
ip.run_cell_magic("bigquery", "--graph", sql)
950+
finally:
951+
graph_server.graph_server.stop_server()
956952
display_mock.assert_not_called()
957953

958954

‎tests/unit/test_graph_server.py

+34-33
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ def test_convert_one_column_no_rows():
140140
"edges": [],
141141
"nodes": [],
142142
"query_result": {"result": []},
143-
"rows": [],
144143
"schema": None,
145144
}
146145
}
@@ -164,7 +163,6 @@ def test_convert_one_column_one_row_one_column():
164163
_validate_nodes_and_edges(result)
165164

166165
assert result["response"]["query_result"] == {"result": [row_alex_owns_account]}
167-
assert result["response"]["rows"] == [[row_alex_owns_account]]
168166
assert result["response"]["schema"] is None
169167

170168

@@ -185,11 +183,6 @@ def test_convert_one_column_one_row_one_column_null_json():
185183
"edges": [],
186184
"nodes": [],
187185
"query_result": {"result": []},
188-
"rows": [
189-
[
190-
None,
191-
]
192-
],
193186
"schema": None,
194187
},
195188
}
@@ -218,10 +211,34 @@ def test_convert_one_column_two_rows():
218211
assert result["response"]["query_result"] == {
219212
"result": [row_alex_owns_account, row_lee_owns_account]
220213
}
221-
assert result["response"]["rows"] == [
222-
[row_alex_owns_account],
223-
[row_lee_owns_account],
224-
]
214+
assert result["response"]["schema"] is None
215+
216+
217+
@pytest.mark.skipif(
218+
graph_visualization is None, reason="Requires `spanner-graph-notebook`"
219+
)
220+
def test_convert_one_row_two_columns():
221+
result = graph_server.convert_graph_data(
222+
{
223+
"col1": {
224+
"0": json.dumps(row_alex_owns_account),
225+
},
226+
"col2": {
227+
"0": json.dumps(row_lee_owns_account),
228+
},
229+
}
230+
)
231+
print(json.dumps(result))
232+
233+
assert len(result["response"]["nodes"]) == 4
234+
assert len(result["response"]["edges"]) == 2
235+
236+
_validate_nodes_and_edges(result)
237+
238+
assert result["response"]["query_result"] == {
239+
"col1": [row_alex_owns_account],
240+
"col2": [row_lee_owns_account],
241+
}
225242
assert result["response"]["schema"] is None
226243

227244

@@ -243,7 +260,6 @@ def test_convert_nongraph_json():
243260
assert len(result["response"]["edges"]) == 0
244261

245262
assert result["response"]["query_result"] == {"result": [{"foo": 1, "bar": 2}]}
246-
assert result["response"]["rows"] == [[{"foo": 1, "bar": 2}]]
247263
assert result["response"]["schema"] is None
248264

249265

@@ -297,32 +313,18 @@ def test_convert_inner_value_not_string():
297313
assert result == {"error": "Expected inner value to be str, got <class 'int'>"}
298314

299315

300-
@pytest.mark.skipif(
301-
graph_visualization is None, reason="Requires `spanner-graph-notebook`"
302-
)
303-
def test_convert_one_column_one_row_two_columns():
304-
result = graph_server.convert_graph_data(
305-
{
306-
"result1": {
307-
"0": json.dumps(row_alex_owns_account),
308-
},
309-
"result2": {
310-
"0": json.dumps(row_alex_owns_account),
311-
},
312-
}
313-
)
314-
assert result == {
315-
"error": "Query has multiple columns - graph visualization not supported"
316-
}
317-
318-
319316
@pytest.mark.skipif(
320317
graph_visualization is None, reason="Requires `spanner-graph-notebook`"
321318
)
322319
def test_convert_empty_dict():
323320
result = graph_server.convert_graph_data({})
324321
assert result == {
325-
"error": "query result with no columns is not supported for graph visualization"
322+
"response": {
323+
"nodes": [],
324+
"edges": [],
325+
"schema": None,
326+
"query_result": {},
327+
}
326328
}
327329

328330

@@ -411,7 +413,6 @@ def test_post_query(self):
411413
self.assertEqual(
412414
response_data["query_result"], {"result": [row_alex_owns_account]}
413415
)
414-
self.assertEqual(response_data["rows"], [[row_alex_owns_account]])
415416
self.assertIsNone(response_data["schema"])
416417

417418

0 commit comments

Comments
 (0)
Please sign in to comment.