feat: Support multiple columns in graph visualization (#100)

ericfe-google · web-flow · commit dbb64426dd61 · 2025-03-12T12:56:13.000-05:00
* Multi-column support

* Remove 'rows' field in results, as it's not used by the Javascript. Also, add a test for the multi-column case.

* reformat

* Fix test_bigquery.py tests, remove unnecessary mocking of GraphServer

* reformat
diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py
@@ -657,9 +657,10 @@ def _is_valid_json(s: str):
 
 def _supports_graph_widget(query_result: pandas.DataFrame):
     num_rows, num_columns = query_result.shape
-    if num_columns != 1:
-        return False
-    return query_result[query_result.columns[0]].apply(_is_valid_json).all()
+    for column in query_result.columns:
+        if not query_result[column].apply(_is_valid_json).all():
+            return False
+    return True
 
 
 def _make_bq_query(
diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py
@@ -56,45 +56,37 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]):
     )
 
     try:
-        column_name = None
-        column_value = None
+        fields: List[StructType.Field] = []
+        data = {}
+        rows = []
         for key, value in query_results.items():
-            if column_name is None:
-                if not isinstance(key, str):
-                    raise ValueError(f"Expected outer key to be str, got {type(key)}")
-                if not isinstance(value, dict):
+            column_name = None
+            column_value = None
+            if not isinstance(key, str):
+                raise ValueError(f"Expected outer key to be str, got {type(key)}")
+            if not isinstance(value, dict):
+                raise ValueError(f"Expected outer value to be dict, got {type(value)}")
+            column_name = key
+            column_value = value
+
+            fields.append(
+                StructType.Field(name=column_name, type=Type(code=TypeCode.JSON))
+            )
+            data[column_name] = []
+            for value_key, value_value in column_value.items():
+                if not isinstance(value_key, str):
                     raise ValueError(
-                        f"Expected outer value to be dict, got {type(value)}"
+                        f"Expected inner key to be str, got {type(value_key)}"
                     )
-                column_name = key
-                column_value = value
-            else:
-                # TODO: Implement multi-column support.
-                raise ValueError(
-                    "Query has multiple columns - graph visualization not supported"
-                )
-        if column_name is None or column_value is None:
-            raise ValueError(
-                "query result with no columns is not supported for graph visualization"
-            )
+                if not isinstance(value_value, str):
+                    raise ValueError(
+                        f"Expected inner value to be str, got {type(value_value)}"
+                    )
+                row_json = json.loads(value_value)
 
-        fields: List[StructType.Field] = [
-            StructType.Field(name=column_name, type=Type(code=TypeCode.JSON))
-        ]
-        data = {column_name: []}
-        rows = []
-        for value_key, value_value in column_value.items():
-            if not isinstance(value_key, str):
-                raise ValueError(f"Expected inner key to be str, got {type(value_key)}")
-            if not isinstance(value_value, str):
-                raise ValueError(
-                    f"Expected inner value to be str, got {type(value_value)}"
-                )
-            row_json = json.loads(value_value)
-
-            if row_json is not None:
-                data[column_name].append(row_json)
-            rows.append([row_json])
+                if row_json is not None:
+                    data[column_name].append(row_json)
+                rows.append([row_json])
 
         d, ignored_columns = columns_to_native_numpy(data, fields)
 
@@ -112,10 +104,13 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]):
 
         return {
             "response": {
+                # These fields populate the graph result view.
                 "nodes": nodes,
                 "edges": edges,
+                # This populates the visualizer's schema view, but not yet implemented on the
+                # BigQuery side.
                 "schema": None,
-                "rows": rows,
+                # This field is used to populate the visualizer's tabular view.
                 "query_result": data,
             }
         }
diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py
@@ -39,6 +39,7 @@
 
 import bigquery_magics
 import bigquery_magics.bigquery as magics
+import bigquery_magics.graph_server as graph_server
 
 try:
     import google.cloud.bigquery_storage as bigquery_storage
@@ -677,10 +678,12 @@ def test_bigquery_graph_json_json_result(monkeypatch):
         bqstorage_client_patch
     ), display_patch as display_mock:
         run_query_mock.return_value = query_job_mock
-        return_value = ip.run_cell_magic("bigquery", "--graph", sql)
+        try:
+            return_value = ip.run_cell_magic("bigquery", "--graph", sql)
+        finally:
+            graph_server.graph_server.stop_server()
 
-        # As we only support visualization with single-column queries, the visualizer should not be launched.
-        display_mock.assert_not_called()
+        display_mock.assert_called()
 
     assert bqstorage_mock.called  # BQ storage client was used
     assert isinstance(return_value, pandas.DataFrame)
@@ -729,9 +732,6 @@ def test_bigquery_graph_json_result(monkeypatch):
     ]
     result = pandas.DataFrame(graph_json_rows, columns=["graph_json"])
     run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True)
-    graph_server_init_patch = mock.patch(
-        "bigquery_magics.graph_server.GraphServer.init", autospec=True
-    )
     display_patch = mock.patch("IPython.display.display", autospec=True)
     query_job_mock = mock.create_autospec(
         google.cloud.bigquery.job.QueryJob, instance=True
@@ -740,10 +740,7 @@ def test_bigquery_graph_json_result(monkeypatch):
 
     with run_query_patch as run_query_mock, (
         bqstorage_client_patch
-    ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock:
-        graph_server_init_mock.return_value = mock.Mock()
-        graph_server_init_mock.return_value.is_alive = mock.Mock()
-        graph_server_init_mock.return_value.is_alive.return_value = True
+    ), display_patch as display_mock:
         run_query_mock.return_value = query_job_mock
 
         return_value = ip.run_cell_magic("bigquery", "--graph", sql)
@@ -770,7 +767,10 @@ def test_bigquery_graph_json_result(monkeypatch):
         )  # identifier in 3rd row of query result
 
         # Make sure we can run a second graph query, after the graph server is already running.
-        return_value = ip.run_cell_magic("bigquery", "--graph", sql)
+        try:
+            return_value = ip.run_cell_magic("bigquery", "--graph", sql)
+        finally:
+            graph_server.graph_server.stop_server()
 
         # Sanity check that the HTML content looks like graph visualization. Minimal check
         # to allow Spanner to change its implementation without breaking this test.
@@ -841,9 +841,6 @@ def test_bigquery_graph_colab(monkeypatch):
     ]
     result = pandas.DataFrame(graph_json_rows, columns=["graph_json"])
     run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True)
-    graph_server_init_patch = mock.patch(
-        "bigquery_magics.graph_server.GraphServer.init", autospec=True
-    )
     display_patch = mock.patch("IPython.display.display", autospec=True)
     query_job_mock = mock.create_autospec(
         google.cloud.bigquery.job.QueryJob, instance=True
@@ -852,10 +849,12 @@ def test_bigquery_graph_colab(monkeypatch):
 
     with run_query_patch as run_query_mock, (
         bqstorage_client_patch
-    ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock:
+    ), display_patch as display_mock:
         run_query_mock.return_value = query_job_mock
-        graph_server_init_mock.return_value = None
-        return_value = ip.run_cell_magic("bigquery", "--graph", sql)
+        try:
+            return_value = ip.run_cell_magic("bigquery", "--graph", sql)
+        finally:
+            graph_server.graph_server.stop_server()
 
         assert len(display_mock.call_args_list) == 1
         assert len(display_mock.call_args_list[0]) == 2
@@ -880,7 +879,6 @@ def test_bigquery_graph_colab(monkeypatch):
 
         # Make sure we actually used colab path, not GraphServer path.
         assert sys.modules["google.colab"].output.register_callback.called
-        assert not graph_server_init_mock.called
 
     assert bqstorage_mock.called  # BQ storage client was used
     assert isinstance(return_value, pandas.DataFrame)
@@ -902,7 +900,6 @@ def test_colab_callback():
             "edges": [],
             "nodes": [],
             "query_result": {"result": []},
-            "rows": [],
             "schema": None,
         }
     }
@@ -937,9 +934,6 @@ def test_bigquery_graph_missing_spanner_deps(monkeypatch):
     sql = "SELECT graph_json FROM t"
     result = pandas.DataFrame([], columns=["graph_json"])
     run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True)
-    graph_server_init_patch = mock.patch(
-        "bigquery_magics.graph_server.GraphServer.init", autospec=True
-    )
     display_patch = mock.patch("IPython.display.display", autospec=True)
     query_job_mock = mock.create_autospec(
         google.cloud.bigquery.job.QueryJob, instance=True
@@ -948,11 +942,13 @@ def test_bigquery_graph_missing_spanner_deps(monkeypatch):
 
     with run_query_patch as run_query_mock, (
         bqstorage_client_patch
-    ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock:
+    ), display_patch as display_mock:
         run_query_mock.return_value = query_job_mock
-        graph_server_init_mock.return_value = None
         with pytest.raises(ImportError):
-            ip.run_cell_magic("bigquery", "--graph", sql)
+            try:
+                ip.run_cell_magic("bigquery", "--graph", sql)
+            finally:
+                graph_server.graph_server.stop_server()
         display_mock.assert_not_called()
 
 
diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py
@@ -140,7 +140,6 @@ def test_convert_one_column_no_rows():
             "edges": [],
             "nodes": [],
             "query_result": {"result": []},
-            "rows": [],
             "schema": None,
         }
     }
@@ -164,7 +163,6 @@ def test_convert_one_column_one_row_one_column():
     _validate_nodes_and_edges(result)
 
     assert result["response"]["query_result"] == {"result": [row_alex_owns_account]}
-    assert result["response"]["rows"] == [[row_alex_owns_account]]
     assert result["response"]["schema"] is None
 
 
@@ -185,11 +183,6 @@ def test_convert_one_column_one_row_one_column_null_json():
             "edges": [],
             "nodes": [],
             "query_result": {"result": []},
-            "rows": [
-                [
-                    None,
-                ]
-            ],
             "schema": None,
         },
     }
@@ -218,10 +211,34 @@ def test_convert_one_column_two_rows():
     assert result["response"]["query_result"] == {
         "result": [row_alex_owns_account, row_lee_owns_account]
     }
-    assert result["response"]["rows"] == [
-        [row_alex_owns_account],
-        [row_lee_owns_account],
-    ]
+    assert result["response"]["schema"] is None
+
+
+@pytest.mark.skipif(
+    graph_visualization is None, reason="Requires `spanner-graph-notebook`"
+)
+def test_convert_one_row_two_columns():
+    result = graph_server.convert_graph_data(
+        {
+            "col1": {
+                "0": json.dumps(row_alex_owns_account),
+            },
+            "col2": {
+                "0": json.dumps(row_lee_owns_account),
+            },
+        }
+    )
+    print(json.dumps(result))
+
+    assert len(result["response"]["nodes"]) == 4
+    assert len(result["response"]["edges"]) == 2
+
+    _validate_nodes_and_edges(result)
+
+    assert result["response"]["query_result"] == {
+        "col1": [row_alex_owns_account],
+        "col2": [row_lee_owns_account],
+    }
     assert result["response"]["schema"] is None
 
 
@@ -243,7 +260,6 @@ def test_convert_nongraph_json():
     assert len(result["response"]["edges"]) == 0
 
     assert result["response"]["query_result"] == {"result": [{"foo": 1, "bar": 2}]}
-    assert result["response"]["rows"] == [[{"foo": 1, "bar": 2}]]
     assert result["response"]["schema"] is None
 
 
@@ -297,32 +313,18 @@ def test_convert_inner_value_not_string():
     assert result == {"error": "Expected inner value to be str, got <class 'int'>"}
 
 
-@pytest.mark.skipif(
-    graph_visualization is None, reason="Requires `spanner-graph-notebook`"
-)
-def test_convert_one_column_one_row_two_columns():
-    result = graph_server.convert_graph_data(
-        {
-            "result1": {
-                "0": json.dumps(row_alex_owns_account),
-            },
-            "result2": {
-                "0": json.dumps(row_alex_owns_account),
-            },
-        }
-    )
-    assert result == {
-        "error": "Query has multiple columns - graph visualization not supported"
-    }
-
-
 @pytest.mark.skipif(
     graph_visualization is None, reason="Requires `spanner-graph-notebook`"
 )
 def test_convert_empty_dict():
     result = graph_server.convert_graph_data({})
     assert result == {
-        "error": "query result with no columns is not supported for graph visualization"
+        "response": {
+            "nodes": [],
+            "edges": [],
+            "schema": None,
+            "query_result": {},
+        }
     }
 
 
@@ -411,7 +413,6 @@ def test_post_query(self):
         self.assertEqual(
             response_data["query_result"], {"result": [row_alex_owns_account]}
         )
-        self.assertEqual(response_data["rows"], [[row_alex_owns_account]])
         self.assertIsNone(response_data["schema"])