Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions bigquery_magics/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,10 @@ def _get_graph_name(query_text: str):
"""
match = re.match(r"\s*GRAPH\s+(\S+)\.(\S+)", query_text, re.IGNORECASE)
if match:
return (match.group(1), match.group(2))
(dataset_id, graph_id) = (match.group(1)), match.group(2)
if "`" in dataset_id or "`" in graph_id:
return None # Backticks in graph name not support for schema view
return (dataset_id, graph_id)
Comment on lines +651 to +654
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The tuple assignment on line 651 is syntactically correct, but its formatting is unconventional and could be confusing. It can be simplified for better readability.

Suggested change
(dataset_id, graph_id) = (match.group(1)), match.group(2)
if "`" in dataset_id or "`" in graph_id:
return None # Backticks in graph name not support for schema view
return (dataset_id, graph_id)
dataset_id, graph_id = match.group(1), match.group(2)
if "`" in dataset_id or "`" in graph_id:
return None # Backticks in graph name not support for schema view
return dataset_id, graph_id

return None


Expand All @@ -668,10 +671,15 @@ def _get_graph_schema(
job_config = bigquery.QueryJobConfig(
query_parameters=[bigquery.ScalarQueryParameter("graph_id", "STRING", graph_id)]
)
info_schema_results = bq_client.query(
info_schema_query, job_config=job_config
).to_dataframe()

job_config.use_legacy_sql = False
try:
info_schema_results = bq_client.query(
info_schema_query, job_config=job_config
).to_dataframe()
except Exception:
# If the INFORMATION_SCHEMA query fails for some reason, disable only schema
# view, not the entire visualizer.
return None
Comment on lines +679 to +682
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Catching a broad Exception and swallowing it silently can make debugging difficult. It would be better to at least log that an error occurred, for example by printing a warning to stderr. This will make it clear to users why the schema view might be unavailable without causing the entire operation to fail.

Suggested change
except Exception:
# If the INFORMATION_SCHEMA query fails for some reason, disable only schema
# view, not the entire visualizer.
return None
except Exception as e:
# If the INFORMATION_SCHEMA query fails for some reason, disable only schema
# view, not the entire visualizer.
print(f"Warning: Failed to retrieve graph schema, continuing without it: {e}", file=sys.stderr)
return None

if info_schema_results.shape == (1, 1):
return graph_server._convert_schema(info_schema_results.iloc[0, 0])
return None
Expand Down Expand Up @@ -733,7 +741,7 @@ def _add_graph_widget(
"<big><b>Error:</b> The query result is too large for graph visualization.</big>"
)
)
return
return False

schema = _get_graph_schema(bq_client, query_text, query_job)

Expand Down Expand Up @@ -764,6 +772,7 @@ def _add_graph_widget(
'"bigquery.graph_visualization.NodeExpansion"',
)
IPython.display.display(IPython.core.display.HTML(html_content))
return True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

The _add_graph_widget function is vulnerable to Cross-Site Scripting (XSS). It renders query results into the notebook's HTML output without properly escaping the </script> tag. An attacker who can control the data returned by a BigQuery query can execute arbitrary JavaScript in the user's browser. To fix this, ensure that all data embedded in the HTML is properly escaped or use a safe JSON embedding technique.



def _is_valid_json(s: str):
Expand Down Expand Up @@ -870,7 +879,12 @@ def _make_bq_query(
result = result.to_dataframe(**dataframe_kwargs)

if args.graph and _supports_graph_widget(result):
_add_graph_widget(bq_client, result, query, query_job, args)
if _add_graph_widget(bq_client, result, query, query_job, args):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-critical critical

The graph visualization feature relies on an insecure local HTTP server (defined in graph_server.py). The server binds to all network interfaces (0.0.0.0) and lacks authentication, allowing any local or network user to read the notebook user's BigQuery data. To fix this, the server should bind to 127.0.0.1 and implement token-based authentication.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

127.0.0.1 would indeed be a bit more secure. I believe the difference between that and 0.0.0.0 is that 0.0.0.0 is open to external connections.

# Invoke _handle_result() in case the result is saved to a variable,
# but return None to suppress the default table view, which is redundant
# with the table view in the graph visualizer.
_handle_result(result, args)
return None
return _handle_result(result, args)


Expand Down
29 changes: 20 additions & 9 deletions tests/unit/bigquery/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,23 @@ def test__run_query_dry_run_without_errors_is_silent():
assert len(captured.stdout) == 0


def test__get_graph_name():
assert magics._get_graph_name("GRAPH foo.bar") == ("foo", "bar")
assert magics._get_graph_name("GRAPH `foo.bar`") is None
assert magics._get_graph_name("GRAPH `foo`.bar") is None
assert magics._get_graph_name("SELECT 1") is None


def test__get_graph_schema_exception():
bq_client = mock.create_autospec(bigquery.Client, instance=True)
bq_client.query.side_effect = Exception("error")
query_text = "GRAPH foo.bar"
query_job = mock.Mock()
query_job.configuration.destination.project = "my-project"

assert magics._get_graph_schema(bq_client, query_text, query_job) is None


@pytest.mark.skipif(
bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
)
Expand Down Expand Up @@ -614,9 +631,7 @@ def test_bigquery_graph_json_json_result(monkeypatch):
display_mock.assert_called()

assert bqstorage_mock.called # BQ storage client was used
assert isinstance(return_value, pandas.DataFrame)
assert len(return_value) == len(result) # verify row count
assert list(return_value) == list(result) # verify column names
assert return_value is None


@pytest.mark.skipif(
Expand Down Expand Up @@ -735,9 +750,7 @@ def test_bigquery_graph_json_result(monkeypatch):
assert '\\"location\\": null' in html_content

assert bqstorage_mock.called # BQ storage client was used
assert isinstance(return_value, pandas.DataFrame)
assert len(return_value) == len(result) # verify row count
assert list(return_value) == list(result) # verify column names
assert return_value is None


@pytest.mark.skipif(
Expand Down Expand Up @@ -1015,9 +1028,7 @@ def test_bigquery_graph_colab(monkeypatch):
assert sys.modules["google.colab"].output.register_callback.called

assert bqstorage_mock.called # BQ storage client was used
assert isinstance(return_value, pandas.DataFrame)
assert len(return_value) == len(result) # verify row count
assert list(return_value) == list(result) # verify column names
assert return_value is None


@pytest.mark.skipif(
Expand Down
Loading