Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 7df5e09

Browse files
committed
Revert "feat: Add BigQuery ML CREATE MODEL support"
This reverts commit b809f81.
1 parent b809f81 commit 7df5e09

15 files changed

Lines changed: 131 additions & 56 deletions

File tree

bigframes/bigquery/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import sys
2020

21-
from bigframes.bigquery import ai
21+
from bigframes.bigquery import ai, ml
2222
from bigframes.bigquery._operations.approx_agg import approx_top_count
2323
from bigframes.bigquery._operations.array import (
2424
array_agg,
@@ -157,4 +157,5 @@
157157
"struct",
158158
# Modules / SQL namespaces
159159
"ai",
160+
"ml",
160161
]

bigframes/bigquery/_operations/ml.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
from __future__ import annotations
1616

17-
import typing
1817
from typing import Mapping, Optional, TYPE_CHECKING, Union
1918

2019
import bigframes.core.log_adapter as log_adapter
@@ -52,34 +51,6 @@ def create_model(
5251
) -> bigframes.ml.base.BaseEstimator:
5352
"""
5453
Creates a BigQuery ML model.
55-
56-
Args:
57-
model_name (str):
58-
The name of the model in BigQuery.
59-
replace (bool, default False):
60-
Whether to replace the model if it already exists.
61-
if_not_exists (bool, default False):
62-
Whether to ignore the error if the model already exists.
63-
transform (list[str], optional):
64-
The TRANSFORM clause, which specifies the preprocessing steps to apply to the input data.
65-
input_schema (Mapping[str, str], optional):
66-
The INPUT clause, which specifies the schema of the input data.
67-
output_schema (Mapping[str, str], optional):
68-
The OUTPUT clause, which specifies the schema of the output data.
69-
connection_name (str, optional):
70-
The connection to use for the model.
71-
options (Mapping[str, Union[str, int, float, bool, list]], optional):
72-
The OPTIONS clause, which specifies the model options.
73-
training_data (Union[dataframe.DataFrame, str], optional):
74-
The query or DataFrame to use for training the model.
75-
custom_holiday (Union[dataframe.DataFrame, str], optional):
76-
The query or DataFrame to use for custom holiday data.
77-
session (bigframes.session.Session, optional):
78-
The BigFrames session to use. If not provided, the default session is used.
79-
80-
Returns:
81-
bigframes.ml.base.BaseEstimator:
82-
The created BigFrames model.
8354
"""
8455
import bigframes.pandas as bpd
8556

bigframes/bigquery/ml.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
"""This module integrates BigQuery ML functions."""
15+
"""This module exposes `BigQuery ML
16+
<https://docs.cloud.google.com/bigquery/docs/bqml-introduction>`_ functions
17+
by directly mapping to the equivalent function names in SQL syntax.
18+
19+
For an interface more familiar to Scikit-Learn users, see :mod:`bigframes.ml`.
20+
"""
1621

1722
from bigframes.bigquery._operations.ml import create_model
1823

bigframes/core/compile/sqlglot/expressions/generic_ops.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ def _(expr: TypedExpr, op: ops.MapOp) -> sge.Expression:
107107
sge.If(this=sge.convert(key), true=sge.convert(value))
108108
for key, value in op.mappings
109109
],
110+
default=expr.expr,
110111
)
111112

112113

bigframes/core/compile/sqlglot/expressions/string_ops.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import sqlglot.expressions as sge
2020

21+
from bigframes import dtypes
2122
from bigframes import operations as ops
2223
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
2324
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
@@ -195,6 +196,9 @@ def _(expr: TypedExpr) -> sge.Expression:
195196

196197
@register_unary_op(ops.len_op)
197198
def _(expr: TypedExpr) -> sge.Expression:
199+
if dtypes.is_array_like(expr.dtype):
200+
return sge.func("ARRAY_LENGTH", expr.expr)
201+
198202
return sge.Length(this=expr.expr)
199203

200204

@@ -239,7 +243,7 @@ def to_startswith(pat: str) -> sge.Expression:
239243

240244
@register_unary_op(ops.StrStripOp, pass_op=True)
241245
def _(expr: TypedExpr, op: ops.StrStripOp) -> sge.Expression:
242-
return sge.Trim(this=sge.convert(op.to_strip), expression=expr.expr)
246+
return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip))
243247

244248

245249
@register_unary_op(ops.StringSplitOp, pass_op=True)
@@ -284,27 +288,29 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
284288

285289
@register_unary_op(ops.ZfillOp, pass_op=True)
286290
def _(expr: TypedExpr, op: ops.ZfillOp) -> sge.Expression:
291+
length_expr = sge.Greatest(
292+
expressions=[sge.Length(this=expr.expr), sge.convert(op.width)]
293+
)
287294
return sge.Case(
288295
ifs=[
289296
sge.If(
290-
this=sge.EQ(
291-
this=sge.Substring(
292-
this=expr.expr, start=sge.convert(1), length=sge.convert(1)
293-
),
294-
expression=sge.convert("-"),
297+
this=sge.func(
298+
"STARTS_WITH",
299+
expr.expr,
300+
sge.convert("-"),
295301
),
296302
true=sge.Concat(
297303
expressions=[
298304
sge.convert("-"),
299305
sge.func(
300306
"LPAD",
301-
sge.Substring(this=expr.expr, start=sge.convert(1)),
302-
sge.convert(op.width - 1),
307+
sge.Substring(this=expr.expr, start=sge.convert(2)),
308+
length_expr - 1,
303309
sge.convert("0"),
304310
),
305311
]
306312
),
307313
)
308314
],
309-
default=sge.func("LPAD", expr.expr, sge.convert(op.width), sge.convert("0")),
315+
default=sge.func("LPAD", expr.expr, length_expr, sge.convert("0")),
310316
)

bigframes/core/sql/ml.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
from __future__ import annotations
1616

17-
import typing
1817
from typing import Mapping, Optional, Union
1918

2019
import bigframes.core.compile.googlesql as googlesql

bigframes/ml/__init__.py

Lines changed: 67 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,82 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
"""BigQuery DataFrames ML provides a SKLearn-like API on the BigQuery engine."""
15+
"""BigQuery DataFrames ML provides a SKLearn-like API on the BigQuery engine.
16+
17+
.. code:: python
18+
19+
from bigframes.ml.linear_model import LinearRegression
20+
model = LinearRegression()
21+
model.fit(feature_columns, label_columns)
22+
model.predict(feature_columns_from_test_data)
23+
24+
You can also save your fit parameters to BigQuery for later use.
25+
26+
.. code:: python
27+
28+
import bigframes.pandas as bpd
29+
model.to_gbq(
30+
your_model_id, # For example: "bqml_tutorial.penguins_model"
31+
replace=True,
32+
)
33+
saved_model = bpd.read_gbq_model(your_model_id)
34+
saved_model.predict(feature_columns_from_test_data)
35+
36+
See the `BigQuery ML linear regression tutorial
37+
<https://docs.cloud.google.com/bigquery/docs/linear-regression-tutorial>`_ for a
38+
detailed example.
39+
40+
See all, the references for ``bigframes.ml`` sub-modules:
41+
42+
* :mod:`bigframes.ml.cluster`
43+
* :mod:`bigframes.ml.compose`
44+
* :mod:`bigframes.ml.decomposition`
45+
* :mod:`bigframes.ml.ensemble`
46+
* :mod:`bigframes.ml.forecasting`
47+
* :mod:`bigframes.ml.imported`
48+
* :mod:`bigframes.ml.impute`
49+
* :mod:`bigframes.ml.linear_model`
50+
* :mod:`bigframes.ml.llm`
51+
* :mod:`bigframes.ml.metrics`
52+
* :mod:`bigframes.ml.model_selection`
53+
* :mod:`bigframes.ml.pipeline`
54+
* :mod:`bigframes.ml.preprocessing`
55+
* :mod:`bigframes.ml.remote`
56+
57+
Alternatively, check out mod:`bigframes.bigquery.ml` for an interface that is
58+
more similar to the BigQuery ML SQL syntax.
59+
"""
60+
61+
from bigframes.ml import (
62+
cluster,
63+
compose,
64+
decomposition,
65+
ensemble,
66+
forecasting,
67+
imported,
68+
impute,
69+
linear_model,
70+
llm,
71+
metrics,
72+
model_selection,
73+
pipeline,
74+
preprocessing,
75+
remote,
76+
)
1677

1778
__all__ = [
1879
"cluster",
1980
"compose",
2081
"decomposition",
82+
"ensemble",
83+
"forecasting",
84+
"imported",
85+
"impute",
2186
"linear_model",
87+
"llm",
2288
"metrics",
2389
"model_selection",
2490
"pipeline",
2591
"preprocessing",
26-
"llm",
27-
"forecasting",
28-
"imported",
2992
"remote",
3093
]

docs/reference/index.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ packages.
1010
bigframes._config
1111
bigframes.bigquery
1212
bigframes.bigquery.ai
13+
bigframes.bigquery.ml
1314
bigframes.enums
1415
bigframes.exceptions
1516
bigframes.geopandas
@@ -26,6 +27,8 @@ scikit-learn.
2627
.. autosummary::
2728
:toctree: api
2829

30+
bigframes.ml
31+
bigframes.ml.base
2932
bigframes.ml.cluster
3033
bigframes.ml.compose
3134
bigframes.ml.decomposition
@@ -35,6 +38,7 @@ scikit-learn.
3538
bigframes.ml.impute
3639
bigframes.ml.linear_model
3740
bigframes.ml.llm
41+
bigframes.ml.metrics
3842
bigframes.ml.model_selection
3943
bigframes.ml.pipeline
4044
bigframes.ml.preprocessing

tests/system/large/functions/test_remote_function.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,13 +1651,12 @@ def square(x):
16511651
return x * x
16521652

16531653

1654-
# Note: Zero represents default, which is 100 instances actually, which is why the remote function still works
1655-
# in the df.apply() call here
1654+
# The default value of 100 is used if the maximum instances value is not set.
16561655
@pytest.mark.parametrize(
16571656
("max_instances_args", "expected_max_instances"),
16581657
[
1659-
pytest.param({}, 0, id="no-set"),
1660-
pytest.param({"cloud_function_max_instances": None}, 0, id="set-None"),
1658+
pytest.param({}, 100, id="no-set"),
1659+
pytest.param({"cloud_function_max_instances": None}, 100, id="set-None"),
16611660
pytest.param({"cloud_function_max_instances": 1000}, 1000, id="set-explicit"),
16621661
],
16631662
)

tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_map/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
55
), `bfcte_1` AS (
66
SELECT
77
*,
8-
CASE `string_col` WHEN 'value1' THEN 'mapped1' END AS `bfcol_1`
8+
CASE `string_col` WHEN 'value1' THEN 'mapped1' ELSE `string_col` END AS `bfcol_1`
99
FROM `bfcte_0`
1010
)
1111
SELECT

0 commit comments

Comments
 (0)