googleapis
diff --git a/‎bigframes/core/block_transforms.py‎
Lines changed: 12 additions & 7 deletions b/‎bigframes/core/block_transforms.py‎
Lines changed: 12 additions & 7 deletions
diff --git a/‎bigframes/core/compile/sqlglot/expressions/binary_compiler.py‎
Lines changed: 78 additions & 2 deletions b/‎bigframes/core/compile/sqlglot/expressions/binary_compiler.py‎
Lines changed: 78 additions & 2 deletions
diff --git a/‎bigframes/core/groupby/dataframe_group_by.py‎
Lines changed: 34 additions & 1 deletion b/‎bigframes/core/groupby/dataframe_group_by.py‎
Lines changed: 34 additions & 1 deletion
diff --git a/‎bigframes/core/groupby/series_group_by.py‎
Lines changed: 24 additions & 0 deletions b/‎bigframes/core/groupby/series_group_by.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎bigframes/core/indexes/base.py‎
Lines changed: 1 addition & 1 deletion b/‎bigframes/core/indexes/base.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/dataframe.py‎
Lines changed: 1 addition & 1 deletion b/‎bigframes/dataframe.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/dtypes.py‎
Lines changed: 4 additions & 0 deletions b/‎bigframes/dtypes.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎bigframes/series.py‎
Lines changed: 1 addition & 1 deletion b/‎bigframes/series.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎samples/snippets/quickstart.py‎
Lines changed: 6 additions & 2 deletions b/‎samples/snippets/quickstart.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎tests/system/small/engines/test_numeric_ops.py‎
Lines changed: 2 additions & 2 deletions b/‎tests/system/small/engines/test_numeric_ops.py‎
Lines changed: 2 additions & 2 deletions
@@ -355,24 +355,28 @@ def value_counts(
     normalize: bool = False,
     sort: bool = True,
     ascending: bool = False,
-    dropna: bool = True,
+    drop_na: bool = True,
+    grouping_keys: typing.Sequence[str] = (),
 ):
-    block, dummy = block.create_constant(1)
+    if grouping_keys and drop_na:
+        # only need this if grouping_keys is involved, otherwise the drop_na in the aggregation will handle it for us
+        block = dropna(block, columns, how="any")
     block, agg_ids = block.aggregate(
-        by_column_ids=columns,
-        aggregations=[ex.UnaryAggregation(agg_ops.count_op, ex.deref(dummy))],
-        dropna=dropna,
+        by_column_ids=(*grouping_keys, *columns),
+        aggregations=[ex.NullaryAggregation(agg_ops.size_op)],
+        dropna=drop_na and not grouping_keys,
     )
     count_id = agg_ids[0]
     if normalize:
-        unbound_window = windows.unbound()
+        unbound_window = windows.unbound(grouping_keys=tuple(grouping_keys))
         block, total_count_id = block.apply_window_op(
             count_id, agg_ops.sum_op, unbound_window
         )
         block, count_id = block.apply_binary_op(count_id, total_count_id, ops.div_op)
 
     if sort:
-        block = block.order_by(
+        order_parts = [ordering.ascending_over(id) for id in grouping_keys]
+        order_parts.extend(
             [
                 ordering.OrderingExpression(
                     ex.deref(count_id),
@@ -382,6 +386,7 @@ def value_counts(
                 )
             ]
         )
+        block = block.order_by(order_parts)
     return block.select_column(count_id).with_column_labels(
         ["proportion" if normalize else "count"]
     )
 
@@ -14,6 +14,7 @@
 
 from __future__ import annotations
 
+import bigframes_vendored.constants as constants
 import sqlglot.expressions as sge
 
 from bigframes import dtypes
@@ -35,8 +36,83 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
         # String addition
         return sge.Concat(expressions=[left.expr, right.expr])
 
-    # Numerical addition
-    return sge.Add(this=left.expr, expression=right.expr)
+    if dtypes.is_numeric(left.dtype) and dtypes.is_numeric(right.dtype):
+        left_expr = left.expr
+        if left.dtype == dtypes.BOOL_DTYPE:
+            left_expr = sge.Cast(this=left_expr, to="INT64")
+        right_expr = right.expr
+        if right.dtype == dtypes.BOOL_DTYPE:
+            right_expr = sge.Cast(this=right_expr, to="INT64")
+        return sge.Add(this=left_expr, expression=right_expr)
+
+    if (
+        dtypes.is_time_or_date_like(left.dtype)
+        and right.dtype == dtypes.TIMEDELTA_DTYPE
+    ):
+        left_expr = left.expr
+        if left.dtype == dtypes.DATE_DTYPE:
+            left_expr = sge.Cast(this=left_expr, to="DATETIME")
+        return sge.TimestampAdd(
+            this=left_expr, expression=right.expr, unit=sge.Var(this="MICROSECOND")
+        )
+    if (
+        dtypes.is_time_or_date_like(right.dtype)
+        and left.dtype == dtypes.TIMEDELTA_DTYPE
+    ):
+        right_expr = right.expr
+        if right.dtype == dtypes.DATE_DTYPE:
+            right_expr = sge.Cast(this=right_expr, to="DATETIME")
+        return sge.TimestampAdd(
+            this=right_expr, expression=left.expr, unit=sge.Var(this="MICROSECOND")
+        )
+    if left.dtype == dtypes.TIMEDELTA_DTYPE and right.dtype == dtypes.TIMEDELTA_DTYPE:
+        return sge.Add(this=left.expr, expression=right.expr)
+
+    raise TypeError(
+        f"Cannot add type {left.dtype} and {right.dtype}. {constants.FEEDBACK_LINK}"
+    )
+
+
+@BINARY_OP_REGISTRATION.register(ops.sub_op)
+def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
+    if dtypes.is_numeric(left.dtype) and dtypes.is_numeric(right.dtype):
+        left_expr = left.expr
+        if left.dtype == dtypes.BOOL_DTYPE:
+            left_expr = sge.Cast(this=left_expr, to="INT64")
+        right_expr = right.expr
+        if right.dtype == dtypes.BOOL_DTYPE:
+            right_expr = sge.Cast(this=right_expr, to="INT64")
+        return sge.Sub(this=left_expr, expression=right_expr)
+
+    if (
+        dtypes.is_time_or_date_like(left.dtype)
+        and right.dtype == dtypes.TIMEDELTA_DTYPE
+    ):
+        left_expr = left.expr
+        if left.dtype == dtypes.DATE_DTYPE:
+            left_expr = sge.Cast(this=left_expr, to="DATETIME")
+        return sge.TimestampSub(
+            this=left_expr, expression=right.expr, unit=sge.Var(this="MICROSECOND")
+        )
+    if dtypes.is_time_or_date_like(left.dtype) and dtypes.is_time_or_date_like(
+        right.dtype
+    ):
+        left_expr = left.expr
+        if left.dtype == dtypes.DATE_DTYPE:
+            left_expr = sge.Cast(this=left_expr, to="DATETIME")
+        right_expr = right.expr
+        if right.dtype == dtypes.DATE_DTYPE:
+            right_expr = sge.Cast(this=right_expr, to="DATETIME")
+        return sge.TimestampDiff(
+            this=left_expr, expression=right_expr, unit=sge.Var(this="MICROSECOND")
+        )
+
+    if left.dtype == dtypes.TIMEDELTA_DTYPE and right.dtype == dtypes.TIMEDELTA_DTYPE:
+        return sge.Sub(this=left.expr, expression=right.expr)
+
+    raise TypeError(
+        f"Cannot subtract type {left.dtype} and {right.dtype}. {constants.FEEDBACK_LINK}"
+    )
 
 
 @BINARY_OP_REGISTRATION.register(ops.ge_op)
 
@@ -16,7 +16,7 @@
 
 import datetime
 import typing
-from typing import Literal, Sequence, Tuple, Union
+from typing import Literal, Optional, Sequence, Tuple, Union
 
 import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.groupby as vendored_pandas_groupby
@@ -372,6 +372,39 @@ def diff(self, periods=1) -> series.Series:
         )
         return self._apply_window_op(agg_ops.DiffOp(periods), window=window)
 
+    def value_counts(
+        self,
+        subset: Optional[Sequence[blocks.Label]] = None,
+        normalize: bool = False,
+        sort: bool = True,
+        ascending: bool = False,
+        dropna: bool = True,
+    ) -> Union[df.DataFrame, series.Series]:
+        if subset is None:
+            columns = self._selected_cols
+        else:
+            columns = [
+                column
+                for column in self._block.value_columns
+                if self._block.col_id_to_label[column] in subset
+            ]
+        block = self._block
+        if self._dropna:  # this drops null grouping columns
+            block = block_ops.dropna(block, self._by_col_ids)
+        block = block_ops.value_counts(
+            block,
+            columns,
+            normalize=normalize,
+            sort=sort,
+            ascending=ascending,
+            drop_na=dropna,  # this drops null value columns
+            grouping_keys=self._by_col_ids,
+        )
+        if self._as_index:
+            return series.Series(block)
+        else:
+            return series.Series(block).to_frame().reset_index(drop=False)
+
     @validations.requires_ordering()
     def rolling(
         self,
 
@@ -244,6 +244,30 @@ def agg(self, func=None) -> typing.Union[df.DataFrame, series.Series]:
 
     aggregate = agg
 
+    def value_counts(
+        self,
+        normalize: bool = False,
+        sort: bool = True,
+        ascending: bool = False,
+        dropna: bool = True,
+    ) -> Union[df.DataFrame, series.Series]:
+        columns = [self._value_column]
+        block = self._block
+        if self._dropna:  # this drops null grouping columns
+            block = block_ops.dropna(block, self._by_col_ids)
+        block = block_ops.value_counts(
+            block,
+            columns,
+            normalize=normalize,
+            sort=sort,
+            ascending=ascending,
+            drop_na=dropna,  # this drops null value columns
+            grouping_keys=self._by_col_ids,
+        )
+        # TODO: once as_index=Fales supported, return DataFrame instead by resetting index
+        # with .to_frame().reset_index(drop=False)
+        return series.Series(block)
+
     @validations.requires_ordering()
     def cumsum(self, *args, **kwargs) -> series.Series:
         return self._apply_window_op(
 
@@ -489,7 +489,7 @@ def value_counts(
             self._block.index_columns,
             normalize=normalize,
             ascending=ascending,
-            dropna=dropna,
+            drop_na=dropna,
         )
         import bigframes.series as series
 
 
@@ -2475,7 +2475,7 @@ def value_counts(
             normalize=normalize,
             sort=sort,
             ascending=ascending,
-            dropna=dropna,
+            drop_na=dropna,
         )
         return bigframes.series.Series(block)
 
 
@@ -289,6 +289,10 @@ def is_time_like(type_: ExpressionType) -> bool:
     return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE, TIME_DTYPE)
 
 
+def is_time_or_date_like(type_: ExpressionType) -> bool:
+    return type_ in (DATE_DTYPE, DATETIME_DTYPE, TIME_DTYPE, TIMESTAMP_DTYPE)
+
+
 def is_geo_like(type_: ExpressionType) -> bool:
     return type_ in (GEO_DTYPE,)
 
 
@@ -1631,7 +1631,7 @@ def value_counts(
             [self._value_column],
             normalize=normalize,
             ascending=ascending,
-            dropna=dropna,
+            drop_na=dropna,
         )
         return Series(block)
 
 
@@ -16,7 +16,7 @@
 def run_quickstart(project_id: str) -> None:
     your_gcp_project_id = project_id
 
-    # [START bigquery_bigframes_quickstart]
+    # [START bigquery_bigframes_quickstart_create_dataframe]
     import bigframes.pandas as bpd
 
     # Set BigQuery DataFrames options
@@ -37,12 +37,16 @@ def run_quickstart(project_id: str) -> None:
 
     # Efficiently preview the results using the .peek() method.
     df.peek()
+    # [END bigquery_bigframes_quickstart_create_dataframe]
 
+    # [START bigquery_bigframes_quickstart_calculate_print]
     # Use the DataFrame just as you would a pandas DataFrame, but calculations
     # happen in the BigQuery query engine instead of the local system.
     average_body_mass = df["body_mass_g"].mean()
     print(f"average_body_mass: {average_body_mass}")
+    # [END bigquery_bigframes_quickstart_calculate_print]
 
+    # [START bigquery_bigframes_quickstart_eval_metrics]
     # Create the Linear Regression model
     from bigframes.ml.linear_model import LinearRegression
 
@@ -70,7 +74,7 @@ def run_quickstart(project_id: str) -> None:
     model = LinearRegression(fit_intercept=False)
     model.fit(X, y)
     model.score(X, y)
-    # [END bigquery_bigframes_quickstart]
+    # [END bigquery_bigframes_quickstart_eval_metrics]
 
     # close session and reset option so not to affect other tests
     bpd.close_session()
 
@@ -53,7 +53,7 @@ def apply_op_pairwise(
     return new_arr
 
 
-@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
+@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
 def test_engines_project_add(
     scalars_array_value: array_value.ArrayValue,
     engine,
@@ -62,7 +62,7 @@ def test_engines_project_add(
     assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
 
 
-@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
+@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
 def test_engines_project_sub(
     scalars_array_value: array_value.ArrayValue,
     engine,
Original file line number	Diff line number	Diff line change
`@@ -489,7 +489,7 @@ def value_counts(`
`489`	`489`	`self._block.index_columns,`
`490`	`490`	`normalize=normalize,`
`491`	`491`	`ascending=ascending,`
`492`		`- dropna=dropna,`
	`492`	`+ drop_na=dropna,`
`493`	`493`	`)`
`494`	`494`	`import bigframes.series as series`
`495`	`495`
Original file line number	Diff line number	Diff line change
`@@ -2475,7 +2475,7 @@ def value_counts(`
`2475`	`2475`	`normalize=normalize,`
`2476`	`2476`	`sort=sort,`
`2477`	`2477`	`ascending=ascending,`
`2478`		`- dropna=dropna,`
	`2478`	`+ drop_na=dropna,`
`2479`	`2479`	`)`
`2480`	`2480`	`return bigframes.series.Series(block)`
`2481`	`2481`
Original file line number	Diff line number	Diff line change
`@@ -1631,7 +1631,7 @@ def value_counts(`
`1631`	`1631`	`[self._value_column],`
`1632`	`1632`	`normalize=normalize,`
`1633`	`1633`	`ascending=ascending,`
`1634`		`- dropna=dropna,`
	`1634`	`+ drop_na=dropna,`
`1635`	`1635`	`)`
`1636`	`1636`	`return Series(block)`
`1637`	`1637`