Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 6ebf76b

Browse files
authored
Merge branch 'main' into dbt-sample-fix
2 parents 92df23d + e5ff8f7 commit 6ebf76b

56 files changed

Lines changed: 1538 additions & 90 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,4 @@ repos:
4646
rev: v2.0.2
4747
hooks:
4848
- id: biome-check
49-
files: '\.js$'
49+
files: '\.(js|css)$'

CHANGELOG.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,32 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.12.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.11.0...v2.12.0) (2025-07-23)
8+
9+
10+
### Features
11+
12+
* Add code samples for dbt bigframes integration ([#1898](https://github.com/googleapis/python-bigquery-dataframes/issues/1898)) ([7e03252](https://github.com/googleapis/python-bigquery-dataframes/commit/7e03252d31e505731db113eb38af77842bf29b9b))
13+
* Add isin local execution to hybrid engine ([#1915](https://github.com/googleapis/python-bigquery-dataframes/issues/1915)) ([c0cefd3](https://github.com/googleapis/python-bigquery-dataframes/commit/c0cefd36cfd55962b86178d2a612d625ed17f79c))
14+
* Add ml.metrics.mean_absolute_error method ([#1910](https://github.com/googleapis/python-bigquery-dataframes/issues/1910)) ([15b8449](https://github.com/googleapis/python-bigquery-dataframes/commit/15b8449dc5ad0c8190a5cbf47894436de18c8e88))
15+
* Allow local arithmetic execution in hybrid engine ([#1906](https://github.com/googleapis/python-bigquery-dataframes/issues/1906)) ([ebdcd02](https://github.com/googleapis/python-bigquery-dataframes/commit/ebdcd0240f0d8edaef3094b3a4e664b4a84d4a25))
16+
* Provide day_of_year and day_of_week for dt accessor ([#1911](https://github.com/googleapis/python-bigquery-dataframes/issues/1911)) ([40e7638](https://github.com/googleapis/python-bigquery-dataframes/commit/40e76383948a79bde48108f6180fd6ae2b3d0875))
17+
* Support params `max_batching_rows`, `container_cpu`, and `container_memory` for `udf` ([#1897](https://github.com/googleapis/python-bigquery-dataframes/issues/1897)) ([8baa912](https://github.com/googleapis/python-bigquery-dataframes/commit/8baa9126e595ae682469a6bb462244240699f57f))
18+
* Support typed pyarrow.Scalar in assignment ([#1930](https://github.com/googleapis/python-bigquery-dataframes/issues/1930)) ([cd28e12](https://github.com/googleapis/python-bigquery-dataframes/commit/cd28e12b3f70a6934a68963a7f25dbd5e3c67335))
19+
20+
21+
### Bug Fixes
22+
23+
* Correct min field from max() to min() in remote function tests ([#1917](https://github.com/googleapis/python-bigquery-dataframes/issues/1917)) ([d5c54fc](https://github.com/googleapis/python-bigquery-dataframes/commit/d5c54fca32ed75c1aef52c99781db7f8ac7426e1))
24+
* Resolve location reset issue in bigquery options ([#1914](https://github.com/googleapis/python-bigquery-dataframes/issues/1914)) ([c15cb8a](https://github.com/googleapis/python-bigquery-dataframes/commit/c15cb8a1a9c834c2c1c2984930415b246f3f948b))
25+
* Series.str.isdigit in unicode superscripts and fractions ([#1924](https://github.com/googleapis/python-bigquery-dataframes/issues/1924)) ([8d46c36](https://github.com/googleapis/python-bigquery-dataframes/commit/8d46c36da7881a99861166c03a0831beff8ee0dd))
26+
27+
28+
### Documentation
29+
30+
* Add code snippets for session and IO public docs ([#1919](https://github.com/googleapis/python-bigquery-dataframes/issues/1919)) ([6e01cbe](https://github.com/googleapis/python-bigquery-dataframes/commit/6e01cbec0dcf40e528b4a96e944681df18773c11))
31+
* Add snippets for performance optimization doc ([#1923](https://github.com/googleapis/python-bigquery-dataframes/issues/1923)) ([4da309e](https://github.com/googleapis/python-bigquery-dataframes/commit/4da309e27bd58a685e8aca953717da75d4ba5305))
32+
733
## [2.11.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.10.0...v2.11.0) (2025-07-15)
834

935

MANIFEST.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
# Generated by synthtool. DO NOT EDIT!
1818
include README.rst LICENSE
1919
recursive-include third_party/bigframes_vendored *
20-
recursive-include bigframes *.json *.proto *.js py.typed
20+
recursive-include bigframes *.json *.proto *.js *.css py.typed
2121
recursive-include tests *
2222
global-exclude *.py[co]
2323
global-exclude __pycache__

bigframes/core/compile/sqlglot/expressions/unary_compiler.py

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,16 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
175175
)
176176

177177

178+
@UNARY_OP_REGISTRATION.register(ops.StrContainsRegexOp)
179+
def _(op: ops.StrContainsRegexOp, expr: TypedExpr) -> sge.Expression:
180+
return sge.RegexpLike(this=expr.expr, expression=sge.convert(op.pat))
181+
182+
183+
@UNARY_OP_REGISTRATION.register(ops.StrContainsOp)
184+
def _(op: ops.StrContainsOp, expr: TypedExpr) -> sge.Expression:
185+
return sge.Like(this=expr.expr, expression=sge.convert(f"%{op.pat}%"))
186+
187+
178188
@UNARY_OP_REGISTRATION.register(ops.date_op)
179189
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
180190
return sge.Date(this=expr.expr)
@@ -234,6 +244,176 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
234244
return sge.func("FARM_FINGERPRINT", expr.expr)
235245

236246

247+
@UNARY_OP_REGISTRATION.register(ops.hour_op)
248+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
249+
return sge.Extract(this=sge.Identifier(this="HOUR"), expression=expr.expr)
250+
251+
252+
@UNARY_OP_REGISTRATION.register(ops.invert_op)
253+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
254+
return sge.BitwiseNot(this=expr.expr)
255+
256+
257+
@UNARY_OP_REGISTRATION.register(ops.isalnum_op)
258+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
259+
return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^(\p{N}|\p{L})+$"))
260+
261+
262+
@UNARY_OP_REGISTRATION.register(ops.isalpha_op)
263+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
264+
return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\p{L}+$"))
265+
266+
267+
@UNARY_OP_REGISTRATION.register(ops.isdecimal_op)
268+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
269+
return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\d+$"))
270+
271+
272+
@UNARY_OP_REGISTRATION.register(ops.isdigit_op)
273+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
274+
return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\p{Nd}+$"))
275+
276+
277+
@UNARY_OP_REGISTRATION.register(ops.islower_op)
278+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
279+
return sge.And(
280+
this=sge.EQ(
281+
this=sge.Lower(this=expr.expr),
282+
expression=expr.expr,
283+
),
284+
expression=sge.NEQ(
285+
this=sge.Upper(this=expr.expr),
286+
expression=expr.expr,
287+
),
288+
)
289+
290+
291+
@UNARY_OP_REGISTRATION.register(ops.isnumeric_op)
292+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
293+
return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\pN+$"))
294+
295+
296+
@UNARY_OP_REGISTRATION.register(ops.isspace_op)
297+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
298+
return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\s+$"))
299+
300+
301+
@UNARY_OP_REGISTRATION.register(ops.isupper_op)
302+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
303+
return sge.And(
304+
this=sge.EQ(
305+
this=sge.Upper(this=expr.expr),
306+
expression=expr.expr,
307+
),
308+
expression=sge.NEQ(
309+
this=sge.Lower(this=expr.expr),
310+
expression=expr.expr,
311+
),
312+
)
313+
314+
315+
@UNARY_OP_REGISTRATION.register(ops.len_op)
316+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
317+
return sge.Length(this=expr.expr)
318+
319+
320+
@UNARY_OP_REGISTRATION.register(ops.ln_op)
321+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
322+
return sge.Case(
323+
ifs=[
324+
sge.If(
325+
this=expr.expr < sge.convert(0),
326+
true=_NAN,
327+
)
328+
],
329+
default=sge.Ln(this=expr.expr),
330+
)
331+
332+
333+
@UNARY_OP_REGISTRATION.register(ops.log10_op)
334+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
335+
return sge.Case(
336+
ifs=[
337+
sge.If(
338+
this=expr.expr < sge.convert(0),
339+
true=_NAN,
340+
)
341+
],
342+
default=sge.Log(this=expr.expr, expression=sge.convert(10)),
343+
)
344+
345+
346+
@UNARY_OP_REGISTRATION.register(ops.log1p_op)
347+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
348+
return sge.Case(
349+
ifs=[
350+
sge.If(
351+
this=expr.expr < sge.convert(-1),
352+
true=_NAN,
353+
)
354+
],
355+
default=sge.Ln(this=sge.convert(1) + expr.expr),
356+
)
357+
358+
359+
@UNARY_OP_REGISTRATION.register(ops.lower_op)
360+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
361+
return sge.Lower(this=expr.expr)
362+
363+
364+
@UNARY_OP_REGISTRATION.register(ops.StrLstripOp)
365+
def _(op: ops.StrLstripOp, expr: TypedExpr) -> sge.Expression:
366+
return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="LEFT")
367+
368+
369+
@UNARY_OP_REGISTRATION.register(ops.neg_op)
370+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
371+
return sge.Neg(this=expr.expr)
372+
373+
374+
@UNARY_OP_REGISTRATION.register(ops.pos_op)
375+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
376+
return expr.expr
377+
378+
379+
@UNARY_OP_REGISTRATION.register(ops.reverse_op)
380+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
381+
return sge.func("REVERSE", expr.expr)
382+
383+
384+
@UNARY_OP_REGISTRATION.register(ops.StrRstripOp)
385+
def _(op: ops.StrRstripOp, expr: TypedExpr) -> sge.Expression:
386+
return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="RIGHT")
387+
388+
389+
@UNARY_OP_REGISTRATION.register(ops.sqrt_op)
390+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
391+
return sge.Case(
392+
ifs=[
393+
sge.If(
394+
this=expr.expr < sge.convert(0),
395+
true=_NAN,
396+
)
397+
],
398+
default=sge.Sqrt(this=expr.expr),
399+
)
400+
401+
402+
@UNARY_OP_REGISTRATION.register(ops.StrStripOp)
403+
def _(op: ops.StrStripOp, expr: TypedExpr) -> sge.Expression:
404+
return sge.Trim(this=sge.convert(op.to_strip), expression=expr.expr)
405+
406+
407+
@UNARY_OP_REGISTRATION.register(ops.iso_day_op)
408+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
409+
return sge.Extract(this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr)
410+
411+
412+
@UNARY_OP_REGISTRATION.register(ops.iso_week_op)
413+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
414+
return sge.Extract(this=sge.Identifier(this="ISOWEEK"), expression=expr.expr)
415+
416+
237417
@UNARY_OP_REGISTRATION.register(ops.isnull_op)
238418
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
239419
return sge.Is(this=expr.expr, expression=sge.Null())
@@ -316,3 +496,8 @@ def _(op: ops.ParseJSON, expr: TypedExpr) -> sge.Expression:
316496
@UNARY_OP_REGISTRATION.register(ops.ToJSONString)
317497
def _(op: ops.ToJSONString, expr: TypedExpr) -> sge.Expression:
318498
return sge.func("TO_JSON_STRING", expr.expr)
499+
500+
501+
@UNARY_OP_REGISTRATION.register(ops.upper_op)
502+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
503+
return sge.Upper(this=expr.expr)

bigframes/core/indexes/base.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,21 @@
2727
import pandas
2828

2929
from bigframes import dtypes
30+
from bigframes.core.array_value import ArrayValue
3031
import bigframes.core.block_transforms as block_ops
3132
import bigframes.core.blocks as blocks
3233
import bigframes.core.expression as ex
34+
import bigframes.core.identifiers as ids
35+
import bigframes.core.nodes as nodes
3336
import bigframes.core.ordering as order
3437
import bigframes.core.utils as utils
3538
import bigframes.core.validations as validations
39+
import bigframes.core.window_spec as window_spec
3640
import bigframes.dtypes
3741
import bigframes.formatting_helpers as formatter
3842
import bigframes.operations as ops
3943
import bigframes.operations.aggregations as agg_ops
44+
import bigframes.series
4045

4146
if typing.TYPE_CHECKING:
4247
import bigframes.dataframe
@@ -247,6 +252,118 @@ def query_job(self) -> bigquery.QueryJob:
247252
self._query_job = query_job
248253
return self._query_job
249254

255+
def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]:
256+
"""Get integer location, slice or boolean mask for requested label.
257+
258+
Args:
259+
key:
260+
The label to search for in the index.
261+
262+
Returns:
263+
An integer, slice, or boolean mask representing the location(s) of the key.
264+
265+
Raises:
266+
NotImplementedError: If the index has more than one level.
267+
KeyError: If the key is not found in the index.
268+
"""
269+
if self.nlevels != 1:
270+
raise NotImplementedError("get_loc only supports single-level indexes")
271+
272+
# Get the index column from the block
273+
index_column = self._block.index_columns[0]
274+
275+
# Apply row numbering to the original data
276+
row_number_column_id = ids.ColumnId.unique()
277+
window_node = nodes.WindowOpNode(
278+
child=self._block._expr.node,
279+
expression=ex.NullaryAggregation(agg_ops.RowNumberOp()),
280+
window_spec=window_spec.unbound(),
281+
output_name=row_number_column_id,
282+
never_skip_nulls=True,
283+
)
284+
285+
windowed_array = ArrayValue(window_node)
286+
windowed_block = blocks.Block(
287+
windowed_array,
288+
index_columns=self._block.index_columns,
289+
column_labels=self._block.column_labels.insert(
290+
len(self._block.column_labels), None
291+
),
292+
index_labels=self._block._index_labels,
293+
)
294+
295+
# Create expression to find matching positions
296+
match_expr = ops.eq_op.as_expr(ex.deref(index_column), ex.const(key))
297+
windowed_block, match_col_id = windowed_block.project_expr(match_expr)
298+
299+
# Filter to only rows where the key matches
300+
filtered_block = windowed_block.filter_by_id(match_col_id)
301+
302+
# Check if key exists at all by counting on the filtered block
303+
count_agg = ex.UnaryAggregation(
304+
agg_ops.count_op, ex.deref(row_number_column_id.name)
305+
)
306+
count_result = filtered_block._expr.aggregate([(count_agg, "count")])
307+
count_scalar = self._block.session._executor.execute(
308+
count_result
309+
).to_py_scalar()
310+
311+
if count_scalar == 0:
312+
raise KeyError(f"'{key}' is not in index")
313+
314+
# If only one match, return integer position
315+
if count_scalar == 1:
316+
min_agg = ex.UnaryAggregation(
317+
agg_ops.min_op, ex.deref(row_number_column_id.name)
318+
)
319+
position_result = filtered_block._expr.aggregate([(min_agg, "position")])
320+
position_scalar = self._block.session._executor.execute(
321+
position_result
322+
).to_py_scalar()
323+
return int(position_scalar)
324+
325+
# Handle multiple matches based on index monotonicity
326+
is_monotonic = self.is_monotonic_increasing or self.is_monotonic_decreasing
327+
if is_monotonic:
328+
return self._get_monotonic_slice(filtered_block, row_number_column_id)
329+
else:
330+
# Return boolean mask for non-monotonic duplicates
331+
mask_block = windowed_block.select_columns([match_col_id])
332+
# Reset the index to use positional integers instead of original index values
333+
mask_block = mask_block.reset_index(drop=True)
334+
# Ensure correct dtype and name to match pandas behavior
335+
result_series = bigframes.series.Series(mask_block)
336+
return result_series.astype("boolean")
337+
338+
def _get_monotonic_slice(
339+
self, filtered_block, row_number_column_id: "ids.ColumnId"
340+
) -> slice:
341+
"""Helper method to get a slice for monotonic duplicates with an optimized query."""
342+
# Combine min and max aggregations into a single query for efficiency
343+
min_max_aggs = [
344+
(
345+
ex.UnaryAggregation(
346+
agg_ops.min_op, ex.deref(row_number_column_id.name)
347+
),
348+
"min_pos",
349+
),
350+
(
351+
ex.UnaryAggregation(
352+
agg_ops.max_op, ex.deref(row_number_column_id.name)
353+
),
354+
"max_pos",
355+
),
356+
]
357+
combined_result = filtered_block._expr.aggregate(min_max_aggs)
358+
359+
# Execute query and extract positions
360+
result_df = self._block.session._executor.execute(combined_result).to_pandas()
361+
min_pos = int(result_df["min_pos"].iloc[0])
362+
max_pos = int(result_df["max_pos"].iloc[0])
363+
364+
# Create slice (stop is exclusive)
365+
return slice(min_pos, max_pos + 1)
366+
250367
def __repr__(self) -> str:
251368
# Protect against errors with uninitialized Series. See:
252369
# https://github.com/googleapis/python-bigquery-dataframes/issues/728

0 commit comments

Comments
 (0)