Skip to content

Commit 2fdd6e6

Browse files
authored
feat(bigquery): add Dataset.default_partition_expiration_ms and Table.require_partition_filter properties (#9464)
* feat(bigquery): add Dataset.default_partiion_expiration_ms and Table.require_partition_filter properties Samples double as system tests. I aim to use them here: * https://cloud.google.com/bigquery/docs/managing-partitioned-tables#require-filter * https://cloud.google.com/bigquery/docs/updating-datasets#partition-expiration Note: this also (silently) deprecates TimePartitioning.require_partition_filter, as that's duplicating the same functionality. I was curious why the expiration_ms wasn't also moving up, but then I realized that property only makes sense if a partition is assocatied with a timestamp. * add deprecation warning to require_partition_filter * blacken
1 parent a79d98d commit 2fdd6e6

8 files changed

Lines changed: 284 additions & 42 deletions

bigquery/google/cloud/bigquery/dataset.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ class Dataset(object):
358358
_PROPERTY_TO_API_FIELD = {
359359
"access_entries": "access",
360360
"created": "creationTime",
361+
"default_partition_expiration_ms": "defaultPartitionExpirationMs",
361362
"default_table_expiration_ms": "defaultTableExpirationMs",
362363
"friendly_name": "friendlyName",
363364
}
@@ -460,6 +461,34 @@ def self_link(self):
460461
"""
461462
return self._properties.get("selfLink")
462463

464+
@property
465+
def default_partition_expiration_ms(self):
466+
"""Optional[int]: The default partition expiration for all
467+
partitioned tables in the dataset, in milliseconds.
468+
469+
Once this property is set, all newly-created partitioned tables in
470+
the dataset will have an ``time_paritioning.expiration_ms`` property
471+
set to this value, and changing the value will only affect new
472+
tables, not existing ones. The storage in a partition will have an
473+
expiration time of its partition time plus this value.
474+
475+
Setting this property overrides the use of
476+
``default_table_expiration_ms`` for partitioned tables: only one of
477+
``default_table_expiration_ms`` and
478+
``default_partition_expiration_ms`` will be used for any new
479+
partitioned table. If you provide an explicit
480+
``time_partitioning.expiration_ms`` when creating or updating a
481+
partitioned table, that value takes precedence over the default
482+
partition expiration time indicated by this property.
483+
"""
484+
return _helpers._int_or_none(
485+
self._properties.get("defaultPartitionExpirationMs")
486+
)
487+
488+
@default_partition_expiration_ms.setter
489+
def default_partition_expiration_ms(self, value):
490+
self._properties["defaultPartitionExpirationMs"] = _helpers._str_or_none(value)
491+
463492
@property
464493
def default_table_expiration_ms(self):
465494
"""Union[int, None]: Default expiration time for tables in the dataset

bigquery/google/cloud/bigquery/table.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ class Table(object):
385385
"view_query": "view",
386386
"external_data_configuration": "externalDataConfiguration",
387387
"encryption_configuration": "encryptionConfiguration",
388+
"require_partition_filter": "requirePartitionFilter",
388389
}
389390

390391
def __init__(self, table_ref, schema=None):
@@ -420,6 +421,18 @@ def path(self):
420421
self.table_id,
421422
)
422423

424+
@property
425+
def require_partition_filter(self):
426+
"""bool: If set to true, queries over the partitioned table require a
427+
partition filter that can be used for partition elimination to be
428+
specified.
429+
"""
430+
return self._properties.get("requirePartitionFilter")
431+
432+
@require_partition_filter.setter
433+
def require_partition_filter(self, value):
434+
self._properties["requirePartitionFilter"] = value
435+
423436
@property
424437
def schema(self):
425438
"""List[google.cloud.bigquery.schema.SchemaField]: Table's schema.
@@ -1722,9 +1735,9 @@ class TimePartitioning(object):
17221735
Number of milliseconds for which to keep the storage for a
17231736
partition.
17241737
require_partition_filter (bool, optional):
1725-
If set to true, queries over the partitioned table require a
1726-
partition filter that can be used for partition elimination to be
1727-
specified.
1738+
DEPRECATED: Use
1739+
:attr:`~google.cloud.bigquery.table.Table.require_partition_filter`,
1740+
instead.
17281741
"""
17291742

17301743
def __init__(
@@ -1777,11 +1790,33 @@ def expiration_ms(self, value):
17771790
@property
17781791
def require_partition_filter(self):
17791792
"""bool: Specifies whether partition filters are required for queries
1793+
1794+
DEPRECATED: Use
1795+
:attr:`~google.cloud.bigquery.table.Table.require_partition_filter`,
1796+
instead.
17801797
"""
1798+
warnings.warn(
1799+
(
1800+
"TimePartitioning.require_partition_filter will be removed in "
1801+
"future versions. Please use Table.require_partition_filter "
1802+
"instead."
1803+
),
1804+
PendingDeprecationWarning,
1805+
stacklevel=2,
1806+
)
17811807
return self._properties.get("requirePartitionFilter")
17821808

17831809
@require_partition_filter.setter
17841810
def require_partition_filter(self, value):
1811+
warnings.warn(
1812+
(
1813+
"TimePartitioning.require_partition_filter will be removed in "
1814+
"future versions. Please use Table.require_partition_filter "
1815+
"instead."
1816+
),
1817+
PendingDeprecationWarning,
1818+
stacklevel=2,
1819+
)
17851820
self._properties["requirePartitionFilter"] = value
17861821

17871822
@classmethod
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from .. import update_dataset_default_partition_expiration
16+
17+
18+
def test_update_dataset_default_partition_expiration(capsys, client, dataset_id):
19+
20+
ninety_days_ms = 90 * 24 * 60 * 60 * 1000 # in milliseconds
21+
22+
update_dataset_default_partition_expiration.update_dataset_default_partition_expiration(
23+
client, dataset_id
24+
)
25+
out, _ = capsys.readouterr()
26+
assert (
27+
"Updated dataset {} with new default partition expiration {}".format(
28+
dataset_id, ninety_days_ms
29+
)
30+
in out
31+
)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from google.cloud import bigquery
16+
from .. import update_table_require_partition_filter
17+
18+
19+
def test_update_table_require_partition_filter(capsys, client, random_table_id):
20+
# Make a partitioned table.
21+
schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")]
22+
table = bigquery.Table(random_table_id, schema)
23+
table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp")
24+
table = client.create_table(table)
25+
26+
update_table_require_partition_filter.update_table_require_partition_filter(
27+
client, random_table_id
28+
)
29+
out, _ = capsys.readouterr()
30+
assert (
31+
"Updated table '{}' with require_partition_filter=True".format(random_table_id)
32+
in out
33+
)
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def update_dataset_default_partition_expiration(client, dataset_id):
17+
18+
# [START bigquery_update_dataset_partition_expiration]
19+
# TODO(developer): Import the client library.
20+
# from google.cloud import bigquery
21+
22+
# TODO(developer): Construct a BigQuery client object.
23+
# client = bigquery.Client()
24+
25+
# TODO(developer): Set dataset_id to the ID of the dataset to fetch.
26+
# dataset_id = 'your-project.your_dataset'
27+
28+
dataset = client.get_dataset(dataset_id) # Make an API request.
29+
30+
# Set the default partition expiration (applies to new tables, only) in
31+
# milliseconds. This example sets the default expiration to 90 days.
32+
dataset.default_partition_expiration_ms = 90 * 24 * 60 * 60 * 1000
33+
34+
dataset = client.update_dataset(
35+
dataset, ["default_partition_expiration_ms"]
36+
) # Make an API request.
37+
38+
print(
39+
"Updated dataset {}.{} with new default partition expiration {}".format(
40+
dataset.project, dataset.dataset_id, dataset.default_partition_expiration_ms
41+
)
42+
)
43+
# [END bigquery_update_dataset_partition_expiration]
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def update_table_require_partition_filter(client, table_id):
17+
18+
# [START bigquery_update_table_require_partition_filter]
19+
# TODO(developer): Import the client library.
20+
# from google.cloud import bigquery
21+
22+
# TODO(developer): Construct a BigQuery client object.
23+
# client = bigquery.Client()
24+
25+
# TODO(developer): Set table_id to the ID of the model to fetch.
26+
# table_id = 'your-project.your_dataset.your_table'
27+
28+
table = client.get_table(table_id) # Make an API request.
29+
table.require_partition_filter = True
30+
table = client.update_table(table, ["require_partition_filter"])
31+
32+
# View table properties
33+
print(
34+
"Updated table '{}.{}.{}' with require_partition_filter={}.".format(
35+
table.project,
36+
table.dataset_id,
37+
table.table_id,
38+
table.require_partition_filter,
39+
)
40+
)
41+
# [END bigquery_update_table_require_partition_filter]

bigquery/tests/unit/test_dataset.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,14 @@ def test_access_entries_setter(self):
454454
dataset.access_entries = [phred, bharney]
455455
self.assertEqual(dataset.access_entries, [phred, bharney])
456456

457+
def test_default_partition_expiration_ms(self):
458+
dataset = self._make_one("proj.dset")
459+
assert dataset.default_partition_expiration_ms is None
460+
dataset.default_partition_expiration_ms = 12345
461+
assert dataset.default_partition_expiration_ms == 12345
462+
dataset.default_partition_expiration_ms = None
463+
assert dataset.default_partition_expiration_ms is None
464+
457465
def test_default_table_expiration_ms_setter_bad_value(self):
458466
dataset = self._make_one(self.DS_REF)
459467
with self.assertRaises(ValueError):

0 commit comments

Comments
 (0)