Skip to content

Commit 9f471fb

Browse files
emar-karplamut
authored andcommitted
BigQuery: Add support of the project ID with org prefix to the Table.from_string() method (#9161)
* add prefix support * Update _helpers.py * consolidate the regex * update split_id method * '_parse_id' method renamed to '_split_id' * switched to 're.groups' implementation instead of partly grouping * Update dataset.py flake8 fixed * Update _helpers.py * added the docstring for the '_split_id' method * fix lint failure
1 parent 5c0ea2e commit 9f471fb

3 files changed

Lines changed: 42 additions & 20 deletions

File tree

bigquery/google/cloud/bigquery/_helpers.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import copy
1919
import datetime
2020
import decimal
21+
import re
2122

2223
from google.cloud._helpers import UTC
2324
from google.cloud._helpers import _date_from_iso8601_date
@@ -29,6 +30,12 @@
2930
_RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f"
3031
_TIMEONLY_WO_MICROS = "%H:%M:%S"
3132
_TIMEONLY_W_MICROS = "%H:%M:%S.%f"
33+
_PROJECT_PREFIX_PATTERN = re.compile(
34+
r"""
35+
(?P<project_id>\S+\:[^.]+)\.(?P<dataset_id>[^.]+)(?:$|\.(?P<custom_id>[^.]+)$)
36+
""",
37+
re.VERBOSE,
38+
)
3239

3340

3441
def _not_null(value, field):
@@ -586,24 +593,42 @@ def _str_or_none(value):
586593
return str(value)
587594

588595

596+
def _split_id(full_id):
597+
"""Helper: split full_id into composite parts.
598+
599+
Args:
600+
full_id (str): Fully-qualified ID in standard SQL format.
601+
602+
Returns:
603+
List[str]: ID's parts separated into components.
604+
"""
605+
with_prefix = _PROJECT_PREFIX_PATTERN.match(full_id)
606+
if with_prefix is None:
607+
parts = full_id.split(".")
608+
else:
609+
parts = with_prefix.groups()
610+
parts = [part for part in parts if part]
611+
return parts
612+
613+
589614
def _parse_3_part_id(full_id, default_project=None, property_name="table_id"):
590615
output_project_id = default_project
591616
output_dataset_id = None
592617
output_resource_id = None
593-
parts = full_id.split(".")
618+
parts = _split_id(full_id)
594619

595620
if len(parts) != 2 and len(parts) != 3:
596621
raise ValueError(
597622
"{property_name} must be a fully-qualified ID in "
598-
'standard SQL format. e.g. "project.dataset.{property_name}", '
623+
'standard SQL format, e.g., "project.dataset.{property_name}", '
599624
"got {}".format(full_id, property_name=property_name)
600625
)
601626

602627
if len(parts) == 2 and not default_project:
603628
raise ValueError(
604629
"When default_project is not set, {property_name} must be a "
605-
"fully-qualified ID in standard SQL format. "
606-
'e.g. "project.dataset_id.{property_name}", got {}'.format(
630+
"fully-qualified ID in standard SQL format, "
631+
'e.g., "project.dataset_id.{property_name}", got {}'.format(
607632
full_id, property_name=property_name
608633
)
609634
)

bigquery/google/cloud/bigquery/dataset.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
import six
2020
import copy
21-
import re
2221

2322
import google.cloud._helpers
2423
from google.cloud.bigquery import _helpers
@@ -27,14 +26,6 @@
2726
from google.cloud.bigquery.table import TableReference
2827

2928

30-
_PROJECT_PREFIX_PATTERN = re.compile(
31-
r"""
32-
(?P<project_id>\S+\:[^.]+)\.(?P<dataset_id>[^.]+)$
33-
""",
34-
re.VERBOSE,
35-
)
36-
37-
3829
def _get_table_reference(self, table_id):
3930
"""Constructs a TableReference.
4031
@@ -299,13 +290,7 @@ def from_string(cls, dataset_id, default_project=None):
299290
"""
300291
output_dataset_id = dataset_id
301292
output_project_id = default_project
302-
with_prefix = _PROJECT_PREFIX_PATTERN.match(dataset_id)
303-
if with_prefix is None:
304-
parts = dataset_id.split(".")
305-
else:
306-
project_id = with_prefix.group("project_id")
307-
dataset_id = with_prefix.group("dataset_id")
308-
parts = [project_id, dataset_id]
293+
parts = _helpers._split_id(dataset_id)
309294

310295
if len(parts) == 1 and not default_project:
311296
raise ValueError(

bigquery/tests/unit/test_table.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,11 +215,23 @@ def test_from_string(self):
215215
self.assertEqual(got.dataset_id, "string_dataset")
216216
self.assertEqual(got.table_id, "string_table")
217217

218+
def test_from_string_w_prefix(self):
219+
cls = self._get_target_class()
220+
got = cls.from_string("google.com:string-project.string_dataset.string_table")
221+
self.assertEqual(got.project, "google.com:string-project")
222+
self.assertEqual(got.dataset_id, "string_dataset")
223+
self.assertEqual(got.table_id, "string_table")
224+
218225
def test_from_string_legacy_string(self):
219226
cls = self._get_target_class()
220227
with self.assertRaises(ValueError):
221228
cls.from_string("string-project:string_dataset.string_table")
222229

230+
def test_from_string_w_incorrect_prefix(self):
231+
cls = self._get_target_class()
232+
with self.assertRaises(ValueError):
233+
cls.from_string("google.com.string-project.string_dataset.string_table")
234+
223235
def test_from_string_not_fully_qualified(self):
224236
cls = self._get_target_class()
225237
with self.assertRaises(ValueError):

0 commit comments

Comments
 (0)