Skip to content

Commit 6c13421

Browse files
authored
Add options to ignore errors when creating/deleting datasets/tables. (#7491)
In create_dataset and create_table, exists_ok ignores errors when a dataset/table already exists. It makes a GET request to get the existing resource if it already exists. In delete_dataset and delete_table, not_found_ok ignores errors when a dataset/table doesn't exist.
1 parent 54cab6e commit 6c13421

2 files changed

Lines changed: 243 additions & 16 deletions

File tree

bigquery/google/cloud/bigquery/client.py

Lines changed: 54 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from google.resumable_media.requests import MultipartUpload
3333
from google.resumable_media.requests import ResumableUpload
3434

35+
import google.api_core.exceptions
3536
from google.api_core import page_iterator
3637
import google.cloud._helpers
3738
from google.cloud import exceptions
@@ -297,7 +298,7 @@ def dataset(self, dataset_id, project=None):
297298

298299
return DatasetReference(project, dataset_id)
299300

300-
def create_dataset(self, dataset):
301+
def create_dataset(self, dataset, exists_ok=False, retry=DEFAULT_RETRY):
301302
"""API call: create the dataset via a POST request.
302303
303304
See
@@ -312,6 +313,11 @@ def create_dataset(self, dataset):
312313
A :class:`~google.cloud.bigquery.dataset.Dataset` to create.
313314
If ``dataset`` is a reference, an empty dataset is created
314315
with the specified ID and client's default location.
316+
exists_ok (bool):
317+
Defaults to ``False``. If ``True``, ignore "already exists"
318+
errors when creating the dataset.
319+
retry (google.api_core.retry.Retry):
320+
Optional. How to retry the RPC.
315321
316322
Returns:
317323
google.cloud.bigquery.dataset.Dataset:
@@ -338,11 +344,15 @@ def create_dataset(self, dataset):
338344
if data.get("location") is None and self.location is not None:
339345
data["location"] = self.location
340346

341-
api_response = self._connection.api_request(method="POST", path=path, data=data)
342-
343-
return Dataset.from_api_repr(api_response)
344-
345-
def create_table(self, table):
347+
try:
348+
api_response = self._call_api(retry, method="POST", path=path, data=data)
349+
return Dataset.from_api_repr(api_response)
350+
except google.api_core.exceptions.Conflict:
351+
if not exists_ok:
352+
raise
353+
return self.get_dataset(dataset.reference, retry=retry)
354+
355+
def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY):
346356
"""API call: create a table via a PUT request
347357
348358
See
@@ -358,6 +368,11 @@ def create_table(self, table):
358368
If ``table`` is a reference, an empty table is created
359369
with the specified ID. The dataset that the table belongs to
360370
must already exist.
371+
exists_ok (bool):
372+
Defaults to ``False``. If ``True``, ignore "already exists"
373+
errors when creating the table.
374+
retry (google.api_core.retry.Retry):
375+
Optional. How to retry the RPC.
361376
362377
Returns:
363378
google.cloud.bigquery.table.Table:
@@ -369,10 +384,14 @@ def create_table(self, table):
369384
table = Table(table)
370385

371386
path = "/projects/%s/datasets/%s/tables" % (table.project, table.dataset_id)
372-
api_response = self._connection.api_request(
373-
method="POST", path=path, data=table.to_api_repr()
374-
)
375-
return Table.from_api_repr(api_response)
387+
data = table.to_api_repr()
388+
try:
389+
api_response = self._call_api(retry, method="POST", path=path, data=data)
390+
return Table.from_api_repr(api_response)
391+
except google.api_core.exceptions.Conflict:
392+
if not exists_ok:
393+
raise
394+
return self.get_table(table.reference, retry=retry)
376395

377396
def _call_api(self, retry, **kwargs):
378397
call = functools.partial(self._connection.api_request, **kwargs)
@@ -563,7 +582,9 @@ def list_tables(
563582
result.dataset = dataset
564583
return result
565584

566-
def delete_dataset(self, dataset, delete_contents=False, retry=DEFAULT_RETRY):
585+
def delete_dataset(
586+
self, dataset, delete_contents=False, retry=DEFAULT_RETRY, not_found_ok=False
587+
):
567588
"""Delete a dataset.
568589
569590
See
@@ -579,12 +600,15 @@ def delete_dataset(self, dataset, delete_contents=False, retry=DEFAULT_RETRY):
579600
in, this method attempts to create a dataset reference from a
580601
string using
581602
:func:`google.cloud.bigquery.dataset.DatasetReference.from_string`.
582-
retry (:class:`google.api_core.retry.Retry`):
583-
(Optional) How to retry the RPC.
584603
delete_contents (boolean):
585604
(Optional) If True, delete all the tables in the dataset. If
586605
False and the dataset contains tables, the request will fail.
587606
Default is False.
607+
retry (:class:`google.api_core.retry.Retry`):
608+
(Optional) How to retry the RPC.
609+
not_found_ok (bool):
610+
Defaults to ``False``. If ``True``, ignore "not found" errors
611+
when deleting the dataset.
588612
"""
589613
if isinstance(dataset, str):
590614
dataset = DatasetReference.from_string(
@@ -598,9 +622,15 @@ def delete_dataset(self, dataset, delete_contents=False, retry=DEFAULT_RETRY):
598622
if delete_contents:
599623
params["deleteContents"] = "true"
600624

601-
self._call_api(retry, method="DELETE", path=dataset.path, query_params=params)
625+
try:
626+
self._call_api(
627+
retry, method="DELETE", path=dataset.path, query_params=params
628+
)
629+
except google.api_core.exceptions.NotFound:
630+
if not not_found_ok:
631+
raise
602632

603-
def delete_table(self, table, retry=DEFAULT_RETRY):
633+
def delete_table(self, table, retry=DEFAULT_RETRY, not_found_ok=False):
604634
"""Delete a table
605635
606636
See
@@ -618,13 +648,21 @@ def delete_table(self, table, retry=DEFAULT_RETRY):
618648
:func:`google.cloud.bigquery.table.TableReference.from_string`.
619649
retry (:class:`google.api_core.retry.Retry`):
620650
(Optional) How to retry the RPC.
651+
not_found_ok (bool):
652+
Defaults to ``False``. If ``True``, ignore "not found" errors
653+
when deleting the table.
621654
"""
622655
if isinstance(table, str):
623656
table = TableReference.from_string(table, default_project=self.project)
624657

625658
if not isinstance(table, (Table, TableReference)):
626659
raise TypeError("table must be a Table or a TableReference")
627-
self._call_api(retry, method="DELETE", path=table.path)
660+
661+
try:
662+
self._call_api(retry, method="DELETE", path=table.path)
663+
except google.api_core.exceptions.NotFound:
664+
if not not_found_ok:
665+
raise
628666

629667
def _get_query_results(
630668
self, job_id, retry, project=None, timeout_ms=None, location=None

bigquery/tests/unit/test_client.py

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
except (ImportError, AttributeError): # pragma: NO COVER
3737
pyarrow = None
3838

39+
import google.api_core.exceptions
3940
from google.cloud.bigquery.dataset import DatasetReference
4041

4142

@@ -804,6 +805,61 @@ def test_create_dataset_w_string(self):
804805
},
805806
)
806807

808+
def test_create_dataset_alreadyexists_w_exists_ok_false(self):
809+
creds = _make_credentials()
810+
client = self._make_one(
811+
project=self.PROJECT, credentials=creds, location=self.LOCATION
812+
)
813+
client._connection = _make_connection(
814+
google.api_core.exceptions.AlreadyExists("dataset already exists")
815+
)
816+
817+
with pytest.raises(google.api_core.exceptions.AlreadyExists):
818+
client.create_dataset(self.DS_ID)
819+
820+
def test_create_dataset_alreadyexists_w_exists_ok_true(self):
821+
post_path = "/projects/{}/datasets".format(self.PROJECT)
822+
get_path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID)
823+
resource = {
824+
"datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
825+
"etag": "etag",
826+
"id": "{}:{}".format(self.PROJECT, self.DS_ID),
827+
"location": self.LOCATION,
828+
}
829+
creds = _make_credentials()
830+
client = self._make_one(
831+
project=self.PROJECT, credentials=creds, location=self.LOCATION
832+
)
833+
conn = client._connection = _make_connection(
834+
google.api_core.exceptions.AlreadyExists("dataset already exists"), resource
835+
)
836+
837+
dataset = client.create_dataset(self.DS_ID, exists_ok=True)
838+
839+
self.assertEqual(dataset.dataset_id, self.DS_ID)
840+
self.assertEqual(dataset.project, self.PROJECT)
841+
self.assertEqual(dataset.etag, resource["etag"])
842+
self.assertEqual(dataset.full_dataset_id, resource["id"])
843+
self.assertEqual(dataset.location, self.LOCATION)
844+
845+
conn.api_request.assert_has_calls(
846+
[
847+
mock.call(
848+
method="POST",
849+
path=post_path,
850+
data={
851+
"datasetReference": {
852+
"projectId": self.PROJECT,
853+
"datasetId": self.DS_ID,
854+
},
855+
"labels": {},
856+
"location": self.LOCATION,
857+
},
858+
),
859+
mock.call(method="GET", path=get_path),
860+
]
861+
)
862+
807863
def test_create_table_w_day_partition(self):
808864
from google.cloud.bigquery.table import Table
809865
from google.cloud.bigquery.table import TimePartitioning
@@ -1177,6 +1233,79 @@ def test_create_table_w_string(self):
11771233
)
11781234
self.assertEqual(got.table_id, self.TABLE_ID)
11791235

1236+
def test_create_table_alreadyexists_w_exists_ok_false(self):
1237+
post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID)
1238+
creds = _make_credentials()
1239+
client = self._make_one(
1240+
project=self.PROJECT, credentials=creds, location=self.LOCATION
1241+
)
1242+
conn = client._connection = _make_connection(
1243+
google.api_core.exceptions.AlreadyExists("table already exists")
1244+
)
1245+
1246+
with pytest.raises(google.api_core.exceptions.AlreadyExists):
1247+
client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID))
1248+
1249+
conn.api_request.assert_called_once_with(
1250+
method="POST",
1251+
path=post_path,
1252+
data={
1253+
"tableReference": {
1254+
"projectId": self.PROJECT,
1255+
"datasetId": self.DS_ID,
1256+
"tableId": self.TABLE_ID,
1257+
},
1258+
"labels": {},
1259+
},
1260+
)
1261+
1262+
def test_create_table_alreadyexists_w_exists_ok_true(self):
1263+
post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID)
1264+
get_path = "/projects/{}/datasets/{}/tables/{}".format(
1265+
self.PROJECT, self.DS_ID, self.TABLE_ID
1266+
)
1267+
resource = {
1268+
"id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID),
1269+
"tableReference": {
1270+
"projectId": self.PROJECT,
1271+
"datasetId": self.DS_ID,
1272+
"tableId": self.TABLE_ID,
1273+
},
1274+
}
1275+
creds = _make_credentials()
1276+
client = self._make_one(
1277+
project=self.PROJECT, credentials=creds, location=self.LOCATION
1278+
)
1279+
conn = client._connection = _make_connection(
1280+
google.api_core.exceptions.AlreadyExists("table already exists"), resource
1281+
)
1282+
1283+
got = client.create_table(
1284+
"{}.{}".format(self.DS_ID, self.TABLE_ID), exists_ok=True
1285+
)
1286+
1287+
self.assertEqual(got.project, self.PROJECT)
1288+
self.assertEqual(got.dataset_id, self.DS_ID)
1289+
self.assertEqual(got.table_id, self.TABLE_ID)
1290+
1291+
conn.api_request.assert_has_calls(
1292+
[
1293+
mock.call(
1294+
method="POST",
1295+
path=post_path,
1296+
data={
1297+
"tableReference": {
1298+
"projectId": self.PROJECT,
1299+
"datasetId": self.DS_ID,
1300+
"tableId": self.TABLE_ID,
1301+
},
1302+
"labels": {},
1303+
},
1304+
),
1305+
mock.call(method="GET", path=get_path),
1306+
]
1307+
)
1308+
11801309
def test_get_table(self):
11811310
path = "projects/%s/datasets/%s/tables/%s" % (
11821311
self.PROJECT,
@@ -1804,6 +1933,33 @@ def test_delete_dataset_wrong_type(self):
18041933
with self.assertRaises(TypeError):
18051934
client.delete_dataset(client.dataset(self.DS_ID).table("foo"))
18061935

1936+
def test_delete_dataset_w_not_found_ok_false(self):
1937+
path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID)
1938+
creds = _make_credentials()
1939+
http = object()
1940+
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
1941+
conn = client._connection = _make_connection(
1942+
google.api_core.exceptions.NotFound("dataset not found")
1943+
)
1944+
1945+
with self.assertRaises(google.api_core.exceptions.NotFound):
1946+
client.delete_dataset(self.DS_ID)
1947+
1948+
conn.api_request.assert_called_with(method="DELETE", path=path, query_params={})
1949+
1950+
def test_delete_dataset_w_not_found_ok_true(self):
1951+
path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID)
1952+
creds = _make_credentials()
1953+
http = object()
1954+
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
1955+
conn = client._connection = _make_connection(
1956+
google.api_core.exceptions.NotFound("dataset not found")
1957+
)
1958+
1959+
client.delete_dataset(self.DS_ID, not_found_ok=True)
1960+
1961+
conn.api_request.assert_called_with(method="DELETE", path=path, query_params={})
1962+
18071963
def test_delete_table(self):
18081964
from google.cloud.bigquery.table import Table
18091965

@@ -1836,6 +1992,39 @@ def test_delete_table_w_wrong_type(self):
18361992
with self.assertRaises(TypeError):
18371993
client.delete_table(client.dataset(self.DS_ID))
18381994

1995+
def test_delete_table_w_not_found_ok_false(self):
1996+
path = "/projects/{}/datasets/{}/tables/{}".format(
1997+
self.PROJECT, self.DS_ID, self.TABLE_ID
1998+
)
1999+
creds = _make_credentials()
2000+
http = object()
2001+
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
2002+
conn = client._connection = _make_connection(
2003+
google.api_core.exceptions.NotFound("table not found")
2004+
)
2005+
2006+
with self.assertRaises(google.api_core.exceptions.NotFound):
2007+
client.delete_table("{}.{}".format(self.DS_ID, self.TABLE_ID))
2008+
2009+
conn.api_request.assert_called_with(method="DELETE", path=path)
2010+
2011+
def test_delete_table_w_not_found_ok_true(self):
2012+
path = "/projects/{}/datasets/{}/tables/{}".format(
2013+
self.PROJECT, self.DS_ID, self.TABLE_ID
2014+
)
2015+
creds = _make_credentials()
2016+
http = object()
2017+
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
2018+
conn = client._connection = _make_connection(
2019+
google.api_core.exceptions.NotFound("table not found")
2020+
)
2021+
2022+
client.delete_table(
2023+
"{}.{}".format(self.DS_ID, self.TABLE_ID), not_found_ok=True
2024+
)
2025+
2026+
conn.api_request.assert_called_with(method="DELETE", path=path)
2027+
18392028
def test_job_from_resource_unknown_type(self):
18402029
from google.cloud.bigquery.job import UnknownJob
18412030

0 commit comments

Comments
 (0)