Skip to content

Commit 55a8097

Browse files
lbristol88tswast
authored andcommitted
Phase 1 for storing schemas for later use. (#7761)
* Added functions to client for loading and saving schemas to a file. * Tests for schema to/from json.
1 parent 7755867 commit 55a8097

2 files changed

Lines changed: 227 additions & 0 deletions

File tree

bigquery/google/cloud/bigquery/client.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323

2424
import functools
2525
import gzip
26+
import io
27+
import json
2628
import os
2729
import tempfile
2830
import uuid
@@ -50,6 +52,7 @@
5052
from google.cloud.bigquery.model import ModelReference
5153
from google.cloud.bigquery.query import _QueryResults
5254
from google.cloud.bigquery.retry import DEFAULT_RETRY
55+
from google.cloud.bigquery.schema import SchemaField
5356
from google.cloud.bigquery.table import _table_arg_to_table
5457
from google.cloud.bigquery.table import _table_arg_to_table_ref
5558
from google.cloud.bigquery.table import Table
@@ -1929,6 +1932,50 @@ def list_rows(
19291932
)
19301933
return row_iterator
19311934

1935+
def _schema_from_json_file_object(self, file_obj):
1936+
"""Helper function for schema_from_json that takes a
1937+
file object that describes a table schema.
1938+
1939+
Returns:
1940+
List of schema field objects.
1941+
"""
1942+
json_data = json.load(file_obj)
1943+
return [SchemaField.from_api_repr(field) for field in json_data]
1944+
1945+
def _schema_to_json_file_object(self, schema_list, file_obj):
1946+
"""Helper function for schema_to_json that takes a schema list and file
1947+
object and writes the schema list to the file object with json.dump
1948+
"""
1949+
json.dump(schema_list, file_obj, indent=2, sort_keys=True)
1950+
1951+
def schema_from_json(self, file_or_path):
1952+
"""Takes a file object or file path that contains json that describes
1953+
a table schema.
1954+
1955+
Returns:
1956+
List of schema field objects.
1957+
"""
1958+
if isinstance(file_or_path, io.IOBase):
1959+
return self._schema_from_json_file_object(file_or_path)
1960+
1961+
with open(file_or_path) as file_obj:
1962+
return self._schema_from_json_file_object(file_obj)
1963+
1964+
def schema_to_json(self, schema_list, destination):
1965+
"""Takes a list of schema field objects.
1966+
1967+
Serializes the list of schema field objects as json to a file.
1968+
1969+
Destination is a file path or a file object.
1970+
"""
1971+
json_schema_list = [f.to_api_repr() for f in schema_list]
1972+
1973+
if isinstance(destination, io.IOBase):
1974+
return self._schema_to_json_file_object(json_schema_list, destination)
1975+
1976+
with open(destination, mode="w") as file_obj:
1977+
return self._schema_to_json_file_object(json_schema_list, file_obj)
1978+
19321979

19331980
# pylint: disable=unused-argument
19341981
def _item_to_project(iterator, resource):

bigquery/tests/unit/test_client.py

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5161,3 +5161,183 @@ def test__do_multipart_upload_wrong_size(self):
51615161

51625162
with pytest.raises(ValueError):
51635163
client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None)
5164+
5165+
def test_schema_from_json_with_file_path(self):
5166+
from google.cloud.bigquery.schema import SchemaField
5167+
5168+
file_content = """[
5169+
{
5170+
"description": "quarter",
5171+
"mode": "REQUIRED",
5172+
"name": "qtr",
5173+
"type": "STRING"
5174+
},
5175+
{
5176+
"description": "sales representative",
5177+
"mode": "NULLABLE",
5178+
"name": "rep",
5179+
"type": "STRING"
5180+
},
5181+
{
5182+
"description": "total sales",
5183+
"mode": "NULLABLE",
5184+
"name": "sales",
5185+
"type": "FLOAT"
5186+
}
5187+
]"""
5188+
5189+
expected = [
5190+
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
5191+
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
5192+
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
5193+
]
5194+
5195+
client = self._make_client()
5196+
mock_file_path = "/mocked/file.json"
5197+
5198+
if six.PY2:
5199+
open_patch = mock.patch(
5200+
"__builtin__.open", mock.mock_open(read_data=file_content)
5201+
)
5202+
else:
5203+
open_patch = mock.patch(
5204+
"builtins.open", new=mock.mock_open(read_data=file_content)
5205+
)
5206+
5207+
with open_patch as _mock_file:
5208+
actual = client.schema_from_json(mock_file_path)
5209+
_mock_file.assert_called_once_with(mock_file_path)
5210+
# This assert is to make sure __exit__ is called in the context
5211+
# manager that opens the file in the function
5212+
_mock_file().__exit__.assert_called_once()
5213+
5214+
assert expected == actual
5215+
5216+
def test_schema_from_json_with_file_object(self):
5217+
from google.cloud.bigquery.schema import SchemaField
5218+
5219+
file_content = """[
5220+
{
5221+
"description": "quarter",
5222+
"mode": "REQUIRED",
5223+
"name": "qtr",
5224+
"type": "STRING"
5225+
},
5226+
{
5227+
"description": "sales representative",
5228+
"mode": "NULLABLE",
5229+
"name": "rep",
5230+
"type": "STRING"
5231+
},
5232+
{
5233+
"description": "total sales",
5234+
"mode": "NULLABLE",
5235+
"name": "sales",
5236+
"type": "FLOAT"
5237+
}
5238+
]"""
5239+
5240+
expected = [
5241+
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
5242+
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
5243+
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
5244+
]
5245+
5246+
client = self._make_client()
5247+
5248+
if six.PY2:
5249+
fake_file = io.BytesIO(file_content)
5250+
else:
5251+
fake_file = io.StringIO(file_content)
5252+
5253+
actual = client.schema_from_json(fake_file)
5254+
5255+
assert expected == actual
5256+
5257+
def test_schema_to_json_with_file_path(self):
5258+
from google.cloud.bigquery.schema import SchemaField
5259+
5260+
file_content = [
5261+
{
5262+
"description": "quarter",
5263+
"mode": "REQUIRED",
5264+
"name": "qtr",
5265+
"type": "STRING",
5266+
},
5267+
{
5268+
"description": "sales representative",
5269+
"mode": "NULLABLE",
5270+
"name": "rep",
5271+
"type": "STRING",
5272+
},
5273+
{
5274+
"description": "total sales",
5275+
"mode": "NULLABLE",
5276+
"name": "sales",
5277+
"type": "FLOAT",
5278+
},
5279+
]
5280+
5281+
schema_list = [
5282+
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
5283+
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
5284+
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
5285+
]
5286+
5287+
client = self._make_client()
5288+
mock_file_path = "/mocked/file.json"
5289+
5290+
if six.PY2:
5291+
open_patch = mock.patch("__builtin__.open", mock.mock_open())
5292+
else:
5293+
open_patch = mock.patch("builtins.open", mock.mock_open())
5294+
5295+
with open_patch as mock_file, mock.patch("json.dump") as mock_dump:
5296+
client.schema_to_json(schema_list, mock_file_path)
5297+
mock_file.assert_called_once_with(mock_file_path, mode="w")
5298+
# This assert is to make sure __exit__ is called in the context
5299+
# manager that opens the file in the function
5300+
mock_file().__exit__.assert_called_once()
5301+
mock_dump.assert_called_with(
5302+
file_content, mock_file.return_value, indent=2, sort_keys=True
5303+
)
5304+
5305+
def test_schema_to_json_with_file_object(self):
5306+
from google.cloud.bigquery.schema import SchemaField
5307+
5308+
file_content = [
5309+
{
5310+
"description": "quarter",
5311+
"mode": "REQUIRED",
5312+
"name": "qtr",
5313+
"type": "STRING",
5314+
},
5315+
{
5316+
"description": "sales representative",
5317+
"mode": "NULLABLE",
5318+
"name": "rep",
5319+
"type": "STRING",
5320+
},
5321+
{
5322+
"description": "total sales",
5323+
"mode": "NULLABLE",
5324+
"name": "sales",
5325+
"type": "FLOAT",
5326+
},
5327+
]
5328+
5329+
schema_list = [
5330+
SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
5331+
SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
5332+
SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
5333+
]
5334+
5335+
if six.PY2:
5336+
fake_file = io.BytesIO()
5337+
else:
5338+
fake_file = io.StringIO()
5339+
5340+
client = self._make_client()
5341+
5342+
client.schema_to_json(schema_list, fake_file)
5343+
assert file_content == json.loads(fake_file.getvalue())

0 commit comments

Comments
 (0)