Skip to content

Commit aad16d2

Browse files
plamutemar-kar
authored andcommitted
BigQuery Storage: Add more in-depth system tests (googleapis#8992)
* Add additional BQ storage system test fixtures * Add reader column selection system test * Add basic reader system test * Add reader with row filter system test * Add reading data with snapshot system test * Add reading column partitioned table system test * Add system test for column types data conversions * Add ingestion time partitioned table system test * Add system test for resuming a read at an offset * Remove unnecessary protobuf install in noxfile * Add TODO comment to replace a test helper method A similar method is planned to be added to the library itself, and when done, the _add_rows() will not be needed anymore. * Extract BQ client to session fixture in tests Creating a client once per system tests session avoids the overhead of authenticating before each test case. * Only create BQ storage client once per test run Creating a client just once avoids the auth overhead on every system test case. * Add common credentials fixture for system tests
1 parent f41f85c commit aad16d2

4 files changed

Lines changed: 553 additions & 4 deletions

File tree

bigquery_storage/noxfile.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ def system(session):
118118
session.install("-e", local_dep)
119119
session.install("-e", "../test_utils/")
120120
session.install("-e", ".[fastavro,pandas,pyarrow]")
121+
session.install("-e", "../bigquery/")
121122
session.install("-e", ".")
122123

123124
# Run py.test against the system tests.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
first_name,last_name,age
2+
John,Doe,42
3+
Jack,Black,53
4+
Nick,Sleek,24
5+
Kevin,Powell,50
6+
Johnny,Young,2

bigquery_storage/tests/system/conftest.py

Lines changed: 187 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,203 @@
1616
"""System tests for reading rows from tables."""
1717

1818
import os
19+
import uuid
1920

2021
import pytest
2122

2223
from google.cloud import bigquery_storage_v1beta1
2324

2425

25-
@pytest.fixture()
26+
_ASSETS_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets")
27+
28+
29+
@pytest.fixture(scope="session")
2630
def project_id():
2731
return os.environ["PROJECT_ID"]
2832

2933

30-
@pytest.fixture()
31-
def client():
32-
return bigquery_storage_v1beta1.BigQueryStorageClient()
34+
@pytest.fixture(scope="session")
35+
def credentials():
36+
from google.oauth2 import service_account
37+
38+
# NOTE: the test config in noxfile checks that the env variable is indeed set
39+
filename = os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
40+
return service_account.Credentials.from_service_account_file(filename)
41+
42+
43+
@pytest.fixture(scope="session")
44+
def bq_client(credentials):
45+
from google.cloud import bigquery
46+
47+
return bigquery.Client(credentials=credentials)
48+
49+
50+
@pytest.fixture(scope="session")
51+
def dataset(project_id, bq_client):
52+
from google.cloud import bigquery
53+
54+
unique_suffix = str(uuid.uuid4()).replace("-", "_")
55+
dataset_name = "bq_storage_system_tests_" + unique_suffix
56+
57+
dataset_id = "{}.{}".format(project_id, dataset_name)
58+
dataset = bigquery.Dataset(dataset_id)
59+
dataset.location = "US"
60+
created_dataset = bq_client.create_dataset(dataset)
61+
62+
yield created_dataset
63+
64+
bq_client.delete_dataset(dataset, delete_contents=True)
65+
66+
67+
@pytest.fixture(scope="session")
68+
def table(project_id, dataset, bq_client):
69+
from google.cloud import bigquery
70+
71+
schema = [
72+
bigquery.SchemaField("first_name", "STRING", mode="REQUIRED"),
73+
bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"),
74+
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
75+
]
76+
77+
table_id = "{}.{}.{}".format(project_id, dataset.dataset_id, "users")
78+
bq_table = bigquery.Table(table_id, schema=schema)
79+
created_table = bq_client.create_table(bq_table)
80+
81+
yield created_table
82+
83+
bq_client.delete_table(created_table)
84+
85+
86+
@pytest.fixture
87+
def table_with_data_ref(dataset, table, bq_client):
88+
from google.cloud import bigquery
89+
90+
job_config = bigquery.LoadJobConfig()
91+
job_config.source_format = bigquery.SourceFormat.CSV
92+
job_config.skip_leading_rows = 1
93+
job_config.schema = table.schema
94+
95+
filename = os.path.join(_ASSETS_DIR, "people_data.csv")
96+
97+
with open(filename, "rb") as source_file:
98+
job = bq_client.load_table_from_file(source_file, table, job_config=job_config)
99+
100+
job.result() # wait for the load to complete
101+
102+
table_ref = bigquery_storage_v1beta1.types.TableReference()
103+
table_ref.project_id = table.project
104+
table_ref.dataset_id = table.dataset_id
105+
table_ref.table_id = table.table_id
106+
yield table_ref
107+
108+
# truncate table data
109+
query = "DELETE FROM {}.{} WHERE 1 = 1".format(dataset.dataset_id, table.table_id)
110+
query_job = bq_client.query(query, location="US")
111+
query_job.result()
112+
113+
114+
@pytest.fixture
115+
def col_partition_table_ref(project_id, dataset, bq_client):
116+
from google.cloud import bigquery
117+
118+
schema = [
119+
bigquery.SchemaField("occurred", "DATE", mode="REQUIRED"),
120+
bigquery.SchemaField("description", "STRING", mode="REQUIRED"),
121+
]
122+
time_partitioning = bigquery.table.TimePartitioning(
123+
type_=bigquery.table.TimePartitioningType.DAY, field="occurred"
124+
)
125+
bq_table = bigquery.table.Table(
126+
table_ref="{}.{}.notable_events".format(project_id, dataset.dataset_id),
127+
schema=schema,
128+
)
129+
bq_table.time_partitioning = time_partitioning
130+
131+
created_table = bq_client.create_table(bq_table)
132+
133+
table_ref = bigquery_storage_v1beta1.types.TableReference()
134+
table_ref.project_id = created_table.project
135+
table_ref.dataset_id = created_table.dataset_id
136+
table_ref.table_id = created_table.table_id
137+
yield table_ref
138+
139+
bq_client.delete_table(created_table)
140+
141+
142+
@pytest.fixture
143+
def ingest_partition_table_ref(project_id, dataset, bq_client):
144+
from google.cloud import bigquery
145+
146+
schema = [
147+
bigquery.SchemaField("shape", "STRING", mode="REQUIRED"),
148+
bigquery.SchemaField("altitude", "INT64", mode="REQUIRED"),
149+
]
150+
time_partitioning = bigquery.table.TimePartitioning(
151+
type_=bigquery.table.TimePartitioningType.DAY,
152+
field=None, # use _PARTITIONTIME pseudo column
153+
)
154+
bq_table = bigquery.table.Table(
155+
table_ref="{}.{}.ufo_sightings".format(project_id, dataset.dataset_id),
156+
schema=schema,
157+
)
158+
bq_table.time_partitioning = time_partitioning
159+
160+
created_table = bq_client.create_table(bq_table)
161+
162+
table_ref = bigquery_storage_v1beta1.types.TableReference()
163+
table_ref.project_id = created_table.project
164+
table_ref.dataset_id = created_table.dataset_id
165+
table_ref.table_id = created_table.table_id
166+
yield table_ref
167+
168+
bq_client.delete_table(created_table)
169+
170+
171+
@pytest.fixture
172+
def all_types_table_ref(project_id, dataset, bq_client):
173+
from google.cloud import bigquery
174+
175+
schema = [
176+
bigquery.SchemaField("string_field", "STRING"),
177+
bigquery.SchemaField("bytes_field", "BYTES"),
178+
bigquery.SchemaField("int64_field", "INT64"),
179+
bigquery.SchemaField("float64_field", "FLOAT64"),
180+
bigquery.SchemaField("numeric_field", "NUMERIC"),
181+
bigquery.SchemaField("bool_field", "BOOL"),
182+
bigquery.SchemaField("geography_field", "GEOGRAPHY"),
183+
bigquery.SchemaField(
184+
"person_struct_field",
185+
"STRUCT",
186+
fields=(
187+
bigquery.SchemaField("name", "STRING"),
188+
bigquery.SchemaField("age", "INT64"),
189+
),
190+
),
191+
bigquery.SchemaField("timestamp_field", "TIMESTAMP"),
192+
bigquery.SchemaField("date_field", "DATE"),
193+
bigquery.SchemaField("time_field", "TIME"),
194+
bigquery.SchemaField("datetime_field", "DATETIME"),
195+
bigquery.SchemaField("string_array_field", "STRING", mode="REPEATED"),
196+
]
197+
bq_table = bigquery.table.Table(
198+
table_ref="{}.{}.complex_records".format(project_id, dataset.dataset_id),
199+
schema=schema,
200+
)
201+
202+
created_table = bq_client.create_table(bq_table)
203+
204+
table_ref = bigquery_storage_v1beta1.types.TableReference()
205+
table_ref.project_id = created_table.project
206+
table_ref.dataset_id = created_table.dataset_id
207+
table_ref.table_id = created_table.table_id
208+
yield table_ref
209+
210+
bq_client.delete_table(created_table)
211+
212+
213+
@pytest.fixture(scope="session")
214+
def client(credentials):
215+
return bigquery_storage_v1beta1.BigQueryStorageClient(credentials=credentials)
33216

34217

35218
@pytest.fixture()

0 commit comments

Comments
 (0)