Skip to content
15 changes: 11 additions & 4 deletions bigquery/google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,18 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
type for some or all of the DataFrame columns.

Returns:
Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]:
The automatically determined schema. Returns None if the type of
any column cannot be determined.
Sequence[google.cloud.bigquery.schema.SchemaField]:
The automatically determined schema. Returns empty tuple if the
type of any column cannot be determined.
"""
if bq_schema:
for field in bq_schema:
if field.field_type in schema._STRUCT_TYPES:
raise ValueError(
"Uploading dataframes with struct (record) column types "
"is not supported. See: "
"https://github.com/googleapis/google-cloud-python/issues/8191"
)
bq_schema_index = {field.name: field for field in bq_schema}
else:
bq_schema_index = {}
Expand All @@ -220,7 +227,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name)
if not bq_type:
warnings.warn("Unable to determine type of column '{}'.".format(column))
return None
return ()
bq_field = schema.SchemaField(column, bq_type)
bq_schema_out.append(bq_field)
return tuple(bq_schema_out)
Expand Down
20 changes: 1 addition & 19 deletions bigquery/google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@
from google.cloud.bigquery.retry import DEFAULT_RETRY
from google.cloud.bigquery.routine import Routine
from google.cloud.bigquery.routine import RoutineReference
from google.cloud.bigquery.schema import _STRUCT_TYPES
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import _table_arg_to_table
from google.cloud.bigquery.table import _table_arg_to_table_ref
Expand Down Expand Up @@ -1532,27 +1531,10 @@ def load_table_from_dataframe(
if location is None:
location = self.location

if job_config.schema:
for field in job_config.schema:
if field.field_type in _STRUCT_TYPES:
raise ValueError(
"Uploading dataframes with struct (record) column types "
"is not supported. See: "
"https://github.com/googleapis/google-cloud-python/issues/8191"
)

autodetected_schema = _pandas_helpers.dataframe_to_bq_schema(
job_config.schema = _pandas_helpers.dataframe_to_bq_schema(
dataframe, job_config.schema
)

# Only use an explicit schema if we were able to determine one
# matching the dataframe. If not, fallback to the pandas to_parquet
# method.
if autodetected_schema:
job_config.schema = autodetected_schema
else:
job_config.schema = ()

tmpfd, tmppath = tempfile.mkstemp(suffix="_job_{}.parquet".format(job_id[:8]))
os.close(tmpfd)

Expand Down
8 changes: 4 additions & 4 deletions bigquery/tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5230,7 +5230,7 @@ def test_load_table_from_dataframe(self):
from google.cloud.bigquery import job

client = self._make_client()
records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}]
records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
Comment thread
plamut marked this conversation as resolved.
dataframe = pandas.DataFrame(records)

load_patch = mock.patch(
Expand Down Expand Up @@ -5265,7 +5265,7 @@ def test_load_table_from_dataframe_w_client_location(self):
from google.cloud.bigquery import job

client = self._make_client(location=self.LOCATION)
records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}]
records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
dataframe = pandas.DataFrame(records)

load_patch = mock.patch(
Expand Down Expand Up @@ -5300,7 +5300,7 @@ def test_load_table_from_dataframe_w_custom_job_config(self):
from google.cloud.bigquery import job

client = self._make_client()
records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}]
records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
dataframe = pandas.DataFrame(records)
job_config = job.LoadJobConfig()

Expand Down Expand Up @@ -5702,7 +5702,7 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self):
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
def test_load_table_from_dataframe_wo_pyarrow_custom_compression(self):
client = self._make_client()
records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}]
records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
dataframe = pandas.DataFrame(records)

load_patch = mock.patch(
Expand Down