2222import mock
2323import pytest
2424import six
25+ from six .moves import queue
2526
2627import google .api_core .exceptions
2728
@@ -1816,9 +1817,12 @@ def test_to_dataframe_w_bqstorage_nonempty(self):
18161817 bqstorage_client = mock .create_autospec (
18171818 bigquery_storage_v1beta1 .BigQueryStorageClient
18181819 )
1819- session = bigquery_storage_v1beta1 .types .ReadSession (
1820- streams = [{"name" : "/projects/proj/dataset/dset/tables/tbl/streams/1234" }]
1821- )
1820+ streams = [
1821+ # Use two streams we want to check frames are read from each stream.
1822+ {"name" : "/projects/proj/dataset/dset/tables/tbl/streams/1234" },
1823+ {"name" : "/projects/proj/dataset/dset/tables/tbl/streams/5678" },
1824+ ]
1825+ session = bigquery_storage_v1beta1 .types .ReadSession (streams = streams )
18221826 session .avro_schema .schema = json .dumps (
18231827 {
18241828 "fields" : [
@@ -1836,20 +1840,25 @@ def test_to_dataframe_w_bqstorage_nonempty(self):
18361840
18371841 mock_rows = mock .create_autospec (reader .ReadRowsIterable )
18381842 mock_rowstream .rows .return_value = mock_rows
1843+ page_items = [
1844+ {"colA" : 1 , "colB" : "abc" , "colC" : 2.0 },
1845+ {"colA" : - 1 , "colB" : "def" , "colC" : 4.0 },
1846+ ]
18391847
18401848 def blocking_to_dataframe (* args , ** kwargs ):
18411849 # Sleep for longer than the waiting interval so that we know we're
18421850 # only reading one page per loop at most.
18431851 time .sleep (2 * mut ._PROGRESS_INTERVAL )
1844- return pandas .DataFrame (
1845- {"colA" : [1 , - 1 ], "colB" : ["abc" , "def" ], "colC" : [2.0 , 4.0 ]},
1846- columns = ["colA" , "colB" , "colC" ],
1847- )
1852+ return pandas .DataFrame (page_items , columns = ["colA" , "colB" , "colC" ])
18481853
18491854 mock_page = mock .create_autospec (reader .ReadRowsPage )
18501855 mock_page .to_dataframe .side_effect = blocking_to_dataframe
1851- mock_pages = mock .PropertyMock (return_value = (mock_page , mock_page , mock_page ))
1852- type(mock_rows ).pages = mock_pages
1856+ mock_pages = (mock_page , mock_page , mock_page )
1857+ type(mock_rows ).pages = mock .PropertyMock (return_value = mock_pages )
1858+
1859+ # Test that full queue errors are ignored.
1860+ mock_queue = mock .create_autospec (mut ._NoopProgressBarQueue )
1861+ mock_queue ().put_nowait .side_effect = queue .Full
18531862
18541863 schema = [
18551864 schema .SchemaField ("colA" , "IGNORED" ),
@@ -1866,17 +1875,100 @@ def blocking_to_dataframe(*args, **kwargs):
18661875 selected_fields = schema ,
18671876 )
18681877
1869- with mock .patch (
1878+ with mock .patch . object ( mut , "_NoopProgressBarQueue" , mock_queue ), mock . patch (
18701879 "concurrent.futures.wait" , wraps = concurrent .futures .wait
18711880 ) as mock_wait :
18721881 got = row_iterator .to_dataframe (bqstorage_client = bqstorage_client )
18731882
1883+ # Are the columns in the expected order?
18741884 column_names = ["colA" , "colC" , "colB" ]
18751885 self .assertEqual (list (got ), column_names )
1876- self .assertEqual (len (got .index ), 6 )
1886+
1887+ # Have expected number of rows?
1888+ total_pages = len (streams ) * len (mock_pages )
1889+ total_rows = len (page_items ) * total_pages
1890+ self .assertEqual (len (got .index ), total_rows )
1891+
18771892 # Make sure that this test looped through multiple progress intervals.
18781893 self .assertGreaterEqual (mock_wait .call_count , 2 )
18791894
1895+ # Make sure that this test pushed to the progress queue.
1896+ self .assertEqual (mock_queue ().put_nowait .call_count , total_pages )
1897+
1898+ @unittest .skipIf (pandas is None , "Requires `pandas`" )
1899+ @unittest .skipIf (
1900+ bigquery_storage_v1beta1 is None , "Requires `google-cloud-bigquery-storage`"
1901+ )
1902+ @unittest .skipIf (tqdm is None , "Requires `tqdm`" )
1903+ @mock .patch ("tqdm.tqdm" )
1904+ def test_to_dataframe_w_bqstorage_updates_progress_bar (self , tqdm_mock ):
1905+ from google .cloud .bigquery import schema
1906+ from google .cloud .bigquery import table as mut
1907+ from google .cloud .bigquery_storage_v1beta1 import reader
1908+
1909+ # Speed up testing.
1910+ mut ._PROGRESS_INTERVAL = 0.01
1911+
1912+ bqstorage_client = mock .create_autospec (
1913+ bigquery_storage_v1beta1 .BigQueryStorageClient
1914+ )
1915+ streams = [
1916+ # Use two streams we want to check that progress bar updates are
1917+ # sent from each stream.
1918+ {"name" : "/projects/proj/dataset/dset/tables/tbl/streams/1234" },
1919+ {"name" : "/projects/proj/dataset/dset/tables/tbl/streams/5678" },
1920+ ]
1921+ session = bigquery_storage_v1beta1 .types .ReadSession (streams = streams )
1922+ session .avro_schema .schema = json .dumps ({"fields" : [{"name" : "testcol" }]})
1923+ bqstorage_client .create_read_session .return_value = session
1924+
1925+ mock_rowstream = mock .create_autospec (reader .ReadRowsStream )
1926+ bqstorage_client .read_rows .return_value = mock_rowstream
1927+
1928+ mock_rows = mock .create_autospec (reader .ReadRowsIterable )
1929+ mock_rowstream .rows .return_value = mock_rows
1930+ mock_page = mock .create_autospec (reader .ReadRowsPage )
1931+ page_items = [- 1 , 0 , 1 ]
1932+ type(mock_page ).num_items = mock .PropertyMock (return_value = len (page_items ))
1933+
1934+ def blocking_to_dataframe (* args , ** kwargs ):
1935+ # Sleep for longer than the waiting interval. This ensures the
1936+ # progress_queue gets written to more than once because it gives
1937+ # the worker->progress updater time to sum intermediate updates.
1938+ time .sleep (2 * mut ._PROGRESS_INTERVAL )
1939+ return pandas .DataFrame ({"testcol" : page_items })
1940+
1941+ mock_page .to_dataframe .side_effect = blocking_to_dataframe
1942+ mock_pages = (mock_page , mock_page , mock_page , mock_page , mock_page )
1943+ type(mock_rows ).pages = mock .PropertyMock (return_value = mock_pages )
1944+
1945+ schema = [schema .SchemaField ("testcol" , "IGNORED" )]
1946+
1947+ row_iterator = mut .RowIterator (
1948+ _mock_client (),
1949+ None , # api_request: ignored
1950+ None , # path: ignored
1951+ schema ,
1952+ table = mut .TableReference .from_string ("proj.dset.tbl" ),
1953+ selected_fields = schema ,
1954+ )
1955+
1956+ row_iterator .to_dataframe (
1957+ bqstorage_client = bqstorage_client , progress_bar_type = "tqdm"
1958+ )
1959+
1960+ # Make sure that this test updated the progress bar once per page from
1961+ # each stream.
1962+ total_pages = len (streams ) * len (mock_pages )
1963+ expected_total_rows = total_pages * len (page_items )
1964+ progress_updates = [
1965+ args [0 ] for args , kwargs in tqdm_mock ().update .call_args_list
1966+ ]
1967+ # Should have sent >1 update due to delay in blocking_to_dataframe.
1968+ self .assertGreater (len (progress_updates ), 1 )
1969+ self .assertEqual (sum (progress_updates ), expected_total_rows )
1970+ tqdm_mock ().close .assert_called_once ()
1971+
18801972 @unittest .skipIf (pandas is None , "Requires `pandas`" )
18811973 @unittest .skipIf (
18821974 bigquery_storage_v1beta1 is None , "Requires `google-cloud-bigquery-storage`"
0 commit comments