Skip to content

Commit 89ff561

Browse files
committed
Feedback updates.
1 parent 7acd247 commit 89ff561

9 files changed

Lines changed: 76 additions & 63 deletions

File tree

docs/speech-streaming.rst

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,3 @@ Streaming Speech Result
1313
:members:
1414
:undoc-members:
1515
:show-inheritance:
16-
17-
Streaming Endpointer Type
18-
=========================
19-
20-
.. automodule:: google.cloud.speech.endpointer_type
21-
:members:
22-
:undoc-members:
23-
:show-inheritance:

docs/speech-usage.rst

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -155,41 +155,43 @@ data to possible text alternatives on the fly.
155155

156156
.. code-block:: python
157157
158-
>>> import io
159158
>>> from google.cloud import speech
160159
>>> client = speech.Client()
161-
>>> with io.open('./hello.wav', 'rb') as stream:
160+
>>> with open('./hello.wav', 'rb') as stream:
162161
... sample = client.sample(stream=stream, encoding=speech.Encoding.LINEAR16,
163162
... sample_rate=16000)
164163
... for response in client.stream_recognize(sample):
165164
... print(response.transcript)
166-
hello
167165
... print(response.is_final)
166+
hello
168167
True
169168
170169
171-
By setting ``interim_results`` to true, interim results (tentative hypotheses)
170+
By setting ``interim_results`` to :data:`True`, interim results (tentative hypotheses)
172171
may be returned as they become available (these interim results are indicated
173-
with the is_final=false flag). If false or omitted, only is_final=true
172+
with the ``is_final=false`` flag). If :data:`False` or omitted, only ``is_final=true``
174173
result(s) are returned.
175174

176175
.. code-block:: python
177176
178-
>>> import io
179177
>>> from google.cloud import speech
180178
>>> client = speech.Client()
181-
>>> with io.open('./hello.wav', 'rb') as stream:
182-
>>> sample = client.sample(stream=stream, encoding=speech.Encoding.LINEAR16,
179+
>>> with open('./hello.wav', 'rb') as stream:
180+
... sample = client.sample(stream=stream, encoding=speech.Encoding.LINEAR16,
183181
... sample_rate=16000)
184182
... for response in client.stream_recognize(sample,
185183
... interim_results=True):
184+
... print('====Response====')
186185
... print(response.transcript)
187-
hell
188186
... print(response.is_final)
187+
====Response====
188+
he
189189
False
190-
... print(response.transcript)
190+
====Response====
191+
hell
192+
False
193+
====Repsonse====
191194
hello
192-
... print(response.is_final)
193195
True
194196
195197
@@ -204,13 +206,15 @@ See: `Single Utterance`_
204206

205207
.. code-block:: python
206208
207-
>>> with io.open('./hello_pause_goodbye.wav', 'rb') as stream:
208-
>>> sample = client.sample(stream=stream, encoding=speech.Encoding.LINEAR16,
209+
>>> with open('./hello_pause_goodbye.wav', 'rb') as stream:
210+
... sample = client.sample(stream=stream, encoding=speech.Encoding.LINEAR16,
209211
... sample_rate=16000)
210-
... stream_container = client.stream_recognize(sample,
211-
... single_utterance=True)
212-
>>> print(stream_container.get_full_text())
212+
... for response in client.stream_recognize(sample,
213+
... single_utterance=True):
214+
... print(response.transcript)
215+
... print(response.is_final)
213216
hello
217+
True
214218
215219
.. _Single Utterance: https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#streamingrecognitionconfig
216220
.. _sync_recognize: https://cloud.google.com/speech/reference/rest/v1beta1/speech/syncrecognize

speech/google/cloud/speech/client.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,9 @@ def stream_recognize(self, sample, language_code=None,
234234
235235
See: https://cloud.google.com/speech/limits#content
236236
237+
Yields :class:`~streaming_response.StreamingSpeechResponse` containing
238+
results and metadata from the streaming request.
239+
237240
:type sample: :class:`~google.cloud.speech.sample.Sample`
238241
:param sample: Instance of ``Sample`` containing audio information.
239242
@@ -264,8 +267,8 @@ def stream_recognize(self, sample, language_code=None,
264267
and phrases. This can also be used to add new
265268
words to the vocabulary of the recognizer.
266269
267-
:type single_utterance: boolean
268-
:param single_utterance: [Optional] If false or omitted, the recognizer
270+
:type single_utterance: bool
271+
:param single_utterance: (Optional) If false or omitted, the recognizer
269272
will perform continuous recognition
270273
(continuing to process audio even if the user
271274
pauses speaking) until the client closes the
@@ -282,13 +285,15 @@ def stream_recognize(self, sample, language_code=None,
282285
SpeechRecognitionResult with the is_final flag
283286
set to true.
284287
285-
:type interim_results: boolean
286-
:param interim_results: [Optional] If true, interim results (tentative
288+
:type interim_results: bool
289+
:param interim_results: (Optional) If true, interim results (tentative
287290
hypotheses) may be returned as they become
288291
available (these interim results are indicated
289292
with the is_final=false flag). If false or
290293
omitted, only is_final=true result(s) are
291294
returned.
295+
296+
:raises: :class:`EnvironmentError` if gRPC is not enabled.
292297
"""
293298
if not _USE_GAX:
294299
raise EnvironmentError('gRPC is required to use this API.')
@@ -301,7 +306,7 @@ def stream_recognize(self, sample, language_code=None,
301306
interim_results=interim_results)
302307

303308
for response in self.speech_api.streaming_recognize(requests):
304-
if hasattr(response, 'results') or interim_results:
309+
if getattr(response, 'results', None) or interim_results:
305310
yield StreamingSpeechResponse.from_pb(response)
306311

307312
@property
@@ -414,8 +419,8 @@ def _make_request_stream(sample, language_code=None, max_alternatives=None,
414419
and phrases. This can also be used to add new
415420
words to the vocabulary of the recognizer.
416421
417-
:type single_utterance: boolean
418-
:param single_utterance: [Optional] If false or omitted, the recognizer
422+
:type single_utterance: bool
423+
:param single_utterance: (Optional) If false or omitted, the recognizer
419424
will perform continuous recognition
420425
(continuing to process audio even if the user
421426
pauses speaking) until the client closes the
@@ -432,8 +437,8 @@ def _make_request_stream(sample, language_code=None, max_alternatives=None,
432437
SpeechRecognitionResult with the is_final flag
433438
set to true.
434439
435-
:type interim_results: boolean
436-
:param interim_results: [Optional] If true, interim results (tentative
440+
:type interim_results: bool
441+
:param interim_results: (Optional) If true, interim results (tentative
437442
hypotheses) may be returned as they become
438443
available (these interim results are indicated
439444
with the is_final=false flag). If false or
@@ -452,7 +457,6 @@ def _make_request_stream(sample, language_code=None, max_alternatives=None,
452457
data = sample.stream.read(sample.chunk_size)
453458
if not data:
454459
break
455-
# Optimize the request data size to around 100ms.
456460
yield StreamingRecognizeRequest(audio_content=data)
457461

458462

@@ -466,12 +470,12 @@ def _make_streaming_config(sample, language_code,
466470
:param sample: Instance of ``Sample`` containing audio information.
467471
468472
:type language_code: str
469-
:param language_code: (Optional) The language of the supplied audio as
473+
:param language_code: The language of the supplied audio as
470474
BCP-47 language tag. Example: ``'en-GB'``.
471475
If omitted, defaults to ``'en-US'``.
472476
473477
:type max_alternatives: int
474-
:param max_alternatives: (Optional) Maximum number of recognition
478+
:param max_alternatives: Maximum number of recognition
475479
hypotheses to be returned. The server may
476480
return fewer than maxAlternatives.
477481
Valid values are 0-30. A value of 0 or 1
@@ -492,8 +496,8 @@ def _make_streaming_config(sample, language_code,
492496
and phrases. This can also be used to add new
493497
words to the vocabulary of the recognizer.
494498
495-
:type single_utterance: boolean
496-
:param single_utterance: [Optional] If false or omitted, the recognizer
499+
:type single_utterance: bool
500+
:param single_utterance: If false or omitted, the recognizer
497501
will perform continuous recognition
498502
(continuing to process audio even if the user
499503
pauses speaking) until the client closes the
@@ -510,8 +514,8 @@ def _make_streaming_config(sample, language_code,
510514
SpeechRecognitionResult with the is_final flag
511515
set to true.
512516
513-
:type interim_results: boolean
514-
:param interim_results: [Optional] If true, interim results (tentative
517+
:type interim_results: bool
518+
:param interim_results: If true, interim results (tentative
515519
hypotheses) may be returned as they become
516520
available (these interim results are indicated
517521
with the is_final=false flag). If false or

speech/google/cloud/speech/endpointer_type.py

Whitespace-only changes.

speech/google/cloud/speech/sample.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def chunk_size(self):
7777
:rtype: int
7878
:returns: Optimized chunk size.
7979
"""
80-
return int(self.sample_rate / 10)
80+
return int(self.sample_rate / 10.0)
8181

8282
@property
8383
def source_uri(self):

speech/google/cloud/speech/streaming_response.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"""Representation of a GAPIC Speech API response."""
1616

1717
from google.cloud.speech.streaming_result import StreamingSpeechResult
18+
from google.cloud.gapic.speech.v1beta1.enums import StreamingRecognizeResponse
1819

1920

2021
class StreamingSpeechResponse(object):
@@ -38,7 +39,7 @@ def __init__(self, error=None, endpointer_type=None, results=None,
3839
result_index=None):
3940
results = results or []
4041
self._error = error
41-
self._endpointer_type = EndpointerType.reverse_map.get(
42+
self._endpointer_type = EndpointerType.REVERSE_MAP.get(
4243
endpointer_type, None)
4344
self._result_index = result_index
4445
self._results = [StreamingSpeechResult.from_pb(result)
@@ -91,7 +92,7 @@ def is_final(self):
9192
:returns: True if the result has completed it's processing.
9293
"""
9394
if self.results:
94-
return bool(self.results[0].is_final)
95+
return self.results[0].is_final
9596
else:
9697
return False
9798

@@ -123,26 +124,27 @@ def transcript(self):
123124
if self.results and self.results[0].alternatives:
124125
return self.results[0].alternatives[0].transcript
125126
else:
126-
return ''
127+
return None
127128

128129

129-
class EndpointerType(object):
130+
class EndpointerType(StreamingRecognizeResponse.EndpointerType):
130131
"""Endpointer type for tracking state of Speech API detection.
131132
133+
ENDPOINTER_EVENT_UNSPECIFIED (int): No endpointer event specified.
134+
START_OF_SPEECH (int): Speech has been detected in the audio stream.
135+
END_OF_SPEECH (int): Speech has ceased to be detected in the audio
136+
stream.
137+
END_OF_AUDIO (int): The end of the audio stream has been reached. and
138+
it is being processed.
139+
END_OF_UTTERANCE (int): This event is only sent when
140+
``single_utterance`` is ``true``. It indicates that the server has
141+
detected the end of the user's speech utterance and expects no
142+
additional speech. Therefore, the server will not process additional
143+
audio. The client should stop sending additional audio data.
144+
132145
See:
133146
https://cloud.google.com/speech/reference/rpc/\
134147
google.cloud.speech.v1beta1#endpointertype
135148
"""
136-
ENDPOINTER_EVENT_UNSPECIFIED = 0
137-
START_OF_SPEECH = 1
138-
END_OF_SPEECH = 2
139-
END_OF_AUDIO = 3
140-
END_OF_UTTERANCE = 4
141-
142-
reverse_map = {
143-
0: 'ENDPOINTER_EVENT_UNSPECIFIED',
144-
1: 'START_OF_SPEECH',
145-
2: 'END_OF_SPEECH',
146-
3: 'END_OF_AUDIO',
147-
4: 'END_OF_UTTERANCE'
148-
}
149+
REVERSE_MAP = {v: k for k, v
150+
in vars(StreamingRecognizeResponse.EndpointerType).items()}

speech/google/cloud/speech/streaming_result.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,4 @@ def is_final(self):
7070
:rtype: bool
7171
:returns: True if the result has completed it's processing.
7272
"""
73-
return bool(self._is_final)
73+
return self._is_final

speech/unit_tests/test_client.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -251,19 +251,30 @@ def test_stream_recognize(self):
251251
sample = client.sample(stream=stream,
252252
encoding=Encoding.LINEAR16,
253253
sample_rate=self.SAMPLE_RATE)
254-
results = client.stream_recognize(sample)
254+
responses = client.stream_recognize(sample)
255255

256-
self.assertIsInstance(next(results), StreamingSpeechResponse)
256+
self.assertIsInstance(next(responses), StreamingSpeechResponse)
257257
requests = []
258258
for req in client.speech_api._requests:
259259
requests.append(req)
260260
self.assertEqual(len(requests), 2)
261261

262262

263+
class _MockSpeechGAPICAlternative(object):
264+
transcript = 'hello there!'
265+
confidence = 0.9704365
266+
267+
268+
class _MockSpeechGAPICResult(object):
269+
alternatives = [_MockSpeechGAPICAlternative()]
270+
is_final = False
271+
stability = 0.0
272+
273+
263274
class _MockGAPICSpeechResponse(object):
264275
error = None
265276
endpointer_type = None
266-
results = []
277+
results = [_MockSpeechGAPICResult()]
267278
result_index = 0
268279

269280

speech/unit_tests/test_response.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def test_ctor(self):
3030
self.assertEqual(response.confidence, 0.0)
3131
self.assertEqual(response.endpointer_type, None)
3232
self.assertEqual(response.results, [])
33-
self.assertEqual(response.transcript, '')
33+
self.assertEqual(response.transcript, None)
3434
self.assertFalse(response.is_final)
3535

3636
def test_from_pb(self):

0 commit comments

Comments
 (0)