1414
1515"""Basic client for Google Cloud Speech API."""
1616
17+ import os
1718from base64 import b64encode
1819
1920from google .cloud ._helpers import _to_bytes
2021from google .cloud ._helpers import _bytes_to_unicode
2122from google .cloud import client as client_module
23+ from google .cloud .environment_vars import DISABLE_GRPC
2224from google .cloud .speech .connection import Connection
2325from google .cloud .speech .encoding import Encoding
2426from google .cloud .speech .operation import Operation
27+ from google .cloud .speech .streaming .request import _make_request_stream
2528from google .cloud .speech .sample import Sample
29+ from google .cloud .speech .streaming .container import StreamingResponseContainer
30+
31+ try :
32+ from google .cloud .gapic .speech .v1beta1 .speech_api import SpeechApi
33+ except ImportError : # pragma: NO COVER
34+ _HAVE_GAX = False
35+ else :
36+ _HAVE_GAX = True
37+
38+
39+ _DISABLE_GAX = os .getenv (DISABLE_GRPC , False )
40+ _USE_GAX = _HAVE_GAX and not _DISABLE_GAX
2641
2742
2843class Client (client_module .Client ):
@@ -47,6 +62,7 @@ class Client(client_module.Client):
4762 """
4863
4964 _connection_class = Connection
65+ _speech_api = None
5066
5167 def async_recognize (self , sample , language_code = None ,
5268 max_alternatives = None , profanity_filter = None ,
@@ -104,7 +120,7 @@ def async_recognize(self, sample, language_code=None,
104120 return Operation .from_api_repr (self , api_response )
105121
106122 @staticmethod
107- def sample (content = None , source_uri = None , encoding = None ,
123+ def sample (content = None , source_uri = None , stream = None , encoding = None ,
108124 sample_rate = None ):
109125 """Factory: construct Sample to use when making recognize requests.
110126
@@ -118,6 +134,9 @@ def sample(content=None, source_uri=None, encoding=None,
118134 supported, which must be specified in the following
119135 format: ``gs://bucket_name/object_name``.
120136
137+ :type stream: :class:`io.BufferedReader`
138+ :param stream: File like object to read audio data from.
139+
121140 :type encoding: str
122141 :param encoding: encoding of audio data sent in all RecognitionAudio
123142 messages, can be one of: :attr:`~.Encoding.LINEAR16`,
@@ -135,7 +154,7 @@ def sample(content=None, source_uri=None, encoding=None,
135154 :rtype: :class:`~google.cloud.speech.sample.Sample`
136155 :returns: Instance of ``Sample``.
137156 """
138- return Sample (content = content , source_uri = source_uri ,
157+ return Sample (content = content , source_uri = source_uri , stream = stream ,
139158 encoding = encoding , sample_rate = sample_rate )
140159
141160 def sync_recognize (self , sample , language_code = None ,
@@ -199,6 +218,108 @@ def sync_recognize(self, sample, language_code=None,
199218 else :
200219 raise ValueError ('result in api should have length 1' )
201220
221+ def stream_recognize (self , sample , language_code = None ,
222+ max_alternatives = None , profanity_filter = None ,
223+ speech_context = None , single_utterance = False ,
224+ interim_results = False ):
225+ """Streaming speech recognition.
226+
227+ .. note::
228+ Streaming recognition requests are limited to 1 minute of audio.
229+
230+ See: https://cloud.google.com/speech/limits#content
231+
232+ :type sample: :class:`~google.cloud.speech.sample.Sample`
233+ :param sample: Instance of ``Sample`` containing audio information.
234+
235+ :type language_code: str
236+ :param language_code: (Optional) The language of the supplied audio as
237+ BCP-47 language tag. Example: ``'en-GB'``.
238+ If omitted, defaults to ``'en-US'``.
239+
240+ :type max_alternatives: int
241+ :param max_alternatives: (Optional) Maximum number of recognition
242+ hypotheses to be returned. The server may
243+ return fewer than maxAlternatives.
244+ Valid values are 0-30. A value of 0 or 1
245+ will return a maximum of 1. Defaults to 1
246+
247+ :type profanity_filter: bool
248+ :param profanity_filter: If True, the server will attempt to filter
249+ out profanities, replacing all but the
250+ initial character in each filtered word with
251+ asterisks, e.g. ``'f***'``. If False or
252+ omitted, profanities won't be filtered out.
253+
254+ :type speech_context: list
255+ :param speech_context: A list of strings (max 50) containing words and
256+ phrases "hints" so that the speech recognition
257+ is more likely to recognize them. This can be
258+ used to improve the accuracy for specific words
259+ and phrases. This can also be used to add new
260+ words to the vocabulary of the recognizer.
261+
262+ :type single_utterance: boolean
263+ :param single_utterance: [Optional] If false or omitted, the recognizer
264+ will perform continuous recognition
265+ (continuing to process audio even if the user
266+ pauses speaking) until the client closes the
267+ output stream (gRPC API) or when the maximum
268+ time limit has been reached. Multiple
269+ SpeechRecognitionResults with the is_final
270+ flag set to true may be returned.
271+
272+ If true, the recognizer will detect a single
273+ spoken utterance. When it detects that the
274+ user has paused or stopped speaking, it will
275+ return an END_OF_UTTERANCE event and cease
276+ recognition. It will return no more than one
277+ SpeechRecognitionResult with the is_final flag
278+ set to true.
279+
280+ :type interim_results: boolean
281+ :param interim_results: [Optional] If true, interim results (tentative
282+ hypotheses) may be returned as they become
283+ available (these interim results are indicated
284+ with the is_final=false flag). If false or
285+ omitted, only is_final=true result(s) are
286+ returned.
287+
288+ :rtype: :class:`~streaming.StreamingResponseContainer`
289+ :returns: An instance of ``StreamingReponseContainer``.
290+
291+ """
292+ if not _USE_GAX :
293+ raise EnvironmentError ('GRPC is required to use this API.' )
294+
295+ if sample .stream .closed :
296+ raise ValueError ('Stream is closed.' )
297+
298+ requests = _make_request_stream (sample , language_code = language_code ,
299+ max_alternatives = max_alternatives ,
300+ profanity_filter = profanity_filter ,
301+ speech_context = speech_context ,
302+ single_utterance = single_utterance ,
303+ interim_results = interim_results )
304+
305+ responses = StreamingResponseContainer ()
306+ for response in self .speech_api .streaming_recognize (requests ):
307+ if response :
308+ responses .add_response (response )
309+
310+ return responses
311+
312+ @property
313+ def speech_api (self ):
314+ """Instance of Speech API.
315+
316+ :rtype: :class:`google.cloud.gapic.speech.v1beta1.speech_api.SpeechApi`
317+ :returns: Instance of ``SpeechApi``.
318+ """
319+ if not self ._speech_api :
320+ self ._speech_api = SpeechApi ()
321+ return self ._speech_api
322+
202323
203324def _build_request_data (sample , language_code = None , max_alternatives = None ,
204325 profanity_filter = None , speech_context = None ):
0 commit comments