Use GAPIC with sync_recognize().

daspecster · daspecster · commit 70ac27b14455 · 2016-10-14T18:14:47.000-04:00
diff --git a/docs/speech-usage.rst b/docs/speech-usage.rst
@@ -91,8 +91,8 @@ Great Britian.
      ...     language_code='en-GB', max_alternatives=2)
      >>> for alternative in alternatives:
      ...     print('=' * 20)
-     ...     print('transcript: ' + alternative['transcript'])
-     ...     print('confidence: ' + alternative['confidence'])
+     ...     print('transcript: ' + alternative.transcript)
+     ...     print('confidence: ' + alternative.confidence)
      ====================
      transcript: Hello, this is a test
      confidence: 0.81
@@ -113,8 +113,8 @@ Example of using the profanity filter.
     ...                                      profanity_filter=True)
     >>> for alternative in alternatives:
     ...     print('=' * 20)
-    ...     print('transcript: ' + alternative['transcript'])
-    ...     print('confidence: ' + alternative['confidence'])
+    ...     print('transcript: ' + alternative.transcript)
+    ...     print('confidence: ' + alternative.confidence)
     ====================
     transcript: Hello, this is a f****** test
     confidence: 0.81
@@ -135,8 +135,8 @@ words to the vocabulary of the recognizer.
     ...                                      speech_context=hints)
     >>> for alternative in alternatives:
     ...     print('=' * 20)
-    ...     print('transcript: ' + alternative['transcript'])
-    ...     print('confidence: ' + alternative['confidence'])
+    ...     print('transcript: ' + alternative.transcript)
+    ...     print('confidence: ' + alternative.confidence)
     ====================
     transcript: Hello, this is a test
     confidence: 0.81
diff --git a/speech/google/cloud/speech/client.py b/speech/google/cloud/speech/client.py
@@ -25,12 +25,17 @@
 from google.cloud.speech.encoding import Encoding
 from google.cloud.speech.operation import Operation
 from google.cloud.speech.sample import Sample
+from google.cloud.speech.transcript import Transcript
 from google.cloud.speech.streaming_response import StreamingSpeechResponse
 
 try:
     from google.cloud.gapic.speech.v1beta1.speech_api import SpeechApi
+    from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
+        SpeechContext)
     from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
         RecognitionConfig)
+    from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
+        RecognitionAudio)
     from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
         StreamingRecognitionConfig)
     from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
@@ -211,17 +216,21 @@ def sync_recognize(self, sample, language_code=None,
                   * ``confidence``: The confidence in language detection, float
                     between 0 and 1.
         """
+        if _USE_GAX:
+            config = RecognitionConfig(
+                encoding=sample.encoding, sample_rate=sample.sample_rate,
+                language_code=language_code, max_alternatives=max_alternatives,
+                profanity_filter=profanity_filter,
+                speech_context=SpeechContext(phrases=speech_context))
 
-        data = _build_request_data(sample, language_code, max_alternatives,
-                                   profanity_filter, speech_context)
-
-        api_response = self.connection.api_request(
-            method='POST', path='speech:syncrecognize', data=data)
+            audio = RecognitionAudio(content=sample.content,
+                                     uri=sample.source_uri)
 
-        if len(api_response['results']) == 1:
-            return api_response['results'][0]['alternatives']
+            return self._sync_recognize(config, audio)
         else:
-            raise ValueError('result in api should have length 1')
+            data = _build_request_data(sample, language_code, max_alternatives,
+                                       profanity_filter, speech_context)
+            return self._sync_recognize(data=data)
 
     def stream_recognize(self, sample, language_code=None,
                          max_alternatives=None, profanity_filter=None,
@@ -315,6 +324,37 @@ def speech_api(self):
             self._speech_api = SpeechApi()
         return self._speech_api
 
+    def _sync_recognize(self, config=None, audio=None, data=None):
+        """Handler for sync_recognize requests with or without GAPIC.
+
+        :type config: :class:`~RecognitionConfig
+        :param config: Instance of ``RecognitionConfig`` with recognition
+                       settings.
+
+        :type audio: :class:`~RecognitionAudio`
+        :param audio: Instance of ``RecognitionAudio`` with audio source data.
+
+        :type data: dict
+        :param data: Mapped configuration paramters for the request.
+
+        :rtype: list of :class:`~transcript.Transcript`
+        :returns: List of ``Transcript`` with recognition results.
+        """
+        if config and audio and _USE_GAX:
+            api_response = self.speech_api.sync_recognize(config=config,
+                                                          audio=audio)
+            results = api_response.results.pop()
+            alternatives = results.alternatives
+            return [Transcript.from_pb(alternative)
+                    for alternative in alternatives]
+        elif data:
+            api_response = self.connection.api_request(
+                method='POST', path='speech:syncrecognize', data=data)
+
+            return [Transcript.from_api_repr(alternative)
+                    for alternative
+                    in api_response['results'][0]['alternatives']]
+
 
 def _build_request_data(sample, language_code=None, max_alternatives=None,
                         profanity_filter=None, speech_context=None):
diff --git a/speech/google/cloud/speech/transcript.py b/speech/google/cloud/speech/transcript.py
@@ -28,9 +28,22 @@ def __init__(self, transcript, confidence):
         self._transcript = transcript
         self._confidence = confidence
 
+    @classmethod
+    def from_api_repr(cls, transcript):
+        """Factory: construct ``Transcript`` from JSON response.
+
+        :type transcript: :class:`~SpeechRecognitionAlternative`
+        :param transcript: Instance of ``SpeechRecognitionAlternative``
+                           from protobuf.
+
+        :rtype: :class:`~Transcript`
+        :returns: Instance of ``Transcript``.
+        """
+        return cls(transcript['transcript'], transcript['confidence'])
+
     @classmethod
     def from_pb(cls, transcript):
-        """Factory: construct ``Transcript`` from protobuf response
+        """Factory: construct ``Transcript`` from protobuf response.
 
         :type transcript: :class:`~SpeechRecognitionAlternative`
         :param transcript: Instance of ``SpeechRecognitionAlternative``
diff --git a/speech/unit_tests/test_client.py b/speech/unit_tests/test_client.py
@@ -65,8 +65,11 @@ def test_sync_recognize_content_with_optional_parameters(self):
         from google.cloud._helpers import _to_bytes
         from google.cloud._helpers import _bytes_to_unicode
 
+        from google.cloud._testing import _Monkey
+        from google.cloud.speech import client as MUT
         from google.cloud.speech.encoding import Encoding
         from google.cloud.speech.sample import Sample
+        from google.cloud.speech.transcript import Transcript
         from unit_tests._fixtures import SYNC_RECOGNIZE_RESPONSE
         _AUDIO_CONTENT = _to_bytes(self.AUDIO_CONTENT)
         _B64_AUDIO_CONTENT = _bytes_to_unicode(b64encode(_AUDIO_CONTENT))
@@ -96,25 +99,30 @@ def test_sync_recognize_content_with_optional_parameters(self):
 
         sample = Sample(content=self.AUDIO_CONTENT, encoding=encoding,
                         sample_rate=self.SAMPLE_RATE)
-        response = client.sync_recognize(sample,
-                                         language_code='EN',
-                                         max_alternatives=2,
-                                         profanity_filter=True,
-                                         speech_context=self.HINTS)
+        with _Monkey(MUT, _USE_GAX=False):
+            response = client.sync_recognize(sample,
+                                             language_code='EN',
+                                             max_alternatives=2,
+                                             profanity_filter=True,
+                                             speech_context=self.HINTS)
 
         self.assertEqual(len(client.connection._requested), 1)
         req = client.connection._requested[0]
         self.assertEqual(len(req), 3)
         self.assertEqual(req['data'], REQUEST)
         self.assertEqual(req['method'], 'POST')
         self.assertEqual(req['path'], 'speech:syncrecognize')
-
-        expected = SYNC_RECOGNIZE_RESPONSE['results'][0]['alternatives']
-        self.assertEqual(response, expected)
+        alternative = SYNC_RECOGNIZE_RESPONSE['results'][0]['alternatives'][0]
+        expected = [Transcript.from_api_repr(alternative)]
+        self.assertEqual(response[0].transcript, expected[0].transcript)
+        self.assertEqual(response[0].confidence, expected[0].confidence)
 
     def test_sync_recognize_source_uri_without_optional_parameters(self):
+        from google.cloud._testing import _Monkey
+        from google.cloud.speech import client as MUT
         from google.cloud.speech.encoding import Encoding
         from google.cloud.speech.sample import Sample
+        from google.cloud.speech.transcript import Transcript
         from unit_tests._fixtures import SYNC_RECOGNIZE_RESPONSE
 
         RETURNED = SYNC_RECOGNIZE_RESPONSE
@@ -135,7 +143,8 @@ def test_sync_recognize_source_uri_without_optional_parameters(self):
 
         sample = Sample(source_uri=self.AUDIO_SOURCE_URI, encoding=encoding,
                         sample_rate=self.SAMPLE_RATE)
-        response = client.sync_recognize(sample)
+        with _Monkey(MUT, _USE_GAX=False):
+            response = client.sync_recognize(sample)
 
         self.assertEqual(len(client.connection._requested), 1)
         req = client.connection._requested[0]
@@ -144,10 +153,14 @@ def test_sync_recognize_source_uri_without_optional_parameters(self):
         self.assertEqual(req['method'], 'POST')
         self.assertEqual(req['path'], 'speech:syncrecognize')
 
-        expected = SYNC_RECOGNIZE_RESPONSE['results'][0]['alternatives']
-        self.assertEqual(response, expected)
+        expected = [Transcript.from_api_repr(
+            SYNC_RECOGNIZE_RESPONSE['results'][0]['alternatives'][0])]
+        self.assertEqual(response[0].transcript, expected[0].transcript)
+        self.assertEqual(response[0].confidence, expected[0].confidence)
 
     def test_sync_recognize_with_empty_results(self):
+        from google.cloud._testing import _Monkey
+        from google.cloud.speech import client as MUT
         from google.cloud.speech.encoding import Encoding
         from google.cloud.speech.sample import Sample
         from unit_tests._fixtures import SYNC_RECOGNIZE_EMPTY_RESPONSE
@@ -156,11 +169,32 @@ def test_sync_recognize_with_empty_results(self):
         client = self._makeOne(credentials=credentials)
         client.connection = _Connection(SYNC_RECOGNIZE_EMPTY_RESPONSE)
 
-        with self.assertRaises(ValueError):
-            sample = Sample(source_uri=self.AUDIO_SOURCE_URI,
-                            encoding=Encoding.FLAC,
-                            sample_rate=self.SAMPLE_RATE)
-            client.sync_recognize(sample)
+        with self.assertRaises(IndexError):
+            with _Monkey(MUT, _USE_GAX=False):
+                sample = Sample(source_uri=self.AUDIO_SOURCE_URI,
+                                encoding=Encoding.FLAC,
+                                sample_rate=self.SAMPLE_RATE)
+                client.sync_recognize(sample)
+
+    def test_sync_recognize_with_gapic(self):
+        from google.cloud.speech import client as MUT
+        from google.cloud.speech import Encoding
+        from google.cloud._testing import _Monkey
+        creds = _Credentials()
+        client = self._makeOne(credentials=creds)
+        client.connection = _Connection()
+
+        client._speech_api = _MockGAPICSpeechAPI()
+        client._speech_api._responses = []
+
+        with _Monkey(MUT, _USE_GAX=True, RecognitionConfig=_RecognitionConfig,
+                     RecognitionAudio=_RecognitionAudio):
+            sample = client.sample(source_uri=self.AUDIO_SOURCE_URI,
+                                   encoding=Encoding.FLAC,
+                                   sample_rate=self.SAMPLE_RATE)
+            results = client.sync_recognize(sample)
+            self.assertEqual(results[0].transcript, 'testing 1 2 3')
+            self.assertEqual(results[0].confidence, 0.95234356)
 
     def test_async_supported_encodings(self):
         from google.cloud.speech.encoding import Encoding
@@ -260,6 +294,31 @@ def test_stream_recognize(self):
         self.assertEqual(len(requests), 2)
 
 
+class _RecognitionConfig(object):
+    def __init__(self, *args, **kwargs):
+        self.args = args
+        self.kwargs = kwargs
+
+
+class _RecognitionAudio(object):
+    def __init__(self, content, uri):
+        self.content = content
+        self.uri = uri
+
+
+class _MockGAPICAlternative(object):
+    transcript = 'testing 1 2 3'
+    confidence = 0.95234356
+
+
+class _MockGAPICSyncResult(object):
+    alternatives = [_MockGAPICAlternative()]
+
+
+class _MockGAPICSyncSpeechResponse(object):
+    results = [_MockGAPICSyncResult()]
+
+
 class _MockGAPICSpeechResponse(object):
     error = None
     endpointer_type = None
@@ -275,6 +334,11 @@ def streaming_recognize(self, requests):
         self._requests = requests
         return self._responses
 
+    def sync_recognize(self, config, audio):
+        self.config = config
+        self.audio = audio
+        return _MockGAPICSyncSpeechResponse()
+
 
 class _Credentials(object):