18
18
19
19
import contextlib
20
20
import re
21
+ import signal
21
22
import threading
22
23
23
- from gcloud . credentials import get_credentials
24
+ from gcloud import credentials
24
25
from google .cloud .speech .v1beta1 import cloud_speech_pb2 as cloud_speech
25
26
from google .rpc import code_pb2
26
27
from grpc .beta import implementations
28
+ from grpc .framework .interfaces .face import face
27
29
import pyaudio
30
+ from six .moves import queue
28
31
29
32
# Audio recording parameters
30
33
RATE = 16000
31
- CHANNELS = 1
32
34
CHUNK = int (RATE / 10 ) # 100ms
33
35
34
- # Keep the request alive for this many seconds
35
- DEADLINE_SECS = 8 * 60 * 60
36
+ # The Speech API has a streaming limit of 60 seconds of audio*, so keep the
37
+ # connection alive for that long, plus some more to give the API time to figure
38
+ # out the transcription.
39
+ # * https://g.co/cloud/speech/limits#content
40
+ DEADLINE_SECS = 60 * 3 + 5
36
41
SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform'
37
42
38
43
@@ -42,7 +47,7 @@ def make_channel(host, port):
42
47
ssl_channel = implementations .ssl_channel_credentials (None , None , None )
43
48
44
49
# Grab application default credentials from the environment
45
- creds = get_credentials ().create_scoped ([SPEECH_SCOPE ])
50
+ creds = credentials . get_credentials ().create_scoped ([SPEECH_SCOPE ])
46
51
# Add a plugin to inject the creds into the header
47
52
auth_header = (
48
53
'Authorization' ,
@@ -58,33 +63,81 @@ def make_channel(host, port):
58
63
return implementations .secure_channel (host , port , composite_channel )
59
64
60
65
66
+ def _audio_data_generator (buff ):
67
+ """A generator that yields all available data in the given buffer.
68
+
69
+ Args:
70
+ buff - a Queue object, where each element is a chunk of data.
71
+ Yields:
72
+ A chunk of data that is the aggregate of all chunks of data in `buff`.
73
+ The function will block until at least one data chunk is available.
74
+ """
75
+ while True :
76
+ # Use a blocking get() to ensure there's at least one chunk of data
77
+ chunk = buff .get ()
78
+ if not chunk :
79
+ # A falsey value indicates the stream is closed.
80
+ break
81
+ data = [chunk ]
82
+
83
+ # Now consume whatever other data's still buffered.
84
+ while True :
85
+ try :
86
+ data .append (buff .get (block = False ))
87
+ except queue .Empty :
88
+ break
89
+ yield b'' .join (data )
90
+
91
+
92
+ def _fill_buffer (audio_stream , buff , chunk ):
93
+ """Continuously collect data from the audio stream, into the buffer."""
94
+ try :
95
+ while True :
96
+ buff .put (audio_stream .read (chunk ))
97
+ except IOError :
98
+ # This happens when the stream is closed. Signal that we're done.
99
+ buff .put (None )
100
+
101
+
61
102
# [START audio_stream]
62
103
@contextlib .contextmanager
63
- def record_audio (channels , rate , chunk ):
104
+ def record_audio (rate , chunk ):
64
105
"""Opens a recording stream in a context manager."""
65
106
audio_interface = pyaudio .PyAudio ()
66
107
audio_stream = audio_interface .open (
67
- format = pyaudio .paInt16 , channels = channels , rate = rate ,
108
+ format = pyaudio .paInt16 ,
109
+ # The API currently only supports 1-channel (mono) audio
110
+ # https://goo.gl/z757pE
111
+ channels = 1 , rate = rate ,
68
112
input = True , frames_per_buffer = chunk ,
69
113
)
70
114
71
- yield audio_stream
115
+ # Create a thread-safe buffer of audio data
116
+ buff = queue .Queue ()
117
+
118
+ # Spin up a separate thread to buffer audio data from the microphone
119
+ # This is necessary so that the input device's buffer doesn't overflow
120
+ # while the calling thread makes network requests, etc.
121
+ fill_buffer_thread = threading .Thread (
122
+ target = _fill_buffer , args = (audio_stream , buff , chunk ))
123
+ fill_buffer_thread .start ()
124
+
125
+ yield _audio_data_generator (buff )
72
126
73
127
audio_stream .stop_stream ()
74
128
audio_stream .close ()
129
+ fill_buffer_thread .join ()
75
130
audio_interface .terminate ()
76
131
# [END audio_stream]
77
132
78
133
79
- def request_stream (stop_audio , channels = CHANNELS , rate = RATE , chunk = CHUNK ):
134
+ def request_stream (data_stream , rate ):
80
135
"""Yields `StreamingRecognizeRequest`s constructed from a recording audio
81
136
stream.
82
137
83
138
Args:
84
- stop_audio: A threading.Event object stops the recording when set.
85
- channels: How many audio channels to record.
139
+ data_stream: A generator that yields raw audio data to send.
86
140
rate: The sampling rate in hertz.
87
- chunk: Buffer audio into chunks of this size before sending to the api.
88
141
"""
89
142
# The initial request must contain metadata about the stream, so the
90
143
# server knows how to interpret it.
@@ -105,14 +158,9 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
105
158
yield cloud_speech .StreamingRecognizeRequest (
106
159
streaming_config = streaming_config )
107
160
108
- with record_audio (channels , rate , chunk ) as audio_stream :
109
- while not stop_audio .is_set ():
110
- data = audio_stream .read (chunk )
111
- if not data :
112
- raise StopIteration ()
113
-
114
- # Subsequent requests can all just have the content
115
- yield cloud_speech .StreamingRecognizeRequest (audio_content = data )
161
+ for data in data_stream :
162
+ # Subsequent requests can all just have the content
163
+ yield cloud_speech .StreamingRecognizeRequest (audio_content = data )
116
164
117
165
118
166
def listen_print_loop (recognize_stream ):
@@ -126,25 +174,36 @@ def listen_print_loop(recognize_stream):
126
174
127
175
# Exit recognition if any of the transcribed phrases could be
128
176
# one of our keywords.
129
- if any (re .search (r'\b(exit|quit)\b' , alt .transcript )
177
+ if any (re .search (r'\b(exit|quit)\b' , alt .transcript , re . I )
130
178
for result in resp .results
131
179
for alt in result .alternatives ):
132
180
print ('Exiting..' )
133
- return
181
+ break
134
182
135
183
136
184
def main ():
137
- stop_audio = threading .Event ()
138
185
with cloud_speech .beta_create_Speech_stub (
139
186
make_channel ('speech.googleapis.com' , 443 )) as service :
140
- try :
141
- listen_print_loop (
142
- service .StreamingRecognize (
143
- request_stream (stop_audio ), DEADLINE_SECS ))
144
- finally :
145
- # Stop the request stream once we're done with the loop - otherwise
146
- # it'll keep going in the thread that the grpc lib makes for it..
147
- stop_audio .set ()
187
+ # For streaming audio from the microphone, there are three threads.
188
+ # First, a thread that collects audio data as it comes in
189
+ with record_audio (RATE , CHUNK ) as buffered_audio_data :
190
+ # Second, a thread that sends requests with that data
191
+ requests = request_stream (buffered_audio_data , RATE )
192
+ # Third, a thread that listens for transcription responses
193
+ recognize_stream = service .StreamingRecognize (
194
+ requests , DEADLINE_SECS )
195
+
196
+ # Exit things cleanly on interrupt
197
+ signal .signal (signal .SIGINT , lambda * _ : recognize_stream .cancel ())
198
+
199
+ # Now, put the transcription responses to use.
200
+ try :
201
+ listen_print_loop (recognize_stream )
202
+
203
+ recognize_stream .cancel ()
204
+ except face .CancellationError :
205
+ # This happens because of the interrupt handler
206
+ pass
148
207
149
208
150
209
if __name__ == '__main__' :
0 commit comments