// Copyright 2011 Google Inc. All Rights Reserved.

package android.speech.tts;

import android.media.AudioFormat;
import android.media.AudioTrack;
import android.speech.tts.TextToSpeechService.AudioOutputParams;
import android.util.Log;

Exposes parts of the AudioTrack API by delegating calls to an underlying AudioTrack. Additionally, provides methods like waitAndRelease() that will block until all audiotrack data has been flushed to the mixer, and is estimated to have completed playback.
/** * Exposes parts of the {@link AudioTrack} API by delegating calls to an * underlying {@link AudioTrack}. Additionally, provides methods like * {@link #waitAndRelease()} that will block until all audiotrack * data has been flushed to the mixer, and is estimated to have completed * playback. */
class BlockingAudioTrack { private static final String TAG = "TTS.BlockingAudioTrack"; private static final boolean DBG = false;
The minimum increment of time to wait for an AudioTrack to finish playing.
/** * The minimum increment of time to wait for an AudioTrack to finish * playing. */
private static final long MIN_SLEEP_TIME_MS = 20;
The maximum increment of time to sleep while waiting for an AudioTrack to finish playing.
/** * The maximum increment of time to sleep while waiting for an AudioTrack * to finish playing. */
private static final long MAX_SLEEP_TIME_MS = 2500;
The maximum amount of time to wait for an audio track to make progress while it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but could happen in exceptional circumstances like a media_server crash.
/** * The maximum amount of time to wait for an audio track to make progress while * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but * could happen in exceptional circumstances like a media_server crash. */
private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;
Minimum size of the buffer of the underlying AudioTrack we create.
/** * Minimum size of the buffer of the underlying {@link android.media.AudioTrack} * we create. */
private static final int MIN_AUDIO_BUFFER_SIZE = 8192; private final AudioOutputParams mAudioParams; private final int mSampleRateInHz; private final int mAudioFormat; private final int mChannelCount; private final int mBytesPerFrame;
A "short utterance" is one that uses less bytes than the audio track buffer size (mAudioBufferSize). In this case, we need to call AudioTrack.stop() to send pending buffers to the mixer, and slightly different logic is required to wait for the track to finish. Not volatile, accessed only from the audio playback thread.
/** * A "short utterance" is one that uses less bytes than the audio * track buffer size (mAudioBufferSize). In this case, we need to call * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly * different logic is required to wait for the track to finish. * * Not volatile, accessed only from the audio playback thread. */
private boolean mIsShortUtterance;
Will be valid after a call to init().
/** * Will be valid after a call to {@link #init()}. */
private int mAudioBufferSize; private int mBytesWritten = 0; // Need to be seen by stop() which can be called from another thread. mAudioTrack will be // set to null only after waitAndRelease(). private Object mAudioTrackLock = new Object(); private AudioTrack mAudioTrack; private volatile boolean mStopped; private int mSessionId; BlockingAudioTrack(AudioOutputParams audioParams, int sampleRate, int audioFormat, int channelCount) { mAudioParams = audioParams; mSampleRateInHz = sampleRate; mAudioFormat = audioFormat; mChannelCount = channelCount; mBytesPerFrame = AudioFormat.getBytesPerSample(mAudioFormat) * mChannelCount; mIsShortUtterance = false; mAudioBufferSize = 0; mBytesWritten = 0; mAudioTrack = null; mStopped = false; } public boolean init() { AudioTrack track = createStreamingAudioTrack(); synchronized (mAudioTrackLock) { mAudioTrack = track; } if (track == null) { return false; } else { return true; } } public void stop() { synchronized (mAudioTrackLock) { if (mAudioTrack != null) { mAudioTrack.stop(); } mStopped = true; } } public int write(byte[] data) { AudioTrack track = null; synchronized (mAudioTrackLock) { track = mAudioTrack; } if (track == null || mStopped) { return -1; } final int bytesWritten = writeToAudioTrack(track, data); mBytesWritten += bytesWritten; return bytesWritten; } public void waitAndRelease() { AudioTrack track = null; synchronized (mAudioTrackLock) { track = mAudioTrack; } if (track == null) { if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]"); return; } // For "small" audio tracks, we have to stop() them to make them mixable, // else the audio subsystem will wait indefinitely for us to fill the buffer // before rendering the track mixable. // // If mStopped is true, the track would already have been stopped, so not // much point not doing that again. if (mBytesWritten < mAudioBufferSize && !mStopped) { if (DBG) { Log.d(TAG, "Stopping audio track to flush audio, state was : " + track.getPlayState() + ",stopped= " + mStopped); } mIsShortUtterance = true; track.stop(); } // Block until the audio track is done only if we haven't stopped yet. if (!mStopped) { if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode()); blockUntilDone(mAudioTrack); } // The last call to AudioTrack.write( ) will return only after // all data from the audioTrack has been sent to the mixer, so // it's safe to release at this point. if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]"); synchronized (mAudioTrackLock) { mAudioTrack = null; } track.release(); } static int getChannelConfig(int channelCount) { if (channelCount == 1) { return AudioFormat.CHANNEL_OUT_MONO; } else if (channelCount == 2){ return AudioFormat.CHANNEL_OUT_STEREO; } return 0; } long getAudioLengthMs(int numBytes) { final int unconsumedFrames = numBytes / mBytesPerFrame; final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz; return estimatedTimeMs; } private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) { if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) { if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode()); audioTrack.play(); } int count = 0; while (count < bytes.length) { // Note that we don't take bufferCopy.mOffset into account because // it is guaranteed to be 0. int written = audioTrack.write(bytes, count, bytes.length); if (written <= 0) { break; } count += written; } return count; } private AudioTrack createStreamingAudioTrack() { final int channelConfig = getChannelConfig(mChannelCount); int minBufferSizeInBytes = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat); int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); AudioFormat audioFormat = (new AudioFormat.Builder()) .setChannelMask(channelConfig) .setEncoding(mAudioFormat) .setSampleRate(mSampleRateInHz).build(); AudioTrack audioTrack = new AudioTrack(mAudioParams.mAudioAttributes, audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM, mAudioParams.mSessionId); if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { Log.w(TAG, "Unable to create audio track."); audioTrack.release(); return null; } mAudioBufferSize = bufferSizeInBytes; setupVolume(audioTrack, mAudioParams.mVolume, mAudioParams.mPan); return audioTrack; } private void blockUntilDone(AudioTrack audioTrack) { if (mBytesWritten <= 0) { return; } if (mIsShortUtterance) { // In this case we would have called AudioTrack#stop() to flush // buffers to the mixer. This makes the playback head position // unobservable and notification markers do not work reliably. We // have no option but to wait until we think the track would finish // playing and release it after. // // This isn't as bad as it looks because (a) We won't end up waiting // for much longer than we should because even at 4khz mono, a short // utterance weighs in at about 2 seconds, and (b) such short utterances // are expected to be relatively infrequent and in a stream of utterances // this shows up as a slightly longer pause. blockUntilEstimatedCompletion(); } else { blockUntilCompletion(audioTrack); } } private void blockUntilEstimatedCompletion() { final int lengthInFrames = mBytesWritten / mBytesPerFrame; final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz); if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance"); try { Thread.sleep(estimatedTimeMs); } catch (InterruptedException ie) { // Do nothing. } } private void blockUntilCompletion(AudioTrack audioTrack) { final int lengthInFrames = mBytesWritten / mBytesPerFrame; int previousPosition = -1; int currentPosition = 0; long blockedTimeMs = 0; while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames && audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) { final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / audioTrack.getSampleRate(); final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS); // Check if the audio track has made progress since the last loop // iteration. We should then add in the amount of time that was // spent sleeping in the last iteration. if (currentPosition == previousPosition) { // This works only because the sleep time that would have been calculated // would be the same in the previous iteration too. blockedTimeMs += sleepTimeMs; // If we've taken too long to make progress, bail. if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) { Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " + "for AudioTrack to make progress, Aborting"); break; } } else { blockedTimeMs = 0; } previousPosition = currentPosition; if (DBG) { Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," + " Playback position : " + currentPosition + ", Length in frames : " + lengthInFrames); } try { Thread.sleep(sleepTimeMs); } catch (InterruptedException ie) { break; } } } private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { final float vol = clip(volume, 0.0f, 1.0f); final float panning = clip(pan, -1.0f, 1.0f); float volLeft = vol; float volRight = vol; if (panning > 0.0f) { volLeft *= (1.0f - panning); } else if (panning < 0.0f) { volRight *= (1.0f + panning); } if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { Log.e(TAG, "Failed to set volume"); } } private static final long clip(long value, long min, long max) { return value < min ? min : (value < max ? value : max); } private static final float clip(float value, float min, float max) { return value < min ? min : (value < max ? value : max); }
See Also:
  • * AudioTrack#setPlaybackPositionUpdateListener(AudioTrack.OnPlaybackPositionUpdateListener).
/** * @see * AudioTrack#setPlaybackPositionUpdateListener(AudioTrack.OnPlaybackPositionUpdateListener). */
public void setPlaybackPositionUpdateListener( AudioTrack.OnPlaybackPositionUpdateListener listener) { synchronized (mAudioTrackLock) { if (mAudioTrack != null) { mAudioTrack.setPlaybackPositionUpdateListener(listener); } } }
See Also:
  • AudioTrack#setNotificationMarkerPosition(int).
/** @see AudioTrack#setNotificationMarkerPosition(int). */
public void setNotificationMarkerPosition(int frames) { synchronized (mAudioTrackLock) { if (mAudioTrack != null) { mAudioTrack.setNotificationMarkerPosition(frames); } } } }