隨着從事 Android 開發年限增長,負責的工做項目也從應用層開發逐步過渡到 Android Framework 層開發。雖然一開始就知道 Android 知識體系的龐大,可是當你逐漸從 Application 層向 Framework 層走的時候,你才發現以前懂得認知真是太少。以前更多打交道的 Activity 和 Fragment ,對於 Service 和 Broadcast 涉及的不多,更多注重的是界面的佈局、動畫、網絡請求等,雖然走應用開發的話,後期會關注架構、性能優化、Hybrid等,可是逐漸接觸 Framework 層相關模塊時候,發現裏面的知識點各類錯綜複雜,就比如講講今天分享的主題是 Android TTS 。java
在 Android 中,TTS全稱叫作 Text to Speech,從字面就能理解它解決的問題是什麼,把文本轉爲語音服務,意思就是你輸入一段文本信息,而後Android 系統能夠把這段文字播報出來。這種應用場景目前比較可能是在各類語音助手APP上,不少手機系統集成商內部都有內置文本轉語音服務,能夠讀當前頁面上的文本信息。一樣,在一些閱讀類APP上咱們也能看到相關服務,打開微信讀書,裏面就直接能夠把當前頁面直接用語音方式播放出來,特別適合哪一種不方便拿着手機屏幕閱讀的場景。微信
<?xml version="1.0" encoding="utf-8"?> <RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android" xmlns:tools="http://schemas.android.com/tools" android:layout_width="match_parent" android:layout_height="match_parent"> <ScrollView android:layout_width="match_parent" android:layout_height="match_parent"> <LinearLayout android:layout_width="match_parent" android:layout_height="match_parent" android:orientation="vertical"> <EditText android:id="@+id/edit_text1" android:layout_width="match_parent" android:layout_height="wrap_content" android:text="杭州自秦朝設縣治以來已有2200多年的歷史,曾是吳越國和南宋的都城。因風景秀麗,素有「人間天堂」的美譽。杭州得益於京杭運河和通商口岸的便利,以及自身發達的絲綢和糧食產業,歷史上曾是重要的商業集散中心。" /> <Button android:id="@+id/btn_tts1" android:layout_width="150dp" android:layout_height="60dp" android:layout_marginTop="10dp" android:text="TTS1" /> <EditText android:id="@+id/edit_text2" android:layout_width="match_parent" android:layout_height="wrap_content" android:text="伊利公開舉報原創始人鄭俊懷:多名高官充當保護傘 北京青年報 2018-10-24 12:01:46 10月24日上午,伊利公司在企業官方網站發出舉報信,公開舉報鄭俊懷等人,聲稱鄭俊懷索要鉅額犯罪所得不成,動用最高檢某原副檢察長等人施壓,長期造謠迫害伊利,多位省部級、廳局級領導均充當鄭俊懷保護傘,人爲抹掉2.4億犯罪事實,運做假減刑,14年來無人敢處理。" /> <Button android:id="@+id/btn_tts2" android:layout_width="150dp" android:layout_height="60dp" android:layout_marginTop="10dp" android:text="TTS2" /> <Button android:id="@+id/btn_cycle" android:layout_width="150dp" android:layout_height="60dp" android:layout_marginTop="10dp" android:text="Cycle TTS" /> <Button android:id="@+id/btn_second" android:layout_width="150dp" android:layout_height="60dp" android:layout_marginTop="10dp" android:text="Second TTS" /> </LinearLayout> </ScrollView> </RelativeLayout>
public class TtsMainActivity extends AppCompatActivity implements View.OnClickListener,TextToSpeech.OnInitListener { private static final String TAG = TtsMainActivity.class.getSimpleName(); private static final int THREADNUM = 100; // 測試用的線程數目 private EditText mTestEt1; private EditText mTestEt2; private TextToSpeech mTTS; // TTS對象 private XKAudioPolicyManager mXKAudioPolicyManager; private HashMap mParams = null; @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_main); mTestEt1 = (EditText) findViewById(R.id.edit_text1); mTestEt2 = (EditText) findViewById(R.id.edit_text2); findViewById(R.id.btn_tts1).setOnClickListener(this); findViewById(R.id.btn_tts2).setOnClickListener(this); findViewById(R.id.btn_cycle).setOnClickListener(this); findViewById(R.id.btn_second).setOnClickListener(this); init(); } private void init(){ mTTS = new TextToSpeech(this.getApplicationContext(),this); mXKAudioPolicyManager = XKAudioPolicyManager.getInstance(this.getApplication()); mParams = new HashMap(); mParams.put(TextToSpeech.Engine.KEY_PARAM_STREAM, "3"); //設置播放類型(音頻流類型) } @Override public void onInit(int status) { if (status == TextToSpeech.SUCCESS) { int result = mTTS.setLanguage(Locale.ENGLISH); if (result == TextToSpeech.LANG_MISSING_DATA || result == TextToSpeech.LANG_NOT_SUPPORTED) { Toast.makeText(this, "數據丟失或不支持", Toast.LENGTH_SHORT).show(); } } } @Override public void onClick(View v) { int id = v.getId(); switch (id){ case R.id.btn_tts1: TtsPlay1(); break; case R.id.btn_tts2: TtsPlay2(); break; case R.id.btn_second: TtsSecond(); break; case R.id.btn_cycle: TtsCycle(); break; default: break; } } private void TtsPlay1(){ if (mTTS != null && !mTTS.isSpeaking() && mXKAudioPolicyManager.requestAudioSource()) { //mTTS.setOnUtteranceProgressListener(new ttsPlayOne()); String text1 = mTestEt1.getText().toString(); Log.d(TAG, "TtsPlay1-----------播放文本內容:" + text1); //朗讀,注意這裏三個參數的added in API level 4 四個參數的added in API level 21 mTTS.speak(text1, TextToSpeech.QUEUE_FLUSH, mParams); } } private void TtsPlay2(){ if (mTTS != null && !mTTS.isSpeaking() && mXKAudioPolicyManager.requestAudioSource()) { //mTTS.setOnUtteranceProgressListener(new ttsPlaySecond()); String text2 = mTestEt2.getText().toString(); Log.d(TAG, "TtsPlay2-----------播放文本內容:" + text2); // 設置音調,值越大聲音越尖(女生),值越小則變成男聲,1.0是常規 mTTS.setPitch(0.8f); //設定語速 ,默認1.0正常語速 mTTS.setSpeechRate(1f); //朗讀,注意這裏三個參數的added in API level 4 四個參數的added in API level 21 mTTS.speak(text2, TextToSpeech.QUEUE_FLUSH, mParams); } } private void TtsSecond(){ Intent intent = new Intent(TtsMainActivity.this,TtsSecondAcitivity.class); startActivity(intent); } private void TtsCycle(){ long millis1 = System.currentTimeMillis(); for (int i = 0; i < THREADNUM; i++) { Thread tempThread = new Thread(new MyRunnable(i, THREADNUM)); tempThread.setName("線程" + i); tempThread.start(); } long millis2 = System.currentTimeMillis(); Log.d(TAG, "循環測試發音耗費時間:" + (millis2 - millis1)); } @Override protected void onStart() { super.onStart(); } @Override protected void onStop() { super.onStop(); } @Override protected void onDestroy() { super.onDestroy(); shutDown(); } private void shutDown(){ if(mTTS != null){ mTTS.stop(); mTTS.shutdown(); } if(mXKAudioPolicyManager != null){ mXKAudioPolicyManager.releaseAudioSource(); } } /** * 自定義線程可執行處理 * */ class MyRunnable implements Runnable { private int i; // 第幾個線程 private int threadNum; // 總共建立了幾個線程 public MyRunnable(int i, int threadNum) { this.i = i; this.threadNum = threadNum; } @Override public void run() { runOnUiThread(new Runnable() { @Override public void run() { Log.d(TAG, "在主線程中執行index:" + i + ",線程總數:" + threadNum); if(i % 2 == 0){ Log.d(TAG, "TtsPlay1 index:" + i); TtsPlay1(); } else{ Log.d(TAG, "TtsPlay2 index:" + i); TtsPlay2(); } try { Thread.sleep(10000); } catch (InterruptedException e) { e.printStackTrace(); } } }); } } public class ttsPlayOne extends UtteranceProgressListener{ @Override public void onStart(String utteranceId) { Log.d(TAG, "ttsPlayOne-----------onStart"); } @Override public void onDone(String utteranceId) { Log.d(TAG, "ttsPlayOne-----------onDone"); } @Override public void onError(String utteranceId) { Log.d(TAG, "ttsPlayOne-----------onError"); } } public class ttsPlaySecond extends UtteranceProgressListener{ @Override public void onStart(String utteranceId) { Log.d(TAG, "ttsPlaySecond-----------onStart"); } @Override public void onDone(String utteranceId) { Log.d(TAG, "ttsPlaySecond-----------onDone"); } @Override public void onError(String utteranceId) { Log.d(TAG, "ttsPlaySecond-----------onError"); } } }
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE"></uses-permission> <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"></uses-permission>
### TTS 最佳實踐源碼分析
因爲目前我在公司負責開發的產品是屬於語音助手類型,天然這類 TTS 發聲的問題和坑平常見的比較多。常見的有以下幾種類型:
### 使用趨勢
隨着物聯網的到來,IoT設備增多,那麼對於相似語音助手相關應用也會增多,由於語音是一個很好的入口,如今逐步從顯示到去顯示的過程,不少智能設備有些是不須要屏幕的,只須要能識別語音和播放聲音。所以,隨着這類應用的增加,對於TTS 相關的API接口調用頻率確定也是加大,相信谷歌在這方面也會逐步在完善。
Android TTS 目標就是解決文本轉化爲語音播報的過程。那它究竟是怎麼實現的呢,咱們從TextToSpeech類的構造函數開始分析。
這裏咱們用Android 6.0版本源碼分析爲主,主要涉及的相關類和接口文件,在源碼中的位置以下:
/** * Used by the framework to instantiate TextToSpeech objects with a supplied * package name, instead of using {@link android.content.Context#getPackageName()} * * @hide */ public TextToSpeech(Context context, OnInitListener listener, String engine, String packageName, boolean useFallback) { mContext = context; mInitListener = listener; mRequestedEngine = engine; mUseFallback = useFallback; mEarcons = new HashMap<String, Uri>(); mUtterances = new HashMap<CharSequence, Uri>(); mUtteranceProgressListener = null; mEnginesHelper = new TtsEngines(mContext); initTts(); }
private int initTts() { // Step 1: Try connecting to the engine that was requested. if (mRequestedEngine != null) { if (mEnginesHelper.isEngineInstalled(mRequestedEngine)) { if (connectToEngine(mRequestedEngine)) { mCurrentEngine = mRequestedEngine; return SUCCESS; } else if (!mUseFallback) { mCurrentEngine = null; dispatchOnInit(ERROR); return ERROR; } } else if (!mUseFallback) { Log.i(TAG, "Requested engine not installed: " + mRequestedEngine); mCurrentEngine = null; dispatchOnInit(ERROR); return ERROR; } } // Step 2: Try connecting to the user's default engine. final String defaultEngine = getDefaultEngine(); if (defaultEngine != null && !defaultEngine.equals(mRequestedEngine)) { if (connectToEngine(defaultEngine)) { mCurrentEngine = defaultEngine; return SUCCESS; } } // Step 3: Try connecting to the highest ranked engine in the // system. final String highestRanked = mEnginesHelper.getHighestRankedEngineName(); if (highestRanked != null && !highestRanked.equals(mRequestedEngine) && !highestRanked.equals(defaultEngine)) { if (connectToEngine(highestRanked)) { mCurrentEngine = highestRanked; return SUCCESS; } } // NOTE: The API currently does not allow the caller to query whether // they are actually connected to any engine. This might fail for various // reasons like if the user disables all her TTS engines. mCurrentEngine = null; dispatchOnInit(ERROR); return ERROR; }
private boolean connectToEngine(String engine) { Connection connection = new Connection(); Intent intent = new Intent(Engine.INTENT_ACTION_TTS_SERVICE); intent.setPackage(engine); boolean bound = mContext.bindService(intent, connection, Context.BIND_AUTO_CREATE); if (!bound) { Log.e(TAG, "Failed to bind to " + engine); return false; } else { Log.i(TAG, "Sucessfully bound to " + engine); mConnectingServiceConnection = connection; return true; } }
<service android:name=".PicoService" android:label="@string/app_name"> <intent-filter> <action android:name="android.intent.action.TTS_SERVICE" /> <category android:name="android.intent.category.DEFAULT" /> </intent-filter> <meta-data android:name="android.speech.tts" android:resource="@xml/tts_engine" /> </service>
public class PicoService extends CompatTtsService { private static final String TAG = "PicoService"; @Override protected String getSoFilename() { return "libttspico.so"; } }
@Override public void onCreate() { if (DBG) Log.d(TAG, "onCreate()"); String soFilename = getSoFilename(); if (mNativeSynth != null) { mNativeSynth.stopSync(); mNativeSynth.shutdown(); mNativeSynth = null; } // Load the engineConfig from the plugin if it has any special configuration // to be loaded. By convention, if an engine wants the TTS framework to pass // in any configuration, it must put it into its content provider which has the URI: // content://<packageName>.providers.SettingsProvider // That content provider must provide a Cursor which returns the String that // is to be passed back to the native .so file for the plugin when getString(0) is // called on it. // Note that the TTS framework does not care what this String data is: it is something // that comes from the engine plugin and is consumed only by the engine plugin itself. String engineConfig = ""; Cursor c = getContentResolver().query(Uri.parse("content://" + getPackageName() + ".providers.SettingsProvider"), null, null, null, null); if (c != null){ c.moveToFirst(); engineConfig = c.getString(0); c.close(); } mNativeSynth = new SynthProxy(soFilename, engineConfig); // mNativeSynth is used by TextToSpeechService#onCreate so it must be set prior // to that call. // getContentResolver() is also moved prior to super.onCreate(), and it works // because the super method don't sets a field or value that affects getContentResolver(); // (including the content resolver itself). super.onCreate(); }
/** * Constructor; pass the location of the native TTS .so to use. */ public SynthProxy(String nativeSoLib, String engineConfig) { boolean applyFilter = shouldApplyAudioFilter(nativeSoLib); Log.v(TAG, "About to load "+ nativeSoLib + ", applyFilter=" + applyFilter); mJniData = native_setup(nativeSoLib, engineConfig); if (mJniData == 0) { throw new RuntimeException("Failed to load " + nativeSoLib); } native_setLowShelf(applyFilter, PICO_FILTER_GAIN, PICO_FILTER_LOWSHELF_ATTENUATION, PICO_FILTER_TRANSITION_FREQ, PICO_FILTER_SHELF_SLOPE); }
咱們能夠看到ngine->funcs->init(engine, __ttsSynthDoneCB, engConfigString);這句代碼比較關鍵,這個init方法上面在com_svox_picottsengine.cpp中,以下:
/* Google Engine API function implementations */ /** init * Allocates Pico memory block and initializes the Pico system. * synthDoneCBPtr - Pointer to callback function which will receive generated samples * config - the engine configuration parameters, here only contains the non-system path * for the lingware location * return tts_result */ tts_result TtsEngine::init( synthDoneCB_t synthDoneCBPtr, const char *config ) { if (synthDoneCBPtr == NULL) { ALOGE("Callback pointer is NULL"); return TTS_FAILURE; } picoMemArea = malloc( PICO_MEM_SIZE ); if (!picoMemArea) { ALOGE("Failed to allocate memory for Pico system"); return TTS_FAILURE; } pico_Status ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem ); if (PICO_OK != ret) { ALOGE("Failed to initialize Pico system"); free( picoMemArea ); picoMemArea = NULL; return TTS_FAILURE; } picoSynthDoneCBPtr = synthDoneCBPtr; picoCurrentLangIndex = -1; // was the initialization given an alternative path for the lingware location? if ((config != NULL) && (strlen(config) > 0)) { pico_alt_lingware_path = (char*)malloc(strlen(config)); strcpy((char*)pico_alt_lingware_path, config); ALOGV("Alternative lingware path %s", pico_alt_lingware_path); } else { pico_alt_lingware_path = (char*)malloc(strlen(PICO_LINGWARE_PATH) + 1); strcpy((char*)pico_alt_lingware_path, PICO_LINGWARE_PATH); ALOGV("Using predefined lingware path %s", pico_alt_lingware_path); } return TTS_SUCCESS; }
public int speak(final CharSequence text, final int queueMode, final Bundle params, final String utteranceId) { return runAction(new Action<Integer>() { @Override public Integer run(ITextToSpeechService service) throws RemoteException { Uri utteranceUri = mUtterances.get(text); if (utteranceUri != null) { return service.playAudio(getCallerIdentity(), utteranceUri, queueMode, getParams(params), utteranceId); } else { return service.speak(getCallerIdentity(), text, queueMode, getParams(params), utteranceId); } } }, ERROR, "speak"); }
private <R> R runAction(Action<R> action, R errorResult, String method, boolean reconnect, boolean onlyEstablishedConnection) { synchronized (mStartLock) { if (mServiceConnection == null) { Log.w(TAG, method + " failed: not bound to TTS engine"); return errorResult; } return mServiceConnection.runAction(action, errorResult, method, reconnect, onlyEstablishedConnection); } }
public <R> R runAction(Action<R> action, R errorResult, String method, boolean reconnect, boolean onlyEstablishedConnection) { synchronized (mStartLock) { try { if (mService == null) { Log.w(TAG, method + " failed: not connected to TTS engine"); return errorResult; } if (onlyEstablishedConnection && !isEstablished()) { Log.w(TAG, method + " failed: TTS engine connection not fully set up"); return errorResult; } return action.run(mService); } catch (RemoteException ex) { Log.e(TAG, method + " failed", ex); if (reconnect) { disconnect(); initTts(); } return errorResult; } } }
@Override public int playAudio(IBinder caller, Uri audioUri, int queueMode, Bundle params, String utteranceId) { if (!checkNonNull(caller, audioUri, params)) { return TextToSpeech.ERROR; } SpeechItem item = new AudioSpeechItemV1(caller, Binder.getCallingUid(), Binder.getCallingPid(), params, utteranceId, audioUri); return mSynthHandler.enqueueSpeechItem(queueMode, item); }
接着執行mSynthHandler.enqueueSpeechItem(queueMode, item),其代碼以下:
/** * Adds a speech item to the queue. * * Called on a service binder thread. */ public int enqueueSpeechItem(int queueMode, final SpeechItem speechItem) { UtteranceProgressDispatcher utterenceProgress = null; if (speechItem instanceof UtteranceProgressDispatcher) { utterenceProgress = (UtteranceProgressDispatcher) speechItem; } if (!speechItem.isValid()) { if (utterenceProgress != null) { utterenceProgress.dispatchOnError( TextToSpeech.ERROR_INVALID_REQUEST); } return TextToSpeech.ERROR; } if (queueMode == TextToSpeech.QUEUE_FLUSH) { stopForApp(speechItem.getCallerIdentity()); } else if (queueMode == TextToSpeech.QUEUE_DESTROY) { stopAll(); } Runnable runnable = new Runnable() { @Override public void run() { if (isFlushed(speechItem)) { speechItem.stop(); } else { setCurrentSpeechItem(speechItem); speechItem.play(); setCurrentSpeechItem(null); } } }; Message msg = Message.obtain(this, runnable); // The obj is used to remove all callbacks from the given app in // stopForApp(String). // // Note that this string is interned, so the == comparison works. msg.obj = speechItem.getCallerIdentity(); if (sendMessage(msg)) { return TextToSpeech.SUCCESS; } else { Log.w(TAG, "SynthThread has quit"); if (utterenceProgress != null) { utterenceProgress.dispatchOnError(TextToSpeech.ERROR_SERVICE); } return TextToSpeech.ERROR; } }
主要是看 speechItem.play()方法,代碼以下:
/** * Plays the speech item. Blocks until playback is finished. * Must not be called more than once. * * Only called on the synthesis thread. */ public void play() { synchronized (this) { if (mStarted) { throw new IllegalStateException("play() called twice"); } mStarted = true; } playImpl(); } protected abstract void playImpl();
@Override protected void playImpl() { AbstractSynthesisCallback synthesisCallback; mEventLogger.onRequestProcessingStart(); synchronized (this) { // stop() might have been called before we enter this // synchronized block. if (isStopped()) { return; } mSynthesisCallback = createSynthesisCallback(); synthesisCallback = mSynthesisCallback; } TextToSpeechService.this.onSynthesizeText(mSynthesisRequest, synthesisCallback); // Fix for case where client called .start() & .error(), but did not called .done() if (synthesisCallback.hasStarted() && !synthesisCallback.hasFinished()) { synthesisCallback.done(); } }
@Override protected void onSynthesizeText(SynthesisRequest request, SynthesisCallback callback) { if (mNativeSynth == null) { callback.error(); return; } // Set language String lang = request.getLanguage(); String country = request.getCountry(); String variant = request.getVariant(); if (mNativeSynth.setLanguage(lang, country, variant) != TextToSpeech.SUCCESS) { Log.e(TAG, "setLanguage(" + lang + "," + country + "," + variant + ") failed"); callback.error(); return; } // Set speech rate int speechRate = request.getSpeechRate(); if (mNativeSynth.setSpeechRate(speechRate) != TextToSpeech.SUCCESS) { Log.e(TAG, "setSpeechRate(" + speechRate + ") failed"); callback.error(); return; } // Set speech int pitch = request.getPitch(); if (mNativeSynth.setPitch(pitch) != TextToSpeech.SUCCESS) { Log.e(TAG, "setPitch(" + pitch + ") failed"); callback.error(); return; } // Synthesize if (mNativeSynth.speak(request, callback) != TextToSpeech.SUCCESS) { callback.error(); return; } }
static jint com_android_tts_compat_SynthProxy_speak(JNIEnv *env, jobject thiz, jlong jniData, jstring textJavaString, jobject request) { SynthProxyJniStorage* pSynthData = getSynthData(jniData); if (pSynthData == NULL) { return ANDROID_TTS_FAILURE; } initializeFilter(); Mutex::Autolock l(engineMutex); android_tts_engine_t *engine = pSynthData->mEngine; if (!engine) { return ANDROID_TTS_FAILURE; } SynthRequestData *pRequestData = new SynthRequestData; pRequestData->jniStorage = pSynthData; pRequestData->env = env; pRequestData->request = env->NewGlobalRef(request); pRequestData->startCalled = false; const char *textNativeString = env->GetStringUTFChars(textJavaString, 0); memset(pSynthData->mBuffer, 0, pSynthData->mBufferSize); int result = engine->funcs->synthesizeText(engine, textNativeString, pSynthData->mBuffer, pSynthData->mBufferSize, static_cast<void *>(pRequestData)); env->ReleaseStringUTFChars(textJavaString, textNativeString); return (jint) result; }
從目前來看,隨着語音成爲更多Iot設備的入口,那麼在語音TTS合成播報方面技術會愈來愈成熟,特別是對於Android 系統原生相關的接口也會愈來愈強大。所以,對於TTS後續的發展,應該是冉冉上升。