您的位置：首页 > 移动开发 > Android开发

避开Google Voice Search利用Google Speech API实现Android语音识别

2013-07-31 14:41 751 查看

原文：http://my.eoe.cn/sisuer/archive/5960.html

最近自己写一个小东西，突发奇想要做个语音识别出来，网上查了很多资料，发现大部分是要装google voice search，或则使用第三方的SDK如讯飞等！

自己感觉不爽，毕竟无论是装google voice search还是申请讯飞的key都很麻烦，后来发现了http://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&maxresults=1&lang=zh-CN 这个地址后就产生了想法，于是就有了下面的东西。

首先是录音的代码：

privatevoidstartRecording(){

if (mRecorder == null
|| mRecorder.getState() != AudioRecord.STATE_INITIALIZED){
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_ILLEGAL_STATE,0);
mHandler.sendMessage(msg);
return;
}

mRecorder.startRecording();
if (mRecorder.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING){
textView.setText(R.string.recording);
new Thread(){
@Override
publicvoidrun(){
byte[] tmpBuffer = newbyte[mBufferSize/2];
while (mRecorder != null
&& mRecorder.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING){
int numOfRead = mRecorder.read(tmpBuffer,0,tmpBuffer.length);
if (numOfRead < 0){
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_RECORDING,0);
mHandler.sendMessage(msg);
break;
}

float sum = 0;
for (int i=0; i < tmpBuffer.length; i+=2){
short t = (short)(tmpBuffer[i] | (tmpBuffer[i+1] <<8 ));
sum += Math.abs(t);
}
float rms = sum/(tmpBuffer.length * 2);
Message msg = mHandler.obtainMessage(MSG_RECORD_RECORDING,(int)rms,0);
mHandler.sendMessage(msg);
if (mRecordedData.length > mRecordedLength + numOfRead){
System.arraycopy(tmpBuffer,0,mRecordedData,mRecordedLength,numOfRead);
mRecordedLength += numOfRead;
}else {
break;
}
}
mHandler.sendEmptyMessage(MSG_RECORD_STOPPED);
}
}.start();

}else {
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_ILLEGAL_STATE,0);
mHandler.sendMessage(msg);
}
}

因为Google的那个网址能识别的格式有限，而PCM又非常容易转化为wav格式的文件，所以下一步就是将录音的数据非常成格式。

从上面可以看到录音的数据我是存放到mRecordedData里面，而mRecordedLength是录音长度，下面是转化为wav格式的代码：

privatevoidcreateWavHeaderIfNeed(boolean forceCreate){

if (!forceCreate && wavHeader != null){
return;
}
// sample rate * number of channel * bit per sample / bit per bytes
int avgBytesPerSec = mSampleRate * mChannels * DEFAULT_PER_SAMPLE_IN_BIT / 8;
wavHeader = newbyte[]{
'R','I','F','F', //id = RIFF , fixed chars
0, 0, 0, 0, // RIFF WAVE chunk size = 36 + data length
'W','A','V','E', // Type
/* Format chunk */
'f','m','t',' ', // id = 'fmt '
16, 0, 0, 0, // format chunk size = 16, if 18, means existing extension message
1, 0, // format tag, 0x0001 = 16 pcm
(byte)mChannels, 0, // number of channels (MONO = 1, STEREO =2)
/* 4 bytes , sample rate */
(byte)(mSampleRate & 0xff),
(byte)((mSampleRate >>8) & 0xff),
(byte)((mSampleRate >>16) & 0xff),
(byte)((mSampleRate >>24) & 0xff),
/* 4 bytes average bytes per seconds */
(byte)(avgBytesPerSec & 0xff),
(byte)((avgBytesPerSec >>8) & 0xff),
(byte)((avgBytesPerSec >>16) & 0xff),
(byte)((avgBytesPerSec >>24) & 0xff),
/* 2 bytes, block align */
/******************************
* sample 1
******************************
* channel 0 least| channel 0 most|
* ******************************/
(byte)(DEFAULT_PER_SAMPLE_IN_BIT * mChannels / 8), // per sample in bytes
0,
/* 2 bytes, Bits per sample */
16, 0,
/* data chunk */
'd','a','t','a', /// Id = 'data'
0, 0, 0, 0// data size, set 0 due to unknown yet
};
}

privatevoidsetWavHeaderInt(int offset,int value){
if (offset < 0 || offset >40){
//total length = 44, int length = 4,
//44 - 4 = 40
thrownew IllegalArgumentException("offset out of range");
}
createWavHeaderIfNeed(false);

wavHeader[offset++] = (byte)(value & 0xff);
wavHeader[offset++] = (byte)((value >>8) & 0xff);
wavHeader[offset++] = (byte)((value >>16) & 0xff);
wavHeader[offset] = (byte)((value >>24) & 0xff);
}

privatebyte[] getWavData(){
setWavHeaderInt(4,36+mRecordedLength);
setWavHeaderInt(40,mRecordedLength);
byte[] wavData = newbyte[44+mRecordedLength];
System.arraycopy(wavHeader,0,wavData,0,wavHeader.length);
System.arraycopy(mRecordedData,0,wavData,wavHeader.length,mRecordedLength);
return wavData;
}

　　通过上面的getWavData()就可以获得wav格式的录音数据了。那么接下来就是提交到前面提交的网址上去等待返回的数据了。这一步很简单就是做一个post的工作，代码如下：

private HttpURLConnection getConnection(){

HttpURLConnection connection = null;
try{
URL httpUrl = new URL(GOOGLE_VOICE_API_URL + mLang);
connection = (HttpURLConnection)httpUrl.openConnection();
connection.setConnectTimeout(DEFAULT_CONNECT_TIMEOUT);
connection.setReadTimeout(DEFAULT_READ_TIMEOUT);
connection.setRequestMethod("POST");
connection.setDoInput(true);
connection.setDoOutput(true);
connection.setUseCaches(false);
connection.setRequestProperty("User-Agent",USER_AGENT);
connection.setRequestProperty("Content-Type",CONTENT_TYPE_WAV);
}catch (MalformedURLException ex){
JLog.e(TAG,"getConnection();Invalid url format",ex);
}catch (ProtocolException ex){
JLog.e(TAG, "getConnection();Un support protocol",ex);
}catch (IOException ex){
JLog.e(TAG,"getConnection();IO error while open connection",ex);
}
return connection;
}

privatevoidstartWebRecognizer(finalbyte[] wavData){
textView.setText(R.string.analyzing);
final HttpURLConnection connection = getConnection();
if (connection == null){
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NETWORK,0);
mHandler.sendMessage(msg);
}else {
new Thread(){
@Override
publicvoidrun(){
try {
DataOutputStream dos = new DataOutputStream(connection.getOutputStream());
dos.write(wavData);
dos.flush();
dos.close();

InputStreamReader inputStreamReader = new InputStreamReader(connection.getInputStream(),
Charset.forName("utf-8"));
BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
StringBuilder sb = new StringBuilder();
String tmpStr = null;
while ((tmpStr = bufferedReader.readLine()) != null){
sb.append(tmpStr);
}
Message msg = mHandler.obtainMessage(MSG_DECODE_DATA,sb.toString());
mHandler.sendMessage(msg);
}catch (IOException ex){
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NETWORK,0);
mHandler.sendMessage(msg);
}
}
}.start();
}
}

　　OK，现在我们获得了返回的数据，那么接着就是解析返回的数据了。首先说明下google返回的数据格式，是如下的json数据：

"status":0, /* 结果代码，0是成功，4是no speech, 5是no match */
"id":"c421dee91abe31d9b8457f2a80ebca91-1", /* 识别编号 */
"hypotheses": /* 假设，即结果 */
[
{
"utterance":"下午好", /* 话语 */
"confidence":0.2507637/* 信心，即准确度 */
}
]
}

　　这里说明下，返回的结果条数是根据前面的maxresults=1来确定的，如果是2就会返回两条，而这些结果是按照准确度从高到低排列的，理论最高值为1.

下面不废话，开始解析结果：

privatevoidstartParseJson(String jsonString){

try{
JSONObject jsonObject = newJSONObject(jsonString);
int status = jsonObject.getInt("status");
if (status == 0){
JSONArray hypotheses = jsonObject.getJSONArray("hypotheses");
if (hypotheses!= null && hypotheses.length() >0){
JSONObject hypot = hypotheses.optJSONObject(0);
String speechText = hypot.getString("utterance");
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NONE,0,speechText);
mHandler.sendMessage(msg);
}
}elseif (status == 4){
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NO_SPEECH,0);
mHandler.sendMessage(msg);
}elseif (status == 5){
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NO_MATCH,0);
mHandler.sendMessage(msg);
}
}catch (JSONException ex){
JLog.e(TAG,"Decode JSON error",ex);
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_DECODING,0);
mHandler.sendMessage(msg);
}
}

　　这样我们就完成了speech to text的过程就是通常所说的语音识别。下面贴上这个activity的完整代码：

package com.jecofang.catebutler.activities;

import android.content.Intent;
import android.graphics.drawable.AnimationDrawable;
import android.media.AudioFormat;
import android.media.AudioRecord;
import android.media.MediaRecorder;
import android.os.Bundle;
import android.os.Handler;
import android.os.Message;
import android.view.View;
import android.widget.ImageView;
import android.widget.TextView;
import com.jecofang.catebutler.R;
import com.jecofang.catebutler.base.BaseActivity;
import com.jecofang.catebutler.common.JLog;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.ProtocolException;
import java.net.URL;
import java.nio.charset.Charset;

/**
* ***************************************
* File Name : SpeechRecognitionActivity
* Author : Jeco Fang
* Email : jeco.fang@163.com
* Create on : 13-7-19
* All rights reserved 2013 - 2013
* ****************************************
*/
publicclass SpeechRecognitionActivity extends BaseActivity {
privatestaticfinal String TAG = "SpeechRecognitionActivity";
/* Recording params */
publicstaticfinal String AUDIO_SOURCE = "AudioSource";
privatestaticfinalint DEFAULT_AUDIO_SOURCE = MediaRecorder.AudioSource.VOICE_RECOGNITION;
publicstaticfinal String SAMPLE_RATE = "SampleRate";
privatestaticfinalint DEFAULT_SAMPLE_RATE = 16000;
privatestaticfinalint DEFAULT_AUDIO_ENCODING = AudioFormat.ENCODING_PCM_16BIT;
privatestaticfinalshort DEFAULT_PER_SAMPLE_IN_BYTES = 2;
privatestaticfinalshort DEFAULT_PER_SAMPLE_IN_BIT = 16;
publicstaticfinal String CHANNELS = "Channels";
privatestaticfinalshort DEFAULT_CHANNELS = 1; //Number of channels (MONO = 1, STEREO = 2)

/* Web API params */
publicstaticfinal String LANGUAGE = "Language";
privatestaticfinal String DEFAULT_LANGUAGE = "zh-CN";
privatestaticfinal String GOOGLE_VOICE_API_URL =
"http://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&maxresults=1&lang=";
privatestaticfinal String USER_AGENT = "Mozilla/5.0";
privatestaticfinalint DEFAULT_CONNECT_TIMEOUT = 10 * 1000; //10 sec;
privatestaticfinalint DEFAULT_READ_TIMEOUT = 20 * 1000; //20 sec;
privatestaticfinal String CONTENT_TYPE_WAV = "audio/L16;rate=16000";

/* Message Types */
privatestaticfinalint MSG_PREPARE_RECORDER = 1;
privatestaticfinalint MSG_START_RECORDING = 2;
privatestaticfinalint MSG_RECORD_RECORDING = 3;
privatestaticfinalint MSG_STOP_RECORDING = 4;
privatestaticfinalint MSG_RECORD_STOPPED = 5;
privatestaticfinalint MSG_DECODE_DATA = 6;
privatestaticfinalint MSG_ERROR = 7;

/* Errors */
publicstaticfinalint ERR_NONE = 0;
publicstaticfinalint ERR_UNKNOWN = -1;
publicstaticfinalint ERR_UN_SUPPORT_PARAMS = -2;
publicstaticfinalint ERR_ILLEGAL_STATE = -3;
publicstaticfinalint ERR_RECORDING = -4;
publicstaticfinalint ERR_NETWORK = -5;
publicstaticfinalint ERR_NO_SPEECH = -6;
publicstaticfinalint ERR_NO_MATCH = -7;
publicstaticfinalint ERR_DECODING = -8;

privateint mSampleRate;
privateshort mChannels;
privateint mAudioSource;

private AudioRecord mRecorder;
privateint mBufferSize;
privateint mRecordedLength;
privatebyte[] mRecordedData;
privatebyte[] wavHeader;

privateenum State{
IDLE,
BUSY
}

private String mLang;

private Handler mHandler = newInternalHandler();
private State mState;

private ImageView imageView;
private TextView textView;

publicvoidonCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_speech_recognition);

imageView = (ImageView)findViewById(R.id.iv_speaking);
textView = (TextView)findViewById(R.id.tv_result);
mState = State.IDLE;
}

@Override
publicvoidonStart(){
super.onStart();
JLog.d("onStart");
if (mState == State.IDLE){
Intent intent = getIntent();
mAudioSource = intent.getIntExtra(AUDIO_SOURCE,DEFAULT_AUDIO_SOURCE);
mSampleRate = intent.getIntExtra(SAMPLE_RATE,DEFAULT_SAMPLE_RATE);
mChannels = intent.getShortExtra(CHANNELS,DEFAULT_CHANNELS);
mLang = intent.getStringExtra(LANGUAGE);
if (mLang == null || mLang.trim().length() == 0){
mLang = DEFAULT_LANGUAGE;
}
if (!isNetworkAvailable()){
Message message = mHandler.obtainMessage(MSG_ERROR,ERR_NETWORK);
mHandler.sendMessage(message);
}else {
mHandler.sendEmptyMessageDelayed(MSG_PREPARE_RECORDER,500);
}
}
}

@Override
publicvoidonStop(){
super.onStop();
JLog.d("onStop");
}

@Override
publicvoidonPause(){
super.onPause();
JLog.d("onPause");
}

@Override
publicvoidonResume(){
super.onResume();
JLog.d("onResume");
}

privateclass InternalHandler extends Handler{
privatelong lastTalkTime;
privatelong startTime;
AnimationDrawable animationDrawable;

@Override
publicvoidhandleMessage(Message msg){
switch (msg.what){
case MSG_PREPARE_RECORDER:
mState = State.BUSY;
JLog.d("Prepare recorder");
prepareRecorder();
break;
case MSG_START_RECORDING:
startTime = System.currentTimeMillis();
lastTalkTime = 0;
JLog.d("Start recording");
startRecording();
textView.setText(R.string.speech);
break;
case MSG_RECORD_RECORDING:
//After 5 seconds started recording, if there is no speech, send stop message.
//In recording if no speech time exclude 3 seconds, send stop message
long currentTime = System.currentTimeMillis();
int volume = msg.arg1;
JLog.d(TAG,"Record recording.Volume = %d",volume );
if (lastTalkTime == 0){
if (volume >= 30){
lastTalkTime = currentTime;
startAnimationIfNeed(animationDrawable);
}else {
stopAnimation(animationDrawable);
if (currentTime - startTime >= 5 * 1000){
mHandler.sendEmptyMessage(MSG_STOP_RECORDING);
}
}
}else {
if (volume >= 30){
lastTalkTime = currentTime;
startAnimationIfNeed(animationDrawable);
}else {
stopAnimation(animationDrawable);
if (currentTime - lastTalkTime >= 3 * 1000){
mHandler.sendEmptyMessage(MSG_STOP_RECORDING);
}
}
}
break;
case MSG_STOP_RECORDING:
JLog.d("Stop recording");
stopAnimation(animationDrawable);
stopRecording();
break;
case MSG_RECORD_STOPPED:
JLog.d("Recorder stopped, try to get remote data");
byte[] wavData = getWavData();
startWebRecognizer(wavData);

if (mRecorder != null){
mRecorder.release();
mRecorder = null;
}
break;
case MSG_DECODE_DATA:
String data = "";
if (msg.obj != null){
data = msg.obj.toString();
}
JLog.d("Try to parse data :" + data);
if (data.trim().length()>0){
startParseJson(data.trim());
}else {
Message message = mHandler.obtainMessage(MSG_ERROR,ERR_UNKNOWN,0);
mHandler.sendMessage(message);
}
break;
case MSG_ERROR:
mState = State.IDLE;
if (mRecorder != null){
mRecorder.release();
mRecorder = null;
}
Intent intent = newIntent();
intent.putExtra(SPEECH_RESULT_STATUS,msg.arg1);
if (msg.obj != null){
JLog.d("Error:"+msg.arg1+";value"+msg.obj);
intent.putExtra(SPEECH_RESULT_VALUE,msg.obj.toString());
}
JLog.d("Error:"+msg.arg1);
setResult(RESULT_OK,intent);
finish();
break;
default:
break;
}
}
}

privatevoidprepareRecorder(){
int minBufferSize = AudioRecord.getMinBufferSize(mSampleRate,
AudioFormat.CHANNEL_IN_MONO,DEFAULT_AUDIO_ENCODING);
if (minBufferSize == AudioRecord.ERROR_BAD_VALUE){
JLog.e(TAG, "Params are not support by hardware.\\\\n"
+ "sample rate: %d; channel: %2x; encoding: %2x",
mSampleRate,
AudioFormat.CHANNEL_IN_MONO,
DEFAULT_AUDIO_ENCODING);
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_UN_SUPPORT_PARAMS,0);
mHandler.sendMessage(msg);
return;
}elseif (minBufferSize == AudioRecord.ERROR){
JLog.w(TAG,"Unable to query hardware for output property");
minBufferSize = mSampleRate * (120 / 1000) * DEFAULT_PER_SAMPLE_IN_BYTES * mChannels;
}
mBufferSize = minBufferSize * 2;

mRecorder = newAudioRecord(mAudioSource,mSampleRate,
AudioFormat.CHANNEL_IN_MONO,DEFAULT_AUDIO_ENCODING,mBufferSize);
if (mRecorder.getState() != AudioRecord.STATE_INITIALIZED){
JLog.e(TAG,"AudioRecord initialize failed");
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_ILLEGAL_STATE,0);
mHandler.sendMessage(msg);
return;
}

mRecordedLength = 0;
int maxRecordLength = mSampleRate * mChannels * DEFAULT_PER_SAMPLE_IN_BYTES * 35;
mRecordedData = newbyte[maxRecordLength];
Message msg = mHandler.obtainMessage(MSG_START_RECORDING);
mHandler.sendMessage(msg);
}

privatevoidstartRecording(){
if (mRecorder == null
|| mRecorder.getState() != AudioRecord.STATE_INITIALIZED){
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_ILLEGAL_STATE,0);
mHandler.sendMessage(msg);
return;
}

mRecorder.startRecording();
if (mRecorder.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING){
textView.setText(R.string.recording);
new Thread(){
@Override
publicvoidrun(){
byte[] tmpBuffer = newbyte[mBufferSize/2];
while (mRecorder != null
&& mRecorder.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING){
int numOfRead = mRecorder.read(tmpBuffer,0,tmpBuffer.length);
if (numOfRead < 0){
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_RECORDING,0);
mHandler.sendMessage(msg);
break;
}

float sum = 0;
for (int i=0; i < tmpBuffer.length; i+=2){
short t = (short)(tmpBuffer[i] | (tmpBuffer[i+1] <<8 ));
sum += Math.abs(t);
}
float rms = sum/(tmpBuffer.length * 2);
Message msg = mHandler.obtainMessage(MSG_RECORD_RECORDING,(int)rms,0);
mHandler.sendMessage(msg);
if (mRecordedData.length > mRecordedLength + numOfRead){
System.arraycopy(tmpBuffer,0,mRecordedData,mRecordedLength,numOfRead);
mRecordedLength += numOfRead;
}else {
break;
}
}
mHandler.sendEmptyMessage(MSG_RECORD_STOPPED);
}
}.start();

}else {
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_ILLEGAL_STATE,0);
mHandler.sendMessage(msg);
}
}

privatevoidstopRecording(){
if (mRecorder != null
&& mRecorder.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING){
mRecorder.stop();
}
}

privatevoidcreateWavHeaderIfNeed(boolean forceCreate){
if (!forceCreate && wavHeader != null){
return;
}
// sample rate * number of channel * bit per sample / bit per bytes
int avgBytesPerSec = mSampleRate * mChannels * DEFAULT_PER_SAMPLE_IN_BIT / 8;
wavHeader = newbyte[]{
'R','I','F','F', //id = RIFF , fixed chars
0, 0, 0, 0, // RIFF WAVE chunk size = 36 + data length
'W','A','V','E', // Type
/* Format chunk */
'f','m','t',' ', // id = 'fmt '
16, 0, 0, 0, // format chunk size = 16, if 18, means existing extension message
1, 0, // format tag, 0x0001 = 16 pcm
(byte)mChannels, 0, // number of channels (MONO = 1, STEREO =2)
/* 4 bytes , sample rate */
(byte)(mSampleRate & 0xff),
(byte)((mSampleRate >>8) & 0xff),
(byte)((mSampleRate >>16) & 0xff),
(byte)((mSampleRate >>24) & 0xff),
/* 4 bytes average bytes per seconds */
(byte)(avgBytesPerSec & 0xff),
(byte)((avgBytesPerSec >>8) & 0xff),
(byte)((avgBytesPerSec >>16) & 0xff),
(byte)((avgBytesPerSec >>24) & 0xff),
/* 2 bytes, block align */
/******************************
* sample 1
******************************
* channel 0 least| channel 0 most|
* ******************************/
(byte)(DEFAULT_PER_SAMPLE_IN_BIT * mChannels / 8), // per sample in bytes
0,
/* 2 bytes, Bits per sample */
16, 0,
/* data chunk */
'd','a','t','a', /// Id = 'data'
0, 0, 0, 0// data size, set 0 due to unknown yet
};
}

privatevoidsetWavHeaderInt(int offset,int value){
if (offset < 0 || offset >40){
//total length = 44, int length = 4,
//44 - 4 = 40
thrownew IllegalArgumentException("offset out of range");
}
createWavHeaderIfNeed(false);

wavHeader[offset++] = (byte)(value & 0xff);
wavHeader[offset++] = (byte)((value >>8) & 0xff);
wavHeader[offset++] = (byte)((value >>16) & 0xff);
wavHeader[offset] = (byte)((value >>24) & 0xff);
}

privatebyte[] getWavData(){
setWavHeaderInt(4,36+mRecordedLength);
setWavHeaderInt(40,mRecordedLength);
byte[] wavData = newbyte[44+mRecordedLength];
System.arraycopy(wavHeader,0,wavData,0,wavHeader.length);
System.arraycopy(mRecordedData,0,wavData,wavHeader.length,mRecordedLength);
return wavData;
}

private HttpURLConnection getConnection(){
HttpURLConnection connection = null;
try{
URL httpUrl = new URL(GOOGLE_VOICE_API_URL + mLang);
connection = (HttpURLConnection)httpUrl.openConnection();
connection.setConnectTimeout(DEFAULT_CONNECT_TIMEOUT);
connection.setReadTimeout(DEFAULT_READ_TIMEOUT);
connection.setRequestMethod("POST");
connection.setDoInput(true);
connection.setDoOutput(true);
connection.setUseCaches(false);
connection.setRequestProperty("User-Agent",USER_AGENT);
connection.setRequestProperty("Content-Type",CONTENT_TYPE_WAV);
}catch (MalformedURLException ex){
JLog.e(TAG,"getConnection();Invalid url format",ex);
}catch (ProtocolException ex){
JLog.e(TAG, "getConnection();Un support protocol",ex);
}catch (IOException ex){
JLog.e(TAG,"getConnection();IO error while open connection",ex);
}
return connection;
}

privatevoidstartWebRecognizer(finalbyte[] wavData){
textView.setText(R.string.analyzing);
final HttpURLConnection connection = getConnection();
if (connection == null){
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NETWORK,0);
mHandler.sendMessage(msg);
}else {
new Thread(){
@Override
publicvoidrun(){
try {
DataOutputStream dos = new DataOutputStream(connection.getOutputStream());
dos.write(wavData);
dos.flush();
dos.close();

InputStreamReader inputStreamReader = new InputStreamReader(connection.getInputStream(),
Charset.forName("utf-8"));
BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
StringBuilder sb = new StringBuilder();
String tmpStr = null;
while ((tmpStr = bufferedReader.readLine()) != null){
sb.append(tmpStr);
}
Message msg = mHandler.obtainMessage(MSG_DECODE_DATA,sb.toString());
mHandler.sendMessage(msg);
}catch (IOException ex){
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NETWORK,0);
mHandler.sendMessage(msg);
}
}
}.start();
}
}

privatevoidstartParseJson(String jsonString){
try{
JSONObject jsonObject = newJSONObject(jsonString);
int status = jsonObject.getInt("status");
if (status == 0){
JSONArray hypotheses = jsonObject.getJSONArray("hypotheses");
if (hypotheses!= null && hypotheses.length() >0){
JSONObject hypot = hypotheses.optJSONObject(0);
String speechText = hypot.getString("utterance");
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NONE,0,speechText);
mHandler.sendMessage(msg);
}
}elseif (status == 4){
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NO_SPEECH,0);
mHandler.sendMessage(msg);
}elseif (status == 5){
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_NO_MATCH,0);
mHandler.sendMessage(msg);
}
}catch (JSONException ex){
JLog.e(TAG,"Decode JSON error",ex);
Message msg = mHandler.obtainMessage(MSG_ERROR,ERR_DECODING,0);
mHandler.sendMessage(msg);
}
}

privatevoidstartAnimationIfNeed(AnimationDrawable animationDrawable){
imageView.setVisibility(View.VISIBLE);
if (animationDrawable == null){
imageView.setBackgroundResource(R.anim.speak_view);
animationDrawable = (AnimationDrawable)imageView.getBackground();
}

if (animationDrawable != null && !animationDrawable.isRunning()){
animationDrawable.start();
}
}

privatevoidstopAnimation(AnimationDrawable animationDrawable){
imageView.setVisibility(View.INVISIBLE);
}
}

必须说一句的就是里面的JLog.x是自己简单封装了下Log的类，主要是统一控制log level。BaseActivity是activity的一些常用方法的封装以及自定义的一些常量，这里用的只有几个常量：

protectedstaticfinalint GET_SPEECH_RESULT = 1;

protectedstaticfinal String SPEECH_RESULT_STATUS = "speechResultStatus";
protectedstaticfinal String SPEECH_RESULT_VALUE = "speechResultValue";

layout文件代码：

<?xml version="1.0" encoding="utf-8"?>

<RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="fill_parent"
android:layout_height="fill_parent"
android:background="#90000000">

android:layout_width="fill_parent"
android:layout_height="wrap_content"
android:layout_centerInParent="true">

android:layout_width="240dp"
android:layout_height="wrap_content"
android:orientation="vertical"
android:layout_centerHorizontal="true">

android:id="@+id/image_layout"
android:layout_height="230dp"
android:layout_width="230dp"
android:layout_centerInParent="true">

android:id="@+id/iv_speaking"
android:layout_height="wrap_content"
android:layout_width="wrap_content"
android:layout_centerInParent="true">

android:layout_height="wrap_content"
android:layout_width="wrap_content"
android:layout_centerInParent="true"
android:background="@drawable/ic_speech">

android:id="@+id/tv_result"
android:layout_height="wrap_content"
android:layout_width="wrap_content"
android:textColor="#FFFFFFFF"
android:textSize="14sp"
android:singleLine="true"
android:ellipsize="marquee"
android:marqueeRepeatLimit="marquee_forever"
android:layout_marginTop="40dip"
android:layout_centerInParent="true">

　整个layout的背景是设置的#90000000,就是黑色的半透明。

speak animation的代码：

<?xml version="1.0" encoding="utf-8"?>

<animation-list android:oneshot="false"
xmlns:android="http://schemas.android.com/apk/res/android">
<item android:duration="150" android:drawable="@drawable/mic_1" />
<item android:duration="150" android:drawable="@drawable/mic_2" />
<item android:duration="150" android:drawable="@drawable/mic_3" />
<item android:duration="150" android:drawable="@drawable/mic_4" />

　　其实就是几张半透明的从小到大的圆圈。

至于调用就很简单了：

ib_Speak = (ImageButton)findViewById(R.id.main_bottom_bar_ib_speak);

ib_Speak.setOnClickListener(new View.OnClickListener() {
@Override
publicvoidonClick(View view) {
Intent intent = newIntent(MainActivity.this,SpeechRecognitionActivity.class);
startActivityForResult(intent, GET_SPEECH_RESULT);
//Intent intent = new Intent(MainActivity.this,Record.class);
//startActivity(intent);
}
});

获取结果：

@Override

protectedvoidonActivityResult(int requestCode, int resultCode, Intent data){
if (requestCode == GET_SPEECH_RESULT){
if (resultCode == RESULT_CANCELED){
//do nothing for now
}elseif (resultCode == RESULT_OK){
JLog.i("status;"+ data.getIntExtra(SPEECH_RESULT_STATUS,0));
switch (data.getIntExtra(SPEECH_RESULT_STATUS,0)){
case SpeechRecognitionActivity.ERR_NONE:
String text = data.getStringExtra(SPEECH_RESULT_VALUE);
if (text != null && text.trim().length() >0){
submitText(text);
}
break;
default:
Toast.makeText(this,R.string.error,Toast.LENGTH_SHORT).show();
break;
}
}
}
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航