diff --git a/README.md b/README.md index f6f90afa4db320f6dfe065e373f7a55aa9c218e1..8fd92dcbd68f2bcf78efe4a5cee0443d2abbad80 100644 --- a/README.md +++ b/README.md @@ -56,124 +56,130 @@ Demo中使用了SDK内置的默认实时语音识别服务的外网访问URL, **示例** ```java -import java.io.InputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; import com.alibaba.nls.client.protocol.InputFormatEnum; import com.alibaba.nls.client.protocol.NlsClient; import com.alibaba.nls.client.protocol.SampleRateEnum; -import com.alibaba.nls.client.protocol.asr.SpeechTranscriber; -import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener; -import com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse; +import com.alibaba.nls.client.protocol.asr.SpeechRecognizer; +import com.alibaba.nls.client.protocol.asr.SpeechRecognizerListener; +import com.alibaba.nls.client.protocol.asr.SpeechRecognizerResponse; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** - * 实时语音识别Demo + * 此示例演示了 + * ASR一句话识别API调用 + * 动态获取token + * 通过本地文件模拟实时流发送 + * 识别耗时计算 + * (仅作演示,需用户根据实际情况实现) */ -public class SpeechTranscriberDemo { +public class SpeechRecognizerDemo { + private static final Logger logger = LoggerFactory.getLogger(SpeechRecognizerDemo.class); private String appKey; - private String accessToken; NlsClient client; - public SpeechTranscriberDemo(String appKey, String token) { + public SpeechRecognizerDemo(String appKey, String id, String secret, String url) { this.appKey = appKey; - this.accessToken = token; - //创建NlsClient实例,应用全局创建一个即可,用户指定服务地址 - client = new NlsClient(token, accessToken); - } - public SpeechTranscriberDemo(String appKey, String token, String url) { - this.appKey = appKey; - this.accessToken = token; - //创建NlsClient实例,应用全局创建一个即可,用户指定服务地址 - client = new NlsClient(url, accessToken); + //TODO 重要提示 创建NlsClient实例,应用全局创建一个即可,生命周期可和整个应用保持一致,默认服务地址为阿里云线上服务地址 + //TODO 这里简单演示了获取token 的代码,该token会过期,实际使用时注意在accessToken.getExpireTime()过期前再次获取token + AccessToken accessToken = new AccessToken(id, secret); + try { + accessToken.apply(); + System.out.println("get token: " + accessToken.getToken() + ", expire time: " + accessToken.getExpireTime()); + // TODO 创建NlsClient实例,应用全局创建一个即可 + if(url.isEmpty()) { + client = new NlsClient(accessToken.getToken()); + }else { + client = new NlsClient(url, accessToken.getToken()); + } + } catch (IOException e) { + e.printStackTrace(); + } } - private static SpeechTranscriberListener getTranscriberListener() { - SpeechTranscriberListener listener = new SpeechTranscriberListener() { + private static SpeechRecognizerListener getRecognizerListener(int myOrder, String userParam) { + SpeechRecognizerListener listener = new SpeechRecognizerListener() { //识别出中间结果.服务端识别出一个字或词时会返回此消息.仅当setEnableIntermediateResult(true)时,才会有此类消息返回 @Override - public void onTranscriptionResultChange(SpeechTranscriberResponse response) { - System.out.println("task_id: " + response.getTaskId() + - ", name: " + response.getName() + - //状态码 20000000 表示正常识别 - ", status: " + response.getStatus() + - //句子编号,从1开始递增 - ", index: " + response.getTransSentenceIndex() + - //当前的识别结果 - ", result: " + response.getTransSentenceText() + - //当前已处理的音频时长,单位是毫秒 - ", time: " + response.getTransSentenceTime()); - } - @Override - public void onTranscriberStart(SpeechTranscriberResponse response) { - System.out.println("task_id: " + response.getTaskId() + - ", name: " + response.getName() + - ", status: " + response.getStatus()); + public void onRecognitionResultChanged(SpeechRecognizerResponse response) { + //事件名称 RecognitionResultChanged、 状态码(20000000 表示识别成功)、语音识别文本 + System.out.println("name: " + response.getName() + ", status: " + response.getStatus() + ", result: " + response.getRecognizedText()); } + //识别完毕 @Override - public void onSentenceBegin(SpeechTranscriberResponse response) { - System.out.println("task_id: " + response.getTaskId() + - ", name: " + response.getName() + - ", status: " + response.getStatus()); - } - //识别出一句话.服务端会智能断句,当识别到一句话结束时会返回此消息 - @Override - public void onSentenceEnd(SpeechTranscriberResponse response) { - System.out.println("task_id: " + response.getTaskId() + - ", name: " + response.getName() + - //状态码 20000000 表示正常识别 - ", status: " + response.getStatus() + - //句子编号,从1开始递增 - ", index: " + response.getTransSentenceIndex() + - //当前的识别结果 - ", result: " + response.getTransSentenceText() + - //置信度 - ", confidence: " + response.getConfidence() + - //开始时间 - ", begin_time: " + response.getSentenceBeginTime() + - //当前已处理的音频时长,单位是毫秒 - ", time: " + response.getTransSentenceTime()); + public void onRecognitionCompleted(SpeechRecognizerResponse response) { + //事件名称 RecognitionCompleted, 状态码 20000000 表示识别成功, getRecognizedText是识别结果文本 + System.out.println("name: " + response.getName() + ", status: " + response.getStatus() + ", result: " + response.getRecognizedText()); } - //识别完毕 @Override - public void onTranscriptionComplete(SpeechTranscriberResponse response) { - System.out.println("task_id: " + response.getTaskId() + - ", name: " + response.getName() + - ", status: " + response.getStatus()); + public void onStarted(SpeechRecognizerResponse response) { + System.out.println("myOrder: " + myOrder + "; myParam: " + userParam + "; task_id: " + response.getTaskId()); } @Override - public void onFail(SpeechTranscriberResponse response) { - System.out.println( - "task_id: " + response.getTaskId() + - //状态码 20000000 表示识别成功 - ", status: " + response.getStatus() + - //错误信息 - ", status_text: " + response.getStatusText()); + public void onFail(SpeechRecognizerResponse response) { + // TODO 重要提示: task_id很重要,是调用方和服务端通信的唯一ID标识,当遇到问题时,需要提供此task_id以便排查 + System.out.println("task_id: " + response.getTaskId() + ", status: " + response.getStatus() + ", status_text: " + response.getStatusText()); } }; return listener; } - public void process(InputStream ins) { - SpeechTranscriber transcriber = null; + /// 根据二进制数据大小计算对应的同等语音长度 + /// sampleRate 仅支持8000或16000 + public static int getSleepDelta(int dataSize, int sampleRate) { + // 仅支持16位采样 + int sampleBytes = 16; + // 仅支持单通道 + int soundChannel = 1; + return (dataSize * 10 * 8000) / (160 * sampleRate); + } + public void process(String filepath, int sampleRate) { + SpeechRecognizer recognizer = null; try { - //创建实例,建立连接 - transcriber = new SpeechTranscriber(client, getTranscriberListener()); - transcriber.setAppKey(appKey); - //输入音频编码方式 - transcriber.setFormat(InputFormatEnum.PCM); - //输入音频采样率 - transcriber.setSampleRate(SampleRateEnum.SAMPLE_RATE_16K); - //是否返回中间识别结果 - transcriber.setEnableIntermediateResult(false); - //是否生成并返回标点符号 - transcriber.setEnablePunctuation(true); - //是否将返回结果规整化,比如将一百返回为100 - transcriber.setEnableITN(false); + // 传递用户自定义参数 + String myParam = "user-param"; + int myOrder = 1234; + SpeechRecognizerListener listener = getRecognizerListener(myOrder, myParam); + recognizer = new SpeechRecognizer(client, listener); + recognizer.setAppKey(appKey); + //设置音频编码格式 TODO 如果是opus文件,请设置为 InputFormatEnum.OPUS + recognizer.setFormat(InputFormatEnum.PCM); + //设置音频采样率 + if(sampleRate == 16000) { + recognizer.setSampleRate(SampleRateEnum.SAMPLE_RATE_16K); + } else if(sampleRate == 8000) { + recognizer.setSampleRate(SampleRateEnum.SAMPLE_RATE_8K); + } + //设置是否返回中间识别结果 + recognizer.setEnableIntermediateResult(true); //此方法将以上参数设置序列化为json发送给服务端,并等待服务端确认 - transcriber.start(); - //语音数据来自声音文件用此方法,控制发送速率;若语音来自实时录音,不需控制发送速率直接调用 transcriber.sent(ins)即可 - transcriber.send(ins, 3200, 100); + long now = System.currentTimeMillis(); + recognizer.start(); + logger.info("ASR start latency : " + (System.currentTimeMillis() - now) + " ms"); + File file = new File(filepath); + FileInputStream fis = new FileInputStream(file); + byte[] b = new byte[3200]; + int len; + while ((len = fis.read(b)) > 0) { + logger.info("send data pack length: " + len); + recognizer.send(b); + // TODO 重要提示:这里是用读取本地文件的形式模拟实时获取语音流并发送的,因为read很快,所以这里需要sleep + // TODO 如果是真正的实时获取语音,则无需sleep, 如果是8k采样率语音,第二个参数改为8000 + int deltaSleep = getSleepDelta(len, sampleRate); + Thread.sleep(deltaSleep); + } //通知服务端语音数据发送完毕,等待服务端处理完成 - transcriber.stop(); + now = System.currentTimeMillis(); + // TODO 计算实际延迟: stop返回之后一般即是识别结果返回时间 + logger.info("ASR wait for complete"); + recognizer.stop(); + logger.info("ASR stop latency : " + (System.currentTimeMillis() - now) + " ms"); + fis.close(); } catch (Exception e) { System.err.println(e.getMessage()); } finally { - if (null != transcriber) { - transcriber.close(); + //关闭连接 + if (null != recognizer) { + recognizer.close(); } } } @@ -181,31 +187,27 @@ public class SpeechTranscriberDemo { client.shutdown(); } public static void main(String[] args) throws Exception { - String appKey = null; - String token = null; - String url = null; - SpeechTranscriberDemo demo =null; - if (args.length == 2) { - appKey = args[0]; - token = args[1]; - //default url is wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1 - demo = new SpeechTranscriberDemo(appKey, token); - }else if(args.length == 3){ - appKey = args[0]; - token = args[1]; - url = args[2]; - demo = new SpeechTranscriberDemo(appKey, token, url); - }else{ - System.err.println("SpeechTranscriberDemo need params(url is optional): " + - " []"); + String appKey = null; // "填写你的appkey"; + String id = null; // "填写你在阿里云网站上的AccessKeyId"; + String secret = null; // "填写你在阿里云网站上的AccessKeySecret"; + String url = ""; // 默认即可,默认值:wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1 + if (args.length == 3) { + appKey = args[0]; + id = args[1]; + secret = args[2]; + } else if (args.length == 4) { + appKey = args[0]; + id = args[1]; + secret = args[2]; + url = args[3]; + } else { + System.err.println("run error, need params(url is optional): " + " [url]"); System.exit(-1); } - InputStream ins = SpeechTranscriberDemo.class.getResourceAsStream("/nls-sample-16k.wav"); - if (null == ins) { - System.err.println("open the audio file failed!"); - return; - } - demo.process(ins); + SpeechRecognizerDemo demo = new SpeechRecognizerDemo(appKey, id, secret, url); + // TODO 重要提示: 这里用一个本地文件来模拟发送实时流数据,实际使用时,用户可以从某处实时采集或接收语音流并发送到ASR服务端 + demo.process("./nls-sample-16k.wav", 16000); + //demo.process("./nls-sample.opus", 16000); demo.shutdown(); } }