提交 a15eedd8 编写于 作者: znn1980's avatar znn1980

增加语音转写回调异常方法

上级 aef8029e
package com.ifacebox.speech;
public interface AudioDataCallback {
void setText(boolean isFinal, String text);
void onText(boolean isFinal, String text);
void onError(Throwable t);
}
package com.ifacebox.speech;
import java.io.IOException;
import java.io.InputStream;
import com.google.protobuf.ByteString;
import com.yitutech.speech.AudioConfig;
import com.yitutech.speech.SpeechConfig;
......@@ -14,124 +11,79 @@ import com.yitutech.speech.StreamingSpeechResult;
import com.yitutech.speech.StreamingSpeechStatus;
import com.yitutech.speech.StreamingTranscription;
import com.yitutech.speech.SpeechRecognitionGrpc.SpeechRecognitionStub;
import io.grpc.ForwardingClientCall.SimpleForwardingClientCall;
import io.grpc.ForwardingClientCallListener.SimpleForwardingClientCallListener;
import io.grpc.CallOptions;
import io.grpc.Channel;
import io.grpc.ClientCall;
import io.grpc.ClientInterceptor;
import io.grpc.ClientInterceptors;
import io.grpc.ManagedChannel;
import io.grpc.ManagedChannelBuilder;
import io.grpc.Metadata;
import io.grpc.MethodDescriptor;
import io.grpc.stub.StreamObserver;
public class AudioSpeechServer {
private static final Metadata.Key<String> X_DEV_ID = Metadata.Key.of("x-dev-id", Metadata.ASCII_STRING_MARSHALLER);
private static final Metadata.Key<String> X_REQUEST_SEND_TIMESTAMP = Metadata.Key.of("x-request-send-timestamp", Metadata.ASCII_STRING_MARSHALLER);
private static final Metadata.Key<String> X_SIGNATURE = Metadata.Key.of("x-signature", Metadata.ASCII_STRING_MARSHALLER);
private static final Metadata.Key<String> X_API_KEY = Metadata.Key.of("x-api-key", Metadata.ASCII_STRING_MARSHALLER);
private ManagedChannel channel;
private SpeechRecognitionStub stub;
private StreamObserver<StreamingSpeechRequest> requestObserver;
private AudioDataCallback callback;
private AudioSpeechConfig config;
private volatile boolean isRunning = false;
public AudioSpeechServer(AudioSpeechConfig config, AudioDataCallback callback) {
this.config = config;
this.callback = callback;
}
public boolean isRunning() {
return isRunning;
}
public void setAudioSpeechConfig(AudioSpeechConfig config) {
this.config = config;
}
public AudioSpeechConfig getAudioSpeechConfig() {
return config;
}
private ManagedChannel managedChannel;
private SpeechRecognitionStub speechRecognitionStub;
private StreamObserver<StreamingSpeechRequest> streamObserverRequest;
private AudioDataCallback audioDataCallback;
private AudioSpeechConfig audioSpeechConfig;
public void start() {
isRunning = true;
channel = ManagedChannelBuilder.forAddress(config.getIp(), config.getPort()).usePlaintext().build();
stub = SpeechRecognitionGrpc.newStub(ClientInterceptors.intercept(channel, new ClientInterceptor() {
@Override
public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(MethodDescriptor<ReqT, RespT> method, CallOptions callOptions, Channel next) {
return new SimpleForwardingClientCall<ReqT, RespT>(next.newCall(method, callOptions)) {
@Override
public void start(Listener<RespT> responseListener, Metadata headers) {
long ts = config.getRequestSendTimestamp();
headers.put(X_DEV_ID, config.getDevId());
headers.put(X_REQUEST_SEND_TIMESTAMP, String.valueOf(ts));
headers.put(X_SIGNATURE, config.getSignature(ts));
headers.put(X_API_KEY, config.getApiKey(ts));
System.err.println("验证信息:" + headers.toString());
super.start(new SimpleForwardingClientCallListener<RespT>(responseListener) {
@Override
public void onHeaders(Metadata headers) {
super.onHeaders(headers);
}
}, headers);
}
};
}
}));
requestObserver = stub.recognizeStream(new StreamObserver<StreamingSpeechResponse>() {
@Override
public void onNext(StreamingSpeechResponse response) {
StreamingSpeechStatus status = response.getStatus();
System.err.println("当前音频处理进行到的时间点(音频开始时间为0):" + status.getProcessedTimestamp());
StreamingSpeechResult result = response.getResult();
System.err.println("此识别结果是否为最终结果:" + result.getIsFinal());
StreamingTranscription transcription = result.getBestTranscription();
System.err.println("转写结果:" + transcription.getTranscribedText());
if (callback != null && !transcription.getTranscribedText().isEmpty()) {
callback.setText(result.getIsFinal(), transcription.getTranscribedText());
}
}
public AudioSpeechServer(AudioSpeechConfig audioSpeechConfig, AudioDataCallback audioDataCallback) {
this.audioSpeechConfig = audioSpeechConfig;
this.audioDataCallback = audioDataCallback;
}
@Override
public void onError(Throwable t) {
System.err.println("异常:" + t);
}
public void start() {
System.out.println("依图实时语音转写服务启动中...");
managedChannel = ManagedChannelBuilder.forAddress(audioSpeechConfig.getIp(), audioSpeechConfig.getPort()).usePlaintext().build();
speechRecognitionStub = SpeechRecognitionGrpc.newStub(ClientInterceptors.intercept(managedChannel, new SpeechClientInterceptor(audioSpeechConfig)));
streamObserverRequest = speechRecognitionStub.recognizeStream(new StreamObserver<StreamingSpeechResponse>() {
@Override
public void onNext(StreamingSpeechResponse response) {
StreamingSpeechStatus status = response.getStatus();
System.err.println("当前音频处理进行到的时间点(音频开始时间为0):" + status.getProcessedTimestamp());
StreamingSpeechResult result = response.getResult();
System.err.println("此识别结果是否为最终结果:" + result.getIsFinal());
StreamingTranscription transcription = result.getBestTranscription();
System.err.println("转写结果:" + transcription.getTranscribedText());
if (audioDataCallback != null && !transcription.getTranscribedText().isEmpty()) {
audioDataCallback.onText(result.getIsFinal(), transcription.getTranscribedText());
}
}
@Override
public void onCompleted() {
System.err.println("完成:");
}
});
requestObserver.onNext(StreamingSpeechRequest.newBuilder().setStreamingSpeechConfig(getConfig()).build());
}
@Override
public void onError(Throwable t) {
System.err.println("异常:" + t);
if (audioDataCallback != null) {
audioDataCallback.onError(t);
}
}
public void stop() {
isRunning = false;
channel.shutdown();
}
@Override
public void onCompleted() {
System.err.println("完成:");
}
});
streamObserverRequest.onNext(StreamingSpeechRequest.newBuilder().setStreamingSpeechConfig(getStreamingSpeechConfig()).build());
System.out.println("依图实时语音转写服务启动完成");
}
public void setAudioData(byte[] bytes, int offset, int size) {
requestObserver.onNext(StreamingSpeechRequest.newBuilder().setAudioData(ByteString.copyFrom(bytes, offset, size)).build());
}
public void stop() {
System.out.println("依图实时语音转写服务停止中...");
managedChannel.shutdown();
System.out.println("依图实时语音转写服务停止完成");
}
public void setAudioData(InputStream is) throws IOException {
requestObserver.onNext(StreamingSpeechRequest.newBuilder().setAudioData(ByteString.readFrom(is)).build());
}
public void setAudioData(byte[] bytes, int offset, int size) {
streamObserverRequest.onNext(StreamingSpeechRequest.newBuilder().setAudioData(ByteString.copyFrom(bytes, offset, size)).build());
}
public StreamingSpeechConfig getConfig() {
SpeechConfig.Builder speechConfig = SpeechConfig.newBuilder();
speechConfig.setLang(SpeechConfig.Language.MANDARIN);
speechConfig.setScene(SpeechConfig.Scene.GENERALSCENE);
speechConfig.setRecognizeType(SpeechConfig.RecognizeType.ALL);
AudioConfig.Builder audioConfig = AudioConfig.newBuilder();
audioConfig.setAue(AudioConfig.AudioEncoding.PCM);
audioConfig.setSampleRate(config.getSampleRate());
StreamingSpeechConfig.Builder streamingSpeechConfig = StreamingSpeechConfig.newBuilder();
streamingSpeechConfig.setSpeechConfig(speechConfig.build());
streamingSpeechConfig.setAudioConfig(audioConfig.build());
return streamingSpeechConfig.build();
}
public StreamingSpeechConfig getStreamingSpeechConfig() {
SpeechConfig.Builder speechConfig = SpeechConfig.newBuilder();
speechConfig.setLang(SpeechConfig.Language.MANDARIN);
speechConfig.setScene(SpeechConfig.Scene.GENERALSCENE);
speechConfig.setRecognizeType(SpeechConfig.RecognizeType.ALL);
AudioConfig.Builder audioConfig = AudioConfig.newBuilder();
audioConfig.setAue(AudioConfig.AudioEncoding.PCM);
audioConfig.setSampleRate(audioSpeechConfig.getSampleRate());
StreamingSpeechConfig.Builder streamingSpeechConfig = StreamingSpeechConfig.newBuilder();
streamingSpeechConfig.setSpeechConfig(speechConfig.build());
streamingSpeechConfig.setAudioConfig(audioConfig.build());
return streamingSpeechConfig.build();
}
}
......@@ -2,16 +2,19 @@ package com.ifacebox.speech;
public class AudioSpeechTest {
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
AudioSpeechServer server = new AudioSpeechServer(new AudioSpeechConfig(), new AudioDataCallback() {
@Override
public void setText(boolean isFinal, String text) {
System.out.println(text);
}
});
server.start();
new AudioSpeechTransfer(server).start();
}
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
new AudioSpeechTransfer(new AudioSpeechConfig(), new AudioDataCallback() {
@Override
public void onText(boolean isFinal, String text) {
System.out.println(text);
}
@Override
public void onError(Throwable t) {
System.err.println(t);
}
}).start();
}
}
......@@ -10,95 +10,100 @@ import javax.sound.sampled.DataLine;
import javax.sound.sampled.TargetDataLine;
public class AudioSpeechTransfer implements Runnable {
private static final int AUDIO_SPEECH_TIME = 1000;
private volatile boolean isRunning = false;
private AudioFormat audioFormat = null;
private TargetDataLine targetDataLine = null;
private Thread transferThread = null;
private AudioSpeechServer server = null;
private static final int AUDIO_SPEECH_TIME = 1000;
private volatile boolean isRunning = false;
private TargetDataLine targetDataLine = null;
private Thread transferThread = null;
private AudioSpeechServer audioSpeechServer;
private AudioFormat audioFormat;
public AudioSpeechTransfer(AudioSpeechServer server) {
this.server = server;
this.audioFormat = new AudioFormat(server.getAudioSpeechConfig().getSampleRate(), 16, 1, true, false);
}
public AudioSpeechTransfer(AudioSpeechConfig audioSpeechConfig, AudioDataCallback audioDataCallback) {
this.audioSpeechServer = new AudioSpeechServer(audioSpeechConfig, audioDataCallback);
this.audioFormat = new AudioFormat(audioSpeechConfig.getSampleRate(), 16, 1, true, false);
}
public void start() {
System.out.println("开始录音...");
isRunning = true;
transferThread = new Thread(this);
transferThread.start();
}
public void start() {
System.out.println("开始录音...");
isRunning = true;
transferThread = new Thread(this);
transferThread.start();
}
public void stop() {
System.out.println("停止录音...");
isRunning = false;
audioFormat = null;
if (targetDataLine != null) {
targetDataLine.close();
targetDataLine = null;
}
if (transferThread != null) {
transferThread.interrupt();
transferThread = null;
}
}
public void stop() {
System.out.println("停止录音...");
isRunning = false;
audioFormat = null;
if (targetDataLine != null) {
targetDataLine.close();
targetDataLine = null;
}
if (transferThread != null) {
transferThread.interrupt();
transferThread = null;
}
if (audioSpeechServer != null) {
audioSpeechServer.stop();
audioSpeechServer = null;
}
}
@Override
public void run() {
byte[] bytes = new byte[1024];
ByteBuffer buffer = new ByteBuffer();
try {
targetDataLine = (TargetDataLine) AudioSystem.getLine(new DataLine.Info(TargetDataLine.class, audioFormat));
targetDataLine.open(audioFormat);
targetDataLine.start();
long ms = System.currentTimeMillis();
while (isRunning) {
int len = new AudioInputStream(targetDataLine).read(bytes);
if (Math.abs(System.currentTimeMillis() - ms) < AUDIO_SPEECH_TIME) {
byte[] data = new byte[len];
System.arraycopy(bytes, 0, data, 0, len);
buffer.put(data);
} else {
byte[] data = buffer.array();
System.out.println("间隔[" + AUDIO_SPEECH_TIME + "]毫秒发送数据:" + data.length);
server.setAudioData(data, 0, data.length);
buffer.clear();
ms = System.currentTimeMillis();
}
}
} catch (Exception e) {
e.printStackTrace();
stop();
}
}
@Override
public void run() {
byte[] bytes = new byte[1024];
ByteBuffer buffer = new ByteBuffer();
audioSpeechServer.start();
try {
targetDataLine = (TargetDataLine) AudioSystem.getLine(new DataLine.Info(TargetDataLine.class, audioFormat));
targetDataLine.open(audioFormat);
targetDataLine.start();
long ms = System.currentTimeMillis();
while (isRunning) {
int len = new AudioInputStream(targetDataLine).read(bytes);
if (Math.abs(System.currentTimeMillis() - ms) < AUDIO_SPEECH_TIME) {
byte[] data = new byte[len];
System.arraycopy(bytes, 0, data, 0, len);
buffer.put(data);
} else {
byte[] data = buffer.array();
System.out.println("间隔[" + AUDIO_SPEECH_TIME + "]毫秒发送数据:" + data.length);
audioSpeechServer.setAudioData(data, 0, data.length);
buffer.clear();
ms = System.currentTimeMillis();
}
}
} catch (Exception e) {
e.printStackTrace();
stop();
}
}
static class ByteBuffer {
private List<byte[]> buffer;
private int length;
static class ByteBuffer {
private List<byte[]> buffer;
private int length;
public ByteBuffer() {
buffer = new ArrayList<byte[]>();
length = 0;
}
public ByteBuffer() {
buffer = new ArrayList<byte[]>();
length = 0;
}
public void put(byte[] bytes) {
length = length + bytes.length;
buffer.add(bytes);
}
public void put(byte[] bytes) {
length = length + bytes.length;
buffer.add(bytes);
}
public void clear() {
length = 0;
buffer.clear();
}
public void clear() {
length = 0;
buffer.clear();
}
public byte[] array() {
byte[] bytes = new byte[length];
for (int i = 0; i < buffer.size(); i++) {
byte[] data = buffer.get(i);
System.arraycopy(data, 0, bytes, i * data.length, data.length);
}
return bytes;
}
}
public byte[] array() {
byte[] bytes = new byte[length];
for (int i = 0; i < buffer.size(); i++) {
byte[] data = buffer.get(i);
System.arraycopy(data, 0, bytes, i * data.length, data.length);
}
return bytes;
}
}
}
......@@ -6,19 +6,7 @@ import java.awt.Image;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import javax.swing.Box;
import javax.swing.ImageIcon;
import javax.swing.JButton;
import javax.swing.JComponent;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTabbedPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
import javax.swing.UIManager;
import javax.swing.*;
/**
* <pre>
......@@ -29,7 +17,7 @@ import javax.swing.UIManager;
* 连接路数 1路代表单个连接每秒发送1秒时长的音频(如果发送过快,超出的数据会被服务端丢弃,造成回显结果异常)
* </pre>
*/
public class AudioSpeechUI extends JFrame implements ActionListener {
public class AudioSpeechUI extends JFrame implements ActionListener, AudioDataCallback {
private static final long serialVersionUID = 1L;
public static final Image SPEECH = new ImageIcon(AudioSpeechUI.class.getResource("/speech.png")).getImage().getScaledInstance(32, 32, Image.SCALE_SMOOTH);
private JTabbedPane jTabbedPane;
......@@ -50,35 +38,22 @@ public class AudioSpeechUI extends JFrame implements ActionListener {
private JTextField jTextFieldDevKey;
private JTextField jTextFieldSampleRate;
private JButton jButtonConfigSave;
private JButton jButtonConfigStart;
private JButton jButtonConfigStop;
private AudioSpeechTransfer audioSpeechTransfer;
private AudioSpeechServer server;
private AudioSpeechConfig config;
private AudioSpeechConfig audioSpeechConfig;
public AudioSpeechUI() {
config = new AudioSpeechConfig();
audioSpeechConfig = new AudioSpeechConfig();
super.setTitle("实时语音转写");
super.setIconImage(SPEECH);
super.setDefaultCloseOperation(EXIT_ON_CLOSE);
super.add(jTabbedPane = new JTabbedPane());
this.init();
this.initComponents();
super.pack();
super.setLocationRelativeTo(null);
super.setResizable(false);
server = new AudioSpeechServer(config, new AudioDataCallback() {
@Override
public void setText(boolean isFinal, String text) {
jLabelSpeech.setText(text);
if (isFinal) {
jTextArea.append(text + "\r\n");
jTextArea.setCaretPosition(jTextArea.getText().length());
}
}
});
}
public void init() {
public void initComponents() {
jTabbedPane.add("语音转写", jPanelSpeech = new JPanel());
jTabbedPane.add("服务设置", jPanelConfig = new JPanel());
jPanelSpeech.setLayout(new BorderLayout());
......@@ -111,30 +86,21 @@ public class AudioSpeechUI extends JFrame implements ActionListener {
jPanelConfigTools.setLayout(new FlowLayout());
jPanelConfigTools.add(jButtonConfigSave = new JButton("保存"));
jButtonConfigSave.addActionListener(this);
jPanelConfigTools.add(jButtonConfigStart = new JButton("启动"));
jButtonConfigStart.addActionListener(this);
jPanelConfigTools.add(jButtonConfigStop = new JButton("停止"));
jButtonConfigStop.addActionListener(this);
jButtonConfigStop.setEnabled(false);
jTextFieldIp.setText(config.getIp());
jTextFieldPort.setText(String.valueOf(config.getPort()));
jTextFieldDevId.setText(config.getDevId());
jTextFieldDevKey.setText(config.getDevKey());
jTextFieldSampleRate.setText(String.valueOf(config.getSampleRate()));
jTextFieldIp.setText(audioSpeechConfig.getIp());
jTextFieldPort.setText(String.valueOf(audioSpeechConfig.getPort()));
jTextFieldDevId.setText(audioSpeechConfig.getDevId());
jTextFieldDevKey.setText(audioSpeechConfig.getDevKey());
jTextFieldSampleRate.setText(String.valueOf(audioSpeechConfig.getSampleRate()));
}
@Override
public void actionPerformed(ActionEvent e) {
if (e.getSource() == jButtonSpeechStart) {
if (!server.isRunning()) {
JOptionPane.showMessageDialog(this, "请先在服务设置中启动服务!");
} else {
jButtonSpeechStart.setEnabled(false);
jButtonSpeechStop.setEnabled(true);
audioSpeechTransfer = new AudioSpeechTransfer(server);
audioSpeechTransfer.start();
}
jButtonSpeechStart.setEnabled(false);
jButtonSpeechStop.setEnabled(true);
audioSpeechTransfer = new AudioSpeechTransfer(audioSpeechConfig, this);
audioSpeechTransfer.start();
}
if (e.getSource() == jButtonSpeechStop) {
jButtonSpeechStart.setEnabled(true);
......@@ -146,37 +112,34 @@ public class AudioSpeechUI extends JFrame implements ActionListener {
}
if (e.getSource() == jButtonConfigSave) {
try {
config.setIp(jTextFieldIp.getText());
config.setPort(Integer.parseInt(jTextFieldPort.getText()));
config.setDevId(jTextFieldDevId.getText());
config.setDevKey(jTextFieldDevKey.getText());
config.setSampleRate(Integer.parseInt(jTextFieldSampleRate.getText()));
server.setAudioSpeechConfig(config);
audioSpeechConfig.setIp(jTextFieldIp.getText());
audioSpeechConfig.setPort(Integer.parseInt(jTextFieldPort.getText()));
audioSpeechConfig.setDevId(jTextFieldDevId.getText());
audioSpeechConfig.setDevKey(jTextFieldDevKey.getText());
audioSpeechConfig.setSampleRate(Integer.parseInt(jTextFieldSampleRate.getText()));
} catch (Exception ex) {
}
}
if (e.getSource() == jButtonConfigStart) {
jButtonConfigStart.setEnabled(false);
new Thread(new Runnable() {
@Override
public void run() {
server.start();
jButtonConfigStop.setEnabled(true);
}
}).start();
}
if (e.getSource() == jButtonConfigStop) {
jButtonConfigStop.setEnabled(false);
new Thread(new Runnable() {
@Override
public void run() {
server.stop();
jButtonConfigStart.setEnabled(true);
}
}).start();
}
@Override
public void onText(boolean isFinal, String text) {
jLabelSpeech.setText(text);
if (isFinal) {
jTextArea.append(text + "\r\n");
jTextArea.setCaretPosition(jTextArea.getText().length());
}
}
@Override
public void onError(Throwable t) {
jButtonSpeechStart.setEnabled(true);
jButtonSpeechStop.setEnabled(false);
audioSpeechTransfer.stop();
audioSpeechTransfer = null;
JOptionPane.showMessageDialog(this, "语音转写异常【" + t.getMessage() + "】");
}
public Box addFieldBox(String label, JComponent field) {
Box box = Box.createHorizontalBox();
box.add(new JLabel(label));
......
package com.ifacebox.speech;
import io.grpc.*;
public class SpeechClientInterceptor implements ClientInterceptor {
private static final Metadata.Key<String> X_DEV_ID = Metadata.Key.of("x-dev-id", Metadata.ASCII_STRING_MARSHALLER);
private static final Metadata.Key<String> X_REQUEST_SEND_TIMESTAMP = Metadata.Key.of("x-request-send-timestamp", Metadata.ASCII_STRING_MARSHALLER);
private static final Metadata.Key<String> X_SIGNATURE = Metadata.Key.of("x-signature", Metadata.ASCII_STRING_MARSHALLER);
private static final Metadata.Key<String> X_API_KEY = Metadata.Key.of("x-api-key", Metadata.ASCII_STRING_MARSHALLER);
private AudioSpeechConfig audioSpeechConfig;
public SpeechClientInterceptor(AudioSpeechConfig audioSpeechConfig) {
this.audioSpeechConfig = audioSpeechConfig;
}
@Override
public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(MethodDescriptor<ReqT, RespT> method, CallOptions callOptions, Channel next) {
return new ForwardingClientCall.SimpleForwardingClientCall<ReqT, RespT>(next.newCall(method, callOptions)) {
@Override
public void start(Listener<RespT> responseListener, Metadata headers) {
long ts = audioSpeechConfig.getRequestSendTimestamp();
headers.put(X_DEV_ID, audioSpeechConfig.getDevId());
headers.put(X_REQUEST_SEND_TIMESTAMP, String.valueOf(ts));
headers.put(X_SIGNATURE, audioSpeechConfig.getSignature(ts));
headers.put(X_API_KEY, audioSpeechConfig.getApiKey(ts));
System.err.println("验证信息:" + headers.toString());
super.start(new ForwardingClientCallListener.SimpleForwardingClientCallListener<RespT>(responseListener) {
@Override
public void onHeaders(Metadata headers) {
super.onHeaders(headers);
}
}, headers);
}
};
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册