调用火山云的语音生成TTS和语音识别STT
首先需要去火山云的控制台开通TTS和STT服务语音技术 (volcengine.com)
火山这里都提供了免费的额度可以使用
我这里是使用了java来调用API
目前我还了解到阿里的开源项目SenseVoice(STT)和CosyVoice(TTS)非常的不错,但是都是使用Python开发的。可以做到说话情绪的识别,感兴趣可以去github上了解一下。
TTS(首先需要导入它给的类)
package com.erroright.backend_server_java.pojo.util;import java.util.UUID;public class TtsRequest {public static final String APP_ID = "控制台的APPID";public static final String CLUSTER = "";public static final String Token = "";public static final String VoiceType = "BV001_streaming";//生成声音的选择(如果生成语音报错,就是你没开通这个音色的权限)public static final String Emotion = "angry";//语气public TtsRequest() {}public TtsRequest(String text) {this.request.text = text;}private App app = new App();private User user = new User();private Audio audio = new Audio();private Request request = new Request();public App getApp() {return app;}public void setApp(App app) {this.app = app;}public User getUser() {return user;}public void setUser(User user) {this.user = user;}public Audio getAudio() {return audio;}public void setAudio(Audio audio) {this.audio = audio;}public Request getRequest() {return request;}public void setRequest(Request request) {this.request = request;}public class App {private String appid = APP_ID;private String token = Token; // 目前未生效,填写默认值:access_tokenprivate String cluster = CLUSTER;public String getAppid() {return appid;}public void setAppid(String appid) {this.appid = appid;}public String getToken() {return token;}public void setToken(String token) {this.token = token;}public String getCluster() {return cluster;}public void setCluster(String cluster) {this.cluster = cluster;}}public class User {private String uid = "388808087185088"; // 目前未生效,填写一个默认值就可以public String getUid() {return uid;}public void setUid(String uid) {this.uid = uid;}}public class Audio {private String voice_type = VoiceType;private String encoding = "wav";private float speed_ratio = 1.0F;private float volume_ratio = 10;private float pitch_ratio = 10;private String emotion = Emotion;public String getVoice_type() {return voice_type;}public void setVoice_type(String voice_type) {this.voice_type = voice_type;}public String getEncoding() {return encoding;}public void setEncoding(String encoding) {this.encoding = encoding;}public float getSpeedRatio() {return speed_ratio;}public void setSpeedRatio(int speed_ratio) {this.speed_ratio = speed_ratio;}public float getVolumeRatio() {return volume_ratio;}public void setVolumeRatio(int volume_ratio) {this.volume_ratio = volume_ratio;}public float getPitchRatio() {return pitch_ratio;}public void setPitchRatio(int pitch_ratio) {this.pitch_ratio = pitch_ratio;}public String getEmotion() {return emotion;}public void setEmotion(int emotion) {this.emotion = String.valueOf(emotion);}}public class Request {private String reqid = UUID.randomUUID().toString();private String text;private String text_type = "plain";private String operation = "query";public String getReqid() {return reqid;}public void setReqid(String reqid) {this.reqid = reqid;}public String getText() {return text;}public void setText(String text) {this.text = text;}public String getText_type() {return text_type;}public void setText_type(String text_type) {this.text_type = text_type;}public String getOperation() {return operation;}public void setOperation(String operation) {this.operation = operation;}}
}
调用代码
package com.erroright.backend_server_java.util;import com.alibaba.fastjson.JSON;
import com.erroright.backend_server_java.pojo.util.TtsRequest;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
import org.springframework.stereotype.Component;import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Base64;@Component
@Slf4j
public class TtsHttpClient {public static final String API_URL = "https://openspeech.bytedance.com/api/v1/tts";public static final String ACCESS_TOKEN = "填入火山云开通项目的Token";public static byte[] getTts(String content) throws IOException {log.info("TTS生成:"+content);TtsRequest ttsRequest = new TtsRequest(content);String json= JSON.toJSONString(ttsRequest);OkHttpClient client = new OkHttpClient();RequestBody body = RequestBody.create(json, MediaType.get("application/json; charset=utf-8"));Request request = new Request.Builder().url(API_URL).post(body).header("Authorization", "Bearer; " + ACCESS_TOKEN).build();try (Response response = client.newCall(request).execute()) {String TtsRresponse=response.body().string();// 提取 "data" 字段的值String data = TtsRresponse.split("\"data\":\"")[1].split("\"")[0];//保存生成的文件try (FileOutputStream fos = new FileOutputStream("output.wav")) {fos.write(Base64.getDecoder().decode(data));}// 解码 Base64 数据return Base64.getDecoder().decode(data);}}}
STT(导入类,在官方文档中是三个类,为了在springBoot中封装,分开了一个)
package com.erroright.backend_server_java.pojo.util;import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.java_websocket.client.WebSocketClient;
import org.java_websocket.handshake.ServerHandshake;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;import javax.crypto.Mac;
import javax.crypto.spec.SecretKeySpec;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.security.InvalidKeyException;
import java.security.NoSuchAlgorithmException;
import java.util.Base64;
import java.util.UUID;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;class AsrParams {private App app;private User user;private Request request;private Audio audio;public AsrParams(App app, User user, Request request, Audio audio) {this.app = app;this.user = user;this.request = request;this.audio = audio;}public App getApp() {return app;}public void setApp(App app) {this.app = app;}public User getUser() {return user;}public void setUser(User user) {this.user = user;}public Request getRequest() {return request;}public void setRequest(Request request) {this.request = request;}public Audio getAudio() {return audio;}public void setAudio(Audio audio) {this.audio = audio;}public static class App {private String appid;private String cluster;private String token;public App(String appid, String cluster, String token) {this.appid = appid;this.cluster = cluster;this.token = token;}public String getAppid() {return appid;}public void setAppid(String appid) {this.appid = appid;}public String getCluster() {return cluster;}public void setCluster(String cluster) {this.cluster = cluster;}public String getToken() {return token;}public void setToken(String token) {this.token = token;}}public static class User {private String uid;public User(String uid) {this.uid = uid;}public String getUid() {return uid;}public void setUid(String uid) {this.uid = uid;}}public static class Request {private String reqid;private String workflow;private int nbest;private boolean show_utterances;private String result_type;private int sequence;public Request(String reqid, String workflow, int nbest, boolean show_utterances, String result_type, int sequence) {this.reqid = reqid;this.workflow = workflow;this.nbest = nbest;this.show_utterances = show_utterances;this.result_type = result_type;this.sequence = sequence;}public String getReqid() {return reqid;}public void setReqid(String reqid) {this.reqid = reqid;}public String getWorkflow() {return workflow;}public void setWorkflow(String workflow) {this.workflow = workflow;}public int getNbest() {return nbest;}public void setNbest(int nbest) {this.nbest = nbest;}public boolean isShow_utterances() {return show_utterances;}public void setShow_utterances(boolean show_utterances) {this.show_utterances = show_utterances;}public String getResult_type() {return result_type;}public void setResult_type(String result_type) {this.result_type = result_type;}public int getSequence() {return sequence;}public void setSequence(int sequence) {this.sequence = sequence;}}public static class Audio {private String format;private String codec;private int rate;private int bits;private int channels;public Audio(String format, String codec, int rate, int bits, int channels) {this.format = format;this.codec = codec;this.rate = rate;this.bits = bits;this.channels = channels;}public String getFormat() {return format;}public void setFormat(String format) {this.format = format;}public String getCodec() {return codec;}public void setCodec(String codec) {this.codec = codec;}public int getRate() {return rate;}public void setRate(int rate) {this.rate = rate;}public int getBits() {return bits;}public void setBits(int bits) {this.bits = bits;}public int getChannels() {return channels;}public void setChannels(int channels) {this.channels = channels;}}
}public class AsrClient extends WebSocketClient {private static final String URL = "wss://openspeech.bytedance.com/api/v2/asr";private static final Logger logger = LoggerFactory.getLogger(WebSocketClient.class);private String appid;private String token;private String sk;private String cluster;private String workflow = "audio_in,resample,partition,vad,fe,decode,nlu_punctuate";private String uid = "usesr_id";private int nhest = 1;private boolean show_utterances = true;private String result_type = "full";private String format = "wav";private String codec = "raw";private int sample_rate = 16000;private int channels = 1;private int bits = 16;private AuthType authType = AuthType.TOKEN;private byte[] params_msg = null;private AsrResponse asr_response;private CountDownLatch recv_latch = null;private int recv_timeout = 5;private boolean recv_suc = true;public static AsrClient build() throws URISyntaxException {URI uri = new URI(URL);return new AsrClient(uri);}// TODO 接受一个 listener 监听消息, onOpen, onMessage, onError, onCompleteprivate AsrClient(URI uri) {super(uri);}public static class ProtocolVersion {static public int PROTOCOL_VERSION = 0b0001;}public static class MessageType {static public int FULL_CLIENT_REQUEST = 0b0001;static public int AUDIO_ONLY_CLIENT_REQUEST = 0b0010;static public int FULL_SERVER_RESPONSE = 0b1001;static public int SERVER_ACK = 0b1011;static public int ERROR_MESSAGE_FROM_SERVER = 0b1111;}public static class MessageTypeFlag {static public int NO_SEQUENCE_NUMBER = 0b0000;static public int POSITIVE_SEQUENCE_CLIENT_ASSGIN = 0b0001;static public int NEGATIVE_SEQUENCE_SERVER_ASSGIN = 0b0010;static public int NEGATIVE_SEQUENCE_CLIENT_ASSGIN = 0b0011;}public static class MessageSerial {public int NO_SERIAL = 0b0000;public static int JSON = 0b0001;public int CUSTOM_SERIAL = 0b1111;}public static class MessageCompress {public int NO_COMPRESS = 0b0000;public static int GZIP = 0b0001;public int CUSTOM_COMPRESS = 0b1111;}public enum AuthType {TOKEN,SIGNATURE;}@Overridepublic void onOpen(ServerHandshake serverHandshake) {logger.info("asr client onOpen");}@Overridepublic void onMessage(String s) {logger.info("onMessage String, should be onMessage(ByteBuffer) called");
// try {
// if (parse_response(s) != 0) {
// logger.error("error happends to close connection");
// close();
// }
// } catch (IOException e) {
// e.printStackTrace();
// }}@Overridepublic void onMessage(ByteBuffer bytes) {try {if (parse_response(bytes) != 0) {recv_suc = false;logger.error("error happends to close connection");close();}recv_latch.countDown();} catch (IOException e) {e.printStackTrace();}}@Overridepublic void onClose(int i, String s, boolean b) {logger.info("asr onClose {}, {}, {}", i, s, b);}@Overridepublic void onError(Exception e) {logger.info("asr onError {}", e.getMessage());recv_suc = false;recv_latch.countDown();this.close();}// public int asr_connect() throws IOException, NoSuchAlgorithmException, InvalidKeyException {
// this.params_msg = construct_param();
// set_auth_header();
// this.connect();
// return 0;
// }public boolean asr_sync_connect() throws IOException, InterruptedException, NoSuchAlgorithmException, InvalidKeyException {this.params_msg = construct_param();set_auth_header();boolean ret = this.connectBlocking();if (!ret) {return ret;}recv_latch = new CountDownLatch(1);this.send(this.params_msg);ret = recv_latch.await(recv_timeout, TimeUnit.SECONDS);return ret && recv_suc;}public AsrResponse asr_send(byte[] audio, boolean is_last) throws IOException, InterruptedException {recv_latch = new CountDownLatch(1);byte[] payload = construct_audio_payload(audio, is_last);this.send(payload);boolean ret = recv_latch.await(recv_timeout, TimeUnit.SECONDS);if (!ret) {logger.error("recv message timeout");this.close();return new AsrResponse();}return asr_response;}public int asr_close() {this.close();return 0;}private void set_auth_header() throws NoSuchAlgorithmException, InvalidKeyException {if (authType == AuthType.TOKEN) {this.addHeader("Authorization", "Bearer; " + token);return;}String custom_header = "Custom";String custom_cont = "auth_custom";this.addHeader(custom_header, custom_cont);String str = "GET " + getURI().getPath() + " HTTP/1.1\n"+ custom_cont + "\n";byte[] str_byte = str.getBytes(StandardCharsets.UTF_8);byte[] data = concat_byte(str_byte, this.params_msg);byte[] sk_byte = this.sk.getBytes(StandardCharsets.UTF_8);String HMAC_SHA256 = "HmacSHA256";Mac sha256Hmac = Mac.getInstance(HMAC_SHA256);SecretKeySpec keySpec = new SecretKeySpec(sk_byte, HMAC_SHA256);sha256Hmac.init(keySpec);byte[] mac_data = sha256Hmac.doFinal(data);String base64_data = Base64.getUrlEncoder().encodeToString(mac_data);String auth_cont = "HMAC256; access_token=\"" + this.token+ "\"; mac=\"" + base64_data+ "\"; h=\"" + custom_header + "\"";this.addHeader("Authorization", auth_cont);}private byte[] gzip_compress(byte[] content) throws IOException {ByteArrayOutputStream out = new ByteArrayOutputStream(content.length);GZIPOutputStream gzip = new GZIPOutputStream(out);gzip.write(content);gzip.close();byte[] result = out.toByteArray();out.close();return result;}private byte[] gzip_decompress(byte[] content) throws IOException {ByteArrayInputStream in = new ByteArrayInputStream(content);GZIPInputStream gzip = new GZIPInputStream(in);ByteArrayOutputStream out = new ByteArrayOutputStream();byte[] buff = new byte[1024];int len = 0;while ((len = gzip.read(buff, 0, buff.length)) > 0) {out.write(buff, 0, len);}byte[] result = out.toByteArray();in.close();gzip.close();out.close();return result;}private byte[] construct_param() throws IOException {int header_len = 4;byte[] header = new byte[header_len];header[0] = (byte) (ProtocolVersion.PROTOCOL_VERSION << 4 | (header_len >> 2));header[1] = (byte) (MessageType.FULL_CLIENT_REQUEST << 4 | MessageTypeFlag.NO_SEQUENCE_NUMBER);header[2] = (byte) (MessageSerial.JSON << 4 | MessageCompress.GZIP);header[3] = 0;String reqid = UUID.randomUUID().toString();AsrParams.App app = new AsrParams.App(appid, cluster, token);AsrParams.User user = new AsrParams.User(uid);AsrParams.Request request = new AsrParams.Request(reqid, workflow, 1, show_utterances, result_type, 1);AsrParams.Audio audio = new AsrParams.Audio(format, codec, sample_rate, bits, channels);AsrParams asr_params = new AsrParams(app, user, request, audio);ObjectMapper mapper = new ObjectMapper();
// String params_json = mapper.writeValueAsString(asr_params);byte[] payload = mapper.writeValueAsBytes(asr_params);logger.info("params_json {}", new String(payload));payload = gzip_compress(payload);// java big-endian defaultint payload_len = payload.length;ByteBuffer bb = ByteBuffer.allocate(4);//b.order(ByteOrder.BIG_ENDIAN); // optional, the initial order of a byte buffer is always BIG_ENDIAN.bb.putInt(payload_len);byte[] pl_byte = bb.array();return concat_byte(header, pl_byte, payload);}private int parse_response(ByteBuffer msg) throws IOException {byte[] msg_byte = msg.array();int header_len = (msg_byte[0] & 0x0f) << 2;int message_type = (msg_byte[1] & 0xf0) >> 4;int message_type_flag = msg_byte[1] & 0x0f;int message_serial = (msg_byte[2] & 0xf0) >> 4;int message_compress = msg_byte[2] & 0x0f;byte[] payload = null;int payload_len = 0;int payload_offset = header_len;if (message_type == MessageType.FULL_SERVER_RESPONSE) {ByteBuffer bb = ByteBuffer.wrap(msg_byte, payload_offset, 4);payload_len = bb.getInt();payload_offset += 4;} else if (message_type == MessageType.SERVER_ACK) {ByteBuffer bb = ByteBuffer.wrap(msg_byte, payload_offset, 4);int seq = bb.getInt();payload_offset += 4;if (msg_byte.length > 8) {payload_len = ByteBuffer.wrap(msg_byte, payload_offset, 4).getInt();payload_offset += 4;}} else if (message_type == MessageType.ERROR_MESSAGE_FROM_SERVER) {int error_code = ByteBuffer.wrap(msg_byte, payload_offset, 4).getInt();payload_offset += 4;payload_len = ByteBuffer.wrap(msg_byte, payload_offset, 4).getInt();payload_offset += 4;} else {logger.error("unsupported message type {}", message_type);return -1;}payload = new byte[msg_byte.length - payload_offset];System.arraycopy(msg_byte, payload_offset, payload, 0, payload.length);if (message_compress == MessageCompress.GZIP) {payload = gzip_decompress(payload);}if (message_serial == MessageSerial.JSON) {ObjectMapper mapper = new ObjectMapper().disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);asr_response = mapper.readValue(payload, 0, payload.length, AsrResponse.class);}if (asr_response.getCode() != 1000) {logger.error("asr resposne {}", new String(payload));return -1;}if (asr_response.getSequence() < 0) {logger.debug("get last response");}// logger.info("asr response {}", new String(payload));return 0;}private byte[] construct_audio_payload(byte[] audio, boolean is_last) throws IOException {int header_len = 4;byte[] header = new byte[header_len];header[0] = (byte) (ProtocolVersion.PROTOCOL_VERSION << 4 | (header_len >> 2));if (!is_last) {header[1] = (byte) (MessageType.AUDIO_ONLY_CLIENT_REQUEST << 4 | MessageTypeFlag.NO_SEQUENCE_NUMBER);} else {header[1] = (byte) (MessageType.AUDIO_ONLY_CLIENT_REQUEST << 4 | MessageTypeFlag.NEGATIVE_SEQUENCE_SERVER_ASSGIN);}header[2] = (byte) (MessageSerial.JSON << 4 | MessageCompress.GZIP);header[3] = 0;byte[] payload = gzip_compress(audio);int payload_len = payload.length;ByteBuffer bb = ByteBuffer.allocate(4);bb.putInt(payload_len);byte[] pl_byte = bb.array();return concat_byte(header, pl_byte, payload);}public void setAppid(String appid) {this.appid = appid;}public void setToken(String token) {this.token = token;}public void setSk(String sk) {this.sk = sk;}public void setCluster(String cluster) {this.cluster = cluster;}public void setWorkflow(String workflow) {this.workflow = workflow;}public void setUid(String uid) {this.uid = uid;}public void setShow_utterances(boolean show_utterances) {this.show_utterances = show_utterances;}public void setResult_type(String result_type) {this.result_type = result_type;}public void setFormat(String format) {this.format = format;}public void setCodec(String codec) {this.codec = codec;}public void setSample_rate(int sample_rate) {this.sample_rate = sample_rate;}public void setChannels(int channels) {this.channels = channels;}public void setBits(int bits) {this.bits = bits;}public AuthType getAuthType() {return authType;}public void setAuthType(AuthType authType) {this.authType = authType;}public AsrResponse getAsrResponse() {return asr_response;}private byte[] concat_byte(byte[] first, byte[] second) {byte[] result = new byte[first.length + second.length];System.arraycopy(first, 0, result, 0, first.length);System.arraycopy(second, 0, result, first.length, second.length);return result;}private byte[] concat_byte(byte[] first, byte[] second, byte[] third) {byte[] result = new byte[first.length + second.length + third.length];System.arraycopy(first, 0, result, 0, first.length);System.arraycopy(second, 0, result, first.length, second.length);System.arraycopy(third, 0, result, first.length+second.length, third.length);return result;}
}
package com.erroright.backend_server_java.pojo.util;public class AsrResponse {private String reqid = "unknow";private int code = 0;private String message = "";private int sequence = 0;private Result[] result;private Addition addition;public String getReqid() {return reqid;}public void setReqid(String reqid) {this.reqid = reqid;}public int getCode() {return code;}public void setCode(int code) {this.code = code;}public String getMessage() {return message;}public void setMessage(String message) {this.message = message;}public int getSequence() {return sequence;}public void setSequence(int sequence) {this.sequence = sequence;}public Result[] getResult() {return result;}public void setResult(Result[] result) {this.result = result;}public Addition getAddition() {return addition;}public void setAddition(Addition addition) {this.addition = addition;}public static class Result {private String text;private int confidence;private String language;private Utterances[] utterances;private float global_confidence;public String getText() {return text;}public void setText(String text) {this.text = text;}public int getConfidence() {return confidence;}public void setConfidence(int confidence) {this.confidence = confidence;}public String getLanguage() {return language;}public void setLanguage(String language) {this.language = language;}public Utterances[] getUtterances() {return utterances;}public void setUtterances(Utterances[] utterances) {this.utterances = utterances;}public float getGlobal_confidence() {return global_confidence;}public void setGlobal_confidence(float global_confidence) {this.global_confidence = global_confidence;}}public static class Utterances {private String text;private int start_time;private int end_time;private boolean definite;private String language;private Words[] words;public String getText() {return text;}public void setText(String text) {this.text = text;}public int getStart_time() {return start_time;}public void setStart_time(int start_time) {this.start_time = start_time;}public int getEnd_time() {return end_time;}public void setEnd_time(int end_time) {this.end_time = end_time;}public boolean isDefinite() {return definite;}public void setDefinite(boolean definite) {this.definite = definite;}public String getLanguage() {return language;}public void setLanguage(String language) {this.language = language;}public Words[] getWords() {return words;}public void setWords(Words[] words) {this.words = words;}}public static class Words {private String text;private int start_time;private int end_time;private int blank_duration;public String getText() {return text;}public void setText(String text) {this.text = text;}public int getStart_time() {return start_time;}public void setStart_time(int start_time) {this.start_time = start_time;}public int getEnd_time() {return end_time;}public void setEnd_time(int end_time) {this.end_time = end_time;}public int getBlank_duration() {return blank_duration;}public void setBlank_duration(int blank_duration) {this.blank_duration = blank_duration;}}public static class Addition {private String duration;public String getDuration() {return duration;}public void setDuration(String duration) {this.duration = duration;}}
}
调用方式
package com.erroright.backend_server_java.util;import com.erroright.backend_server_java.pojo.util.AsrClient;
import com.erroright.backend_server_java.pojo.util.AsrResponse;
import com.fasterxml.jackson.core.JsonProcessingException;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URISyntaxException;
import java.security.InvalidKeyException;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;@Slf4j
@Component
public class SttStreamClient {String appid = ""; // 项目的 appidString token = ""; // 项目的 tokenString cluster = ""; // 请求的集群String audio_format = "wav"; // wav 或者 mp3, 根据音频类型设置AsrClient asr_client = null;SttStreamClient() throws URISyntaxException, IOException, NoSuchAlgorithmException, InvalidKeyException, InterruptedException {asr_client = AsrClient.build();asr_client.setAppid(appid);asr_client.setToken(token);asr_client.setCluster(cluster);asr_client.setFormat(audio_format);asr_client.setShow_utterances(true);asr_client.asr_sync_connect();}public String STT( byte[] file ) throws URISyntaxException, JsonProcessingException, FileNotFoundException {long startTime = System.currentTimeMillis();String STTResult="";try {// File file = new File(audio_path);// FileInputStream fp = new FileInputStream(file);byte[] b = new byte[64000];int len = 0;int count = 0;AsrResponse asr_response = new AsrResponse();// while ((len = fp.read(b)) > 0) {// count += 1;// asr_response = asr_client.asr_send(Arrays.copyOfRange(b, 0, len), fp.available() == 0);// }while (len < file.length) {int bytesToRead = Math.min(b.length, file.length - len);System.arraycopy(file, len, b, 0, bytesToRead);len += bytesToRead;asr_response = asr_client.asr_send(Arrays.copyOfRange(b, 0, bytesToRead), len == file.length);count += 1;}// get asr text// AsrResponse response = asr_client.getAsrResponse();for (AsrResponse.Result result: asr_response.getResult()) {STTResult+=result.getText();}} catch (Exception e) {System.err.println(e.getMessage());} finally {if (asr_client != null) {asr_client.asr_close();}long endTime = System.currentTimeMillis();log.info("语音识别执行时间: " +( endTime - startTime) / 1000.0);return STTResult;}}
}
相关文章:
调用火山云的语音生成TTS和语音识别STT
首先需要去火山云的控制台开通TTS和STT服务语音技术 (volcengine.com) 火山这里都提供了免费的额度可以使用 我这里是使用了java来调用API 目前我还了解到阿里的开源项目SenseVoice(STT)和CosyVoice(TTS)非常的不错,但是都是使用Python开发…...
中间件解析漏洞
一:IIS less-1 IIS6.X 步骤一:在iis的⽹站根⽬录新建⼀个名为x.asp的⽂件 步骤二:在x.asp中新建⼀个.txt⽂件,内容为<%now()%> asp代码,更改后缀为jpg 步骤三:在外部浏览器进行访问Window2003的ip/x.asp/1.jpg࿰…...
如何在Mac电脑上本地部署Stable Diffusion:详细教程(webUI)
Stable Diffusion是一款强大的AI生成图像模型,它可以基于文本描述生成高质量的图像。对于想要在本地运行此模型的用户来说,使用Mac电脑部署Stable Diffusion是一个非常吸引人的选择,特别是对于M1或M2芯片的用户。本文将详细介绍如何在Mac上本…...
FPGA随记——移位寄存器
数电知识——移位寄存器:移位寄存器——数电第六章学习-CSDN博客 移位寄存器在FPGA中:FPGA原理与结构(5)——移位寄存器(Shift Registers)-CSDN博客...
Java | Leetcode Java题解之第390题消除游戏
题目: 题解: class Solution {public int lastRemaining(int n) {int a1 1;int k 0, cnt n, step 1;while (cnt > 1) {if (k % 2 0) { // 正向a1 a1 step;} else { // 反向a1 (cnt % 2 0) ? a1 : a1 step;}k;cnt cnt >> 1;step s…...
新型PyPI攻击技术可能导致超2.2万软件包被劫持
一种针对 Python 软件包索引(PyPI)注册表的新型供应链攻击技术已在野外被利用,并且目前正试图渗透到下游组织中。 软件供应链安全公司 JFrog 将其代号定为Revival Hijack,并称这种攻击方法可用于劫持 2.2万个现有 PyPI 软件包&am…...
spring cloud gateway 之删除请求头
在使用spring gateway作为网关时,我们经常需要在将请求转发到下游服务时,过滤掉某些请求头,以避免不必要的信息泄露,而spring gateway提供了RemoveRequestHeader内置的过滤器帮我们实现该功能,此外,我们也可…...
Flutter自动打包ios ipa并且上传
该脚本会自动打包iios ipa 并自动上传,中间自动flutter clean ,自动 pod install 里面需要填写自己应用的 apiKey和apiIssuer 如我的例子中apiKey 为 1234 apiIssuer 为5678, 首先flutter 工程目录 新建 shell目录,目录下新建ipa.sh文件&…...
深入理解synchronized的原理是什么
对象头锁机制原则 Synchronized 的原理是什么 Synchronized 是由JVM实现的一种实现互斥同步的实现方式。如果查看synchronized关键字修饰的字节码,会发现在编译器生成了monitorenter和monitorexit两个字节码指令。 这两个指令的意思就是在虚拟机执行到monitore…...
Electron32-Vue3OS桌面管理os模板|vite5+electron32+arco后台os系统
原创新作electron32.xvue3arco.design仿ipad/windows桌面os系统。 基于最新跨平台技术Electron32、Vite5、Vue3 setup、Pinia2、Arco-Design、Echarts、Sortablejs实战开发桌面版osx管理系统。内置ipad/windows两种桌面风格模板、动态json配置桌面图标、自研栅格拖拽布局模板。…...
c++ 定义函数
在C中,定义函数是一个基本的编程概念。函数是执行特定任务的一段代码,可以接受参数并返回值。下面是关于如何定义和使用函数的详细介绍。 1. 函数的基本结构 函数的基本结构包括以下几个部分: 返回类型:表示函数返回值的类型。…...
【深度学习 计算机视觉】计算机视觉工程师所需的和有帮助的基本技能
计算机视觉工程师通常需要具备一系列的技术和非技术技能,以下是一些基本技能和知识领域,它们对于在这一领域取得成功非常有帮助: 技术技能 编程能力: 熟练掌握至少一种编程语言,如Python、C或Java。熟悉数据结构和算…...
【CSS】如何写渐变色文字并且有打光效果
效果如上,其实核心除了渐变色文字的设置 background: linear-gradient(270deg, #d2a742 94%, #f6e2a7 25%, #d5ab4a 48%, #f6e2a7 82%, #d1a641 4%);color: #e8bb2c;background-clip: text;color: transparent;还有就是打光效果,原理其实就是两块遮罩&am…...
Android 14(API 级别 34)中,DexClassLoader 不再支持可写 dex/jar 文件
Android 14(API 级别 34)中,DexClassLoader 不再支持从可写文件加载 dex/jar 文件。这意味着从Android 14开始,你不能再使用 DexClassLoader 来动态加载位于内部存储中的dex/jar文件,除非这些文件被设置为只读。 解决…...
Linux -动静态库
文章目录 1.文件系统1.1 inode1.2 硬链接定义特点使用方法 1.3软链接定义特点使用方法 2.动态库和静态库2.1动态库2.11定义与特点2.12使用方法 2.2 静态库2.21定义与特点2.22 使用方法 2.3示例2.31编写库代码2.32编译生成动态库2.33 编译生成静态库 2.4总结 1.文件系统 我们使…...
原点安全荣获“AutoSec Awards 安全之星”优秀汽车数据安全合规方案奖
9月3日,「AutoSec 2024第八届中国汽车网络安全周暨第五届智能汽车数据安全展」在上海盛大开幕。本届大会由谈思实验室和谈思汽车主办、上海市车联网协会联合主办,以汽车“网络数据安全、软件安全、功能安全”为主题,汇聚了国内外的技术专家、…...
2024前端面试题分享
前言 最近忙着面试很久没有更新文章了,分享一下我收集的前端面经,当然题目仅供参考(乞求秋招offer) 面试题 响应式布局 ---根据用户的的窗口变化而变化的布局方式 react 的hooks ---官方提供的钩子和自定义的钩子…...
数学基础 -- 线性代数之正交矩阵
正交矩阵 正交矩阵是线性代数中的一个重要概念,具有许多优良的性质,在数值计算、线性变换、信号处理等领域有着广泛的应用。 1. 正交矩阵的定义 一个 n n n \times n nn 的方阵 Q Q Q 如果满足以下条件: Q T Q Q Q T I Q^T Q Q Q^T …...
PostgreSQL 17即将发布,新功能Top 3
按照计划,PostgreSQL 17 即将在 2024 年 9 月 26 日发布,目前已经发布了第一个 RC 版本,新版本的功能增强可以参考 Release Notes。 本文给大家分享其中 3 个重大的新增功能。 MERGE 语句增强 MERGE 语句是 PostgreSQL 15 增加的一个新功能…...
心觉:别再做单线程的打工人!换个思路突破
Hi,我是心觉,与你一起玩转潜意识、脑波音乐和吸引力法则,轻松搞定人生挑战,实现心中梦想! 挑战日更写作161/1000(完整记录在下面) 公门洞开纳百川 众心逐梦越千山 号召引领潜力绽 心觉潜意识无间 想让财富翻个2倍…...
变量 varablie 声明- Rust 变量 let mut 声明与 C/C++ 变量声明对比分析
一、变量声明设计:let 与 mut 的哲学解析 Rust 采用 let 声明变量并通过 mut 显式标记可变性,这种设计体现了语言的核心哲学。以下是深度解析: 1.1 设计理念剖析 安全优先原则:默认不可变强制开发者明确声明意图 let x 5; …...
Prompt Tuning、P-Tuning、Prefix Tuning的区别
一、Prompt Tuning、P-Tuning、Prefix Tuning的区别 1. Prompt Tuning(提示调优) 核心思想:固定预训练模型参数,仅学习额外的连续提示向量(通常是嵌入层的一部分)。实现方式:在输入文本前添加可训练的连续向量(软提示),模型只更新这些提示参数。优势:参数量少(仅提…...
Swift 协议扩展精进之路:解决 CoreData 托管实体子类的类型不匹配问题(下)
概述 在 Swift 开发语言中,各位秃头小码农们可以充分利用语法本身所带来的便利去劈荆斩棘。我们还可以恣意利用泛型、协议关联类型和协议扩展来进一步简化和优化我们复杂的代码需求。 不过,在涉及到多个子类派生于基类进行多态模拟的场景下,…...
渗透实战PortSwigger靶场-XSS Lab 14:大多数标签和属性被阻止
<script>标签被拦截 我们需要把全部可用的 tag 和 event 进行暴力破解 XSS cheat sheet: https://portswigger.net/web-security/cross-site-scripting/cheat-sheet 通过爆破发现body可以用 再把全部 events 放进去爆破 这些 event 全部可用 <body onres…...
Python爬虫(一):爬虫伪装
一、网站防爬机制概述 在当今互联网环境中,具有一定规模或盈利性质的网站几乎都实施了各种防爬措施。这些措施主要分为两大类: 身份验证机制:直接将未经授权的爬虫阻挡在外反爬技术体系:通过各种技术手段增加爬虫获取数据的难度…...
网络编程(UDP编程)
思维导图 UDP基础编程(单播) 1.流程图 服务器:短信的接收方 创建套接字 (socket)-----------------------------------------》有手机指定网络信息-----------------------------------------------》有号码绑定套接字 (bind)--------------…...
全面解析各类VPN技术:GRE、IPsec、L2TP、SSL与MPLS VPN对比
目录 引言 VPN技术概述 GRE VPN 3.1 GRE封装结构 3.2 GRE的应用场景 GRE over IPsec 4.1 GRE over IPsec封装结构 4.2 为什么使用GRE over IPsec? IPsec VPN 5.1 IPsec传输模式(Transport Mode) 5.2 IPsec隧道模式(Tunne…...
如何在网页里填写 PDF 表格?
有时候,你可能希望用户能在你的网站上填写 PDF 表单。然而,这件事并不简单,因为 PDF 并不是一种原生的网页格式。虽然浏览器可以显示 PDF 文件,但原生并不支持编辑或填写它们。更糟的是,如果你想收集表单数据ÿ…...
招商蛇口 | 执笔CID,启幕低密生活新境
作为中国城市生长的力量,招商蛇口以“美好生活承载者”为使命,深耕全球111座城市,以央企担当匠造时代理想人居。从深圳湾的开拓基因到西安高新CID的战略落子,招商蛇口始终与城市发展同频共振,以建筑诠释对土地与生活的…...
免费PDF转图片工具
免费PDF转图片工具 一款简单易用的PDF转图片工具,可以将PDF文件快速转换为高质量PNG图片。无需安装复杂的软件,也不需要在线上传文件,保护您的隐私。 工具截图 主要特点 🚀 快速转换:本地转换,无需等待上…...
