文章标题
2017-09-15 17:30
225 查看
.wav文件根据8K16bit语音分离后的字节的重组方法
废话不多说,直接上代码@Override public void saveSplitVoiceByBatch(String originFileId, String jobInfoJson, String formatFileId) { try { if (StringUtils.isNotBlank(originFileId) && StringUtils.isNotBlank(jobInfoJson)) { // 对引擎返回的字节数组进行序列化 List<VoiceData> voiceDataList = JSON.parseArray(jobInfoJson, VoiceData.class); for (VoiceData voiceData : voiceDataList) { Integer voiceDataLength = 0; for (VoiceDataDTO voiceD : voiceData.getSegs()) { voiceDataLength += voiceD.getLength(); } // 设置数组中所有字节长度 voiceData.setLength(voiceDataLength); } // 通过对应的集合中的字节总长度进行排序 Collections.sort(voiceDataList, new Comparator<VoiceData>() { @Override public int compare(VoiceData data1, VoiceData data2) { if (data1.getLength() > data2.getLength()) { return -1; } else if (data1.getLength() < data2.getLength()) { return 1; } else {// 相等 return 0; } } }); //获取源文件的文件信息 AttachFile attachFile = attachFileDao.select(formatFileId); //当分离的语音文件大于等于1份 if (voiceDataList.size() >= 1) { //获取总长度第一大的集合 List<VoiceDataDTO> voiceA = voiceDataList.get(0).getSegs(); //获取总长度第二大的集合 List<VoiceDataDTO> voiceB = voiceDataList.get(1).getSegs(); //得到源文件的字节数组长度 byte[] waveFile = getVoiceFromFdfs(attachFile); // 通过引擎返回的分离长度信息和源文件字节数组对A语音文件进行组装 InputStream inputStreamA = splitVoiceInfo(waveFile, voiceA); //将新合成的A语音文件进行上传 String attachFileIdA = uploadSplitVoice(attachFile, "A", inputStreamA); String attachFileIdB = ""; //当分离的语音文件大于1份,只取前2份引擎分离字节数组字节总长度最大的 if (voiceDataList.size() > 1) { // 通过引擎返回的分离长度信息和源文件字节数组对A语音文件进行组装 InputStream inputStreamB = splitVoiceInfo(waveFile, voiceB); //将新合成的A语音文件进行上传 attachFileIdB = uploadSplitVoice(attachFile, "B", inputStreamB); } //通过源文件Id,将新合成的A、B语音文件主键更新在对应的任务表中 regTaskDao.updateBatchSplit(originFileId, attachFileIdA, attachFileIdB); logger.info("形成新的A、B语音处理结束,A、B语音对应的文件主键分别为:A:" + attachFileIdA + " B:" + attachFileIdB); } else { regTaskDao.updateBatchSplit(originFileId, formatFileId, ""); } } else { logger.info("引擎未返回有效数据"); } } catch (Exception e) { e.printStackTrace(); } } /** * 对分离后的语音文件进行上传并返回对应的文件主键 * * @param attachFile * 原语音文件 * @param inputStream * 原语音文件输入流 * @return 上传文件主键 */ public String uploadSplitVoice(AttachFile attachFile, String fileName, InputStream inputStream) { AttachFile attach = new AttachFile(); String attachFileId = null; try { attach.setFileSize((long) (inputStream.available())); attach.setOriginalName(attachFile.getOriginalName().substring(0, attachFile.getOriginalName().lastIndexOf(".")) + "." + fileName + "说话人语音" + attachFile.getOriginalName().substring( attachFile.getOriginalName().lastIndexOf("."))); attachFileId = attachFileService.uploadAndConfirm(attach, inputStream); inputStream.close(); } catch (IOException e) { e.printStackTrace(); } return attachFileId; } /** * 通过语音文件获取语音文件的字节数组 * * @param attachFile * 原语音文件 * @return byte[] 语音文件对应的字符数组 */ public byte[] getVoiceFromFdfs(AttachFile attachFile) { FdfsFile fdfsFile = (FdfsFile) attachFile; byte[] waveFile = null; ByteArrayOutputStream out = new ByteArrayOutputStream(); try { fdfsClient.downloadFile(out, fdfsFile.getStoreLocation()); waveFile = out.toByteArray(); } catch (Exception e) { } return waveFile; } /** * * * @param waveFile * 原始语音字节数组 * @param voiceData * 引擎处理后的语音片段字节长度 * @return InputStream 输入流 */ public InputStream splitVoiceInfo(byte[] waveFile, List<VoiceDataDTO> voiceData) { byte[] bytes = new byte[0]; byte[] data = new byte[0]; byte[] dataHead = new byte[44]; int dataLength = 0; int byteLength = 0; // 语音文件格式头部 dataHead = Arrays.copyOfRange(waveFile, 0, 44); // 8k16bit的语音 int lenPerMs = 8000 * 16 / 8 / 1000; // B说话人语音 for (VoiceDataDTO voice : voiceData) { if (voice.getEnd() * lenPerMs <= waveFile.length) { bytes = new byte[(voice.getEnd() - voice.getBegin()) * lenPerMs]; bytes = Arrays.copyOfRange(waveFile, voice.getBegin() * lenPerMs 4000 , voice.getEnd() * lenPerMs); dataLength = data.length; byteLength = bytes.length; byte[] tempData = Arrays.copyOf(data, dataLength + byteLength);// 扩容 System.arraycopy(bytes, 0, tempData, dataLength, byteLength);// 将第二个数组与第一个数组合并 data = tempData; } } logger.info("准备 进入语音头部信息转换:"); dataHead = changeHead(dataHead, data); InputStream inputStream = new ByteArrayInputStream(dataHead); return inputStream; } /** * 对语音文件头信息进行组合 */ public byte[] changeHead(byte[] headBytes, byte[] data) { logger.debug("进入语音头部信息转换:"); // 合并语音头和语音数据 int validLen = headBytes.length + data.length; byte[] validBytes = new byte[validLen]; System.arraycopy(headBytes, 0, validBytes, 0, headBytes.length); System.arraycopy(data, 0, validBytes, headBytes.length, data.length); byte[] orgPreSize = new byte[4]; orgPreSize = Arrays.copyOfRange(validBytes, 4, 8); byte[] orgNexSize = new byte[4]; orgNexSize = Arrays.copyOfRange(validBytes, 40, 44); byte[] orgDataType = new byte[4]; orgDataType = Arrays.copyOfRange(validBytes, 36, 40); logger.info("原始头部信息中: " + CommonUtils.lBytesToInt(orgPreSize) + " ," + CommonUtils.lBytesToInt(orgNexSize) + " ," + new String(orgDataType)); // 修改头部信息 int dataPreSize = validLen - 8; int dataNextSize = validLen - 44; String dataType = "data"; byte[] dataPreSizeBytes = CommonUtils.toLH(dataPreSize); byte[] dataNextSizeBytes = CommonUtils.toLH(dataNextSize); byte[] dataTypeBytes = dataType.getBytes(); System.arraycopy(dataPreSizeBytes, 0, validBytes, 4, dataPreSizeBytes.length); System.arraycopy(dataNextSizeBytes, 0, validBytes, 40, dataNextSizeBytes.length); System.arraycopy(dataTypeBytes, 0, validBytes, 36, dataTypeBytes.length); byte[] nowPreSize = new byte[4]; nowPreSize = Arrays.copyOfRange(validBytes, 4, 8); byte[] nowNexSize = new byte[4]; nowNexSize = Arrays.copyOfRange(validBytes, 40, 44); byte[] nowDataType = new byte[4]; nowDataType = Arrays.copyOfRange(validBytes, 36, 40); logger.info("新头部信息中: " + CommonUtils.lBytesToInt(nowPreSize) + " ," + CommonUtils.lBytesToInt(nowNexSize) + " ," + new String(nowDataType)); return validBytes; } /** * 接收引擎处理后的数组中的语音片段 */ public class VoiceData { /** * 开始字节长度 */ private List<VoiceDataDTO> segs; /** * 结束字节长度 */ private Integer spk; /** * 数据字节总长度 */ private Integer length; /** * @return the segs */ public List<VoiceDataDTO> getSegs() { return segs; } /** * @param segs the segs to set */ public void setSegs(List<VoiceDataDTO> segs) { this.segs = segs; } /** * @return the spk */ public Integer getSpk() { return spk; } /** * @param spk the spk to set */ public void setSpk(Integer spk) { this.spk = spk; } /** * @return the length */ public Integer getLength() { return length; } /** * @param length the length to set */ public void setLength(Integer length) { this.length = length; } } /** * 接收引擎处理后的语音片段 */ public class VoiceDataDTO { /** * 开始字节长度 */ private Integer begin; /** * 结束字节长度 */ private Integer end; /** * 类型 */ private String type; /** * 字节长度 */ private Integer length; /** * @return the begin */ public Integer getBegin() { return begin; } /** * @param begin the begin to set */ public void setBegin(Integer begin) { this.begin = begin; } /** * @return the end */ public Integer getEnd() { return end; } /** * @param end the end to set */ public void setEnd(Integer end) { this.end = end; } /** * @return the type */ public String getType() { return type; } /** * @param type the type to set */ public void setType(String type) { this.type = type; } /** * @return the length */ public Integer getLength() { return this.end - this.begin; } /** * @param length the length to set */ public void setLength(Integer length) { this.length = length; } }
wav头文件格式:
8KHz采样、16比特量化的线性PCM语音信号的WAVE文件头格式表(共44字节)
偏移地址 字节数 数据类型 内容 文件头定义为
00H 4 char “RIFF” char riff_id[4]=”RIFF”
04H 4 long int 文件总长-8 long int size0=文总长-8
08H 8 char “WAVEfmt ” char wave_fmt[8]
10H 4 long int 10 00 00 00H(PCM) long int size1=0x10
14H 2 int 01 00H int fmttag=0x01
16H 2 int int channel=1 或2
18H 4 long int 采样率 long int samplespersec
1CH 4 long int 每秒播放字节数 long int bytepersec
20H 2 int 采样一次占字节数 int blockalign=声道数*量化数/8
22H 2 int 量化数 int bitpersamples=8或16
24H 4 char “data” char data_id=”data”
28H 4 long int 采样数据字节数 long int size2=文长-44
2CH 到文尾 char 采样数据