标签:
作者:郑童宇
GitHub:https://github.com/CrazyZty
if (recordVoice) { audioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC, Constant.RecordSampleRate, AudioFormat.CHANNEL_IN_MONO, pcmFormat.getAudioFormat(), audioRecordBufferSize); try { audioRecord.startRecording(); } catch (Exception e) { NoRecordPermission(); continue; } BufferedOutputStream bufferedOutputStream = FileFunction .GetBufferedOutputStreamFromFile(recordFileUrl); while (recordVoice) { int audioRecordReadDataSize = audioRecord.read(audioRecordBuffer, 0, audioRecordBufferSize); if (audioRecordReadDataSize > 0) { calculateRealVolume(audioRecordBuffer, audioRecordReadDataSize); if (bufferedOutputStream != null) { try { byte[] outputByteArray = CommonFunction .GetByteBuffer(audioRecordBuffer, audioRecordReadDataSize, Variable.isBigEnding); bufferedOutputStream.write(outputByteArray); } catch (IOException e) { e.printStackTrace(); } } } else { NoRecordPermission(); continue; } } if (bufferedOutputStream != null) { try { bufferedOutputStream.close(); } catch (Exception e) { LogFunction.error("关闭录音输出数据流异常", e); } } audioRecord.stop(); audioRecord.release(); audioRecord = null; }录音的实际实现和控制代码较多,在此仅抽出核心的录音代码进行讲解。在此为获取录音的原始数据,我使用了Android原生的AudioRecord,其他的平台基本也会提供类似的工具类。这段代码实现的功能是当录音开始后,应用会根据设定的采样率和声道数以及采样字节数来不断从MIC中获取原始的音频数据,然后将获取的音频数据写入到指定文件中,直至录音结束。这段代码逻辑比较清晰的,我就不过多讲解了。
private boolean decodeMusicFile(String musicFileUrl, String decodeFileUrl, int startSecond, int endSecond, Handler handler, DecodeOperateInterface decodeOperateInterface) { int sampleRate = 0; int channelCount = 0; long duration = 0; String mime = null; MediaExtractor mediaExtractor = new MediaExtractor(); MediaFormat mediaFormat = null; MediaCodec mediaCodec = null; try { mediaExtractor.setDataSource(musicFileUrl); } catch (Exception e) { LogFunction.error("设置解码音频文件路径错误", e); return false; } mediaFormat = mediaExtractor.getTrackFormat(0); sampleRate = mediaFormat.containsKey(MediaFormat.KEY_SAMPLE_RATE) ? mediaFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE) : 44100; channelCount = mediaFormat.containsKey(MediaFormat.KEY_CHANNEL_COUNT) ? mediaFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT) : 1; duration = mediaFormat.containsKey(MediaFormat.KEY_DURATION) ? mediaFormat.getLong(MediaFormat.KEY_DURATION) : 0; mime = mediaFormat.containsKey(MediaFormat.KEY_MIME) ? mediaFormat.getString(MediaFormat.KEY_MIME) : ""; LogFunction.log("歌曲信息", "Track info: mime:" + mime + " 采样率sampleRate:" + sampleRate + " channels:" + channelCount + " duration:" + duration); if (CommonFunction.isEmpty(mime) || !mime.startsWith("audio/")) { LogFunction.error("解码文件不是音频文件", "mime:" + mime); return false; } if (mime.equals("audio/ffmpeg")) { mime = "audio/mpeg"; mediaFormat.setString(MediaFormat.KEY_MIME, mime); } try { mediaCodec = MediaCodec.createDecoderByType(mime); mediaCodec.configure(mediaFormat, null, null, 0); } catch (Exception e) { LogFunction.error("解码器configure出错", e); return false; } getDecodeData(mediaExtractor, mediaCodec, decodeFileUrl, sampleRate, channelCount, startSecond, endSecond, handler, decodeOperateInterface); return true; }decodeMusicFile方法的代码主要功能是获取背景音乐信息,初始化解码器,最后调用getDecodeData方法正式开始对背景音乐进行处理。
private void getDecodeData(MediaExtractor mediaExtractor, MediaCodec mediaCodec, String decodeFileUrl, int sampleRate, int channelCount, int startSecond, int endSecond, Handler handler, final DecodeOperateInterface decodeOperateInterface) { boolean decodeInputEnd = false; boolean decodeOutputEnd = false; int sampleDataSize; int inputBufferIndex; int outputBufferIndex; int byteNumber; long decodeNoticeTime = System.currentTimeMillis(); long decodeTime; long presentationTimeUs = 0; final long timeOutUs = 100; final long startMicroseconds = startSecond * 1000 * 1000; final long endMicroseconds = endSecond * 1000 * 1000; ByteBuffer[] inputBuffers; ByteBuffer[] outputBuffers; ByteBuffer sourceBuffer; ByteBuffer targetBuffer; MediaFormat outputFormat = mediaCodec.getOutputFormat(); MediaCodec.BufferInfo bufferInfo; byteNumber = (outputFormat.containsKey("bit-width") ? outputFormat.getInteger("bit-width") : 0) / 8; mediaCodec.start(); inputBuffers = mediaCodec.getInputBuffers(); outputBuffers = mediaCodec.getOutputBuffers(); mediaExtractor.selectTrack(0); bufferInfo = new MediaCodec.BufferInfo(); BufferedOutputStream bufferedOutputStream = FileFunction .GetBufferedOutputStreamFromFile(decodeFileUrl); while (!decodeOutputEnd) { if (decodeInputEnd) { return; } decodeTime = System.currentTimeMillis(); if (decodeTime - decodeNoticeTime > Constant.OneSecond) { final int decodeProgress = (int) ((presentationTimeUs - startMicroseconds) * Constant.NormalMaxProgress / endMicroseconds); if (decodeProgress > 0) { handler.post(new Runnable() { @Override public void run() { decodeOperateInterface.updateDecodeProgress(decodeProgress); } }); } decodeNoticeTime = decodeTime; } try { inputBufferIndex = mediaCodec.dequeueInputBuffer(timeOutUs); if (inputBufferIndex >= 0) { sourceBuffer = inputBuffers[inputBufferIndex]; sampleDataSize = mediaExtractor.readSampleData(sourceBuffer, 0); if (sampleDataSize < 0) { decodeInputEnd = true; sampleDataSize = 0; } else { presentationTimeUs = mediaExtractor.getSampleTime(); } mediaCodec.queueInputBuffer(inputBufferIndex, 0, sampleDataSize, presentationTimeUs, decodeInputEnd ? MediaCodec.BUFFER_FLAG_END_OF_STREAM : 0); if (!decodeInputEnd) { mediaExtractor.advance(); } } else { LogFunction.error("inputBufferIndex", "" + inputBufferIndex); } // decode to PCM and push it to the AudioTrack player outputBufferIndex = mediaCodec.dequeueOutputBuffer(bufferInfo, timeOutUs); if (outputBufferIndex < 0) { switch (outputBufferIndex) { case MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED: outputBuffers = mediaCodec.getOutputBuffers(); LogFunction.error("MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED", "[AudioDecoder]output buffers have changed."); break; case MediaCodec.INFO_OUTPUT_FORMAT_CHANGED: outputFormat = mediaCodec.getOutputFormat(); sampleRate = outputFormat.containsKey(MediaFormat.KEY_SAMPLE_RATE) ? outputFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE) : sampleRate; channelCount = outputFormat.containsKey(MediaFormat.KEY_CHANNEL_COUNT) ? outputFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT) : channelCount; byteNumber = (outputFormat.containsKey("bit-width") ? outputFormat.getInteger("bit-width") : 0) / 8; LogFunction.error("MediaCodec.INFO_OUTPUT_FORMAT_CHANGED", "[AudioDecoder]output format has changed to " + mediaCodec.getOutputFormat()); break; default: LogFunction.error("error", "[AudioDecoder] dequeueOutputBuffer returned " + outputBufferIndex); break; } continue; } targetBuffer = outputBuffers[outputBufferIndex]; byte[] sourceByteArray = new byte[bufferInfo.size]; targetBuffer.get(sourceByteArray); targetBuffer.clear(); mediaCodec.releaseOutputBuffer(outputBufferIndex, false); if ((bufferInfo.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) { decodeOutputEnd = true; } if (sourceByteArray.length > 0 && bufferedOutputStream != null) { if (presentationTimeUs < startMicroseconds) { continue; } byte[] convertByteNumberByteArray = ConvertByteNumber(byteNumber, Constant.RecordByteNumber, sourceByteArray); byte[] resultByteArray = ConvertChannelNumber(channelCount, Constant.RecordChannelNumber, Constant.RecordByteNumber, convertByteNumberByteArray); try { bufferedOutputStream.write(resultByteArray); } catch (Exception e) { LogFunction.error("输出解压音频数据异常", e); } } if (presentationTimeUs > endMicroseconds) { break; } } catch (Exception e) { LogFunction.error("getDecodeData异常", e); } } if (bufferedOutputStream != null) { try { bufferedOutputStream.close(); } catch (IOException e) { LogFunction.error("关闭bufferedOutputStream异常", e); } } if (sampleRate != Constant.RecordSampleRate) { Resample(sampleRate, decodeFileUrl); } if (mediaCodec != null) { mediaCodec.stop(); mediaCodec.release(); } if (mediaExtractor != null) { mediaExtractor.release(); } }getDecodeData方法是此次的进行解码和裁剪的核心,方法的传入参数中mediaExtractor,mediaCodec用以实际控制处理背景音乐的音频数据,decodeFileUrl用以指明解码和裁剪后的PCM文件的存储地址,sampleRate,channelCount分别用以指明背景音乐的采样率,声道数,startSecond用以指明裁剪背景音乐的开始时间,目前功能中默认为0,endSecond用以指明裁剪背景音乐的结束时间,数值大小由录音时长直接决定。
private static byte[] ConvertByteNumber(int sourceByteNumber, int outputByteNumber, byte[] sourceByteArray) { if (sourceByteNumber == outputByteNumber) { return sourceByteArray; } int sourceByteArrayLength = sourceByteArray.length; byte[] byteArray; switch (sourceByteNumber) { case 1: switch (outputByteNumber) { case 2: byteArray = new byte[sourceByteArrayLength * 2]; byte resultByte[]; for (int index = 0; index < sourceByteArrayLength; index += 1) { resultByte = CommonFunction.GetBytes((short) (sourceByteArray[index] * 256), Variable.isBigEnding); byteArray[2 * index] = resultByte[0]; byteArray[2 * index + 1] = resultByte[1]; } return byteArray; } break; case 2: switch (outputByteNumber) { case 1: int outputByteArrayLength = sourceByteArrayLength / 2; byteArray = new byte[outputByteArrayLength]; for (int index = 0; index < outputByteArrayLength; index += 1) { byteArray[index] = (byte) (CommonFunction.GetShort(sourceByteArray[2 * index], sourceByteArray[2 * index + 1], Variable.isBigEnding) / 256); } return byteArray; } break; } return sourceByteArray; }ConvertByteNumber方法的参数中sourceByteNumber代表背景音乐文件采样点字节数,outputByteNumber代表录音文件采样点字节数,两者如果相同就不处理,不相同则根据背景音乐文件采样点字节数进行不同的处理,本方法只对单字节存储和双字节存储进行了处理,欢迎在各位Github上填充其他采样点字节数的处理方法,
private static byte[] ConvertChannelNumber(int sourceChannelCount, int outputChannelCount, int byteNumber, byte[] sourceByteArray) { if (sourceChannelCount == outputChannelCount) { return sourceByteArray; } switch (byteNumber) { case 1: case 2: break; default: return sourceByteArray; } int sourceByteArrayLength = sourceByteArray.length; byte[] byteArray; switch (sourceChannelCount) { case 1: switch (outputChannelCount) { case 2: byteArray = new byte[sourceByteArrayLength * 2]; byte firstByte; byte secondByte; switch (byteNumber) { case 1: for (int index = 0; index < sourceByteArrayLength; index += 1) { firstByte = sourceByteArray[index]; byteArray[2 * index] = firstByte; byteArray[2 * index + 1] = firstByte; } break; case 2: for (int index = 0; index < sourceByteArrayLength; index += 2) { firstByte = sourceByteArray[index]; secondByte = sourceByteArray[index + 1]; byteArray[2 * index] = firstByte; byteArray[2 * index + 1] = secondByte; byteArray[2 * index + 2] = firstByte; byteArray[2 * index + 3] = secondByte; } break; } return byteArray; } break; case 2: switch (outputChannelCount) { case 1: int outputByteArrayLength = sourceByteArrayLength / 2; byteArray = new byte[outputByteArrayLength]; switch (byteNumber) { case 1: for (int index = 0; index < outputByteArrayLength; index += 2) { short averageNumber = (short) ((short) sourceByteArray[2 * index] + (short) sourceByteArray[2 * index + 1]); byteArray[index] = (byte) (averageNumber >> 1); } break; case 2: for (int index = 0; index < outputByteArrayLength; index += 2) { byte resultByte[] = CommonFunction.AverageShortByteArray(sourceByteArray[2 * index], sourceByteArray[2 * index + 1], sourceByteArray[2 * index + 2], sourceByteArray[2 * index + 3], Variable.isBigEnding); byteArray[index] = resultByte[0]; byteArray[index + 1] = resultByte[1]; } break; } return byteArray; } break; } return sourceByteArray; }ConvertChannelNumber方法的参数中sourceChannelNumber代表背景音乐文件声道数,outputChannelNumber代表录音文件声道数,两者如果相同就不处理,不相同则根据声道数和采样点字节数进行不同的处理,本方法只对单双通道进行了处理,欢迎在Github上填充立体声等声道的处理方法。
private static void Resample(int sampleRate, String decodeFileUrl) { String newDecodeFileUrl = decodeFileUrl + "new"; try { FileInputStream fileInputStream = new FileInputStream(new File(decodeFileUrl)); FileOutputStream fileOutputStream = new FileOutputStream(new File(newDecodeFileUrl)); new SSRC(fileInputStream, fileOutputStream, sampleRate, Constant.RecordSampleRate, Constant.RecordByteNumber, Constant.RecordByteNumber, 1, Integer.MAX_VALUE, 0, 0, true); fileInputStream.close(); fileOutputStream.close(); FileFunction.RenameFile(newDecodeFileUrl, decodeFileUrl); } catch (IOException e) { LogFunction.error("关闭bufferedOutputStream异常", e); } }为了修改采样率,在此使用了SSRC在Java端的实现,在网上可以搜到一份关于SSRC的介绍:"SSRC = Synchronous Sample Rate Converter,同步采样率转换,直白地说就是只能做整数倍频,不支持任意频率之间的转换,比如44.1KHz<->48KHz。",但不同的SSRC实现原理有所不同,我是用的是来自https://github.com/shibatch/SSRC在Java端的实现,简单读了此SSRC在Java端实现的源码,其代码实现中通过判别重采样前后采样率的最大公约数是否满足设定条件作为是否可重采样的依据,可以支持常见的非整数倍频率的采样率转化,如44.1khz<->48khz,但如果目标采样率是比较特殊的采样率如某一较大的质数,那就无法支持重采样。
public static void ComposeAudio(String firstAudioFilePath, String secondAudioFilePath, String composeAudioFilePath, boolean deleteSource, float firstAudioWeight, float secondAudioWeight, int audioOffset, final ComposeAudioInterface composeAudioInterface) { boolean firstAudioFinish = false; boolean secondAudioFinish = false; byte[] firstAudioByteBuffer; byte[] secondAudioByteBuffer; byte[] mp3Buffer; short resultShort; short[] outputShortArray; int index; int firstAudioReadNumber; int secondAudioReadNumber; int outputShortArrayLength; final int byteBufferSize = 1024; firstAudioByteBuffer = new byte[byteBufferSize]; secondAudioByteBuffer = new byte[byteBufferSize]; mp3Buffer = new byte[(int) (7200 + (byteBufferSize * 1.25))]; outputShortArray = new short[byteBufferSize / 2]; Handler handler = new Handler(Looper.getMainLooper()); FileInputStream firstAudioInputStream = FileFunction.GetFileInputStreamFromFile(firstAudioFilePath); FileInputStream secondAudioInputStream = FileFunction.GetFileInputStreamFromFile(secondAudioFilePath); FileOutputStream composeAudioOutputStream = FileFunction.GetFileOutputStreamFromFile(composeAudioFilePath); LameUtil.init(Constant.RecordSampleRate, Constant.LameBehaviorChannelNumber, Constant.BehaviorSampleRate, Constant.LameBehaviorBitRate, Constant.LameMp3Quality); try { while (!firstAudioFinish && !secondAudioFinish) { index = 0; if (audioOffset < 0) { secondAudioReadNumber = secondAudioInputStream.read(secondAudioByteBuffer); outputShortArrayLength = secondAudioReadNumber / 2; for (; index < outputShortArrayLength; index++) { resultShort = CommonFunction.GetShort(secondAudioByteBuffer[index * 2], secondAudioByteBuffer[index * 2 + 1], Variable.isBigEnding); outputShortArray[index] = (short) (resultShort * secondAudioWeight); } audioOffset += secondAudioReadNumber; if (secondAudioReadNumber < 0) { secondAudioFinish = true; break; } if (audioOffset >= 0) { break; } } else { firstAudioReadNumber = firstAudioInputStream.read(firstAudioByteBuffer); outputShortArrayLength = firstAudioReadNumber / 2; for (; index < outputShortArrayLength; index++) { resultShort = CommonFunction.GetShort(firstAudioByteBuffer[index * 2], firstAudioByteBuffer[index * 2 + 1], Variable.isBigEnding); outputShortArray[index] = (short) (resultShort * firstAudioWeight); } audioOffset -= firstAudioReadNumber; if (firstAudioReadNumber < 0) { firstAudioFinish = true; break; } if (audioOffset <= 0) { break; } } if (outputShortArrayLength > 0) { int encodedSize = LameUtil.encode(outputShortArray, outputShortArray, outputShortArrayLength, mp3Buffer); if (encodedSize > 0) { composeAudioOutputStream.write(mp3Buffer, 0, encodedSize); } } } handler.post(new Runnable() { @Override public void run() { if (composeAudioInterface != null) { composeAudioInterface.updateComposeProgress(20); } } }); while (!firstAudioFinish || !secondAudioFinish) { index = 0; firstAudioReadNumber = firstAudioInputStream.read(firstAudioByteBuffer); secondAudioReadNumber = secondAudioInputStream.read(secondAudioByteBuffer); int minAudioReadNumber = Math.min(firstAudioReadNumber, secondAudioReadNumber); int maxAudioReadNumber = Math.max(firstAudioReadNumber, secondAudioReadNumber); if (firstAudioReadNumber < 0) { firstAudioFinish = true; } if (secondAudioReadNumber < 0) { secondAudioFinish = true; } int halfMinAudioReadNumber = minAudioReadNumber / 2; outputShortArrayLength = maxAudioReadNumber / 2; for (; index < halfMinAudioReadNumber; index++) { resultShort = CommonFunction.WeightShort(firstAudioByteBuffer[index * 2], firstAudioByteBuffer[index * 2 + 1], secondAudioByteBuffer[index * 2], secondAudioByteBuffer[index * 2 + 1], firstAudioWeight, secondAudioWeight, Variable.isBigEnding); outputShortArray[index] = resultShort; } if (firstAudioReadNumber != secondAudioReadNumber) { if (firstAudioReadNumber > secondAudioReadNumber) { for (; index < outputShortArrayLength; index++) { resultShort = CommonFunction.GetShort(firstAudioByteBuffer[index * 2], firstAudioByteBuffer[index * 2 + 1], Variable.isBigEnding); outputShortArray[index] = (short) (resultShort * firstAudioWeight); } } else { for (; index < outputShortArrayLength; index++) { resultShort = CommonFunction.GetShort(secondAudioByteBuffer[index * 2], secondAudioByteBuffer[index * 2 + 1], Variable.isBigEnding); outputShortArray[index] = (short) (resultShort * secondAudioWeight); } } } if (outputShortArrayLength > 0) { int encodedSize = LameUtil.encode(outputShortArray, outputShortArray, outputShortArrayLength, mp3Buffer); if (encodedSize > 0) { composeAudioOutputStream.write(mp3Buffer, 0, encodedSize); } } } } catch (Exception e) { LogFunction.error("ComposeAudio异常", e); handler.post(new Runnable() { @Override public void run() { if (composeAudioInterface != null) { composeAudioInterface.composeFail(); } } }); return; } handler.post(new Runnable() { @Override public void run() { if (composeAudioInterface != null) { composeAudioInterface.updateComposeProgress(50); } } }); try { final int flushResult = LameUtil.flush(mp3Buffer); if (flushResult > 0) { composeAudioOutputStream.write(mp3Buffer, 0, flushResult); } } catch (Exception e) { LogFunction.error("释放ComposeAudio LameUtil异常", e); } finally { try { composeAudioOutputStream.close(); } catch (Exception e) { LogFunction.error("关闭合成输出音频流异常", e); } LameUtil.close(); } if (deleteSource) { FileFunction.DeleteFile(firstAudioFilePath); FileFunction.DeleteFile(secondAudioFilePath); } try { firstAudioInputStream.close(); secondAudioInputStream.close(); } catch (IOException e) { LogFunction.error("关闭合成输入音频流异常", e); } handler.post(new Runnable() { @Override public void run() { if (composeAudioInterface != null) { composeAudioInterface.composeSuccess(); } } }); }ComposeAudio方法是此次的进行合成的具体代码实现,方法的传入参数中firstAudioFilePath, secondAudioFilePath是用以合成的音频文件地址,composeAudioFilePath用以指明合成后输出的MP3文件的存储地址,firstAudioWeight,secondAudioWeight分别用以指明合成的两个音频文件在合成过程中的音量权重,audioOffset用以指明第一个音频文件相对于第二个音频文件合成过程中的数据偏移,如为负数,则合成过程中先输出audioOffset个字节长度的第二个音频文件数据,如为正数,则合成过程中先输出audioOffset个字节长度的第一个音频文件数据,audioOffset在另一程度上也代表着时间的偏移,目前我们合成的两个音频文件参数为16位单通道44.1khz采样率,那么audioOffset如果为1*16/8*1*44100=88200字节,那么最终合成出的MP3文件中会先播放1s的第一个音频文件的音频接着再播放两个音频文件加和的音频。
标签:
原文地址:http://blog.csdn.net/u013182263/article/details/51767290