标签:
一般的声音可视化做出频谱图就行了:
但我觉得频谱图看不出声音是什么样的,所以坚定要做出声像图,这两种图都要获取声音中频率的信息。
研究了官网上的auriotouch和pitchdetector两个例子,把auriotouch的声像显示和pitch detector主频获取的主要代码集成到了上节《oc开发笔记2 AUGraph 完成同时录音与播放》中,并用CALayer做了绘图。音频数据流处理函数PerformThru,上节中该函数只完成静音处理功能,本节他可以通过fft获取各个频率的分贝,以及当前主频的频率和分贝。把各频率数据缓存到了数组fArr中用来绘图,主要代码如下:
static OSStatus PerformThru( void *inRefCon, AudioUnitRenderActionFlags *ioActionFlags, const AudioTimeStamp *inTimeStamp, UInt32 inBusNumber, UInt32 inNumberFrames, AudioBufferList *ioData) { //界面的指针,用来获取静音开关按钮 CDYViewController *THIS=(__bridge CDYViewController*)inRefCon; int bufferCapacity = THIS->bufferCapacity; SInt16 index = THIS->index; void *dataBuffer = THIS->dataBuffer; float *outputBuffer = THIS->outputBuffer; uint32_t log2n = THIS->log2n; uint32_t n = THIS->n; uint32_t nOver2 = THIS->nOver2; Float32 kAdjust0DB =THIS->kAdjust0DB ; COMPLEX_SPLIT A = THIS->A; FFTSetup fftSetup = THIS->fftSetup; static int numLevels = sizeof(colorLevels) / sizeof(GLfloat) / 5; //AudioUnitRender将Remote I/O的输入端数据读进来,其中每次数据是以Frame存在的, //每笔Frame有N笔音讯数据内容(这与类比转数位的概念有关,在此会以每笔Frame有N点),2声道就是乘上2倍的数据量, //整个数据都存在例子中的ioData指针中 OSStatus renderErr = AudioUnitRender(THIS->remoteIOUnit, ioActionFlags, inTimeStamp, 1, inNumberFrames, ioData); //把数据读到dataBuffer中,读满一帧就开始用fft处理 int read = bufferCapacity - index; if (read > inNumberFrames) { memcpy((SInt16 *)dataBuffer + index, ioData->mBuffers[0].mData, inNumberFrames*sizeof(SInt16)); THIS->index += inNumberFrames; } else { //NSLog(@"缓存已满,开始处理..."); // If we enter this conditional, our buffer will be filled and we should // perform the FFT. memcpy((SInt16 *)dataBuffer + index, ioData->mBuffers[0].mData, read*sizeof(SInt16)); // Reset the index. THIS->index = 0; ConvertInt16ToFloat(THIS, dataBuffer, outputBuffer, bufferCapacity); //如果波形开关打开 if (THIS->isMute == YES){ /*************** FFT 显示波形***************************************************/ float mFFTNormFactor=n; UInt32 maxFrames = nOver2; //输出 float *outFFTData =(float*) calloc(maxFrames, sizeof(float)); ; //Generate a split complex vector from the real data vDSP_ctoz((COMPLEX *)outputBuffer, 2, &A, 1, maxFrames); //Take the fft and scale appropriately vDSP_fft_zrip(fftSetup, &A, 1, log2n, kFFTDirection_Forward); vDSP_vsmul(A.realp, 1, &mFFTNormFactor, A.realp, 1, maxFrames); vDSP_vsmul(A.imagp, 1, &mFFTNormFactor, A.imagp, 1, maxFrames); //Zero out the nyquist value THIS->A.imagp[0] = 0.0; //Convert the fft data to dB vDSP_zvmags(&A, 1, outFFTData, 1, maxFrames); //In order to avoid taking log10 of zero, an adjusting factor is added in to make the minimum value equal -128dB vDSP_vsadd( outFFTData, 1, & kAdjust0DB , outFFTData, 1, maxFrames); Float32 one = 1; vDSP_vdbcon( outFFTData, 1, &one, outFFTData, 1, maxFrames, 0); //printf( "频率 %f \n",*outFFTData); int y, maxY=300; int fftLength = 2048/2; // NSLog (@"频率转为纹理 纹理个数 fftLength= %d 手机屏幕高度 maxY=%d",fftLength,maxY); for (y=0; y<maxY; y++) { CGFloat yFract = (CGFloat)y / (CGFloat)(maxY - 1); CGFloat fftIdx = yFract * ((CGFloat)fftLength-1); double fftIdx_i, fftIdx_f; fftIdx_f = modf(fftIdx, &fftIdx_i); CGFloat fft_l_fl, fft_r_fl; CGFloat interpVal; int lowerIndex = (int)(fftIdx_i); int upperIndex = (int)(fftIdx_i + 1); upperIndex = (upperIndex == fftLength) ? fftLength - 1 : upperIndex; fft_l_fl = (CGFloat)(80-outFFTData[lowerIndex] ) / 64.; fft_r_fl = (CGFloat)(80-outFFTData[upperIndex] ) / 64.; interpVal = fft_l_fl * (1. - fftIdx_f) + fft_r_fl * fftIdx_f; // NSLog (@"fft_l_fl= %f fft_r_fl=%f interpVal=%f",fft_l_fl,fft_r_fl,interpVal); interpVal = sqrt(CLAMP(0., interpVal, 1.)); int colorind=interpVal*10; // printf( "频率=%d 振幅=%f 颜色等级=%d \n" , (int)fftIdx_i ,interpVal,colorind); FArr[(int)fftIdx_i]=10-colorind; // printf( "%d=%d " ,(int)fftIdx_i, FArr[(int)fftIdx_i]); } // printf("\n\n"); /*************** FFT ***************************************************/ // }else{ /*************** FFT 只显示主频和分贝***************************************************************/ uint32_t stride = 1; vDSP_ctoz((COMPLEX*)outputBuffer, 2, &A, 1, nOver2); // FFT变换 // Carry out a Forward FFT transform. vDSP_fft_zrip(fftSetup, &A, stride, log2n, FFT_FORWARD); // The output signal is now in a split real form. Use the vDSP_ztoc to get // a split real vector. vDSP_ztoc(&A, 1, (COMPLEX *)outputBuffer, 2, nOver2); // Determine the dominant frequency by taking the magnitude squared and // saving the bin which it resides in. float dominantFrequency = 0; int bin = -1; for (int i=0; i<n; i+=2) { //幅度平方函数MagnitudeSquared float curFreq = MagnitudeSquared(outputBuffer[i], outputBuffer[i+1]); if (curFreq > dominantFrequency) { dominantFrequency = curFreq; bin = (i+1)/2; } } printf("主频: %f 分贝: %d \n", bin*(THIS->sampleRate/bufferCapacity), bin); /*************** FFT ***************************************************************/ } //清除缓存outputBuffer memset(outputBuffer, 0, n*sizeof(SInt16)); } //暂不需要外放,清零所有声道的数据(静音) mNumberBuffers为声道个数 双声道为0~1,单声道索引就只有0 for (UInt32 i=0; i < ioData->mNumberBuffers; i++) { memset(ioData->mBuffers[i].mData, 0, ioData->mBuffers[i].mDataByteSize); } if (renderErr < 0) { return renderErr; } return noErr; }
- (void)viewDidLoad { [super viewDidLoad]; // Do any additional setup after loading the view, typically from a nib. isMute = NO; index = 0; UInt32 maxFrames = 2048; bufferCapacity = maxFrames; dataBuffer = (void*)malloc(maxFrames * sizeof(SInt16)); outputBuffer = (float*)malloc(maxFrames *sizeof(float)); //log2n 是fft需要处理的数组长度 log2n = log2f(maxFrames); n = 1 << log2n; assert(n == maxFrames); nOver2 = maxFrames/2; kAdjust0DB = 1.5849e-13; A.realp = (float *)malloc(nOver2 * sizeof(float)); A.imagp = (float *)malloc(nOver2 * sizeof(float)); fftSetup = vDSP_create_fftsetup(log2n, FFT_RADIX2); sampleRate=44100.0; self.soundimg=[[CDYSoundImage alloc] initWithFrame:self.view.frame]; [self.view.layer addSublayer:self.soundimg.colorLayer]; switchbutton.layer.zPosition=10; //频率数据 memset(FArr, 0, sizeof(FArr)) ; //start timer self.timer = [NSTimer scheduledTimerWithTimeInterval:1/60 target:self selector:@selector(tick) userInfo:nil repeats:YES]; //set initial hand positions [self tick]; [self initRemoteIO]; }
人声“1,2,3”的声像:
左图是auriotouch绘制的 右图是我绘制的。
auriotouch使用opengles绘制的,大概原理是把整个屏幕先铺满三角网,然后计算每个顶点的着色颜色值,其实我原来用ogre干过类似的事,但是这次我想试试简单的绘图方案:calayer,绘图函数如下:
//CALayerDelegate协议中的-drawLayer:inContext:方法或者UIView中的-drawRect:方法(其实就是前者的包装方法), //Core Graphics图层就创建了一个绘制上下文,这个上下文需要的大小的内存可从这个算式得出:图层宽*图层高*4字节,宽高的单位均为像素。 //对于一个在Retina iPad上的全屏图层来说,这个内存量就是 2048*1526*4字节,相当于12MB内存,图层每次重绘的时候都需要重新抹掉内存然后重新分配。 //软件绘图的代价昂贵,除非绝对必要,你应该避免重绘你的视图。提高绘制性能的秘诀就在于尽量避免去绘制 // 矢量图形图CAShapeLayer虽然高效 但是屏幕上允许同时出现图层上线数量大约是几百 - (void)drawLayer:(CALayer *)layer inContext:(CGContextRef)ctx { for(int i=0;i<[cells count];i++){ NSMutableArray *array1 = self.cells[i] ; for(int j=0;j<[array1 count];j++){ NSNumber *val= array1[j]; float cval = val.intValue /10.0 ; CGRect rectangle = CGRectMake(j*cellwidth, i*cellheight, cellwidth, cellheight); //设置矩形填充颜色:红色 CGContextSetRGBFillColor(ctx, cval, cval, cval, 1.0); //填充矩形 CGContextFillRect(ctx, rectangle); //设置画笔颜色:黑色 // CGContextSetRGBStrokeColor(ctx, 1.0, 0, 0, 1); //设置画笔线条粗细 // CGContextSetLineWidth(ctx, 1.0); //画矩形边框 // CGContextAddRect(ctx,rectangle); //执行绘画 // CGContextStrokePath(ctx); } // printf("\n" ); } }
我同时要绘制屏幕宽*长个小矩形,那最少是5万个点以上啊,这种绘图方式全靠cup,在模拟器上运行效果还可以,因为cpu比较强,但是真机上应该用gpu来绘制,所以打算换换sprite kit来试试。
由下图可见,calayer完全使用cpu绘图的:
这段时间研究官网中那两个例子,以及学习音频处理和绘图共花了12天时间,最痛苦的是看auriotouch例子,真是一点资料也找不到啊,希望本文能给打算绘制频谱图、声像图等音频可视化效果的同学提供帮助。
代码:
下载
标签:
原文地址:http://blog.csdn.net/baixiaozhe/article/details/51258178