Aubio is a tool designed for the extraction of annotations from audio signals. Its features include segmenting a sound file before each of its attacks, performing pitch detection, tapping the beat and producing midi streams from live audio.
It comes prebuilt for iOS in form of a framework that can be just dragged into a Xcode project to get going. This post will touch some basics of working with the aubio framework on iOS. Let’s walk through a simple beat detection task with aubio.
The first step is the easiest
import aubioTo detect beats, we need to create a tempo detector. You need to pass a method, the buffer size, hop size (number of frames between two consecutive runs. A good value is usually buffer size / 2) and the sample rate.
let tempo: COpaquePointer? = new_aubio_tempo()It’s usually a good idea to ignore long silences. It has a method to do just that that accepts the pointer to the tempo detector and a float silence threshold in dB.
aubio_tempo_set_silence(tempo!, silenceThreshold)In order to perform detection, we need either to feed raw data to the detector.
If you want to run this on a file, it’s easy using an aubio source:
let samples = new_fvec(512)
let source = new_aubio_source( "/path/to/file.wav", 0, 512)
let out = new_fvec(1)
var read : uint_t = 0
while true {
aubio_source_do(source, samples, &read )
aubio_tempo_do(tempo, samples, out)
if (fvec_get_sample(out, 0) != 0) {
let beat_time : Float = Float(total_frames) / Float(samplerate)
puts( String(format: "beat at %.2f", beat_time))
}
if (read < 512) {
break
}
}
del_fvec(out)
del_aubio_tempo(tempo)
del_aubio_source(source)
del_fvec(samples)But who wants to run on a file, it boring! aubio is optimized to even run the algorithms on real-time audio coming from the microphone. First step is to get raw data from the microphone. There’s a really nice gist to get this done.
| // | |
| // RecordAudio.swift | |
| // | |
| // This is a Swift class (updated for Swift 5) | |
| // that uses the iOS RemoteIO Audio Unit | |
| // to record audio input samples, | |
| // (should be instantiated as a singleton object.) | |
| // | |
| // Created by Ronald Nicholson on 10/21/16. | |
| // Copyright © 2017,2019 HotPaw Productions. All rights reserved. | |
| // http://www.nicholson.com/rhn/ | |
| // Distribution permission: BSD 2-clause license | |
| // | |
| import Foundation | |
| import AVFoundation | |
| import AudioUnit | |
| // call setupAudioSessionForRecording() during controlling view load | |
| // call startRecording() to start recording in a later UI call | |
| final class RecordAudio: NSObject { | |
| var audioUnit: AudioUnit? = nil | |
| var micPermission = false | |
| var sessionActive = false | |
| var isRecording = false | |
| var sampleRate : Double = 44100.0 // default audio sample rate | |
| let circBuffSize = 32768 // lock-free circular fifo/buffer size | |
| var circBuffer = [Float](repeating: 0, count: 32768) // for incoming samples | |
| var circInIdx : Int = 0 | |
| var audioLevel : Float = 0.0 | |
| private var hwSRate = 48000.0 // guess of device hardware sample rate | |
| private var micPermissionDispatchToken = 0 | |
| private var interrupted = false // for restart from audio interruption notification | |
| func startRecording() { | |
| if isRecording { return } | |
| startAudioSession() | |
| if sessionActive { | |
| startAudioUnit() | |
| } | |
| } | |
| var numberOfChannels: Int = 2 | |
| private let outputBus: UInt32 = 0 | |
| private let inputBus: UInt32 = 1 | |
| func startAudioUnit() { | |
| var err: OSStatus = noErr | |
| if self.audioUnit == nil { | |
| setupAudioUnit() // setup once | |
| } | |
| guard let au = self.audioUnit | |
| else { return } | |
| err = AudioUnitInitialize(au) | |
| gTmp0 = Int(err) | |
| if err != noErr { return } | |
| err = AudioOutputUnitStart(au) // start | |
| gTmp0 = Int(err) | |
| if err == noErr { | |
| isRecording = true | |
| } | |
| } | |
| func startAudioSession() { | |
| if (sessionActive == false) { | |
| // set and activate Audio Session | |
| do { | |
| let audioSession = AVAudioSession.sharedInstance() | |
| if (micPermission == false) { | |
| if (micPermissionDispatchToken == 0) { | |
| micPermissionDispatchToken = 1 | |
| audioSession.requestRecordPermission({(granted: Bool)-> Void in | |
| if granted { | |
| self.micPermission = true | |
| return | |
| // check for this flag and call from UI loop if needed | |
| } else { | |
| gTmp0 += 1 | |
| // dispatch in main/UI thread an alert | |
| // informing that mic permission is not switched on | |
| } | |
| }) | |
| } | |
| } | |
| if micPermission == false { return } | |
| try audioSession.setCategory(AVAudioSession.Category.record) | |
| // choose 44100 or 48000 based on hardware rate | |
| // sampleRate = 44100.0 | |
| var preferredIOBufferDuration = 0.0058 // 5.8 milliseconds = 256 samples | |
| hwSRate = audioSession.sampleRate // get native hardware rate | |
| if hwSRate == 48000.0 { sampleRate = 48000.0 } // set session to hardware rate | |
| if hwSRate == 48000.0 { preferredIOBufferDuration = 0.0053 } | |
| let desiredSampleRate = sampleRate | |
| try audioSession.setPreferredSampleRate(desiredSampleRate) | |
| try audioSession.setPreferredIOBufferDuration(preferredIOBufferDuration) | |
| NotificationCenter.default.addObserver( | |
| forName: AVAudioSession.interruptionNotification, | |
| object: nil, | |
| queue: nil, | |
| using: myAudioSessionInterruptionHandler ) | |
| try audioSession.setActive(true) | |
| sessionActive = true | |
| } catch /* let error as NSError */ { | |
| // handle error here | |
| } | |
| } | |
| } | |
| private func setupAudioUnit() { | |
| var componentDesc: AudioComponentDescription | |
| = AudioComponentDescription( | |
| componentType: OSType(kAudioUnitType_Output), | |
| componentSubType: OSType(kAudioUnitSubType_RemoteIO), | |
| componentManufacturer: OSType(kAudioUnitManufacturer_Apple), | |
| componentFlags: UInt32(0), | |
| componentFlagsMask: UInt32(0) ) | |
| var osErr: OSStatus = noErr | |
| let component: AudioComponent! = AudioComponentFindNext(nil, &componentDesc) | |
| var tempAudioUnit: AudioUnit? | |
| osErr = AudioComponentInstanceNew(component, &tempAudioUnit) | |
| self.audioUnit = tempAudioUnit | |
| guard let au = self.audioUnit | |
| else { return } | |
| // Enable I/O for input. | |
| var one_ui32: UInt32 = 1 | |
| osErr = AudioUnitSetProperty(au, | |
| kAudioOutputUnitProperty_EnableIO, | |
| kAudioUnitScope_Input, | |
| inputBus, | |
| &one_ui32, | |
| UInt32(MemoryLayout<UInt32>.size)) | |
| // Set format to 32-bit Floats, linear PCM | |
| let nc = 2 // 2 channel stereo | |
| var streamFormatDesc:AudioStreamBasicDescription = AudioStreamBasicDescription( | |
| mSampleRate: Double(sampleRate), | |
| mFormatID: kAudioFormatLinearPCM, | |
| mFormatFlags: ( kAudioFormatFlagsNativeFloatPacked ), | |
| mBytesPerPacket: UInt32(nc * MemoryLayout<UInt32>.size), | |
| mFramesPerPacket: 1, | |
| mBytesPerFrame: UInt32(nc * MemoryLayout<UInt32>.size), | |
| mChannelsPerFrame: UInt32(nc), | |
| mBitsPerChannel: UInt32(8 * (MemoryLayout<UInt32>.size)), | |
| mReserved: UInt32(0) | |
| ) | |
| osErr = AudioUnitSetProperty(au, | |
| kAudioUnitProperty_StreamFormat, | |
| kAudioUnitScope_Input, outputBus, | |
| &streamFormatDesc, | |
| UInt32(MemoryLayout<AudioStreamBasicDescription>.size)) | |
| osErr = AudioUnitSetProperty(au, | |
| kAudioUnitProperty_StreamFormat, | |
| kAudioUnitScope_Output, | |
| inputBus, | |
| &streamFormatDesc, | |
| UInt32(MemoryLayout<AudioStreamBasicDescription>.size)) | |
| var inputCallbackStruct | |
| = AURenderCallbackStruct(inputProc: recordingCallback, | |
| inputProcRefCon: | |
| UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque())) | |
| osErr = AudioUnitSetProperty(au, | |
| AudioUnitPropertyID(kAudioOutputUnitProperty_SetInputCallback), | |
| AudioUnitScope(kAudioUnitScope_Global), | |
| inputBus, | |
| &inputCallbackStruct, | |
| UInt32(MemoryLayout<AURenderCallbackStruct>.size)) | |
| // Ask CoreAudio to allocate buffers for us on render. | |
| // Is this true by default? | |
| osErr = AudioUnitSetProperty(au, | |
| AudioUnitPropertyID(kAudioUnitProperty_ShouldAllocateBuffer), | |
| AudioUnitScope(kAudioUnitScope_Output), | |
| inputBus, | |
| &one_ui32, | |
| UInt32(MemoryLayout<UInt32>.size)) | |
| gTmp0 = Int(osErr) | |
| } | |
| let recordingCallback: AURenderCallback = { ( | |
| inRefCon, | |
| ioActionFlags, | |
| inTimeStamp, | |
| inBusNumber, | |
| frameCount, | |
| ioData ) -> OSStatus in | |
| let audioObject = unsafeBitCast(inRefCon, to: RecordAudio.self) | |
| var err: OSStatus = noErr | |
| // set mData to nil, AudioUnitRender() should be allocating buffers | |
| var bufferList = AudioBufferList( | |
| mNumberBuffers: 1, | |
| mBuffers: AudioBuffer( | |
| mNumberChannels: UInt32(2), | |
| mDataByteSize: 16, | |
| mData: nil)) | |
| if let au = audioObject.audioUnit { | |
| err = AudioUnitRender(au, | |
| ioActionFlags, | |
| inTimeStamp, | |
| inBusNumber, | |
| frameCount, | |
| &bufferList) | |
| } | |
| audioObject.processMicrophoneBuffer( inputDataList: &bufferList, | |
| frameCount: UInt32(frameCount) ) | |
| return 0 | |
| } | |
| func processMicrophoneBuffer( // process RemoteIO Buffer from mic input | |
| inputDataList : UnsafeMutablePointer<AudioBufferList>, | |
| frameCount : UInt32 ) | |
| { | |
| let inputDataPtr = UnsafeMutableAudioBufferListPointer(inputDataList) | |
| let mBuffers : AudioBuffer = inputDataPtr[0] | |
| let count = Int(frameCount) | |
| // Microphone Input Analysis | |
| // let data = UnsafePointer<Int16>(mBuffers.mData) | |
| let bufferPointer = UnsafeMutableRawPointer(mBuffers.mData) | |
| if let bptr = bufferPointer { | |
| let dataArray = bptr.assumingMemoryBound(to: Float.self) | |
| var sum : Float = 0.0 | |
| var j = self.circInIdx | |
| let m = self.circBuffSize | |
| for i in 0..<(count/2) { | |
| let x = Float(dataArray[i+i ]) // copy left channel sample | |
| let y = Float(dataArray[i+i+1]) // copy right channel sample | |
| self.circBuffer[j ] = x | |
| self.circBuffer[j + 1] = y | |
| j += 2 ; if j >= m { j = 0 } // into circular buffer | |
| sum += x * x + y * y | |
| } | |
| self.circInIdx = j // circular index will always be less than size | |
| // measuredMicVol_1 = sqrt( Float(sum) / Float(count) ) // scaled volume | |
| if sum > 0.0 && count > 0 { | |
| let tmp = 5.0 * (logf(sum / Float(count)) + 20.0) | |
| let r : Float = 0.2 | |
| audioLevel = r * tmp + (1.0 - r) * audioLevel | |
| } | |
| } | |
| } | |
| func stopRecording() { | |
| AudioUnitUninitialize(self.audioUnit!) | |
| isRecording = false | |
| } | |
| func myAudioSessionInterruptionHandler(notification: Notification) -> Void { | |
| let interuptionDict = notification.userInfo | |
| if let interuptionType = interuptionDict?[AVAudioSessionInterruptionTypeKey] { | |
| let interuptionVal = AVAudioSession.InterruptionType( | |
| rawValue: (interuptionType as AnyObject).uintValue ) | |
| if (interuptionVal == AVAudioSession.InterruptionType.began) { | |
| if (isRecording) { | |
| stopRecording() | |
| isRecording = false | |
| let audioSession = AVAudioSession.sharedInstance() | |
| do { | |
| try audioSession.setActive(false) | |
| sessionActive = false | |
| } catch { | |
| } | |
| interrupted = true | |
| } | |
| } else if (interuptionVal == AVAudioSession.InterruptionType.ended) { | |
| if (interrupted) { | |
| // potentially restart here | |
| } | |
| } | |
| } | |
| } | |
| } | |
| // end of class RecordAudio | |
| final class RecordAudio_v2: NSObject { | |
| var auAudioUnit: AUAudioUnit! = nil | |
| var enableRecording = true | |
| var audioSessionActive = false | |
| var audioSetupComplete = false | |
| var isRecording = false | |
| var sampleRate : Double = 48000.0 // desired audio sample rate | |
| let circBuffSize = 32768 // lock-free circular fifo/buffer size | |
| var circBuffer = [Float](repeating: 0, count: 32768) | |
| var circInIdx : Int = 0 // sample input index | |
| var circOutIdx : Int = 0 // sample output index | |
| var audioLevel : Float = 0.0 | |
| private var micPermissionRequested = false | |
| private var micPermissionGranted = false | |
| // for restart from audio interruption notification | |
| private var audioInterrupted = false | |
| private var renderBlock : AURenderBlock? = nil | |
| func startRecording() { | |
| if isRecording { return } | |
| if audioSessionActive == false { | |
| // configure and activate Audio Session, this might change the sampleRate | |
| setupAudioSessionForRecording() | |
| } | |
| guard micPermissionGranted && audioSessionActive else { return } | |
| let audioFormat = AVAudioFormat( | |
| commonFormat: AVAudioCommonFormat.pcmFormatInt16, // pcmFormatInt16, pcmFormatFloat32, | |
| sampleRate: Double(sampleRate), // 44100.0 48000.0 | |
| channels:AVAudioChannelCount(2), // 1 or 2 | |
| interleaved: true ) // true for interleaved stereo | |
| if (auAudioUnit == nil) { | |
| setupRemoteIOAudioUnitForRecord(audioFormat: audioFormat!) | |
| } | |
| renderBlock = auAudioUnit.renderBlock // returns AURenderBlock() | |
| if ( enableRecording | |
| && micPermissionGranted | |
| && audioSetupComplete | |
| && audioSessionActive | |
| && isRecording == false ) { | |
| auAudioUnit.isInputEnabled = true | |
| auAudioUnit.outputProvider = { // AURenderPullInputBlock() | |
| (actionFlags, timestamp, frameCount, inputBusNumber, inputData) -> AUAudioUnitStatus in | |
| if let block = self.renderBlock { // AURenderBlock? | |
| let err : OSStatus = block(actionFlags, | |
| timestamp, | |
| frameCount, | |
| 1, | |
| inputData, | |
| .none) | |
| if err == noErr { | |
| // save samples from current input buffer to circular buffer | |
| self.recordMicrophoneInputSamples( | |
| inputDataList: inputData, | |
| frameCount: UInt32(frameCount) ) | |
| } | |
| } | |
| let err2 : AUAudioUnitStatus = noErr | |
| return err2 | |
| } | |
| do { | |
| circInIdx = 0 // initialize circular buffer pointers | |
| circOutIdx = 0 | |
| try auAudioUnit.allocateRenderResources() | |
| try auAudioUnit.startHardware() // equivalent to AudioOutputUnitStart ??? | |
| isRecording = true | |
| } catch { | |
| // placeholder for error handling | |
| } | |
| } | |
| } | |
| func stopRecording() { | |
| if (isRecording) { | |
| auAudioUnit.stopHardware() | |
| isRecording = false | |
| } | |
| if (audioSessionActive) { | |
| let audioSession = AVAudioSession.sharedInstance() | |
| do { | |
| try audioSession.setActive(false) | |
| } catch /* let error as NSError */ { | |
| } | |
| audioSessionActive = false | |
| } | |
| } | |
| private func recordMicrophoneInputSamples( // process RemoteIO Buffer from mic input | |
| inputDataList : UnsafeMutablePointer<AudioBufferList>, | |
| frameCount : UInt32 ) | |
| { | |
| let inputDataPtr = UnsafeMutableAudioBufferListPointer(inputDataList) | |
| let mBuffers : AudioBuffer = inputDataPtr[0] | |
| let count = Int(frameCount) | |
| let bufferPointer = UnsafeMutableRawPointer(mBuffers.mData) | |
| var j = self.circInIdx // current circular array input index | |
| let n = self.circBuffSize | |
| var audioLevelSum : Float = 0.0 | |
| if let bptr = bufferPointer?.assumingMemoryBound(to: Int16.self) { | |
| for i in 0..<(count/2) { | |
| // Save samples in circular buffer for latter processing | |
| let x = Float(bptr[i+i ]) | |
| let y = Float(bptr[i+i+1]) | |
| self.circBuffer[j ] = x // Stereo Left | |
| self.circBuffer[j + 1] = y // Stereo Right | |
| j += 2 ; if j >= n { j = 0 } // Circular buffer looping | |
| // Microphone Input Analysis | |
| audioLevelSum += x * x + y * y | |
| } | |
| } | |
| OSMemoryBarrier(); // from libkern/OSAtomic.h | |
| self.circInIdx = j // circular index will always be less than size | |
| if audioLevelSum > 0.0 && count > 0 { | |
| audioLevel = logf(audioLevelSum / Float(count)) | |
| } | |
| } | |
| // set up and activate Audio Session | |
| func setupAudioSessionForRecording() { | |
| do { | |
| let audioSession = AVAudioSession.sharedInstance() | |
| if (micPermissionGranted == false) { | |
| if (micPermissionRequested == false) { | |
| micPermissionRequested = true | |
| audioSession.requestRecordPermission({(granted: Bool)-> Void in | |
| if granted { | |
| self.micPermissionGranted = true | |
| self.startRecording() | |
| return | |
| } else { | |
| self.enableRecording = false | |
| // dispatch in main/UI thread an alert | |
| // informing that mic permission is not switched on | |
| } | |
| }) | |
| } | |
| return | |
| } | |
| if enableRecording { | |
| try audioSession.setCategory(AVAudioSession.Category.record) | |
| } | |
| let preferredIOBufferDuration = 0.0053 // 5.3 milliseconds = 256 samples | |
| try audioSession.setPreferredSampleRate(sampleRate) // at 48000.0 | |
| try audioSession.setPreferredIOBufferDuration(preferredIOBufferDuration) | |
| NotificationCenter.default.addObserver( | |
| forName: AVAudioSession.interruptionNotification, | |
| object: nil, | |
| queue: nil, | |
| using: myAudioSessionInterruptionHandler ) | |
| try audioSession.setActive(true) | |
| audioSessionActive = true | |
| } catch /* let error as NSError */ { | |
| // placeholder for error handling | |
| } | |
| } | |
| // find and set up the sample format for the RemoteIO Audio Unit | |
| private func setupRemoteIOAudioUnitForRecord(audioFormat : AVAudioFormat) { | |
| do { | |
| let audioComponentDescription = AudioComponentDescription( | |
| componentType: kAudioUnitType_Output, | |
| componentSubType: kAudioUnitSubType_RemoteIO, | |
| componentManufacturer: kAudioUnitManufacturer_Apple, | |
| componentFlags: 0, | |
| componentFlagsMask: 0 ) | |
| try auAudioUnit = AUAudioUnit(componentDescription: audioComponentDescription) | |
| // bus 1 is for data that the microphone exports out to the handler block | |
| let bus1 = auAudioUnit.outputBusses[1] | |
| try bus1.setFormat(audioFormat) // for microphone bus | |
| audioSetupComplete = true | |
| } catch /* let error as NSError */ { | |
| // placeholder for error handling | |
| } | |
| } | |
| private func myAudioSessionInterruptionHandler(notification: Notification) -> Void { | |
| let interuptionDict = notification.userInfo | |
| if let interuptionType = interuptionDict?[AVAudioSessionInterruptionTypeKey] { | |
| let interuptionVal = AVAudioSession.InterruptionType( | |
| rawValue: (interuptionType as AnyObject).uintValue ) | |
| if (interuptionVal == AVAudioSession.InterruptionType.began) { | |
| // [self beginInterruption]; | |
| if (isRecording) { | |
| auAudioUnit.stopHardware() | |
| isRecording = false | |
| let audioSession = AVAudioSession.sharedInstance() | |
| do { | |
| try audioSession.setActive(false) | |
| audioSessionActive = false | |
| } catch { | |
| // placeholder for error handling | |
| } | |
| audioInterrupted = true | |
| } | |
| } else if (interuptionVal == AVAudioSession.InterruptionType.ended) { | |
| // [self endInterruption]; | |
| if (audioInterrupted) { | |
| let audioSession = AVAudioSession.sharedInstance() | |
| do { | |
| try audioSession.setActive(true) | |
| audioSessionActive = true | |
| if (auAudioUnit.renderResourcesAllocated == false) { | |
| try auAudioUnit.allocateRenderResources() | |
| } | |
| try auAudioUnit.startHardware() | |
| isRecording = true | |
| } catch { | |
| // placeholder for error handling | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } // end of RecordAudio class | |
| // eof |
Use this, but update the processMicrophoneBuffer to feed the data to aubio:
func setupAubio(samplerate: UInt32) {
samples = new_fvec(sampleSize)
tempo = new_aubio_tempo("default", 1024, sampleSize, samplerate)
aubio_tempo_set_silence(tempo!, silenceThreshold)
}
func processMicrophoneBuffer(inputDataList : UnsafeMutablePointer<AudioBufferList>, frameCount : UInt32) {
guard let samples = samples, tempo = tempo else { return }
let out = new_fvec(2)
var sampleCount: UInt32 = 0
for i in 0..<(count/2) {
let x = Float(dataArray[i+i ]) // copy left channel sample
let y = Float(dataArray[i+i+1]) // copy right channel sample
fvec_set_sample(samples, x*x + y*y, sampleCount)
sampleCount += 1
if sampleCount == sampleSize || i == count/2-1 {
aubio_tempo_do(onset, samples, out)
if (fvec_get_sample(out, 0) != 0) {
// Yay! A BEAT!!!
break
}
sampleCount = 0
}
}
del_fvec(out)
}
func stopRecording() {
if let tempo = tempo, samples = samples {
del_aubio_tempo(tempo)
del_fvec(samples)
self.tempo = nil
self.samples = nil
}
}Note: Make sure you clean up the pointers in stopRecording to avoid memory leaks.