Aubio is a tool designed for the extraction of annotations from audio signals. Its features include segmenting a sound file before each of its attacks, performing pitch detection, tapping the beat and producing midi streams from live audio.
It comes prebuilt for iOS in form of a framework that can be just dragged into a Xcode project to get going. This post will touch some basics of working with the aubio framework on iOS. Let’s walk through a simple beat detection task with aubio.
The first step is the easiest
import aubio
To detect beats, we need to create a tempo detector. You need to pass a method, the buffer size, hop size (number of frames between two consecutive runs. A good value is usually buffer size / 2) and the sample rate.
let tempo: COpaquePointer? = new_aubio_tempo()
It’s usually a good idea to ignore long silences. It has a method to do just that that accepts the pointer to the tempo detector and a float silence threshold in dB.
aubio_tempo_set_silence(tempo!, silenceThreshold)
In order to perform detection, we need either to feed raw data to the detector.
If you want to run this on a file, it’s easy using an aubio source:
let samples = new_fvec(512)
let source = new_aubio_source( "/path/to/file.wav", 0, 512)
let out = new_fvec(1)
var read : uint_t = 0
while true {
aubio_source_do(source, samples, &read )
aubio_tempo_do(tempo, samples, out)
if (fvec_get_sample(out, 0) != 0) {
let beat_time : Float = Float(total_frames) / Float(samplerate)
puts( String(format: "beat at %.2f", beat_time))
}
if (read < 512) {
break
}
}
del_fvec(out)
del_aubio_tempo(tempo)
del_aubio_source(source)
del_fvec(samples)
But who wants to run on a file, it boring! aubio is optimized to even run the algorithms on real-time audio coming from the microphone. First step is to get raw data from the microphone. There’s a really nice gist to get this done.
// | |
// RecordAudio.swift | |
// | |
// This is a Swift class (updated for Swift 5) | |
// that uses the iOS RemoteIO Audio Unit | |
// to record audio input samples, | |
// (should be instantiated as a singleton object.) | |
// | |
// Created by Ronald Nicholson on 10/21/16. | |
// Copyright © 2017,2019 HotPaw Productions. All rights reserved. | |
// http://www.nicholson.com/rhn/ | |
// Distribution permission: BSD 2-clause license | |
// | |
import Foundation | |
import AVFoundation | |
import AudioUnit | |
// call setupAudioSessionForRecording() during controlling view load | |
// call startRecording() to start recording in a later UI call | |
final class RecordAudio: NSObject { | |
var audioUnit: AudioUnit? = nil | |
var micPermission = false | |
var sessionActive = false | |
var isRecording = false | |
var sampleRate : Double = 44100.0 // default audio sample rate | |
let circBuffSize = 32768 // lock-free circular fifo/buffer size | |
var circBuffer = [Float](repeating: 0, count: 32768) // for incoming samples | |
var circInIdx : Int = 0 | |
var audioLevel : Float = 0.0 | |
private var hwSRate = 48000.0 // guess of device hardware sample rate | |
private var micPermissionDispatchToken = 0 | |
private var interrupted = false // for restart from audio interruption notification | |
func startRecording() { | |
if isRecording { return } | |
startAudioSession() | |
if sessionActive { | |
startAudioUnit() | |
} | |
} | |
var numberOfChannels: Int = 2 | |
private let outputBus: UInt32 = 0 | |
private let inputBus: UInt32 = 1 | |
func startAudioUnit() { | |
var err: OSStatus = noErr | |
if self.audioUnit == nil { | |
setupAudioUnit() // setup once | |
} | |
guard let au = self.audioUnit | |
else { return } | |
err = AudioUnitInitialize(au) | |
gTmp0 = Int(err) | |
if err != noErr { return } | |
err = AudioOutputUnitStart(au) // start | |
gTmp0 = Int(err) | |
if err == noErr { | |
isRecording = true | |
} | |
} | |
func startAudioSession() { | |
if (sessionActive == false) { | |
// set and activate Audio Session | |
do { | |
let audioSession = AVAudioSession.sharedInstance() | |
if (micPermission == false) { | |
if (micPermissionDispatchToken == 0) { | |
micPermissionDispatchToken = 1 | |
audioSession.requestRecordPermission({(granted: Bool)-> Void in | |
if granted { | |
self.micPermission = true | |
return | |
// check for this flag and call from UI loop if needed | |
} else { | |
gTmp0 += 1 | |
// dispatch in main/UI thread an alert | |
// informing that mic permission is not switched on | |
} | |
}) | |
} | |
} | |
if micPermission == false { return } | |
try audioSession.setCategory(AVAudioSession.Category.record) | |
// choose 44100 or 48000 based on hardware rate | |
// sampleRate = 44100.0 | |
var preferredIOBufferDuration = 0.0058 // 5.8 milliseconds = 256 samples | |
hwSRate = audioSession.sampleRate // get native hardware rate | |
if hwSRate == 48000.0 { sampleRate = 48000.0 } // set session to hardware rate | |
if hwSRate == 48000.0 { preferredIOBufferDuration = 0.0053 } | |
let desiredSampleRate = sampleRate | |
try audioSession.setPreferredSampleRate(desiredSampleRate) | |
try audioSession.setPreferredIOBufferDuration(preferredIOBufferDuration) | |
NotificationCenter.default.addObserver( | |
forName: AVAudioSession.interruptionNotification, | |
object: nil, | |
queue: nil, | |
using: myAudioSessionInterruptionHandler ) | |
try audioSession.setActive(true) | |
sessionActive = true | |
} catch /* let error as NSError */ { | |
// handle error here | |
} | |
} | |
} | |
private func setupAudioUnit() { | |
var componentDesc: AudioComponentDescription | |
= AudioComponentDescription( | |
componentType: OSType(kAudioUnitType_Output), | |
componentSubType: OSType(kAudioUnitSubType_RemoteIO), | |
componentManufacturer: OSType(kAudioUnitManufacturer_Apple), | |
componentFlags: UInt32(0), | |
componentFlagsMask: UInt32(0) ) | |
var osErr: OSStatus = noErr | |
let component: AudioComponent! = AudioComponentFindNext(nil, &componentDesc) | |
var tempAudioUnit: AudioUnit? | |
osErr = AudioComponentInstanceNew(component, &tempAudioUnit) | |
self.audioUnit = tempAudioUnit | |
guard let au = self.audioUnit | |
else { return } | |
// Enable I/O for input. | |
var one_ui32: UInt32 = 1 | |
osErr = AudioUnitSetProperty(au, | |
kAudioOutputUnitProperty_EnableIO, | |
kAudioUnitScope_Input, | |
inputBus, | |
&one_ui32, | |
UInt32(MemoryLayout<UInt32>.size)) | |
// Set format to 32-bit Floats, linear PCM | |
let nc = 2 // 2 channel stereo | |
var streamFormatDesc:AudioStreamBasicDescription = AudioStreamBasicDescription( | |
mSampleRate: Double(sampleRate), | |
mFormatID: kAudioFormatLinearPCM, | |
mFormatFlags: ( kAudioFormatFlagsNativeFloatPacked ), | |
mBytesPerPacket: UInt32(nc * MemoryLayout<UInt32>.size), | |
mFramesPerPacket: 1, | |
mBytesPerFrame: UInt32(nc * MemoryLayout<UInt32>.size), | |
mChannelsPerFrame: UInt32(nc), | |
mBitsPerChannel: UInt32(8 * (MemoryLayout<UInt32>.size)), | |
mReserved: UInt32(0) | |
) | |
osErr = AudioUnitSetProperty(au, | |
kAudioUnitProperty_StreamFormat, | |
kAudioUnitScope_Input, outputBus, | |
&streamFormatDesc, | |
UInt32(MemoryLayout<AudioStreamBasicDescription>.size)) | |
osErr = AudioUnitSetProperty(au, | |
kAudioUnitProperty_StreamFormat, | |
kAudioUnitScope_Output, | |
inputBus, | |
&streamFormatDesc, | |
UInt32(MemoryLayout<AudioStreamBasicDescription>.size)) | |
var inputCallbackStruct | |
= AURenderCallbackStruct(inputProc: recordingCallback, | |
inputProcRefCon: | |
UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque())) | |
osErr = AudioUnitSetProperty(au, | |
AudioUnitPropertyID(kAudioOutputUnitProperty_SetInputCallback), | |
AudioUnitScope(kAudioUnitScope_Global), | |
inputBus, | |
&inputCallbackStruct, | |
UInt32(MemoryLayout<AURenderCallbackStruct>.size)) | |
// Ask CoreAudio to allocate buffers for us on render. | |
// Is this true by default? | |
osErr = AudioUnitSetProperty(au, | |
AudioUnitPropertyID(kAudioUnitProperty_ShouldAllocateBuffer), | |
AudioUnitScope(kAudioUnitScope_Output), | |
inputBus, | |
&one_ui32, | |
UInt32(MemoryLayout<UInt32>.size)) | |
gTmp0 = Int(osErr) | |
} | |
let recordingCallback: AURenderCallback = { ( | |
inRefCon, | |
ioActionFlags, | |
inTimeStamp, | |
inBusNumber, | |
frameCount, | |
ioData ) -> OSStatus in | |
let audioObject = unsafeBitCast(inRefCon, to: RecordAudio.self) | |
var err: OSStatus = noErr | |
// set mData to nil, AudioUnitRender() should be allocating buffers | |
var bufferList = AudioBufferList( | |
mNumberBuffers: 1, | |
mBuffers: AudioBuffer( | |
mNumberChannels: UInt32(2), | |
mDataByteSize: 16, | |
mData: nil)) | |
if let au = audioObject.audioUnit { | |
err = AudioUnitRender(au, | |
ioActionFlags, | |
inTimeStamp, | |
inBusNumber, | |
frameCount, | |
&bufferList) | |
} | |
audioObject.processMicrophoneBuffer( inputDataList: &bufferList, | |
frameCount: UInt32(frameCount) ) | |
return 0 | |
} | |
func processMicrophoneBuffer( // process RemoteIO Buffer from mic input | |
inputDataList : UnsafeMutablePointer<AudioBufferList>, | |
frameCount : UInt32 ) | |
{ | |
let inputDataPtr = UnsafeMutableAudioBufferListPointer(inputDataList) | |
let mBuffers : AudioBuffer = inputDataPtr[0] | |
let count = Int(frameCount) | |
// Microphone Input Analysis | |
// let data = UnsafePointer<Int16>(mBuffers.mData) | |
let bufferPointer = UnsafeMutableRawPointer(mBuffers.mData) | |
if let bptr = bufferPointer { | |
let dataArray = bptr.assumingMemoryBound(to: Float.self) | |
var sum : Float = 0.0 | |
var j = self.circInIdx | |
let m = self.circBuffSize | |
for i in 0..<(count/2) { | |
let x = Float(dataArray[i+i ]) // copy left channel sample | |
let y = Float(dataArray[i+i+1]) // copy right channel sample | |
self.circBuffer[j ] = x | |
self.circBuffer[j + 1] = y | |
j += 2 ; if j >= m { j = 0 } // into circular buffer | |
sum += x * x + y * y | |
} | |
self.circInIdx = j // circular index will always be less than size | |
// measuredMicVol_1 = sqrt( Float(sum) / Float(count) ) // scaled volume | |
if sum > 0.0 && count > 0 { | |
let tmp = 5.0 * (logf(sum / Float(count)) + 20.0) | |
let r : Float = 0.2 | |
audioLevel = r * tmp + (1.0 - r) * audioLevel | |
} | |
} | |
} | |
func stopRecording() { | |
AudioUnitUninitialize(self.audioUnit!) | |
isRecording = false | |
} | |
func myAudioSessionInterruptionHandler(notification: Notification) -> Void { | |
let interuptionDict = notification.userInfo | |
if let interuptionType = interuptionDict?[AVAudioSessionInterruptionTypeKey] { | |
let interuptionVal = AVAudioSession.InterruptionType( | |
rawValue: (interuptionType as AnyObject).uintValue ) | |
if (interuptionVal == AVAudioSession.InterruptionType.began) { | |
if (isRecording) { | |
stopRecording() | |
isRecording = false | |
let audioSession = AVAudioSession.sharedInstance() | |
do { | |
try audioSession.setActive(false) | |
sessionActive = false | |
} catch { | |
} | |
interrupted = true | |
} | |
} else if (interuptionVal == AVAudioSession.InterruptionType.ended) { | |
if (interrupted) { | |
// potentially restart here | |
} | |
} | |
} | |
} | |
} | |
// end of class RecordAudio | |
final class RecordAudio_v2: NSObject { | |
var auAudioUnit: AUAudioUnit! = nil | |
var enableRecording = true | |
var audioSessionActive = false | |
var audioSetupComplete = false | |
var isRecording = false | |
var sampleRate : Double = 48000.0 // desired audio sample rate | |
let circBuffSize = 32768 // lock-free circular fifo/buffer size | |
var circBuffer = [Float](repeating: 0, count: 32768) | |
var circInIdx : Int = 0 // sample input index | |
var circOutIdx : Int = 0 // sample output index | |
var audioLevel : Float = 0.0 | |
private var micPermissionRequested = false | |
private var micPermissionGranted = false | |
// for restart from audio interruption notification | |
private var audioInterrupted = false | |
private var renderBlock : AURenderBlock? = nil | |
func startRecording() { | |
if isRecording { return } | |
if audioSessionActive == false { | |
// configure and activate Audio Session, this might change the sampleRate | |
setupAudioSessionForRecording() | |
} | |
guard micPermissionGranted && audioSessionActive else { return } | |
let audioFormat = AVAudioFormat( | |
commonFormat: AVAudioCommonFormat.pcmFormatInt16, // pcmFormatInt16, pcmFormatFloat32, | |
sampleRate: Double(sampleRate), // 44100.0 48000.0 | |
channels:AVAudioChannelCount(2), // 1 or 2 | |
interleaved: true ) // true for interleaved stereo | |
if (auAudioUnit == nil) { | |
setupRemoteIOAudioUnitForRecord(audioFormat: audioFormat!) | |
} | |
renderBlock = auAudioUnit.renderBlock // returns AURenderBlock() | |
if ( enableRecording | |
&& micPermissionGranted | |
&& audioSetupComplete | |
&& audioSessionActive | |
&& isRecording == false ) { | |
auAudioUnit.isInputEnabled = true | |
auAudioUnit.outputProvider = { // AURenderPullInputBlock() | |
(actionFlags, timestamp, frameCount, inputBusNumber, inputData) -> AUAudioUnitStatus in | |
if let block = self.renderBlock { // AURenderBlock? | |
let err : OSStatus = block(actionFlags, | |
timestamp, | |
frameCount, | |
1, | |
inputData, | |
.none) | |
if err == noErr { | |
// save samples from current input buffer to circular buffer | |
self.recordMicrophoneInputSamples( | |
inputDataList: inputData, | |
frameCount: UInt32(frameCount) ) | |
} | |
} | |
let err2 : AUAudioUnitStatus = noErr | |
return err2 | |
} | |
do { | |
circInIdx = 0 // initialize circular buffer pointers | |
circOutIdx = 0 | |
try auAudioUnit.allocateRenderResources() | |
try auAudioUnit.startHardware() // equivalent to AudioOutputUnitStart ??? | |
isRecording = true | |
} catch { | |
// placeholder for error handling | |
} | |
} | |
} | |
func stopRecording() { | |
if (isRecording) { | |
auAudioUnit.stopHardware() | |
isRecording = false | |
} | |
if (audioSessionActive) { | |
let audioSession = AVAudioSession.sharedInstance() | |
do { | |
try audioSession.setActive(false) | |
} catch /* let error as NSError */ { | |
} | |
audioSessionActive = false | |
} | |
} | |
private func recordMicrophoneInputSamples( // process RemoteIO Buffer from mic input | |
inputDataList : UnsafeMutablePointer<AudioBufferList>, | |
frameCount : UInt32 ) | |
{ | |
let inputDataPtr = UnsafeMutableAudioBufferListPointer(inputDataList) | |
let mBuffers : AudioBuffer = inputDataPtr[0] | |
let count = Int(frameCount) | |
let bufferPointer = UnsafeMutableRawPointer(mBuffers.mData) | |
var j = self.circInIdx // current circular array input index | |
let n = self.circBuffSize | |
var audioLevelSum : Float = 0.0 | |
if let bptr = bufferPointer?.assumingMemoryBound(to: Int16.self) { | |
for i in 0..<(count/2) { | |
// Save samples in circular buffer for latter processing | |
let x = Float(bptr[i+i ]) | |
let y = Float(bptr[i+i+1]) | |
self.circBuffer[j ] = x // Stereo Left | |
self.circBuffer[j + 1] = y // Stereo Right | |
j += 2 ; if j >= n { j = 0 } // Circular buffer looping | |
// Microphone Input Analysis | |
audioLevelSum += x * x + y * y | |
} | |
} | |
OSMemoryBarrier(); // from libkern/OSAtomic.h | |
self.circInIdx = j // circular index will always be less than size | |
if audioLevelSum > 0.0 && count > 0 { | |
audioLevel = logf(audioLevelSum / Float(count)) | |
} | |
} | |
// set up and activate Audio Session | |
func setupAudioSessionForRecording() { | |
do { | |
let audioSession = AVAudioSession.sharedInstance() | |
if (micPermissionGranted == false) { | |
if (micPermissionRequested == false) { | |
micPermissionRequested = true | |
audioSession.requestRecordPermission({(granted: Bool)-> Void in | |
if granted { | |
self.micPermissionGranted = true | |
self.startRecording() | |
return | |
} else { | |
self.enableRecording = false | |
// dispatch in main/UI thread an alert | |
// informing that mic permission is not switched on | |
} | |
}) | |
} | |
return | |
} | |
if enableRecording { | |
try audioSession.setCategory(AVAudioSession.Category.record) | |
} | |
let preferredIOBufferDuration = 0.0053 // 5.3 milliseconds = 256 samples | |
try audioSession.setPreferredSampleRate(sampleRate) // at 48000.0 | |
try audioSession.setPreferredIOBufferDuration(preferredIOBufferDuration) | |
NotificationCenter.default.addObserver( | |
forName: AVAudioSession.interruptionNotification, | |
object: nil, | |
queue: nil, | |
using: myAudioSessionInterruptionHandler ) | |
try audioSession.setActive(true) | |
audioSessionActive = true | |
} catch /* let error as NSError */ { | |
// placeholder for error handling | |
} | |
} | |
// find and set up the sample format for the RemoteIO Audio Unit | |
private func setupRemoteIOAudioUnitForRecord(audioFormat : AVAudioFormat) { | |
do { | |
let audioComponentDescription = AudioComponentDescription( | |
componentType: kAudioUnitType_Output, | |
componentSubType: kAudioUnitSubType_RemoteIO, | |
componentManufacturer: kAudioUnitManufacturer_Apple, | |
componentFlags: 0, | |
componentFlagsMask: 0 ) | |
try auAudioUnit = AUAudioUnit(componentDescription: audioComponentDescription) | |
// bus 1 is for data that the microphone exports out to the handler block | |
let bus1 = auAudioUnit.outputBusses[1] | |
try bus1.setFormat(audioFormat) // for microphone bus | |
audioSetupComplete = true | |
} catch /* let error as NSError */ { | |
// placeholder for error handling | |
} | |
} | |
private func myAudioSessionInterruptionHandler(notification: Notification) -> Void { | |
let interuptionDict = notification.userInfo | |
if let interuptionType = interuptionDict?[AVAudioSessionInterruptionTypeKey] { | |
let interuptionVal = AVAudioSession.InterruptionType( | |
rawValue: (interuptionType as AnyObject).uintValue ) | |
if (interuptionVal == AVAudioSession.InterruptionType.began) { | |
// [self beginInterruption]; | |
if (isRecording) { | |
auAudioUnit.stopHardware() | |
isRecording = false | |
let audioSession = AVAudioSession.sharedInstance() | |
do { | |
try audioSession.setActive(false) | |
audioSessionActive = false | |
} catch { | |
// placeholder for error handling | |
} | |
audioInterrupted = true | |
} | |
} else if (interuptionVal == AVAudioSession.InterruptionType.ended) { | |
// [self endInterruption]; | |
if (audioInterrupted) { | |
let audioSession = AVAudioSession.sharedInstance() | |
do { | |
try audioSession.setActive(true) | |
audioSessionActive = true | |
if (auAudioUnit.renderResourcesAllocated == false) { | |
try auAudioUnit.allocateRenderResources() | |
} | |
try auAudioUnit.startHardware() | |
isRecording = true | |
} catch { | |
// placeholder for error handling | |
} | |
} | |
} | |
} | |
} | |
} // end of RecordAudio class | |
// eof |
Use this, but update the processMicrophoneBuffer
to feed the data to aubio:
func setupAubio(samplerate: UInt32) {
samples = new_fvec(sampleSize)
tempo = new_aubio_tempo("default", 1024, sampleSize, samplerate)
aubio_tempo_set_silence(tempo!, silenceThreshold)
}
func processMicrophoneBuffer(inputDataList : UnsafeMutablePointer<AudioBufferList>, frameCount : UInt32) {
guard let samples = samples, tempo = tempo else { return }
let out = new_fvec(2)
var sampleCount: UInt32 = 0
for i in 0..<(count/2) {
let x = Float(dataArray[i+i ]) // copy left channel sample
let y = Float(dataArray[i+i+1]) // copy right channel sample
fvec_set_sample(samples, x*x + y*y, sampleCount)
sampleCount += 1
if sampleCount == sampleSize || i == count/2-1 {
aubio_tempo_do(onset, samples, out)
if (fvec_get_sample(out, 0) != 0) {
// Yay! A BEAT!!!
break
}
sampleCount = 0
}
}
del_fvec(out)
}
func stopRecording() {
if let tempo = tempo, samples = samples {
del_aubio_tempo(tempo)
del_fvec(samples)
self.tempo = nil
self.samples = nil
}
}
Note: Make sure you clean up the pointers in stopRecording
to avoid memory leaks.