Major refactor to clean things up. The MFCC appears to be working.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
# node-mfcc
|
||||
Node.JS implementation of the MFCC algorithm.
|
||||
Node.JS implementation of the MFCC (Mel Frequency Cepstral Coefficients) algorithm.
|
||||
|
||||
# Introduction
|
||||
|
||||
@@ -7,7 +7,7 @@ Code in this project was made by following the tutorial here:
|
||||
|
||||
[http://practicalcryptography.com/miscellaneous/machine-learning/guide-mel-frequency-cepstral-coefficients-mfccs/](http://practicalcryptography.com/miscellaneous/machine-learning/guide-mel-frequency-cepstral-coefficients-mfccs/)
|
||||
|
||||
The proper process by which to compute the MFCC is:
|
||||
To compute the MFCC:
|
||||
|
||||
1. Frame samples into `2^N` sized buffers where `N` is an integer.
|
||||
2. Pass frames into the Fast Fourier Transform to produce `F` frequency bins.
|
||||
@@ -24,7 +24,7 @@ The reason the term 'Cepstral' is used is that it is a play on spectral. In ordi
|
||||
time-domain data. However, in step (6) above we are performing a discrete cosine transform on information that is already in the
|
||||
frequency domain. As a result, the pseudo-spectral term cepstral was invented.
|
||||
|
||||
The reason for the discrete cosine transformation step is to remove any codependency from the Mel-Scale values after applying the triangular filters.
|
||||
The reason for the discrete cosine transformation step is to both compress the mel-bands and to autocorrelate them.
|
||||
|
||||
# Example
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
5.904757629127576e-7,0.0000013999298934710302,0.0000013999298934710302,0.000004265992436375758,0.000004265992436375758,0.000016099656669393937,0.000016099656669393937,0.00012208873831350304,1.0729054448683637,1.0729054448683637,5.671305168245454,1.0728470246934547
|
||||
|
@@ -1,4 +0,0 @@
|
||||
[ 1,
|
||||
0,
|
||||
1,
|
||||
0 ]
|
||||
@@ -1,5 +1 @@
|
||||
module.exports = {
|
||||
Framer: require('./src/sampleFramer'),
|
||||
mfcc: require('./src/mfcc'),
|
||||
windows: require('./src/sampleWindows')
|
||||
};
|
||||
module.exports = require('./src/mfcc');
|
||||
|
||||
@@ -7,207 +7,117 @@
|
||||
* human speech analysis.
|
||||
\*===========================================================================*/
|
||||
var program = require('commander'),
|
||||
Stats = require('fast-stats').Stats,
|
||||
fs = require('fs'),
|
||||
path = require('path'),
|
||||
fjs = require('frequencyjs'),
|
||||
wav = require('wav'),
|
||||
Framer = require('./').Framer,
|
||||
windows = require('./').windows,
|
||||
mfcc = require('./').mfcc,
|
||||
dct = require('dct');
|
||||
|
||||
var u2pcm = require('./u2pcm');
|
||||
|
||||
for (var k in u2pcm)
|
||||
{
|
||||
u2pcm[k] = u2pcm[k] / 32767;
|
||||
}
|
||||
fs = require('fs'),
|
||||
path = require('path'),
|
||||
wav = require('wav'),
|
||||
fft = require('fft-js'),
|
||||
Framer = require('signal-windows').framer,
|
||||
ham = undefined,
|
||||
mfcc = require('./');
|
||||
|
||||
program.version('0.1')
|
||||
.usage('[options]')
|
||||
.option('-v, --verbose', 'Supply to turn on verbose output. Default 0.', 0)
|
||||
.option('-w, --wav [wav]', '[INPUT]: Supply a wave file to process.', undefined)
|
||||
.option('-f, --fft [json]', '[INPUT]: Supply a JSON file of FFT bins to process (primarily for testing).', undefined)
|
||||
.option('-d, --dct [json]', '[INPUT]: Supply a JSON file of inputs for the DCT stage to process (primarily for testing).', undefined)
|
||||
.option('-o, --output [csv]', 'Output CSV (will append).', undefined);
|
||||
.option('-v, --verbose', 'True for verbose output.', 0)
|
||||
.option('-d, --debug [type]', '0: none, 1: Output power spectrum, post-filterbank values, and Mel coefficients. 2: output filter banks used. 3: output frequency magnitudes. Default 0.', 0)
|
||||
.option('-w, --wav [wav]', 'Wave file path to process.', undefined)
|
||||
.option('-m, --minFrequency [number]', 'Low frequency cutoff for MFCC. Default 300', 300)
|
||||
.option('-x, --maxFrequency [number]', 'High frequency cutoff for MFCC. Default 3500', 3500)
|
||||
.option('-f, --numMelSpecFilters [number]', 'Number of mel spec filter banks to use. Default is 26.', 26)
|
||||
.option('-n, --samplesPerFrame [number]', 'Number of samples per frame to pass into the FFT. Default 128.', 128)
|
||||
.option('-s, --samplesPerStep [number]', 'Number of samples to step between each frame. Default is samplesPerFrame.', 128);
|
||||
|
||||
program.parse(process.argv);
|
||||
|
||||
if (program.wav === undefined && program.fft === undefined && program.dct === undefined)
|
||||
if (program.wav === undefined)
|
||||
{
|
||||
console.log('Must choose an input type from the program options.');
|
||||
program.outputHelp();
|
||||
process.exit(1);
|
||||
console.log('Please provide a wave file to process.');
|
||||
program.outputHelp();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if ((program.wav && program.fft) || (program.wav && program.dct) || (program.fft && program.dct))
|
||||
{
|
||||
console.log('Please provide a .wav file, a .json FFT, or a .json DCT file but not more than one!');
|
||||
process.exit(1);
|
||||
}
|
||||
program.minFrequency = parseInt(program.minFrequency);
|
||||
program.maxFrequency = parseInt(program.maxFrequency);
|
||||
program.numMelSpecFilters = parseInt(program.numMelSpecFilters);
|
||||
program.samplesPerFrame = parseInt(program.samplesPerFrame);
|
||||
program.samplesPerStep = parseInt(program.samplesPerStep);
|
||||
|
||||
var freqAssigned = false;
|
||||
sampleRate = 8000,
|
||||
minFreq = 300,
|
||||
maxFreq = 3500,
|
||||
nMelSpecFilters = 26,
|
||||
framerSamples = 64,
|
||||
framerStep = 64,
|
||||
fftBins = framerSamples / 2,
|
||||
framer = undefined,
|
||||
bins = [],
|
||||
binsStats = [],
|
||||
binsToFreq = [],
|
||||
win = windows.construct('ham', framerSamples, 0.71);
|
||||
|
||||
for (var i = 0; i < fftBins; i++) bins[i] = [];
|
||||
|
||||
/*-----------------------------------------------------------------------------------*\
|
||||
* FFT JSON file
|
||||
\*-----------------------------------------------------------------------------------*/
|
||||
if (program.fft)
|
||||
{
|
||||
// Assumes the file contains an array of FFT bins
|
||||
var amplitudes = parseFFTAmplitudes(program.fft);
|
||||
|
||||
// This filterbank, periodogram and melspec generation are an important part of the MFCC as a whole,
|
||||
// but not necessary in the testing of the DCT on its own, using the -d option.
|
||||
var filterBank = mfcc.constructFilterBank(amplitudes.length, nMelSpecFilters, minFreq, maxFreq, sampleRate);
|
||||
|
||||
var freqPowers = mfcc.periodogram(amplitudes),
|
||||
melSpec = filterBank(freqPowers);
|
||||
|
||||
var melCoefficients = dct(melSpec);
|
||||
|
||||
var columns = melCoefficients.map(function (mc, ix) {
|
||||
return 'mfcc' + (ix+1);
|
||||
});
|
||||
|
||||
if (program.output)
|
||||
output(columns, [melCoefficients]);
|
||||
else
|
||||
console.log(melSpec);
|
||||
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------------------*\
|
||||
* DCT JSON file
|
||||
\*-----------------------------------------------------------------------------------*/
|
||||
if (program.dct)
|
||||
{
|
||||
var spectrum = parseFFTAmplitudes(program.dct);
|
||||
|
||||
var dctCoefficients = dct(spectrum);
|
||||
|
||||
var columns = dctCoefficients.map(function(mc, ix) {
|
||||
return 'dct' + (ix + 1);
|
||||
});
|
||||
|
||||
if (program.output) {
|
||||
output(columns, [dctCoefficients]);
|
||||
} else {
|
||||
console.log(dctCoefficients);
|
||||
}
|
||||
}
|
||||
if (program.samplesPerFrame & (program.samplesPerFrame-1) !== 0)
|
||||
throw Error('Please provide a samplesPerFrame that is a power of 2 (e.g. 32, 64, 128, 256, etc.). Was: ' + program.samplesPerFrame);
|
||||
|
||||
var mfcc, // We construct after loading the wav file and reading the header.
|
||||
framer, // Framer is also constructed after loading the wav file
|
||||
sampleRate;
|
||||
|
||||
/*-----------------------------------------------------------------------------------*\
|
||||
* .wav file
|
||||
\*-----------------------------------------------------------------------------------*/
|
||||
if (program.wav)
|
||||
{
|
||||
var wr = new wav.Reader(),
|
||||
filterBank = mfcc.constructFilterBank(fftBins, nMelSpecFilters, minFreq, maxFreq, sampleRate);
|
||||
|
||||
wr.on('data', function (buffer, offset, length) {
|
||||
framer.frame(buffer, function (frame, fIx) {
|
||||
var spectrum = fjs.toSpectrum(frame, {
|
||||
sampling: sampleRate,
|
||||
method: 'fft'
|
||||
}),
|
||||
amplitudes = [];
|
||||
var wr = new wav.Reader();
|
||||
|
||||
wr.on('data', function (buffer, offset, length) {
|
||||
framer.frame(buffer, function (frame, fIx) {
|
||||
if (frame.length != program.samplesPerFrame) return;
|
||||
|
||||
spectrum.forEach(function (spectra, ix) {
|
||||
if (!freqAssigned) binsToFreq[ix] = spectra.frequency;
|
||||
bins[ix].push(spectra.amplitude);
|
||||
amplitudes.push(spectra.amplitude);
|
||||
});
|
||||
var phasors = fft.fft(frame),
|
||||
phasorMagnitudes = fft.util.fftMag(phasors),
|
||||
result = mfcc(phasorMagnitudes, program.debug && true);
|
||||
|
||||
console.log(spectrum);
|
||||
//var freqPowers = mfcc.periodogram(amplitudes),
|
||||
// melSpec = filterBank(freqPowers);
|
||||
if (program.debug == 1)
|
||||
{
|
||||
console.log('Frame ' + fIx);
|
||||
console.log('Frame ' + frame.join(','));
|
||||
console.log('FFT ' + phasorMagnitudes.join(','));
|
||||
console.log('Powers: ' + result.powers.join(','));
|
||||
console.log('Post-filters: ' + result.melSpec.join(','));
|
||||
console.log('Post-DCT: ' + result.melCoef.join(','));
|
||||
}
|
||||
else if (program.debug == 2)
|
||||
{
|
||||
console.log('Filters: ', result.filters);
|
||||
}
|
||||
else if (program.debug == 3)
|
||||
{
|
||||
console.log(phasorMagnitudes.join(','));
|
||||
}
|
||||
else if (!program.debug) console.log(fIx + ',' + result.join(','));
|
||||
});
|
||||
});
|
||||
|
||||
//var melCoefficients= dct.run(melSpec);
|
||||
//console.log([fIx].concat(melCoefficients).join(','));
|
||||
});
|
||||
});
|
||||
wr.on('format', function (format) {
|
||||
|
||||
wr.on('format', function (format) {
|
||||
//TODO customize framer based on actual wave headers rather
|
||||
// than assuming ulaw!
|
||||
framer = new Framer({
|
||||
map: u2pcm,
|
||||
sizeS: framerSamples,
|
||||
stepS: framerStep,
|
||||
scale: win
|
||||
});
|
||||
});
|
||||
var sampleRate = format.sampleRate;
|
||||
|
||||
wr.on('end', function () {
|
||||
statistics();
|
||||
if (program.args.length == 2) writeToCSV();
|
||||
var end = new Date().getTime();
|
||||
console.log('Elapsed: ', end - start);
|
||||
console.log(binsStats);
|
||||
process.exit(1);
|
||||
});
|
||||
ham = require('signal-windows').windows.construct('ham', program.samplesPerFrame);
|
||||
|
||||
var start = new Date().getTime();
|
||||
fs.createReadStream(program.args[0]).pipe(wr);
|
||||
}
|
||||
var ulawMap = format.ulaw ? JSON.parse(fs.readFileSync('data/ulaw2pcm.json').toString()) : undefined;
|
||||
|
||||
function parseFFTAmplitudes(fftBinFile) {
|
||||
var amplitudes = JSON.parse(fs.readFileSync(path.resolve(fftBinFile)).toString());
|
||||
return amplitudes.map(parseFloat);
|
||||
}
|
||||
if (ulawMap) for (var k in ulawMap) ulawMap[k] = ulawMap[k]/32767;
|
||||
|
||||
function statistics() {
|
||||
bins.forEach(function (bin, ix) {
|
||||
var s = (new Stats()).push(bin);
|
||||
binsStats[ix] = {
|
||||
freq: binsToFreq[ix],
|
||||
amean: s.amean()
|
||||
};
|
||||
});
|
||||
}
|
||||
if (format.channels != 1)
|
||||
throw Error('Right now this MFCC code only works on single channel 8-bit wave files.');
|
||||
if (format.bitDepth != 8)
|
||||
throw Error('Right now this MFCC code only works on single channel 8-bit wave files.');
|
||||
|
||||
function writeCSVHeader(columns) {
|
||||
var headers = columns.join(',');
|
||||
// Breaks samples up into frames and runs them through a transform (map) if
|
||||
// provided. In our case we want to transform from u-law if the wave file is
|
||||
// formatted as such.
|
||||
// By default we force a 'hamming' window.
|
||||
framer = new Framer({
|
||||
map: ulawMap,
|
||||
frameSize: program.samplesPerFrame,
|
||||
frameStep: program.samplesPerStep,
|
||||
scale: ham,
|
||||
sampleType: 'UInt8'
|
||||
});
|
||||
|
||||
fs.writeFileSync(program.output, headers);
|
||||
}
|
||||
mfcc = mfcc.construct(program.samplesPerFrame / 2,
|
||||
program.numMelSpecFilters,
|
||||
program.minFrequency,
|
||||
program.maxFrequency,
|
||||
format.sampleRate);
|
||||
});
|
||||
|
||||
function output(columns, rows) {
|
||||
// First see if CSV already exists
|
||||
if (fs.existsSync(program.output))
|
||||
{
|
||||
var contents = fs.readFileSync(program.output).toString();
|
||||
wr.on('end', function () {
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
if (contents.indexOf(columns[0]) == -1)
|
||||
writeCSVHeader(columns);
|
||||
// Otherwise header already exists
|
||||
}
|
||||
else
|
||||
writeCSVHeader(columns);
|
||||
|
||||
rows = rows.map(function (r) {
|
||||
return r.join(',');
|
||||
});
|
||||
|
||||
rows = rows.join('\n');
|
||||
|
||||
// Write out a single line of data
|
||||
fs.appendFileSync(program.output, '\n' + rows);
|
||||
}
|
||||
fs.createReadStream(program.wav).pipe(wr);
|
||||
|
||||
-26
@@ -1,26 +0,0 @@
|
||||
1.423903e+02
|
||||
8.032330e+01
|
||||
2.215118e+02
|
||||
1.672679e+02
|
||||
5.327997e+02
|
||||
2.418544e+02
|
||||
2.060463e+03
|
||||
8.842805e+04
|
||||
1.140045e+05
|
||||
4.408057e+05
|
||||
1.329174e+05
|
||||
6.295898e+04
|
||||
1.384400e+03
|
||||
6.334704e+02
|
||||
3.695203e+02
|
||||
2.444446e+02
|
||||
1.701005e+02
|
||||
1.278180e+02
|
||||
9.702592e+01
|
||||
7.697556e+01
|
||||
7.005164e+01
|
||||
5.945079e+01
|
||||
1.318371e+02
|
||||
4.292482e+03
|
||||
5.036520e+03
|
||||
5.994083e+02
|
||||
|
@@ -28,7 +28,9 @@
|
||||
"dct": "^0.0.2",
|
||||
"fast-stats": "0.0.2",
|
||||
"fft": "^0.2.1",
|
||||
"fft-js": "0.0.6",
|
||||
"frequencyjs": "0.0.3",
|
||||
"signal-windows": "0.0.1",
|
||||
"wav": "^1.0.0"
|
||||
}
|
||||
}
|
||||
|
||||
+135
-92
@@ -9,113 +9,156 @@
|
||||
var dct = require('dct');
|
||||
|
||||
module.exports = {
|
||||
/*
|
||||
* Given a set of amplitudes, estimates the power for those amplitudes.
|
||||
*/
|
||||
periodogram: periodogram,
|
||||
/*
|
||||
* Converts from hertz to the Mel-scale. Used by constructFilterBank.
|
||||
*
|
||||
* Based on the concept that human perception of an equidistant pitch decreases as
|
||||
* pitch increases.
|
||||
*/
|
||||
hzToMels: hzToMels,
|
||||
/*
|
||||
* Inverse of hzToMels.
|
||||
*/
|
||||
melsToHz: melsToHz,
|
||||
/*
|
||||
* Returns a filter bank with nBanks triangular filters distributed according to the mel scale.
|
||||
*
|
||||
* Focused specifically on human speech (300 hz - 8000 hz)
|
||||
*
|
||||
* Recommended values for u-law 8000 hz:
|
||||
*
|
||||
* - nFrequencies == 32 (64 bit FFT)
|
||||
* - nBanks == 31
|
||||
* - Low Frequency == 200
|
||||
* - High Frequency == 3500
|
||||
*/
|
||||
constructFilterBank: constructFilterBank
|
||||
/*
|
||||
* Given a set of amplitudes, estimates the power for those amplitudes.
|
||||
*/
|
||||
powerSpectrum: powerSpectrum,
|
||||
/*
|
||||
* Converts from hertz to the Mel-scale. Used by constructFilterBank.
|
||||
*
|
||||
* Based on the concept that human perception of an equidistant pitch decreases as
|
||||
* pitch increases.
|
||||
*/
|
||||
hzToMels: hzToMels,
|
||||
/*
|
||||
* Inverse of hzToMels.
|
||||
*/
|
||||
melsToHz: melsToHz,
|
||||
/*
|
||||
* Returns a filter bank with bankCount triangular filters distributed according to the mel scale.
|
||||
*
|
||||
* Focused specifically on human speech (300 hz - 8000 hz)
|
||||
*
|
||||
* Recommended values for u-law 8000 hz:
|
||||
*
|
||||
* - fftSize == 64 (128 bin FFT)
|
||||
* - bankCount == 31
|
||||
* - Low Frequency == 200
|
||||
* - High Frequency == 3500
|
||||
*/
|
||||
constructMelFilterBank: constructMelFilterBank,
|
||||
construct: construct
|
||||
};
|
||||
|
||||
function constructFilterBank(nFrequencies, nFilters, lowF, highF, sampleRate) {
|
||||
var bins = [],
|
||||
fq = [],
|
||||
filters = [];
|
||||
function construct(fftSize, bankCount, lowFrequency, highFrequency, sampleRate) {
|
||||
if (!fftSize) throw Error('Please provide an fftSize');
|
||||
if (!bankCount) throw Error('Please provide a bankCount');
|
||||
if (!lowFrequency) throw Error('Please provide a low frequency cutoff.');
|
||||
if (!highFrequency) throw Error('Please provide a high frequency cutoff.');
|
||||
if (!sampleRate) throw Error('Please provide a valid sampleRate.');
|
||||
|
||||
var lowM = hzToMels(lowF),
|
||||
highM = hzToMels(highF),
|
||||
deltaM = (highM - lowM) / (nFilters+1);
|
||||
var filterBank = constructMelFilterBank(fftSize, bankCount, lowFrequency, highFrequency, sampleRate);
|
||||
|
||||
// Construct equidistant Mel values between lowM and highM.
|
||||
for (var i = 0; i < nFilters; i++) {
|
||||
// Get the Mel value and convert back to frequency.
|
||||
// e.g. 200 hz <=> 401.25 Mel
|
||||
fq[i] = melsToHz(lowM + (i * deltaM));
|
||||
/**
|
||||
* Perform a full MFCC on a FFT spectrum.
|
||||
*
|
||||
* FFT Array passed in should contain frequency amplitudes only.
|
||||
*
|
||||
* Pass in truthy for debug if you wish to return outputs of each step (freq. powers, melSpec, and MelCoef)
|
||||
*/
|
||||
return function (fft, debug) {
|
||||
if (fft.length != fftSize)
|
||||
throw Error('Passed in FFT bins were incorrect size. Expected ' + fftSize + ' but was ' + fft.length);
|
||||
|
||||
// Round the frequency we derived from the Mel-scale to the nearest actual FFT bin that we have.
|
||||
// For example, in a 64 sample FFT for 8khz audio we have 32 bins from 0-8khz evenly spaced.
|
||||
bins[i] = Math.floor((nFrequencies+1) * fq[i] / (sampleRate/2));
|
||||
var powers = powerSpectrum(fft),
|
||||
melSpec = filterBank.filter(powers),
|
||||
melCoef = dct(melSpec);
|
||||
|
||||
return debug ? {
|
||||
powers: powers,
|
||||
melSpec: melSpec,
|
||||
melCoef: melCoef,
|
||||
filters: filterBank
|
||||
} : melCoef;
|
||||
}
|
||||
}
|
||||
|
||||
function constructMelFilterBank(fftSize, nFilters, lowF, highF, sampleRate) {
|
||||
var bins = [],
|
||||
fq = [],
|
||||
filters = [];
|
||||
|
||||
var lowM = hzToMels(lowF),
|
||||
highM = hzToMels(highF),
|
||||
deltaM = (highM - lowM) / (nFilters+1);
|
||||
|
||||
// Construct equidistant Mel values between lowM and highM.
|
||||
for (var i = 0; i < nFilters; i++) {
|
||||
// Get the Mel value and convert back to frequency.
|
||||
// e.g. 200 hz <=> 401.25 Mel
|
||||
fq[i] = melsToHz(lowM + (i * deltaM));
|
||||
|
||||
// Round the frequency we derived from the Mel-scale to the nearest actual FFT bin that we have.
|
||||
// For example, in a 64 sample FFT for 8khz audio we have 32 bins from 0-8khz evenly spaced.
|
||||
bins[i] = Math.floor((fftSize+1) * fq[i] / (sampleRate/2));
|
||||
}
|
||||
|
||||
// Construct one cone filter per bin.
|
||||
// Filters end up looking similar to [... 0, 0, 0.33, 0.66, 1.0, 0.66, 0.33, 0, 0...]
|
||||
for (var i = 0; i < bins.length; i++)
|
||||
{
|
||||
filters[i] = [];
|
||||
var filterRange = (i != bins.length-1) ? bins[i+1] - bins[i] : bins[i] - bins[i-1];
|
||||
filters[i].filterRange = filterRange;
|
||||
for (var f = 0; f < fftSize; f++) {
|
||||
// Right, outside of cone
|
||||
if (f > bins[i] + filterRange) filters[i][f] = 0.0;
|
||||
// Right edge of cone
|
||||
else if (f > bins[i]) filters[i][f] = 1.0 - ((f - bins[i]) / filterRange);
|
||||
// Peak of cone
|
||||
else if (f == bins[i]) filters[i][f] = 1.0;
|
||||
// Left edge of cone
|
||||
else if (f >= bins[i] - filterRange) filters[i][f] = 1.0 - (bins[i] - f) / filterRange;
|
||||
// Left, outside of cone
|
||||
else filters[i][f] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
// Construct one cone filter per bin.
|
||||
// Filters end up looking similar to [... 0, 0, 0.33, 0.66, 1.0, 0.66, 0.33, 0, 0...]
|
||||
for (var i = 0; i < bins.length; i++)
|
||||
{
|
||||
filters[i] = [];
|
||||
var filterRange = (i != bins.length-1) ? bins[i+1] - bins[i] : bins[i] - bins[i-1];
|
||||
filters[i].filterRange = filterRange;
|
||||
for (var f = 0; f < nFrequencies; f++) {
|
||||
// Right, outside of cone
|
||||
if (f > bins[i] + filterRange) filters[i][f] = 0.0;
|
||||
// Right edge of cone
|
||||
else if (f > bins[i]) filters[i][f] = 1.0 - ((f - bins[i]) / filterRange);
|
||||
// Peak of cone
|
||||
else if (f == bins[i]) filters[i][f] = 1.0;
|
||||
// Left edge of cone
|
||||
else if (f >= bins[i] - filterRange) filters[i][f] = 1.0 - (bins[i] - f) / filterRange;
|
||||
// Left, outside of cone
|
||||
else filters[i][f] = 0.0;
|
||||
}
|
||||
// Store for debugging.
|
||||
filters.bins = bins;
|
||||
|
||||
// Here we actually apply the filters one by one. Then we add up the results of each applied filter
|
||||
// to get the estimated power contained within that Mel-scale bin.
|
||||
//
|
||||
// First argument is expected to be the result of the frequencies passed to the powerSpectrum
|
||||
// method.
|
||||
return {
|
||||
filters: filters,
|
||||
lowMel: lowM,
|
||||
highMel: highM,
|
||||
deltaMel: deltaM,
|
||||
lowFreq: lowF,
|
||||
highFreq: highF,
|
||||
filter: function (freqPowers) {
|
||||
var ret = [];
|
||||
|
||||
filters.forEach(function (filter, fIx) {
|
||||
var tot = 0;
|
||||
freqPowers.forEach(function (fp, pIx) {
|
||||
tot += fp * filter[pIx];
|
||||
});
|
||||
ret[fIx] = tot;
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Store for debugging.
|
||||
filters.bins = bins;
|
||||
|
||||
// Here we actually apply the filters one by one. Then we add up the results of each applied filter
|
||||
// to get the estimated power contained within that Mel-scale bin.
|
||||
//
|
||||
// First argument is expected to be the result of the frequencies passed to the periodogram
|
||||
// method.
|
||||
return function (freqPowers) {
|
||||
var ret = [];
|
||||
filters.forEach(function (filter, fIx) {
|
||||
var tot = 0;
|
||||
freqPowers.forEach(function (fp, pIx) {
|
||||
tot += fp * filter[pIx];
|
||||
});
|
||||
ret[fIx] = tot;
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
function melsToHz(mels) {
|
||||
return 700 * (Math.exp(mels / 1127) - 1);
|
||||
return 700 * (Math.exp(mels / 1127) - 1);
|
||||
}
|
||||
|
||||
function hzToMels(hertz) {
|
||||
return 1127 * Math.log(1 + hertz/700);
|
||||
return 1127 * Math.log(1 + hertz/700);
|
||||
}
|
||||
|
||||
function periodogram(amplitudes) {
|
||||
var power = [],
|
||||
N = amplitudes.length;
|
||||
/**
|
||||
* Estimate the power spectrum density from FFT amplitudes.
|
||||
*/
|
||||
function powerSpectrum(amplitudes) {
|
||||
var N = amplitudes.length;
|
||||
|
||||
for (var i = 0; i < N; i++) {
|
||||
power[i] = (amplitudes[i] * amplitudes[i]) / N;
|
||||
}
|
||||
|
||||
return power;
|
||||
return amplitudes.map(function (a) {
|
||||
return (a * a) / N;
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
var Framer = function (options) {
|
||||
options = options || {};
|
||||
|
||||
this.fIx = 0;
|
||||
this.sizeS = options.sizeS || 64;
|
||||
this.stepS = options.stepS || this.sizeS;
|
||||
this.map = options.map || undefined;
|
||||
this.scale = options.scale || undefined;
|
||||
this.offset = options.offset || 0;
|
||||
};
|
||||
|
||||
Framer.prototype = {
|
||||
frame: function(buffer, callback) {
|
||||
var self = this,
|
||||
cb = this.offset,
|
||||
frame = [];
|
||||
|
||||
while (cb < buffer.length) {
|
||||
if (this.map) frame.push(this.map[buffer.readUInt8(cb)]);
|
||||
else frame.push(buffer.readUInt8(cb));
|
||||
|
||||
if (frame.length == this.sizeS)
|
||||
{
|
||||
if (this.scale)
|
||||
frame = frame.map(function (s, ix) {
|
||||
return s * self.scale[ix];
|
||||
});
|
||||
|
||||
callback(frame, this.fIx);
|
||||
frame = [];
|
||||
cb -= (this.sizeS - this.stepS);
|
||||
this.fIx++;
|
||||
}
|
||||
cb++;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = Framer;
|
||||
@@ -1,32 +0,0 @@
|
||||
module.exports = {
|
||||
construct: function (type, samples, alpha) {
|
||||
var coef = [];
|
||||
switch (type)
|
||||
{
|
||||
case 'ham':
|
||||
for (var i = 0; i < samples; i++) {
|
||||
coef[i] = 0.54 - 0.46 * Math.cos((2 * Math.PI * i)/(samples - 1));
|
||||
}
|
||||
break;
|
||||
case 'tukey':
|
||||
var sm1 = samples - 1,
|
||||
asm1 = alpha * sm1;
|
||||
for (var i = 0; i < samples; i++) {
|
||||
// Left tail
|
||||
if (i < asm1 / 2)
|
||||
coef[i] = 0.5 * (1 + Math.cos(Math.PI * ((2 * samples)/(asm1) - 1)));
|
||||
// Right tail
|
||||
else if (i > (sm1 * (1 - alpha / 2)))
|
||||
coef[i] = 0.5 * (1 + Math.cos(Math.PI * ((2 * samples)/(asm1) - (2 / alpha) + 1)));
|
||||
// Middle (rectangular)
|
||||
else coef[i] = 1;
|
||||
}
|
||||
break;
|
||||
case 'flat':
|
||||
for (var i = 0; i < samples; i++)
|
||||
coef[i] = 1.0;
|
||||
break;
|
||||
}
|
||||
return coef;
|
||||
}
|
||||
};
|
||||
Reference in New Issue
Block a user