HART  0.2.0
High level Audio Regression and Testing
Loading...
Searching...
No Matches
hart_spectrum.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <vector>
4#include <cmath> // sin(), cos(), abs()
5#include <complex>
6#include <algorithm>
7#include <utility> // swap(), make_pair(), pair
8
11#include "hart_slice.hpp"
12#include "hart_utils.hpp" // nan(), roundToSizeT()
13
14namespace hart
15{
16
17/// @brief Frequency-domain representation of a multi-channel audio signal
18/// @details
19/// Stores complex spectra for each channel independently.
20/// Spectrum is constructed by performing FFT on the entire signal,
21/// zero-padding to the next power-of-two size if necessary.
22///
23/// Current implementation assumptions:
24/// - Rectangular window (no windowing)
25/// - Real-valued input signal
26/// - One-sided spectrum only (`fftSize / 2 + 1` bins)
27/// - Double floating-point precision, regardless of input signal value types
28/// @ingroup DataStructures
30{
31public:
32 /// @brief Constructs spectrum from an audio buffer
33 /// @param buffer Audio buffer to perform FFT on
34 /// @throws hart::SampleRateError if the audio buffer has no sample rate metadata
35 template <typename SampleType>
36 Spectrum (const AudioBuffer<SampleType>& buffer)
37 {
38 if (! buffer.hasSampleRate())
39 HART_THROW_OR_RETURN_VOID (hart::SampleRateError, "Audio buffer has no sample rate metadata");
40
41 m_sampleRateHz = buffer.getSampleRateHz();
42 m_numChannels = buffer.getNumChannels();
43 m_data.resize (m_numChannels);
44
45 const size_t numFrames = buffer.getNumFrames();
46
47 m_fftSize = nextPowerOfTwo (std::max<size_t> (1, numFrames));
48 m_numBins = m_fftSize / 2 + 1;
49
50 for (size_t channel = 0; channel < m_numChannels; ++channel)
51 {
52 m_data[channel].resize (m_numBins);
53 std::vector<std::complex<double>> fftBuffer (m_fftSize);
54 const SampleType* samples = buffer[channel];
55
56 for (size_t frame = 0; frame < numFrames; ++frame)
57 fftBuffer[frame] = static_cast<double> (samples[frame]);
58
59 for (size_t frame = numFrames; frame < m_fftSize; ++frame)
60 fftBuffer[frame] = 0.0;
61
62 performFFT (fftBuffer);
63
64 for (size_t bin = 0; bin < m_numBins; ++bin)
65 m_data[channel][bin] = fftBuffer[bin];
66 }
67 }
68
69 static Spectrum zeros (size_t numChannels, size_t signalDurationFrames, double sampleRateHz)
70 {
71 Spectrum spectrum;
72
73 if (sampleRateHz < 0)
74 HART_THROW_OR_RETURN_VOID (hart::SampleRateError, "Sample rate should not be negative");
75
76 spectrum.m_sampleRateHz = sampleRateHz;
77
78 // We can allow zero channels or zero frames at some point, but need to add
79 // guards to all methods that might get zero division errors as a result
80
81 if (numChannels == 0)
82 HART_THROW_OR_RETURN_VOID (hart::SizeError, "Number of channels should not be zero");
83
84 if (signalDurationFrames == 0)
85 HART_THROW_OR_RETURN_VOID (hart::SizeError, "Signal duration should not be zero");
86
87 spectrum.m_fftSize = nextPowerOfTwo (signalDurationFrames);
88 spectrum.m_numBins = spectrum.m_fftSize / 2 + 1;
89
90 spectrum.m_numChannels = numChannels;
91 spectrum.m_data.resize (
92 spectrum.m_numChannels,
93 std::vector<std::complex<double>> (spectrum.m_numBins, std::complex<double> (0.0, 0.0))
94 );
95
96 return spectrum;
97 }
98
99 /// @brief Returns number of channels
100 size_t getNumChannels() const
101 {
102 return m_numChannels;
103 }
104
105 /// @brief Returns number of frequency bins per channel
106 size_t getNumBins() const
107 {
108 return m_numBins;
109 }
110
111 /// @brief Returns FFT size
112 size_t getFFTSize() const
113 {
114 return m_fftSize;
115 }
116
117 /// @brief Returns sample rate in Hz
118 double getSampleRateHz() const
119 {
120 return m_sampleRateHz;
121 }
122
123 /// @brief Returns frequency corresponding to a bin index
124 double getBinFrequencyHz (size_t binIndex) const
125 {
126 if (binIndex >= m_numBins)
127 HART_THROW_OR_RETURN (hart::IndexError, "Bin index is out of range", hart::nan<double>());
128
129 return static_cast<double> (binIndex) * m_sampleRateHz / m_fftSize;
130 }
131
132 double getBinWidthHz() const
133 {
134 return m_sampleRateHz / m_fftSize;
135 }
136
137 /// @brief Returns complex value of a frequency bin, by bin index
138 std::complex<double> getBinValue (size_t channel, size_t binIndex) const
139 {
140 if (channel >= m_numChannels)
141 HART_THROW_OR_RETURN (hart::IndexError, "Channel index is out of range", hart::nan<double>());
142
143 if (binIndex >= m_numBins)
144 HART_THROW_OR_RETURN (hart::IndexError, "Bin index is out of range", hart::nan<double>());
145
146 return m_data[channel][binIndex];
147 }
148
149 /// @brief Returns complex value of a frequency bin, by frequency
150 std::complex<double> getBinValue (size_t channel, double frequencyHz) const
151 {
152 return getBinValue (channel, findClosestBin (frequencyHz));
153 }
154
155 /// @brief Returns magnitude of a frequency bin, by bin index
156 double getBinMagnitude (size_t channel, size_t binIndex) const
157 {
158 return std::abs (getBinValue (channel, binIndex));
159 }
160
161 /// @brief Returns magnitude of a frequency bin, by frequency
162 double getBinMagnitude (size_t channel, double frequencyHz) const
163 {
164 return getBinMagnitude (channel, findClosestBin (frequencyHz));
165 }
166
167 /// @brief Returns pointer to read-only magnitudes of a specific channel
168 const std::complex<double>* operator[] (size_t channel) const
169 {
170 if (channel >= m_numChannels)
171 HART_THROW_OR_RETURN (hart::IndexError, "Channel index is out of range", hart::nan<double>());
172
173 return m_data[channel].data();
174 }
175
176 /// @brief Returns pointer to mutable magnitudes of a specific channel
177 std::complex<double>* operator[] (size_t channel)
178 {
179 if (channel >= m_numChannels)
180 HART_THROW_OR_RETURN (hart::IndexError, "Channel index is out of range", hart::nan<double>());
181
182 return m_data[channel].data();
183 }
184
185 /// @brief Finds closest FFT bin to a given frequency
186 size_t findClosestBin (double frequencyHz) const
187 {
188 if (frequencyHz < 0.0)
189 HART_THROW_OR_RETURN (hart::ValueError, "This is one-sided spectrum, frequencyHz should be a positive value", 0);
190
191 const size_t bin = roundToSizeT (frequencyHz * m_fftSize / m_sampleRateHz);
192 return std::min (bin, m_numBins - 1);
193 }
194
195 /// @brief Returns a pair of indices representing a provided slice
196 /// @param slice A Slice instance. Valid slice types are `Slice::Type::whole`
197 /// `Slice::Type::bins` and `Slice::Type::freq`
198 /// @retval first Index representing the beginning of the range, inclusive
199 /// @retval second Index representing the end of the range, non-inclusive
200 std::pair<size_t, size_t> getBinIndices (const Slice& slice) const
201 {
202 const size_t numBins = getNumBins();
203
204 switch (slice.type)
205 {
207 {
208 return {0, numBins};
209 }
210
212 {
213 const size_t startBin = static_cast<size_t> (slice.start);
214 const size_t stopBin = static_cast<size_t> (slice.stop);
215
216 return {
217 std::min (startBin, numBins),
218 std::min (stopBin, numBins)
219 };
220 }
221
223 {
224 const size_t startBin = findClosestBin (slice.start);
225 const size_t stopBin = findClosestBin (slice.stop);
226
227 return {
228 std::min (startBin, numBins),
229 std::min (stopBin, numBins)
230 };
231 }
232
233 default:
234 {
235 HART_THROW_OR_RETURN (hart::UnitError, "Slice type cannot be interpreted as bin range", std::make_pair (0, numBins));
236 }
237 }
238 }
239
240private:
241 Spectrum() = default;
242
243 static size_t nextPowerOfTwo (size_t x)
244 {
245 size_t power = 1;
246
247 while (power < x)
248 power <<= 1;
249
250 return power;
251 }
252
253 static void performFFT (std::vector<std::complex<double>>& data)
254 {
255 const size_t n = data.size();
256
257 hassert (n != 0);
258 hassert ((n & (n - 1)) == 0); // Size should be a power of 2
259
260 size_t j = 0;
261
262 for (size_t i = 1; i < n; ++i)
263 {
264 size_t bit = n >> 1;
265
266 while (j & bit)
267 {
268 j ^= bit;
269 bit >>= 1;
270 }
271
272 j ^= bit;
273
274 if (i < j)
275 std::swap (data[i], data[j]);
276 }
277
278 for (size_t len = 2; len <= n; len <<= 1)
279 {
280 const double angle = -hart::twoPi / static_cast<double> (len);
281 const std::complex<double> wlen (std::cos (angle), std::sin (angle));
282
283 for (size_t i = 0; i < n; i += len)
284 {
285 std::complex<double> w (1.0, 0.0);
286
287 for (size_t jj = 0; jj < len / 2; ++jj)
288 {
289 const std::complex<double> u = data[i + jj];
290 const std::complex<double> v = data[i + jj + len / 2] * w;
291
292 data[i + jj] = u + v;
293 data[i + jj + len / 2] = u - v;
294
295 w *= wlen;
296 }
297 }
298 }
299 }
300
301 double m_sampleRateHz = 0.0;
302
303 size_t m_numChannels = 0;
304 size_t m_fftSize = 0;
305 size_t m_numBins = 0;
306
307 std::vector<std::vector<std::complex<double>>> m_data;
308};
309
310} // namespace hart
Container for audio data.
Thrown when a container index is out of range.
Thrown when sample rate is mismatched.
Thrown when an unexpected container size is encountered.
Frequency-domain representation of a multi-channel audio signal.
const std::complex< double > * operator[](size_t channel) const
Returns pointer to read-only magnitudes of a specific channel.
Spectrum(const AudioBuffer< SampleType > &buffer)
Constructs spectrum from an audio buffer.
static Spectrum zeros(size_t numChannels, size_t signalDurationFrames, double sampleRateHz)
std::complex< double > getBinValue(size_t channel, double frequencyHz) const
Returns complex value of a frequency bin, by frequency.
size_t getFFTSize() const
Returns FFT size.
double getSampleRateHz() const
Returns sample rate in Hz.
std::pair< size_t, size_t > getBinIndices(const Slice &slice) const
Returns a pair of indices representing a provided slice.
double getBinFrequencyHz(size_t binIndex) const
Returns frequency corresponding to a bin index.
double getBinMagnitude(size_t channel, size_t binIndex) const
Returns magnitude of a frequency bin, by bin index.
size_t getNumBins() const
Returns number of frequency bins per channel.
std::complex< double > getBinValue(size_t channel, size_t binIndex) const
Returns complex value of a frequency bin, by bin index.
size_t getNumChannels() const
Returns number of channels.
size_t findClosestBin(double frequencyHz) const
Finds closest FFT bin to a given frequency.
std::complex< double > * operator[](size_t channel)
Returns pointer to mutable magnitudes of a specific channel.
double getBinWidthHz() const
double getBinMagnitude(size_t channel, double frequencyHz) const
Returns magnitude of a frequency bin, by frequency.
Thrown when some metric is requested to return a value in an unsupported unit.
Thrown when an inappropriate value is encountered.
#define HART_THROW_OR_RETURN_VOID(ExceptionType, message)
Throws an exception if HART_DO_NOT_THROW_EXCEPTIONS is set, prints a message and returns otherwise.
#define hassert(condition)
Triggers a HartAssertException if the condition is false
#define HART_THROW_OR_RETURN(ExceptionType, message, returnValue)
Throws an exception if HART_DO_NOT_THROW_EXCEPTIONS is set, prints a message and returns a specified ...
constexpr double twoPi
2 * pi
static size_t roundToSizeT(SampleType x)
Rounds a floating point value to a size_t value.
Represents a slice of analysis data.