HART  0.2.0
High level Audio Regression and Testing
Loading...
Searching...
No Matches
hart_onset_latency_detector.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <vector>
4#include <utility> // pair
5
9#include "hart_utils.hpp" // floatsEqual(), make_unique(), decibelsToRatio()
10
11namespace hart
12{
13
14/// @brief Onset-based latency detector implementation for the hart::LatencyBelow class. For internal use.
15/// @private
16template <typename SampleType>
17class OnsetLatencyDetector :
18 public LatencyDetector<SampleType>
19{
20public:
21 OnsetLatencyDetector (double maxLatencySeconds, SilencePolicy silencePolicy, double absThresholdDb):
22 m_maxLatencySeconds (maxLatencySeconds),
23 m_silencePolicy (silencePolicy),
24 m_absThresholdLinear (decibelsToRatio (absThresholdDb))
25 {
26 }
27
28 void prepare (
29 double sampleRateHz,
30 size_t numChannels,
31 size_t /*maxBlockSizeFrames*/
32 ) override
33 {
34 m_sampleRateHz = sampleRateHz;
35 m_numChannels = numChannels;
36 }
37
38 void reset() override
39 {
40 m_noOnsetsDetected = false;
41 m_allChannelsFailed = false;
42 m_detectedLatencyFrames = 0;
43 m_detectedInputOnsetFrames = 0;
44 m_detectedOutputOnsetFrames = 0;
45 m_failureChannel = 0;
46 m_failureFrame = 0;
47 }
48
49 bool match (
50 const AudioBuffer<SampleType>& inputAudio,
51 const AudioBuffer<SampleType>& observedOutputAudio,
52 const std::function<bool (size_t)>& appliesToChannel
53 ) override
54 {
55 const size_t numChannels = inputAudio.getNumChannels();
56 std::vector<long long int> validLatenciesFrames;
57 std::vector<size_t> usedChannels;
58 std::vector<size_t> ignoredChannels;
59 std::vector<std::pair<size_t, size_t>> onsetsFrames (numChannels);
60
61 for (size_t channel = 0; channel < numChannels; ++channel)
62 {
63 if (! appliesToChannel (channel))
64 continue;
65
66 const OnsetDetails inputOnset = findOnsetFrame (inputAudio, channel);
67 const OnsetDetails outputOnset = findOnsetFrame (observedOutputAudio, channel);
68
69 if (inputOnset.found && outputOnset.found)
70 {
71 // It can be negative, in theory
72 const long long int latencyFrames =
73 static_cast<long long int> (outputOnset.frame) - inputOnset.frame;
74
75 validLatenciesFrames.push_back (latencyFrames);
76 usedChannels.push_back (channel);
77 onsetsFrames[channel] = { inputOnset.frame, outputOnset.frame };
78 }
79 else
80 {
81 ignoredChannels.push_back (channel);
82
83 if (m_silencePolicy == SilencePolicy::strict)
84 {
85 m_noOnsetsDetected = true;
86 m_allChannelsFailed = false;
87 m_failureChannel = channel;
88 m_detectedInputOnsetFrames = 0;
89 m_detectedOutputOnsetFrames = 0;
90 m_failureFrame = 0;
91
92 return false;
93 }
94 }
95 }
96
97 if (validLatenciesFrames.empty())
98 {
99 m_noOnsetsDetected = true;
100 m_allChannelsFailed = true;
101 m_failureChannel = 0;
102 m_detectedInputOnsetFrames = 0;
103 m_detectedOutputOnsetFrames = 0;
104 m_failureFrame = 0;
105
106 return false;
107 }
108
109 // Pick the worst-case latency
110 long long int latencyFrames = validLatenciesFrames[0];
111 size_t latencyChannel = usedChannels[0];
112
113 for (size_t i = 1; i < validLatenciesFrames.size(); ++i)
114 {
115 if (validLatenciesFrames[i] > latencyFrames)
116 {
117 latencyFrames = validLatenciesFrames[i];
118 latencyChannel = usedChannels[i];
119 }
120 }
121
122 m_detectedLatencyFrames = latencyFrames;
123 const double detectedLatencySeconds = latencyFrames / m_sampleRateHz;
124
125 if (detectedLatencySeconds <= m_maxLatencySeconds)
126 return true;
127
128 m_noOnsetsDetected = false;
129 m_failureChannel = latencyChannel;
130 m_detectedInputOnsetFrames = onsetsFrames[latencyChannel].first;
131 m_detectedOutputOnsetFrames = onsetsFrames[latencyChannel].second;
132 m_failureFrame = m_detectedOutputOnsetFrames;
133
134 return false;
135 }
136
137 MatcherFailureDetails getFailureDetails() const override
138 {
139 MatcherFailureDetails details;
140 details.frame = m_failureFrame; // This is an output audio's frame, at which the signal onset was detected
141 details.channel = m_failureChannel;
142 std::stringstream detailsStream;
143
144 if (m_noOnsetsDetected)
145 {
146 detailsStream
147 << "Latency could not be determined: "
148 << (m_allChannelsFailed ? "no channels exceeded threshold" : "one of the channels does not exceed threshold");
149 }
150 else
151 {
152 const double detectedLatencySeconds = m_detectedLatencyFrames / m_sampleRateHz;
153
154 detailsStream
155 << "Detected latency: "
156 << secPrecision << detectedLatencySeconds << " seconds ("
157 << m_detectedLatencyFrames << " frames)";
158
159 if (m_detectedInputOnsetFrames != 0)
160 {
161 const double detectedInputOnsetSeconds = m_detectedInputOnsetFrames / m_sampleRateHz;
162 const double detectedOutputOnsetSeconds = m_detectedOutputOnsetFrames / m_sampleRateHz;
163
164 detailsStream
165 << ",\nInput onset: "
166 << detectedInputOnsetSeconds << " seconds ("
167 << m_detectedInputOnsetFrames << " frames),\n"
168 << "Output onset: "
169 << detectedOutputOnsetSeconds << " seconds ("
170 << m_detectedOutputOnsetFrames << " frames)\n";
171 }
172
173 }
174
175 details.description = detailsStream.str();
176 return details;
177 }
178
179 std::unique_ptr<LatencyDetector<SampleType>> copy() const override
180 {
181 return hart::make_unique<OnsetLatencyDetector<SampleType>> (*this);
182 }
183
184private:
185 struct OnsetDetails
186 {
187 bool found;
188 size_t frame;
189 };
190
191 const double m_maxLatencySeconds;
192 const SilencePolicy m_silencePolicy;
193 const double m_absThresholdLinear;
194
195 double m_sampleRateHz = 0.0;
196 size_t m_numChannels = 0;
197
198 bool m_noOnsetsDetected = false;
199 bool m_allChannelsFailed = false;
200 long long int m_detectedLatencyFrames = 0;
201 size_t m_detectedInputOnsetFrames = 0;
202 size_t m_detectedOutputOnsetFrames = 0;
203 size_t m_failureChannel = 0;
204 size_t m_failureFrame = 0;
205
206 OnsetDetails findOnsetFrame (const AudioBuffer<SampleType>& buffer, size_t channel) const
207 {
208 const size_t numFrames = buffer.getNumFrames();
209
210 for (size_t frame = 0; frame < numFrames; ++frame)
211 if (std::abs (static_cast<double> (buffer[channel][frame])) > m_absThresholdLinear)
212 return { true, frame };
213
214 // TODO: Put "non applicable" frame value here
215 return { false, 0 };
216 }
217};
218
219} // namespace hart
Container for audio data.
std::ostream & secPrecision(std::ostream &stream)
Sets number of decimal places for values in seconds.
SilencePolicy
Defines how silence in various algorithms.
static SampleType decibelsToRatio(SampleType valueDb)
Converts dB to linear value (ratio)
size_t channel
Index of channel at which the failure was detected.
std::string description
Readable description of why the match has failed.
size_t frame
Index of frame at which the match has failed.