Ocean
FrameChangeDetector.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #ifndef META_OCEAN_CV_DETECTOR_FRAME_CHANGE_DETECTOR_H
9 #define META_OCEAN_CV_DETECTOR_FRAME_CHANGE_DETECTOR_H
10 
12 
13 #include "ocean/base/Frame.h"
14 #include "ocean/base/Worker.h"
15 
16 #include "ocean/math/Matrix.h"
17 #include "ocean/math/Quaternion.h"
18 
19 #include <array>
20 #include <vector>
21 
22 namespace Ocean
23 {
24 
25 namespace CV
26 {
27 
28 namespace Detector
29 {
30 
31 /**
32  * This class implements a simple detection algorithm to compute whether a camera's image content has significantly changed between a given frame and a registered keyframe.
33  * The implementation uses intensity histogram comparison over local image tiles and scores the overall difference across all tiles in the image space.
34  * The class also allows for accelerometer and gyroscope readings to be fed to the detector, to avoid keyframe selection when the image is blurry.
35  * @ingroup cvdetector
36  */
37 class OCEAN_CV_DETECTOR_EXPORT FrameChangeDetector
38 {
39  public:
40 
41  /// Number of histogram bins to use for intensity values.
42  static constexpr unsigned int kNumberIntensityBins = 16u;
43 
44  /// Number of values in the range [0, 255] covered by each intensity bin in the histogram.
45  static constexpr unsigned int kIntensityBinWidth = 16u;
46 
47  /// Histogram type for a one-channel image.
48  typedef std::array<uint32_t, kNumberIntensityBins> TileHistogram;
49 
50  /// A vector of histograms for tiles in the image.
51  typedef std::vector<TileHistogram> TileHistograms;
52 
53  /**
54  * Different possible results for processFrame().
55  */
56  enum class FrameChangeResult
57  {
58  INVALID_INPUT = 0,
59  NO_CHANGE_DETECTED,
60  CHANGE_DETECTED
61  };
62 
63  /*
64  * Options for the detector.
65  */
66  struct Options
67  {
68  /// Target frame width in pixels, with range (0, infinity). Input frames will be resized to this resolution.
69  unsigned int targetFrameWidth = 0u;
70 
71  /// Target frame height in pixels, with range (0, infinity). Input frames will be resized to this resolution.
72  unsigned int targetFrameHeight = 0u;
73 
74  /// Side length, in pixels, of each spatial bin used for local intensity histogram computation, with range (4, infinity).
75  unsigned int spatialBinSize = 40u;
76 
77  /// Threshold on the (vector magnitude of the) linear acceleration reading from the device's accelerometer, in m/s^2 with range [0, infinity).
78  /// If a value greater than this has been observed over the last two frames, we consider the device motion too large to make the current frame a good keyframe.
79  Scalar largeMotionAccelerationThreshold = Numeric::maxValue();
80 
81  /// Threshold on the (vector magnitude of the) unbiased rotation rate read from the device's gyroscope, in rad/s with range [0, infinity).
82  /// If a value greater than this has been observed over the last two frames, we consider the device motion too large to make the current frame a good keyframe.
83  Scalar largeMotionRotationRateThreshold = Numeric::maxValue();
84 
85  /// Threshold on maximum total device rotation since the last keyframe, based on IMU, in radians with range (0, pi).
86  /// If the device pose difference is more than this amount, then the current frame will be set as the keyframe.
87  Scalar rotationThreshold = Numeric::pi();
88 
89  /// Threshold on the minimum amount of time between keyframes, in seconds, with range [0, preferredMaximumTimeBetweenKeyframes).
90  double minimumTimeBetweenKeyframes = 0.0;
91 
92  /// Preferred threshold on the maximum amount of time between keyframes, in seconds, with range (minimumTimeBetweenKeyframes, absoluteMaximumTimeBetweenKeyframes].
93  /// Keyframes will be set at least at this rate, regardless of whether a significant change in content has occurred.
94  /// This value will be ignored, however, if the current frame contains very strong motion.
95  double preferredMaximumTimeBetweenKeyframes = NumericD::maxValue();
96 
97  /// Absolute threshold on the maximum amount of time between keyframes, in seconds, with range [preferredMaximumTimeBetweenKeyframes, NumericD::maxValue()].
98  /// Keyframes will be set at most at this rate, regardless of whether a significant change in content has occurred, and regardless of whether the current frame contains very strong motion.
99  double absoluteMaximumTimeBetweenKeyframes = NumericD::maxValue();
100 
101  /// Minimum histogram distance between the keyframe and the current frame for a tile to be considered as having significant content change, with range [0, histogramDistanceThreshold).
102  Scalar minimumHistogramDistance = Scalar(25.0);
103 
104  /// Sets the maximum change considered when scoring a specific tile in the current image, with range (minimumHistogramDistance, infinity).
105  Scalar histogramDistanceThreshold = Scalar(100.0);
106 
107  /// Minimum "change detection" score for the current frame to be regarded as significantly different from the keyframe.
108  /// This score is computed as the weighted proportion of tiles having significant change.
109  /// Tile weights are computed as min(tileHistogramDistance / histogramDistanceThreshold, 1), or as 0 if tileHistogramDistance < minimumHistogramDistance.
110  Scalar changeDetectionThreshold = Scalar(0.05);
111 
112  /**
113  * Checks whether the specified options are valid for processing.
114  * @return True if the spatial bin size is nonzero and no larger than the frame dimensions, otherwise false
115  */
116  inline bool isValid() const;
117  };
118 
119  /**
120  * Creates an invalid frame change detector.
121  */
122  inline FrameChangeDetector();
123 
124  /**
125  * Creates a new frame change detector with the given parameters.
126  * @param options Set of parameters for the detector
127  */
128  explicit FrameChangeDetector(const Options& options);
129 
130  /**
131  * Copy constructor.
132  * @param other Frame change detector to copy
133  */
135 
136  /**
137  * Move constructor.
138  * @param other Frame change detector to move to this object
139  */
140  inline FrameChangeDetector(FrameChangeDetector&& other) noexcept;
141 
142  /**
143  * Records a new acceleration reading from an accelerometer.
144  * @param acceleration 3DOF acceleration values in the device's local frame, in units of m/s^2
145  * @param timestamp Timestamp of the accelerometer sample
146  */
147  void addAccelerationSample(const Vector3& acceleration, const Timestamp& timestamp);
148 
149  /**
150  * Records a new rotational motion reading from a gyroscope.
151  * @param rotationRate 3DOF rotation rate values in the device's local frame, in units of rad/s
152  * @param timestamp Timestamp of the gyroscope sample
153  */
154  void addGyroSample(const Vector3& rotationRate, const Timestamp& timestamp);
155 
156  /**
157  * Handles one frame of input and determines whether a significant change in visual content has occurred.
158  * @param yFrame Input frame; must be valid with 8-bit grayscale format; the frame size must not be smaller than (options().targetFrameWidth)x(options().targetFrameHeight).
159  * @param world_R_camera Optional prior on the device's 3DOF orientation as provided by the device's internal sensor fusion algorithm, may be invalid
160  * @param worker Optional worker to distribute the computation
161  * @return Indicator of whether a frame change occurred, or an error value if invalid input was provided
162  */
163  FrameChangeResult detectFrameChange(const Frame& yFrame, const Quaternion& world_R_camera, Worker* worker = nullptr);
164 
165  /**
166  * Returns the set of options that were specified when this detector was created.
167  * @return Options for the detector
168  */
169  inline const Options& options() const;
170 
171  /**
172  * Returns the number of rows in the associated tile matrix.
173  * @return Number of rows, equal to options_.targetFrameHeight / options_.spatialBinSize, or zero if the detector is invalid
174  */
175  inline unsigned int tileRows() const;
176 
177  /**
178  * Returns the number of columns in the associated tile matrix.
179  * @return Number of columns, equal to options_.targetFrameWidth / options_.spatialBinSize, or zero if the detector is invalid
180  */
181  inline unsigned int tileColumns() const;
182 
183  /**
184  * Returns the most recently computed set of histogram distances for this detector. These distances may not have been computed in the most recently processed frame.
185  * @return tileRows() x tileColumns() matrix of histogram distances, each associated with a specific tile in the image
186  */
187  inline const Matrix& tileScores() const;
188 
189  /**
190  * Checks whether the detector was created with valid parameters.
191  * @return True, if the detector was created successfully; otherwise, false
192  */
193  inline bool isValid() const;
194 
195  /**
196  * Move operator.
197  * @param other The detector to be moved
198  * @return Reference to this object
199  */
200  inline FrameChangeDetector& operator=(FrameChangeDetector&& other) noexcept;
201 
202  private:
203 
204  /**
205  * Computes the local intensity histograms for all tiles in the image.
206  * @param yFrame Uint8 grayscale image to process
207  * @param shouldComputeHistogramDistances If true, the histogram difference with the keyframe will be additionally computed for each tile; if false, this step is skipped
208  * @param worker Optional worker to distribute the computation
209  */
210  void computeTileHistograms(const Frame& yFrame, bool shouldComputeHistogramDistances, Worker* worker);
211 
212  /**
213  * Computes the local intensity histograms for a subset of image tiles.
214  * @param yFrame Pointer to uint8 grayscale image data
215  * @param yFrameStride Stride of the yFrame rows in bytes (i.e., 4-byte-aligned)
216  * @param shouldComputeHistogramDistances If true, the histogram difference with the keyframe will be additionally computed for each tile; if false, this step is skipped
217  * @param tileIndexStart Row-major linear index for the first tile to process
218  * @param numTilesToProcess Number of tiles that will be sequentially processed starting with tileIndex
219  */
220  void computeTileHistogramsSubset(const uint8_t* yFrame, const unsigned int yFrameStride, bool shouldComputeHistogramDistances, unsigned int tileIndexStart, unsigned int numTilesToProcess);
221 
222  /**
223  * Computes a distance score between two histograms.
224  * @param tileHistogram1 First histogram to compare
225  * @param tileHistogram2 Second histogram to compare
226  * @return Score reflecting the distance between the histograms, with lower values indicating higher similarity
227  */
228  static Scalar computeHistogramDistance(const TileHistogram& tileHistogram1, const TileHistogram& tileHistogram2);
229 
230  private:
231 
232  /// Set of options for the detector.
234 
235  /// Number of tile blocks in the vertical dimension.
236  unsigned int tileRows_;
237 
238  /// Number of tile blocks in the horizontal dimension.
239  unsigned int tileColumns_;
240 
241  /// Flattened grid of 2D histograms for the most recently processed frame, stored row-major.
243 
244  /// Flattened grid of 2D histograms for the current keyframe.
246 
247  /// Timestamp of the last frame that was processed.
249 
250  /// Timestamp of frame prior to the last frame that was processed.
252 
253  /// Timestamp of the current keyframe.
255 
256  /// Last timestamp at which large motion was detected from the accelerometer or gyro (if any).
258 
259  /// Flattened grid of 2D histogram distances between the most recently processed frame and the current keyframe.
261 
262  /// 3DOF rotation of the last keyframe relative to a world coordinate frame. If rotations are not available or if no frames have been processed, this will be set to invalid.
264 };
265 
267 {
269 }
270 
272  tileRows_(0u),
273  tileColumns_(0u),
274  lastTimestamp_(false),
275  priorLastTimestamp_(false),
276  keyframeTimestamp_(false),
278  world_R_keyframe_(false)
279 {
280  // nothing to do, here
281 }
282 
284  options_(other.options_),
285  tileRows_(other.tileRows_),
286  tileColumns_(other.tileColumns_),
287  tileHistograms_(std::move(other.tileHistograms_)),
288  keyframeTileHistograms_(std::move(other.keyframeTileHistograms_)),
289  lastTimestamp_(other.lastTimestamp_),
290  priorLastTimestamp_(other.priorLastTimestamp_),
291  keyframeTimestamp_(other.keyframeTimestamp_),
292  lastLargeMotionTimestamp_(other.lastLargeMotionTimestamp_),
293  histogramDistances_(std::move(other.histogramDistances_)),
294  world_R_keyframe_(std::move(other.world_R_keyframe_))
295 {
296  // nothing to do, here
297 }
298 
300 {
301  return options_;
302 }
303 
304 inline unsigned int FrameChangeDetector::tileRows() const
305 {
306  return tileRows_;
307 }
308 
309 inline unsigned int FrameChangeDetector::tileColumns() const
310 {
311  return tileColumns_;
312 }
313 
315 {
316  return histogramDistances_;
317 }
318 
319 inline bool FrameChangeDetector::isValid() const
320 {
321  return tileRows_ > 0u && tileColumns_ > 0u;
322 }
323 
325 {
326  if (&other != this)
327  {
328  options_ = other.options_;
329  tileRows_ = other.tileRows_;
330  tileColumns_ = other.tileColumns_;
331  tileHistograms_ = std::move(other.tileHistograms_);
332  keyframeTileHistograms_ = std::move(other.keyframeTileHistograms_);
333  lastTimestamp_ = other.lastTimestamp_;
334  priorLastTimestamp_ = other.priorLastTimestamp_;
335  keyframeTimestamp_ = other.keyframeTimestamp_;
336  lastLargeMotionTimestamp_ = other.lastLargeMotionTimestamp_;
337  histogramDistances_ = std::move(other.histogramDistances_);
338  world_R_keyframe_ = std::move(other.world_R_keyframe_);
339  }
340 
341  return *this;
342 }
343 
344 } // namespace Detector
345 
346 } // namespace CV
347 
348 } // namespace Ocean
349 
350 #endif // META_OCEAN_CV_DETECTOR_FRAME_CHANGE_DETECTOR_H
This class implements a simple detection algorithm to compute whether a camera's image content has si...
Definition: FrameChangeDetector.h:38
void addAccelerationSample(const Vector3 &acceleration, const Timestamp &timestamp)
Records a new acceleration reading from an accelerometer.
Quaternion world_R_keyframe_
3DOF rotation of the last keyframe relative to a world coordinate frame. If rotations are not availab...
Definition: FrameChangeDetector.h:263
Timestamp lastTimestamp_
Timestamp of the last frame that was processed.
Definition: FrameChangeDetector.h:248
const Matrix & tileScores() const
Returns the most recently computed set of histogram distances for this detector.
Definition: FrameChangeDetector.h:314
void addGyroSample(const Vector3 &rotationRate, const Timestamp &timestamp)
Records a new rotational motion reading from a gyroscope.
Timestamp lastLargeMotionTimestamp_
Last timestamp at which large motion was detected from the accelerometer or gyro (if any).
Definition: FrameChangeDetector.h:257
unsigned int tileColumns_
Number of tile blocks in the horizontal dimension.
Definition: FrameChangeDetector.h:239
unsigned int tileRows_
Number of tile blocks in the vertical dimension.
Definition: FrameChangeDetector.h:236
Timestamp priorLastTimestamp_
Timestamp of frame prior to the last frame that was processed.
Definition: FrameChangeDetector.h:251
const Options & options() const
Returns the set of options that were specified when this detector was created.
Definition: FrameChangeDetector.h:299
TileHistograms tileHistograms_
Flattened grid of 2D histograms for the most recently processed frame, stored row-major.
Definition: FrameChangeDetector.h:242
std::array< uint32_t, kNumberIntensityBins > TileHistogram
Histogram type for a one-channel image.
Definition: FrameChangeDetector.h:48
void computeTileHistograms(const Frame &yFrame, bool shouldComputeHistogramDistances, Worker *worker)
Computes the local intensity histograms for all tiles in the image.
Timestamp keyframeTimestamp_
Timestamp of the current keyframe.
Definition: FrameChangeDetector.h:254
FrameChangeResult detectFrameChange(const Frame &yFrame, const Quaternion &world_R_camera, Worker *worker=nullptr)
Handles one frame of input and determines whether a significant change in visual content has occurred...
Matrix histogramDistances_
Flattened grid of 2D histogram distances between the most recently processed frame and the current ke...
Definition: FrameChangeDetector.h:260
unsigned int tileRows() const
Returns the number of rows in the associated tile matrix.
Definition: FrameChangeDetector.h:304
FrameChangeDetector(const Options &options)
Creates a new frame change detector with the given parameters.
FrameChangeResult
Different possible results for processFrame().
Definition: FrameChangeDetector.h:57
Options options_
Set of options for the detector.
Definition: FrameChangeDetector.h:233
void computeTileHistogramsSubset(const uint8_t *yFrame, const unsigned int yFrameStride, bool shouldComputeHistogramDistances, unsigned int tileIndexStart, unsigned int numTilesToProcess)
Computes the local intensity histograms for a subset of image tiles.
TileHistograms keyframeTileHistograms_
Flattened grid of 2D histograms for the current keyframe.
Definition: FrameChangeDetector.h:245
FrameChangeDetector()
Creates an invalid frame change detector.
Definition: FrameChangeDetector.h:271
bool isValid() const
Checks whether the detector was created with valid parameters.
Definition: FrameChangeDetector.h:319
static Scalar computeHistogramDistance(const TileHistogram &tileHistogram1, const TileHistogram &tileHistogram2)
Computes a distance score between two histograms.
unsigned int tileColumns() const
Returns the number of columns in the associated tile matrix.
Definition: FrameChangeDetector.h:309
FrameChangeDetector(const FrameChangeDetector &other)
Copy constructor.
std::vector< TileHistogram > TileHistograms
A vector of histograms for tiles in the image.
Definition: FrameChangeDetector.h:51
FrameChangeDetector & operator=(FrameChangeDetector &&other) noexcept
Move operator.
Definition: FrameChangeDetector.h:324
This class implements Ocean's image class.
Definition: Frame.h:1792
static constexpr T pi()
Returns PI which is equivalent to 180 degree.
Definition: Numeric.h:926
static constexpr T maxValue()
Returns the max scalar value.
Definition: Numeric.h:3244
This class implements a timestamp.
Definition: Timestamp.h:36
This class implements a worker able to distribute function calls over different threads.
Definition: Worker.h:33
float Scalar
Definition of a scalar type.
Definition: Math.h:128
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15
Definition: FrameChangeDetector.h:67
unsigned int targetFrameHeight
Target frame height in pixels, with range (0, infinity). Input frames will be resized to this resolut...
Definition: FrameChangeDetector.h:72
unsigned int targetFrameWidth
Target frame width in pixels, with range (0, infinity). Input frames will be resized to this resoluti...
Definition: FrameChangeDetector.h:69
double preferredMaximumTimeBetweenKeyframes
Preferred threshold on the maximum amount of time between keyframes, in seconds, with range (minimumT...
Definition: FrameChangeDetector.h:95
Scalar rotationThreshold
Threshold on maximum total device rotation since the last keyframe, based on IMU, in radians with ran...
Definition: FrameChangeDetector.h:87
bool isValid() const
Checks whether the specified options are valid for processing.
Definition: FrameChangeDetector.h:266
Scalar histogramDistanceThreshold
Sets the maximum change considered when scoring a specific tile in the current image,...
Definition: FrameChangeDetector.h:105
Scalar minimumHistogramDistance
Minimum histogram distance between the keyframe and the current frame for a tile to be considered as ...
Definition: FrameChangeDetector.h:102
unsigned int spatialBinSize
Side length, in pixels, of each spatial bin used for local intensity histogram computation,...
Definition: FrameChangeDetector.h:75
double minimumTimeBetweenKeyframes
Threshold on the minimum amount of time between keyframes, in seconds, with range [0,...
Definition: FrameChangeDetector.h:90
double absoluteMaximumTimeBetweenKeyframes
Absolute threshold on the maximum amount of time between keyframes, in seconds, with range [preferred...
Definition: FrameChangeDetector.h:99