Ocean
FrameFilterGaussian.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #ifndef META_OCEAN_CV_FRAME_FILTER_GAUSSIAN_H
9 #define META_OCEAN_CV_FRAME_FILTER_GAUSSIAN_H
10 
11 #include "ocean/cv/CV.h"
13 
14 #include "ocean/base/Frame.h"
15 #include "ocean/base/Memory.h"
16 #include "ocean/base/ScopedValue.h"
17 
18 namespace Ocean
19 {
20 
21 namespace CV
22 {
23 
24 /**
25  * This class implements Gaussian image blur filters.
26  * @ingroup cv
27  */
28 class OCEAN_CV_EXPORT FrameFilterGaussian
29 {
30  public:
31 
32  /**
33  * This class holds re-usable memory for the filtering process.
34  */
36  {
37  friend class FrameFilterGaussian;
38 
39  public:
40 
41  /**
42  * Default constructor.
43  */
44  ReusableMemory() = default;
45 
46  protected:
47 
48  /// The reusable memory object for the separable filter.
50 
51  /// The reusable memory for horizontal filter factors.
53 
54  /// The reusable memory for vertical filter factors.
56 
57  /// The reusable memory for several response rows.
59  };
60 
61  public:
62 
63  /**
64  * Calculates the ideal size of a box filter for a specified sigma defining the shape of the Gauss distribution.
65  * @param sigma The sigma defining the shape of the Gauss distribution in pixel, with range (0, infinity)
66  * @return The ideal size of the box filter in pixel, with range [1, infinity], will be odd
67  * @tparam T The data type of sigma, should be 'float' or 'double'
68  */
69  template <typename T>
70  static inline unsigned int sigma2filterSize(const T sigma);
71 
72  /**
73  * Calculates the sigma corresponding to a specified box filter so that the Gauss distribution using the sigma represents the box filter.
74  * @param filterSize The size of the filter in pixel, with range [1, infinity), must be odd
75  * @return The resulting sigma in pixel, with range (0, infinity)
76  * @tparam T The data type of sigma, should be 'float' or 'double'
77  */
78  template <typename T>
79  static inline T filterSize2sigma(const unsigned int filterSize);
80 
81  /**
82  * Determines 1D Gaussian blur filter factors for a given filter size.
83  * The resulting filter will be normalized for filter values with floating point precision and will not be normalized for filter values with integer precision.<br>
84  * This function will determine the sigma based on the specified size of the filter by using 'filterSize2sigma'.
85  * @param filterSize The size of the filter in pixel, with range [1, infinity), must be odd
86  * @param filter The buffer receiving the resulting filter values, must be valid
87  * @param denominator Optional resulting denominator if the resulting filter values are not normalized
88  * @tparam T The data type of the filter elements, e.g., 'unsigned int', or 'float'
89  * @see filterSize2sigma(), determineFilterFactorsWithExplicitSigma().
90  */
91  template <typename T>
92  static void determineFilterFactors(const unsigned int filterSize, T* filter, T* denominator = nullptr);
93 
94  /**
95  * Determines 1D Gaussian blur filter factors for a given filter size.
96  * The resulting filter will be normalized for filter values with floating point precision and will not be normalized for filter values with integer precision.<br>
97  * Information: This function is the equivalent to OpenCV's cv::getGaussianKerne().
98  * @param filterSize The size of the filter in pixel, with range [1, infinity), must be odd
99  * @param sigma The explicit sigma which will be used to determine the filter values, with range (0, infinity)
100  * @param filter The buffer receiving the resulting filter values, must be valid
101  * @param denominator Optional resulting denominator if the resulting filter values are not normalized
102  * @tparam T The data type of the filter elements, must be 'unsigned int', or 'float', or 'double'
103  * @see determineFilterFactors(), filterSize2sigma().
104  */
105  template <typename T>
106  static void determineFilterFactorsWithExplicitSigma(const unsigned int filterSize, const float sigma, T* filter, T* denominator = nullptr);
107 
108  /**
109  * Applies a Gaussian blur filter to a given source image and copies the resulting filter results to a given output frame.
110  * If the target frame type does not match the source frame type the target frame type will be adjusted.
111  * Information: This function is the equivalent to OpenCV's cv::GaussianBlur().
112  * @param source The source frame to which the blur filter will be applied, must be valid
113  * @param target The target frame receiving the blurred image content, will be set to the correct frame type if invalid or not matching
114  * @param filterSize The size of the filter to be applied, with range [1, min(source.width(), source.height())], must be odd
115  * @param worker Optional worker object to distribute the computational load
116  * @param reusableMemory An optional object holding reusable memory which can be used during filtering, nullptr otherwise
117  * @return True, if succeeded
118  */
119  static bool filter(const Frame& source, Frame& target, const unsigned int filterSize, Worker* worker = nullptr, ReusableMemory* reusableMemory = nullptr);
120 
121  /**
122  * Applies a Gaussian blur filter to a given frame.
123  * In case the given frame is a read-only frame, the frame will be replaced with a new frame owning the memory.<br>
124  * In case the given frame is a writable frame, the filter will be applied in place.
125  * @param frame The frame to which the blur filter will be applied, must be valid
126  * @param filterSize The size of the filter to be applied, with range [1, min(source.width(), source.height())], must be odd
127  * @param worker Optional worker object to distribute the computational load
128  * @param reusableMemory An optional object holding reusable memory which can be used during filtering, nullptr otherwise
129  * @return True, if succeeded
130  */
131  static bool filter(Frame& frame, const unsigned int filterSize, Worker* worker = nullptr, ReusableMemory* reusableMemory = nullptr);
132 
133  /**
134  * Applies a Gaussian blur filter to a given frame.
135  * @param source The source frame to be filtered, must be valid
136  * @param target The target frame receiving the filtered results, can be the same memory pointer as 'source', must be valid
137  * @param width The width of the source (and target) frame in pixel, with range [tFilterSize, infinity)
138  * @param height The height of the source (and target) frame in pixel, with range [tFilterSize, infinity)
139  * @param channels The number of channels the source frame (and target frame) has, with range [1, infinity)
140  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
141  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
142  * @param horizontalFilterSize The number of elements the horizontal filter has, with range [1, width], must be odd
143  * @param verticalFilterSize The number of elements the vertical filter has, with range [1, height], must be odd
144  * @param sigma The Optional sigma that is applied explicitly, with range (0, infinity), -1 to calculate the sigma automatically based on the filter sizes
145  * @param worker Optional worker object to distribute the computation
146  * @param reusableMemory An optional object holding reusable memory which can be used during filtering, nullptr otherwise
147  * @param processorInstructions The set of available instructions, may be any combination of instructions
148  * @return True, if succeeded
149  * @tparam T The data type of each pixel channel of the source frame (and target frame) e.g., 'uint8_t', or 'float'
150  * @tparam TFilter The data type of each filter elements e.g., 'unsigned int', or 'float'
151  */
152  template <typename T, typename TFilter>
153  static bool filter(const T* source, T* target, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int horizontalFilterSize, const unsigned int verticalFilterSize, const float sigma = -1.0f, Worker* worker = nullptr, ReusableMemory* reusableMemory = nullptr, const ProcessorInstructions processorInstructions = Processor::get().instructions());
154 
155  /**
156  * Applies a Gaussian blur filter to a given frame.
157  * @param frame The frame to be filtered, must be valid
158  * @param width The width of the frame in pixel, with range [tFilterSize, infinity)
159  * @param height The height of the frame in pixel, with range [tFilterSize, infinity)
160  * @param channels The number of channels the source frame (and target frame) has, with range [1, infinity)
161  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
162  * @param horizontalFilterSize The number of elements the horizontal filter has, with range [1, width], must be odd
163  * @param verticalFilterSize The number of elements the vertical filter has, with range [1, height], must be odd
164  * @param sigma The Optional sigma that is applied explicitly, with range (0, infinity), -1 to calculate the sigma automatically based on the filter sizes
165  * @param worker Optional worker object to distribute the computation
166  * @param reusableMemory An optional object holding reusable memory which can be used during filtering, nullptr otherwise
167  * @param processorInstructions The set of available instructions, may be any combination of instructions
168  * @return True, if succeeded
169  * @tparam T The data type of each pixel channel of the source frame (and target frame) e.g., 'uint8_t', or 'float'
170  * @tparam TFilter The data type of each filter elements e.g., 'unsigned int', or 'float'
171  */
172  template <typename T, typename TFilter>
173  static inline bool filter(T* frame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int framePaddingElements, const unsigned int horizontalFilterSize, const unsigned int verticalFilterSize, const float sigma = -1.0f, Worker* worker = nullptr, ReusableMemory* reusableMemory = nullptr, const ProcessorInstructions processorInstructions = Processor::get().instructions());
174 
175  protected:
176 
177 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
178 
179  /**
180  * Applies a horizontal and vertical filtering with a Gaussian kernel with size 3, applying a horizontal and vertical 121 filter kernel.
181  * The frame must be a 1 channel 8 bit per pixel image.<br>
182  * Instead of applying a separated horizontal and vertical filter, the function applies the 2D filter directly to speed up the process significantly.<br>
183  * This function applies NEON instructions and can handle frames with width >= 18 pixels only.
184  * @param source The source frame to be filtered, must be valid
185  * @param target The target frame receiving the filtered results, must be valid
186  * @param width The width of the source (and target) frame in pixel, with range [18, infinity)
187  * @param height The height of the source (and target) frame in pixel, with range [1, infinity)
188  * @param sourcePaddingElements Optional padding elements at the end of each source row, in elements, with range [0, infinity)
189  * @param targetPaddingElements Optional padding elements at the end of each target row, in elements, with range [0, infinity)
190  * @param reusableMemory An optional object holding reusable memory which can be used during filtering, nullptr otherwise
191  */
192  static inline void filter1Channel8Bit121NEON(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, ReusableMemory* reusableMemory);
193 
194 #endif // OCEAN_HARDWARE_NEON_VERSION >= 10
195 };
196 
197 template <typename T>
198 inline unsigned int FrameFilterGaussian::sigma2filterSize(const T sigma)
199 {
200  ocean_assert(sigma > NumericT<T>::eps());
201 
202  const unsigned int size = (unsigned int)NumericT<T>::ceil((sigma - T(0.8)) * T(6.666666666) + T(2.999)) | 0x01u; // bitwise or to create an odd size
203 
204  ocean_assert(size >= 1u);
205  ocean_assert(size % 2u == 1u);
206 
207  return size;
208 }
209 
210 template <typename T>
211 inline T FrameFilterGaussian::filterSize2sigma(const unsigned int filterSize)
212 {
213  ocean_assert(filterSize >= 1u && (filterSize % 2u) == 1u);
214 
215  return T(0.3) * (T(filterSize / 2u) - T(1)) + T(0.8);
216 }
217 
218 template <>
219 inline void FrameFilterGaussian::determineFilterFactorsWithExplicitSigma<unsigned int>(const unsigned int filterSize, const float sigma, unsigned int* filter, unsigned int* denominator)
220 {
221  ocean_assert(filterSize % 2u == 1u);
222  ocean_assert(filter != nullptr);
223 
224  std::vector<float> floatFilter(filterSize);
225  determineFilterFactorsWithExplicitSigma<float>(filterSize, sigma, floatFilter.data());
226 
227  const float factor = 1.0f / floatFilter[0];
228 
229  unsigned int filterSum = 0u;
230 
231  for (unsigned int n = 0u; n < filterSize; ++n)
232  {
233  filter[n] = (unsigned int)(floatFilter[n] * factor + 0.5f);
234  filterSum += filter[n];
235  }
236 
237  if (denominator)
238  {
239  *denominator = filterSum;
240  }
241 }
242 
243 template <typename T>
244 void FrameFilterGaussian::determineFilterFactorsWithExplicitSigma(const unsigned int filterSize, const float sigma, T* filter, T* denominator)
245 {
246  static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value, "Invalid data type for a filter!");
247 
248  ocean_assert(filterSize % 2u == 1u);
249  ocean_assert(sigma > NumericF::eps());
250 
251  ocean_assert(filter != nullptr);
252 
253  // we calculate e ^ -(x^2 / 2 * sigma^2)
254  // while x = i - (filterSize / 2)
255 
256  const unsigned int filterSize_2 = filterSize / 2u;
257 
258  const T scaleFactor = T(-0.5f / (sigma * sigma));
259 
260  T filterSum = T(0);
261 
262  for (unsigned int n = 0u; n < filterSize; ++n)
263  {
264  const int i = int(n - filterSize_2);
265 
266  filter[n] = NumericT<T>::exp(scaleFactor * T(i) * T(i));
267 
268  filterSum += filter[n];
269  }
270 
271  const T invFilterSum = T(1) / filterSum;
272 
273  for (unsigned int n = 0u; n < filterSize; ++n)
274  {
275  filter[n] *= invFilterSum;
276  }
277 
278 #ifdef OCEAN_DEBUG
279  {
280  T debugFilterSum = T(0);
281  for (unsigned int n = 0u; n < filterSize; ++n)
282  {
283  debugFilterSum += filter[n];
284  }
285  ocean_assert(NumericT<T>::isEqual(debugFilterSum, T(1)));
286  }
287 #endif
288 
289  if (denominator)
290  {
291  *denominator = T(1);
292  }
293 }
294 
295 template <>
296 inline void FrameFilterGaussian::determineFilterFactors<unsigned int>(const unsigned int filterSize, unsigned int* filter, unsigned int* denominator)
297 {
298  ocean_assert(filterSize % 2u == 1u);
299  ocean_assert(filter != nullptr);
300 
301  if (filterSize <= 7u)
302  {
303  static constexpr std::array<unsigned int, 16> predefinedFilters =
304  {
305  1u,
306  1u, 2u, 1u,
307  1u, 4u, 6u, 4u, 1u,
308  1u, 4u, 7u, 9u, 7u, 4u, 1u
309  };
310 
311  static constexpr std::array<unsigned int, 4> predefinedDenominators =
312  {
313  1u,
314  4u,
315  16u,
316  33u
317  };
318 
319  static constexpr std::array<unsigned int, 4> offsets =
320  {
321  0u,
322  1u,
323  4u,
324  9u
325  };
326 
327  ocean_assert(filterSize / 2u < offsets.size());
328  const unsigned int filterOffset = offsets[filterSize / 2u];
329 
330  for (unsigned int n = 0u; n < filterSize; ++n)
331  {
332  ocean_assert(filterOffset + n < predefinedFilters.size());
333  filter[n] = predefinedFilters[filterOffset + n];
334  }
335 
336 
337  if (denominator != nullptr)
338  {
339  ocean_assert(filterSize / 2u < predefinedDenominators.size());
340  *denominator = predefinedDenominators[filterSize / 2u];
341  }
342 
343  return;
344  }
345 
346  const float sigma = filterSize2sigma<float>(filterSize);
347 
348  determineFilterFactorsWithExplicitSigma<unsigned int>(filterSize, sigma, filter, denominator);
349 }
350 
351 template <typename T>
352 void FrameFilterGaussian::determineFilterFactors(const unsigned int filterSize, T* filter, T* denominator)
353 {
354  ocean_assert(filterSize % 2u == 1u);
355  ocean_assert(filter != nullptr);
356 
357  if (filterSize <= 7u)
358  {
359  static constexpr std::array<float, 16> predefinedFilters =
360  {
361  1.0f,
362  0.25f, 0.5f, 0.25f,
363  0.0625f, 0.25f, 0.375f, 0.25f, 0.0625f,
364  0.03125f, 0.109375f, 0.21875f, 0.28125f, 0.21875f, 0.109375f, 0.03125f,
365 
366  };
367 
368  static constexpr std::array<unsigned int, 4> offsets =
369  {
370  0u,
371  1u,
372  4u,
373  9u
374  };
375 
376  ocean_assert(filterSize / 2u < offsets.size());
377  const unsigned int filterOffset = offsets[filterSize / 2u];
378 
379  for (unsigned int n = 0u; n < filterSize; ++n)
380  {
381  ocean_assert(filterOffset + n < predefinedFilters.size());
382  filter[n] = T(predefinedFilters[filterOffset + n]);
383  }
384 
385  if (denominator != nullptr)
386  {
387  *denominator = T(1);
388  }
389 
390  return;
391  }
392 
393  const float sigma = filterSize2sigma<float>(filterSize);
394 
395  determineFilterFactorsWithExplicitSigma<T>(filterSize, sigma, filter, denominator);
396 }
397 
398 template <typename T, typename TFilter>
399 bool FrameFilterGaussian::filter(const T* source, T* target, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int horizontalFilterSize, const unsigned int verticalFilterSize, const float sigma, Worker* worker, ReusableMemory* reusableMemory, const ProcessorInstructions processorInstructions)
400 {
401  ocean_assert(source != nullptr && target != nullptr);
402  ocean_assert(width >= horizontalFilterSize && height >= verticalFilterSize);
403 
404  ocean_assert(horizontalFilterSize >= 1u && horizontalFilterSize % 2u == 1u);
405  ocean_assert(verticalFilterSize >= 1u && verticalFilterSize % 2u == 1u);
406  if (horizontalFilterSize == 0u || horizontalFilterSize % 2u != 1u || verticalFilterSize == 0u || verticalFilterSize % 2u != 1u)
407  {
408  return false;
409  }
410 
411 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
412 
413  // we have a special implementation for small filter kernels
414 
415  if (std::is_same<T, uint8_t>::value && std::is_same<TFilter, unsigned int>::value)
416  {
417  if (width >= 18u && channels == 1u && horizontalFilterSize == 3u && verticalFilterSize == 3u && sigma <= 0.0f)
418  {
419  filter1Channel8Bit121NEON((const uint8_t*)(source), (uint8_t*)(target), width, height, sourcePaddingElements, targetPaddingElements, reusableMemory);
420  return true;
421  }
422  }
423 
424 #endif // OCEAN_HARDWARE_NEON_VERSION >= 10
425 
426  FrameFilterSeparable::ReusableMemory* separableReusableMemory = reusableMemory != nullptr ? &reusableMemory->separableReusableMemory_ : nullptr;
427 
428  std::vector<TFilter> localHorizontalFilter;
429  TFilter* horizontalFilter = nullptr;
430 
431  if (reusableMemory != nullptr)
432  {
433  if (reusableMemory->horizontalFilterMemory_.size() != horizontalFilterSize * sizeof(TFilter))
434  {
435  reusableMemory->horizontalFilterMemory_ = Memory::create<TFilter>(horizontalFilterSize);
436  }
437 
438  horizontalFilter = reusableMemory->horizontalFilterMemory_.data<TFilter>();
439  }
440  else
441  {
442  localHorizontalFilter.resize(horizontalFilterSize);
443  horizontalFilter = localHorizontalFilter.data();
444  }
445 
446  if (sigma <= 0.0f)
447  {
448  determineFilterFactors(horizontalFilterSize, horizontalFilter);
449  }
450  else
451  {
452  determineFilterFactorsWithExplicitSigma(horizontalFilterSize, sigma, horizontalFilter);
453  }
454 
455  if (horizontalFilterSize == verticalFilterSize)
456  {
457  return FrameFilterSeparable::filter<T, TFilter>(source, target, width, height, channels, sourcePaddingElements, targetPaddingElements, horizontalFilter, horizontalFilterSize, horizontalFilter, horizontalFilterSize, worker, separableReusableMemory, processorInstructions);
458  }
459  else
460  {
461  std::vector<TFilter> localVerticalFilter;
462  TFilter* verticalFilter = nullptr;
463 
464  if (reusableMemory != nullptr)
465  {
466  if (reusableMemory->verticalFilterMemory_.size() != verticalFilterSize * sizeof(TFilter))
467  {
468  reusableMemory->verticalFilterMemory_ = Memory::create<TFilter>(verticalFilterSize);
469  }
470 
471  verticalFilter = reusableMemory->verticalFilterMemory_.data<TFilter>();
472  }
473  else
474  {
475  localVerticalFilter.resize(verticalFilterSize);
476  verticalFilter = localVerticalFilter.data();
477  }
478 
479  if (sigma <= 0.0f)
480  {
481  determineFilterFactors(verticalFilterSize, verticalFilter);
482  }
483  else
484  {
485  determineFilterFactorsWithExplicitSigma(verticalFilterSize, sigma, verticalFilter);
486  }
487 
488  return FrameFilterSeparable::filter<T, TFilter>(source, target, width, height, channels, sourcePaddingElements, targetPaddingElements, horizontalFilter, horizontalFilterSize, verticalFilter, verticalFilterSize, worker, separableReusableMemory, processorInstructions);
489  }
490 }
491 
492 template <typename T, typename TFilter>
493 inline bool FrameFilterGaussian::filter(T* frame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int framePaddingElements, const unsigned int horizontalFilterSize, const unsigned int verticalFilterSize, const float sigma, Worker* worker, ReusableMemory* reusableMemory, const ProcessorInstructions processorInstructions)
494 {
495  return filter<T, TFilter>(frame, frame, width, height, channels, framePaddingElements, framePaddingElements, horizontalFilterSize, verticalFilterSize, sigma, worker, reusableMemory, processorInstructions);
496 }
497 
498 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
499 
500 inline void FrameFilterGaussian::filter1Channel8Bit121NEON(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, ReusableMemory* reusableMemory)
501 {
502  ocean_assert(source != nullptr);
503  ocean_assert(target != nullptr);
504  ocean_assert(width >= 18u);
505  ocean_assert(height >= 1u);
506 
507  // [2, 2, 2, 2, 2, 2, 2, 2]
508  const uint8x8_t constant_2_u_8x8 = vdup_n_u8(2u);
509  const uint16x8_t constant_2_u_16x8 = vdupq_n_u16(2u);
510 
511  const unsigned int sourceStrideElements = width * 1u + sourcePaddingElements;
512  const unsigned int targetStrideElements = width * 1u + targetPaddingElements;
513 
514  const unsigned int innerPixels = width - 2u;
515 
516  Memory memoryResponseRows; // memory for three response rows, each row contains 'innerPixels' uint16_t elements
517  uint16_t* responseRows = nullptr;
518 
519  const unsigned int reusableMemoryNecessaryElements = width * 4u;
520 
521  if (reusableMemory != nullptr)
522  {
523  if (reusableMemory->responseRowsMemory_.size() != reusableMemoryNecessaryElements * sizeof(uint16_t))
524  {
525  reusableMemory->responseRowsMemory_ = Memory::create<uint16_t>(reusableMemoryNecessaryElements);
526  }
527 
528  responseRows = reusableMemory->responseRowsMemory_.data<uint16_t>();
529  }
530  else
531  {
532  memoryResponseRows = Memory::create<uint16_t>(reusableMemoryNecessaryElements);
533  responseRows = memoryResponseRows.data<uint16_t>();
534  }
535 
536  ocean_assert(responseRows != nullptr);
537 
538  uint16_t* responseTopRow = responseRows + width * 0u;
539 
540  // first, we determine the horizontal filter response for the 1D filter [1 2 1]
541 
542  responseTopRow[0] = source[0] * 3u + source[1]; // special handling for first pixel response
543 
544  for (unsigned int n = 0u; n < innerPixels; n += 16u)
545  {
546  if (n + 16u > innerPixels)
547  {
548  ocean_assert(n >= 16u && innerPixels > 16u);
549  const unsigned int newN = innerPixels - 16u;
550 
551  const unsigned int offset = n - newN;
552  ocean_assert_and_suppress_unused(offset < innerPixels, offset);
553 
554  ocean_assert(n > newN);
555 
556  n = newN;
557 
558  // the for loop will stop after this iteration
559  ocean_assert(n + 16u == innerPixels);
560  ocean_assert(!(n + 16u < innerPixels));
561  }
562 
563  const uint8x16_t source_0_u_8x16 = vld1q_u8(source + n + 0u);
564  const uint8x16_t source_1_u_8x16 = vld1q_u8(source + n + 1u);
565  const uint8x16_t source_2_u_8x16 = vld1q_u8(source + n + 2u);
566 
567  // result = source0 + source2
568  uint16x8_t low_u_16x8 = vaddl_u8(vget_low_u8(source_0_u_8x16), vget_low_u8(source_2_u_8x16));
569  uint16x8_t high_u_16x8 = vaddl_u8(vget_high_u8(source_0_u_8x16), vget_high_u8(source_2_u_8x16));
570 
571  // result += 2 * source1
572  low_u_16x8 = vmlal_u8(low_u_16x8, vget_low_u8(source_1_u_8x16), constant_2_u_8x8);
573  high_u_16x8 = vmlal_u8(high_u_16x8, vget_high_u8(source_1_u_8x16), constant_2_u_8x8);
574 
575  vst1q_u16(responseTopRow + 1u + n + 0u, low_u_16x8);
576  vst1q_u16(responseTopRow + 1u + n + 8u, high_u_16x8);
577  }
578 
579  responseTopRow[width - 1u] = source[width - 2u] + source[width - 1u] * 3u; // special handling for last pixel response
580 
581  // due to border mirroring, our top and center row is identical for the first iteration
582  uint16_t* responseCenterRow = responseTopRow;
583  uint16_t* responseBottomRow = responseRows + width * 2u;
584  uint8_t* const sourceExtraCopy = (uint8_t*)(responseRows + width * 3u);
585 
586  source += sourceStrideElements;
587 
588  for (unsigned int y = 0u; y < height; ++y)
589  {
590  if (y == height - 2u)
591  {
592  // we need to make a copy of the last source row for in-place filtering
593  memcpy(sourceExtraCopy, source, width * sizeof(uint8_t));
594  }
595 
596  // for each iteration, we have a pre-calculated (horizontal) response for the top and center row already
597 
598  responseBottomRow[0u] = source[0] * 3u + source[1];
599 
600  // handle left pixel: (outside) (inside)
601  // | 3 1 1 | 2 1
602  // | [6] 2 2 | [4] 2
603  // | 3 1 the filter factors are based on: 1 | 2 1
604 
605  // using scoped value for intermediate storage as source and target can be identical e.g., for in-place filtering
606  const ScopedValueT<uint8_t> firstPixelValue(*target, uint8_t((responseTopRow[0] + responseCenterRow[0] * 2u + responseBottomRow[0] + 8u) / 16u));
607 
608  for (unsigned int n = 0u; n < innerPixels; n += 16u)
609  {
610  if (n + 16u > innerPixels)
611  {
612  ocean_assert(n >= 16u && innerPixels > 16u);
613  const unsigned int newN = innerPixels - 16u;
614 
615  const unsigned int offset = n - newN;
616  ocean_assert_and_suppress_unused(offset < innerPixels, offset);
617 
618  ocean_assert(n > newN);
619 
620  n = newN;
621 
622  // the for loop will stop after this iteration
623  ocean_assert(n + 16u == innerPixels);
624  ocean_assert(!(n + 16u < innerPixels));
625  }
626 
627  const uint8x16_t sourceBottom_0_u_8x16 = vld1q_u8(source + n + 0u);
628  const uint8x16_t sourceBottom_1_u_8x16 = vld1q_u8(source + n + 1u);
629  const uint8x16_t sourceBottom_2_u_8x16 = vld1q_u8(source + n + 2u);
630 
631  // bottomResult = bottomSource0 + bottomSource2
632  uint16x8_t bottomLow_u_16x8 = vaddl_u8(vget_low_u8(sourceBottom_0_u_8x16), vget_low_u8(sourceBottom_2_u_8x16));
633  uint16x8_t bottomHigh_u_16x8 = vaddl_u8(vget_high_u8(sourceBottom_0_u_8x16), vget_high_u8(sourceBottom_2_u_8x16));
634 
635  // bottomResult += 2 * bottomSource1
636  bottomLow_u_16x8 = vmlal_u8(bottomLow_u_16x8, vget_low_u8(sourceBottom_1_u_8x16), constant_2_u_8x8);
637  bottomHigh_u_16x8 = vmlal_u8(bottomHigh_u_16x8, vget_high_u8(sourceBottom_1_u_8x16), constant_2_u_8x8);
638 
639 
640  // load the pre-calculated values for top
641  const uint16x8_t topLow_u_16x8 = vld1q_u16(responseTopRow + 1u + n + 0u);
642  const uint16x8_t topHigh_u_16x8 = vld1q_u16(responseTopRow + 1u + n + 8u);
643 
644  // load the pre-calculated values for bottom
645  const uint16x8_t centerLow_u_16x8 = vld1q_u16(responseCenterRow + 1u + n + 0u);
646  const uint16x8_t centerHigh_u_16x8 = vld1q_u16(responseCenterRow + 1u + n + 8u);
647 
648  // result = top + bottom
649  uint16x8_t resultLow_u_16x8 = vaddq_u16(topLow_u_16x8, bottomLow_u_16x8);
650  uint16x8_t resultHigh_u_16x8 = vaddq_u16(topHigh_u_16x8, bottomHigh_u_16x8);
651 
652  // result += 2 * center
653  resultLow_u_16x8 = vmlaq_u16(resultLow_u_16x8, centerLow_u_16x8, constant_2_u_16x8);
654  resultHigh_u_16x8 = vmlaq_u16(resultHigh_u_16x8, centerHigh_u_16x8, constant_2_u_16x8);
655 
656  // write the results for the bottom row so that we can use them as new pre-calculated values in the next iteration
657  // as we may re-calculate the last 16 pixels once again in the very last iteration, we cannot simply write the results to the center row
658  vst1q_u16(responseBottomRow + 1u + n + 0u, bottomLow_u_16x8);
659  vst1q_u16(responseBottomRow + 1u + n + 8u, bottomHigh_u_16x8);
660 
661  // result = (result + 8) / 16
662  const uint8x16_t result_u_8x16 = vcombine_u8(vrshrn_n_u16(resultLow_u_16x8, 4), vrshrn_n_u16(resultHigh_u_16x8, 4));
663 
664  vst1q_u8(target + 1u + n, result_u_8x16);
665  }
666 
667  responseBottomRow[width - 1u] = source[width - 2u] + source[width - 1u] * 3u;
668 
669  // handle right pixel: (inside) (outside)
670  // 1 3 | 1 2 | 1
671  // 2 [6] | 2 [4] | 2
672  // 1 3 | 1 2 | 1
673 
674  target[width - 1u] = uint8_t((responseTopRow[width - 1u] + responseCenterRow[width - 1u] * 2u + responseBottomRow[width - 1u] + 8u) / 16u);
675 
676  source += sourceStrideElements;
677  target += targetStrideElements;
678 
679  std::swap(responseTopRow, responseCenterRow);
680 
681  if (y == 0u)
682  {
683  // the next row will not have any border mirroring anymore
684 
685  responseCenterRow = responseRows + width * 1u;
686  }
687  else if (y == height - 2u)
688  {
689  // the next iteration will handle the last row in the frame
690  // the bottom row will be mirrored which is actually the last row again
691 
692  source = sourceExtraCopy;
693  }
694 
695  std::swap(responseCenterRow, responseBottomRow);
696  }
697 }
698 
699 #endif // OCEAN_HARDWARE_NEON_VERSION >= 10
700 
701 }
702 
703 }
704 
705 #endif // META_OCEAN_CV_FRAME_FILTER_GAUSSIAN_H
This class holds re-usable memory for the filtering process.
Definition: FrameFilterGaussian.h:36
ReusableMemory()=default
Default constructor.
Memory horizontalFilterMemory_
The reusable memory for horizontal filter factors.
Definition: FrameFilterGaussian.h:52
Memory verticalFilterMemory_
The reusable memory for vertical filter factors.
Definition: FrameFilterGaussian.h:55
FrameFilterSeparable::ReusableMemory separableReusableMemory_
The reusable memory object for the separable filter.
Definition: FrameFilterGaussian.h:49
Memory responseRowsMemory_
The reusable memory for several response rows.
Definition: FrameFilterGaussian.h:58
This class implements Gaussian image blur filters.
Definition: FrameFilterGaussian.h:29
static T filterSize2sigma(const unsigned int filterSize)
Calculates the sigma corresponding to a specified box filter so that the Gauss distribution using the...
Definition: FrameFilterGaussian.h:211
static void determineFilterFactors(const unsigned int filterSize, T *filter, T *denominator=nullptr)
Determines 1D Gaussian blur filter factors for a given filter size.
Definition: FrameFilterGaussian.h:352
static bool filter(const Frame &source, Frame &target, const unsigned int filterSize, Worker *worker=nullptr, ReusableMemory *reusableMemory=nullptr)
Applies a Gaussian blur filter to a given source image and copies the resulting filter results to a g...
static bool filter(Frame &frame, const unsigned int filterSize, Worker *worker=nullptr, ReusableMemory *reusableMemory=nullptr)
Applies a Gaussian blur filter to a given frame.
static unsigned int sigma2filterSize(const T sigma)
Calculates the ideal size of a box filter for a specified sigma defining the shape of the Gauss distr...
Definition: FrameFilterGaussian.h:198
static void determineFilterFactorsWithExplicitSigma(const unsigned int filterSize, const float sigma, T *filter, T *denominator=nullptr)
Determines 1D Gaussian blur filter factors for a given filter size.
Definition: FrameFilterGaussian.h:244
static void filter1Channel8Bit121NEON(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, ReusableMemory *reusableMemory)
Applies a horizontal and vertical filtering with a Gaussian kernel with size 3, applying a horizontal...
Definition: FrameFilterGaussian.h:500
This class holds re-usable memory for the filtering process.
Definition: FrameFilterSeparable.h:40
This class implements Ocean's image class.
Definition: Frame.h:1760
This class implements an object able to allocate memory.
Definition: base/Memory.h:22
size_t size() const
Returns the size of the memory in bytes.
Definition: base/Memory.h:386
void * data()
Returns the pointer to the writable memory which is allocated by this object.
Definition: base/Memory.h:303
This class provides basic numeric functionalities.
Definition: Numeric.h:57
static T exp(const T value)
Returns the base-e exponential function of a given value.
Definition: Numeric.h:1643
static constexpr T eps()
Returns a small epsilon.
This class implements a scoped value that allows to change a specified value at the end of a scope.
Definition: ScopedValue.h:23
static Processor & get()
Returns a reference to the unique object.
Definition: Singleton.h:115
This class implements a worker able to distribute function calls over different threads.
Definition: Worker.h:33
ProcessorInstructions
Definition of individual processor instruction types.
Definition: base/Processor.h:22
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15