Ocean
Loading...
Searching...
No Matches
FrameFilterGaussian.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_FRAME_FILTER_GAUSSIAN_H
9#define META_OCEAN_CV_FRAME_FILTER_GAUSSIAN_H
10
11#include "ocean/cv/CV.h"
13
14#include "ocean/base/Frame.h"
15#include "ocean/base/Memory.h"
17
18namespace Ocean
19{
20
21namespace CV
22{
23
24/**
25 * This class implements Gaussian image blur filters.
26 * @ingroup cv
27 */
28class OCEAN_CV_EXPORT FrameFilterGaussian
29{
30 public:
31
32 /**
33 * This class holds re-usable memory for the filtering process.
34 */
36 {
37 friend class FrameFilterGaussian;
38
39 public:
40
41 /**
42 * Default constructor.
43 */
44 ReusableMemory() = default;
45
46 protected:
47
48 /// The reusable memory object for the separable filter.
50
51 /// The reusable memory for horizontal filter factors.
53
54 /// The reusable memory for vertical filter factors.
56
57 /// The reusable memory for several response rows.
59 };
60
61 public:
62
63 /**
64 * Calculates the ideal size of a box filter for a specified sigma defining the shape of the Gauss distribution.
65 * @param sigma The sigma defining the shape of the Gauss distribution in pixel, with range (0, infinity)
66 * @return The ideal size of the box filter in pixel, with range [1, infinity], will be odd
67 * @tparam T The data type of sigma, should be 'float' or 'double'
68 */
69 template <typename T>
70 static inline unsigned int sigma2filterSize(const T sigma);
71
72 /**
73 * Calculates the sigma corresponding to a specified box filter so that the Gauss distribution using the sigma represents the box filter.
74 * @param filterSize The size of the filter in pixel, with range [1, infinity), must be odd
75 * @return The resulting sigma in pixel, with range (0, infinity)
76 * @tparam T The data type of sigma, should be 'float' or 'double'
77 */
78 template <typename T>
79 static inline T filterSize2sigma(const unsigned int filterSize);
80
81 /**
82 * Determines 1D Gaussian blur filter factors for a given filter size.
83 * The resulting filter will be normalized for filter values with floating point precision and will not be normalized for filter values with integer precision.<br>
84 * This function will determine the sigma based on the specified size of the filter by using 'filterSize2sigma'.
85 * @param filterSize The size of the filter in pixel, with range [1, infinity), must be odd
86 * @param filter The buffer receiving the resulting filter values, must be valid
87 * @param denominator Optional resulting denominator if the resulting filter values are not normalized
88 * @tparam T The data type of the filter elements, e.g., 'unsigned int', or 'float'
89 * @see filterSize2sigma(), determineFilterFactorsWithExplicitSigma().
90 */
91 template <typename T>
92 static void determineFilterFactors(const unsigned int filterSize, T* filter, T* denominator = nullptr);
93
94 /**
95 * Determines 1D Gaussian blur filter factors for a given filter size.
96 * The resulting filter will be normalized for filter values with floating point precision and will not be normalized for filter values with integer precision.<br>
97 * Information: This function is the equivalent to OpenCV's cv::getGaussianKerne().
98 * @param filterSize The size of the filter in pixel, with range [1, infinity), must be odd
99 * @param sigma The explicit sigma which will be used to determine the filter values, with range (0, infinity)
100 * @param filter The buffer receiving the resulting filter values, must be valid
101 * @param denominator Optional resulting denominator if the resulting filter values are not normalized
102 * @tparam T The data type of the filter elements, must be 'unsigned int', or 'float', or 'double'
103 * @see determineFilterFactors(), filterSize2sigma().
104 */
105 template <typename T>
106 static void determineFilterFactorsWithExplicitSigma(const unsigned int filterSize, const float sigma, T* filter, T* denominator = nullptr);
107
108 /**
109 * Applies a Gaussian blur filter to a given source image and copies the resulting filter results to a given output frame.
110 * If the target frame type does not match the source frame type the target frame type will be adjusted.
111 * Information: This function is the equivalent to OpenCV's cv::GaussianBlur().
112 * @param source The source frame to which the blur filter will be applied, must be valid
113 * @param target The target frame receiving the blurred image content, will be set to the correct frame type if invalid or not matching
114 * @param filterSize The size of the filter to be applied, with range [1, min(source.width(), source.height())], must be odd
115 * @param worker Optional worker object to distribute the computational load
116 * @param reusableMemory An optional object holding reusable memory which can be used during filtering, nullptr otherwise
117 * @return True, if succeeded
118 */
119 static bool filter(const Frame& source, Frame& target, const unsigned int filterSize, Worker* worker = nullptr, ReusableMemory* reusableMemory = nullptr);
120
121 /**
122 * Applies a Gaussian blur filter to a given frame.
123 * In case the given frame is a read-only frame, the frame will be replaced with a new frame owning the memory.<br>
124 * In case the given frame is a writable frame, the filter will be applied in place.
125 * @param frame The frame to which the blur filter will be applied, must be valid
126 * @param filterSize The size of the filter to be applied, with range [1, min(source.width(), source.height())], must be odd
127 * @param worker Optional worker object to distribute the computational load
128 * @param reusableMemory An optional object holding reusable memory which can be used during filtering, nullptr otherwise
129 * @return True, if succeeded
130 */
131 static bool filter(Frame& frame, const unsigned int filterSize, Worker* worker = nullptr, ReusableMemory* reusableMemory = nullptr);
132
133 /**
134 * Applies a Gaussian blur filter to a given frame.
135 * @param source The source frame to be filtered, must be valid
136 * @param target The target frame receiving the filtered results, can be the same memory pointer as 'source', must be valid
137 * @param width The width of the source (and target) frame in pixel, with range [tFilterSize, infinity)
138 * @param height The height of the source (and target) frame in pixel, with range [tFilterSize, infinity)
139 * @param channels The number of channels the source frame (and target frame) has, with range [1, infinity)
140 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
141 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
142 * @param horizontalFilterSize The number of elements the horizontal filter has, with range [1, width], must be odd
143 * @param verticalFilterSize The number of elements the vertical filter has, with range [1, height], must be odd
144 * @param sigma The Optional sigma that is applied explicitly, with range (0, infinity), -1 to calculate the sigma automatically based on the filter sizes
145 * @param worker Optional worker object to distribute the computation
146 * @param reusableMemory An optional object holding reusable memory which can be used during filtering, nullptr otherwise
147 * @param processorInstructions The set of available instructions, may be any combination of instructions
148 * @return True, if succeeded
149 * @tparam T The data type of each pixel channel of the source frame (and target frame) e.g., 'uint8_t', or 'float'
150 * @tparam TFilter The data type of each filter elements e.g., 'unsigned int', or 'float'
151 */
152 template <typename T, typename TFilter>
153 static bool filter(const T* source, T* target, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int horizontalFilterSize, const unsigned int verticalFilterSize, const float sigma = -1.0f, Worker* worker = nullptr, ReusableMemory* reusableMemory = nullptr, const ProcessorInstructions processorInstructions = Processor::get().instructions());
154
155 /**
156 * Applies a Gaussian blur filter to a given frame.
157 * @param frame The frame to be filtered, must be valid
158 * @param width The width of the frame in pixel, with range [tFilterSize, infinity)
159 * @param height The height of the frame in pixel, with range [tFilterSize, infinity)
160 * @param channels The number of channels the source frame (and target frame) has, with range [1, infinity)
161 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
162 * @param horizontalFilterSize The number of elements the horizontal filter has, with range [1, width], must be odd
163 * @param verticalFilterSize The number of elements the vertical filter has, with range [1, height], must be odd
164 * @param sigma The Optional sigma that is applied explicitly, with range (0, infinity), -1 to calculate the sigma automatically based on the filter sizes
165 * @param worker Optional worker object to distribute the computation
166 * @param reusableMemory An optional object holding reusable memory which can be used during filtering, nullptr otherwise
167 * @param processorInstructions The set of available instructions, may be any combination of instructions
168 * @return True, if succeeded
169 * @tparam T The data type of each pixel channel of the source frame (and target frame) e.g., 'uint8_t', or 'float'
170 * @tparam TFilter The data type of each filter elements e.g., 'unsigned int', or 'float'
171 */
172 template <typename T, typename TFilter>
173 static inline bool filter(T* frame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int framePaddingElements, const unsigned int horizontalFilterSize, const unsigned int verticalFilterSize, const float sigma = -1.0f, Worker* worker = nullptr, ReusableMemory* reusableMemory = nullptr, const ProcessorInstructions processorInstructions = Processor::get().instructions());
174
175 protected:
176
177#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
178
179 /**
180 * Applies a horizontal and vertical filtering with a Gaussian kernel with size 3, applying a horizontal and vertical 121 filter kernel.
181 * The frame must be a 1 channel 8 bit per pixel image.<br>
182 * Instead of applying a separated horizontal and vertical filter, the function applies the 2D filter directly to speed up the process significantly.<br>
183 * This function applies NEON instructions and can handle frames with width >= 18 pixels only.
184 * @param source The source frame to be filtered, must be valid
185 * @param target The target frame receiving the filtered results, must be valid
186 * @param width The width of the source (and target) frame in pixel, with range [18, infinity)
187 * @param height The height of the source (and target) frame in pixel, with range [1, infinity)
188 * @param sourcePaddingElements Optional padding elements at the end of each source row, in elements, with range [0, infinity)
189 * @param targetPaddingElements Optional padding elements at the end of each target row, in elements, with range [0, infinity)
190 * @param reusableMemory An optional object holding reusable memory which can be used during filtering, nullptr otherwise
191 */
192 static inline void filter1Channel8Bit121NEON(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, ReusableMemory* reusableMemory);
193
194#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
195};
196
197template <typename T>
198inline unsigned int FrameFilterGaussian::sigma2filterSize(const T sigma)
199{
200 ocean_assert(sigma > NumericT<T>::eps());
201
202 const unsigned int size = (unsigned int)NumericT<T>::ceil((sigma - T(0.8)) * T(6.666666666) + T(2.999)) | 0x01u; // bitwise or to create an odd size
203
204 ocean_assert(size >= 1u);
205 ocean_assert(size % 2u == 1u);
206
207 return size;
208}
209
210template <typename T>
211inline T FrameFilterGaussian::filterSize2sigma(const unsigned int filterSize)
212{
213 ocean_assert(filterSize >= 1u && (filterSize % 2u) == 1u);
214
215 return T(0.3) * (T(filterSize / 2u) - T(1)) + T(0.8);
216}
217
218template <>
219inline void FrameFilterGaussian::determineFilterFactorsWithExplicitSigma<unsigned int>(const unsigned int filterSize, const float sigma, unsigned int* filter, unsigned int* denominator)
220{
221 ocean_assert(filterSize % 2u == 1u);
222 ocean_assert(filter != nullptr);
223
224 std::vector<float> floatFilter(filterSize);
225 determineFilterFactorsWithExplicitSigma<float>(filterSize, sigma, floatFilter.data());
226
227 const float factor = 1.0f / floatFilter[0];
228
229 unsigned int filterSum = 0u;
230
231 for (unsigned int n = 0u; n < filterSize; ++n)
232 {
233 filter[n] = (unsigned int)(floatFilter[n] * factor + 0.5f);
234 filterSum += filter[n];
235 }
236
237 if (denominator)
238 {
239 *denominator = filterSum;
240 }
241}
242
243template <typename T>
244void FrameFilterGaussian::determineFilterFactorsWithExplicitSigma(const unsigned int filterSize, const float sigma, T* filter, T* denominator)
245{
246 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value, "Invalid data type for a filter!");
247
248 ocean_assert(filterSize % 2u == 1u);
249 ocean_assert(sigma > NumericF::eps());
250
251 ocean_assert(filter != nullptr);
252
253 // we calculate e ^ -(x^2 / 2 * sigma^2)
254 // while x = i - (filterSize / 2)
255
256 const unsigned int filterSize_2 = filterSize / 2u;
257
258 const T scaleFactor = T(-0.5f / (sigma * sigma));
259
260 T filterSum = T(0);
261
262 for (unsigned int n = 0u; n < filterSize; ++n)
263 {
264 const int i = int(n - filterSize_2);
265
266 filter[n] = NumericT<T>::exp(scaleFactor * T(i) * T(i));
267
268 filterSum += filter[n];
269 }
270
271 const T invFilterSum = T(1) / filterSum;
272
273 for (unsigned int n = 0u; n < filterSize; ++n)
274 {
275 filter[n] *= invFilterSum;
276 }
277
278#ifdef OCEAN_DEBUG
279 {
280 T debugFilterSum = T(0);
281 for (unsigned int n = 0u; n < filterSize; ++n)
282 {
283 debugFilterSum += filter[n];
284 }
285 ocean_assert(NumericT<T>::isEqual(debugFilterSum, T(1)));
286 }
287#endif
288
289 if (denominator)
290 {
291 *denominator = T(1);
292 }
293}
294
295template <>
296inline void FrameFilterGaussian::determineFilterFactors<unsigned int>(const unsigned int filterSize, unsigned int* filter, unsigned int* denominator)
297{
298 ocean_assert(filterSize % 2u == 1u);
299 ocean_assert(filter != nullptr);
300
301 if (filterSize <= 7u)
302 {
303 static constexpr std::array<unsigned int, 16> predefinedFilters =
304 {
305 1u,
306 1u, 2u, 1u,
307 1u, 4u, 6u, 4u, 1u,
308 1u, 4u, 7u, 9u, 7u, 4u, 1u
309 };
310
311 static constexpr std::array<unsigned int, 4> predefinedDenominators =
312 {
313 1u,
314 4u,
315 16u,
316 33u
317 };
318
319 static constexpr std::array<unsigned int, 4> offsets =
320 {
321 0u,
322 1u,
323 4u,
324 9u
325 };
326
327 ocean_assert(filterSize / 2u < offsets.size());
328 const unsigned int filterOffset = offsets[filterSize / 2u];
329
330 for (unsigned int n = 0u; n < filterSize; ++n)
331 {
332 ocean_assert(filterOffset + n < predefinedFilters.size());
333 filter[n] = predefinedFilters[filterOffset + n];
334 }
335
336
337 if (denominator != nullptr)
338 {
339 ocean_assert(filterSize / 2u < predefinedDenominators.size());
340 *denominator = predefinedDenominators[filterSize / 2u];
341 }
342
343 return;
344 }
345
346 const float sigma = filterSize2sigma<float>(filterSize);
347
348 determineFilterFactorsWithExplicitSigma<unsigned int>(filterSize, sigma, filter, denominator);
349}
350
351template <typename T>
352void FrameFilterGaussian::determineFilterFactors(const unsigned int filterSize, T* filter, T* denominator)
353{
354 ocean_assert(filterSize % 2u == 1u);
355 ocean_assert(filter != nullptr);
356
357 if (filterSize <= 7u)
358 {
359 static constexpr std::array<float, 16> predefinedFilters =
360 {
361 1.0f,
362 0.25f, 0.5f, 0.25f,
363 0.0625f, 0.25f, 0.375f, 0.25f, 0.0625f,
364 0.03125f, 0.109375f, 0.21875f, 0.28125f, 0.21875f, 0.109375f, 0.03125f,
365
366 };
367
368 static constexpr std::array<unsigned int, 4> offsets =
369 {
370 0u,
371 1u,
372 4u,
373 9u
374 };
375
376 ocean_assert(filterSize / 2u < offsets.size());
377 const unsigned int filterOffset = offsets[filterSize / 2u];
378
379 for (unsigned int n = 0u; n < filterSize; ++n)
380 {
381 ocean_assert(filterOffset + n < predefinedFilters.size());
382 filter[n] = T(predefinedFilters[filterOffset + n]);
383 }
384
385 if (denominator != nullptr)
386 {
387 *denominator = T(1);
388 }
389
390 return;
391 }
392
393 const float sigma = filterSize2sigma<float>(filterSize);
394
395 determineFilterFactorsWithExplicitSigma<T>(filterSize, sigma, filter, denominator);
396}
397
398template <typename T, typename TFilter>
399bool FrameFilterGaussian::filter(const T* source, T* target, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int horizontalFilterSize, const unsigned int verticalFilterSize, const float sigma, Worker* worker, ReusableMemory* reusableMemory, const ProcessorInstructions processorInstructions)
400{
401 ocean_assert(source != nullptr && target != nullptr);
402 ocean_assert(width >= horizontalFilterSize && height >= verticalFilterSize);
403
404 ocean_assert(horizontalFilterSize >= 1u && horizontalFilterSize % 2u == 1u);
405 ocean_assert(verticalFilterSize >= 1u && verticalFilterSize % 2u == 1u);
406 if (horizontalFilterSize == 0u || horizontalFilterSize % 2u != 1u || verticalFilterSize == 0u || verticalFilterSize % 2u != 1u)
407 {
408 return false;
409 }
410
411#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
412
413 // we have a special implementation for small filter kernels
414
415 if (std::is_same<T, uint8_t>::value && std::is_same<TFilter, unsigned int>::value)
416 {
417 if (width >= 18u && channels == 1u && horizontalFilterSize == 3u && verticalFilterSize == 3u && sigma <= 0.0f)
418 {
419 filter1Channel8Bit121NEON((const uint8_t*)(source), (uint8_t*)(target), width, height, sourcePaddingElements, targetPaddingElements, reusableMemory);
420 return true;
421 }
422 }
423
424#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
425
426 FrameFilterSeparable::ReusableMemory* separableReusableMemory = reusableMemory != nullptr ? &reusableMemory->separableReusableMemory_ : nullptr;
427
428 std::vector<TFilter> localHorizontalFilter;
429 TFilter* horizontalFilter = nullptr;
430
431 if (reusableMemory != nullptr)
432 {
433 if (reusableMemory->horizontalFilterMemory_.size() != horizontalFilterSize * sizeof(TFilter))
434 {
435 reusableMemory->horizontalFilterMemory_ = Memory::create<TFilter>(horizontalFilterSize);
436 }
437
438 horizontalFilter = reusableMemory->horizontalFilterMemory_.data<TFilter>();
439 }
440 else
441 {
442 localHorizontalFilter.resize(horizontalFilterSize);
443 horizontalFilter = localHorizontalFilter.data();
444 }
445
446 if (sigma <= 0.0f)
447 {
448 determineFilterFactors(horizontalFilterSize, horizontalFilter);
449 }
450 else
451 {
452 determineFilterFactorsWithExplicitSigma(horizontalFilterSize, sigma, horizontalFilter);
453 }
454
455 if (horizontalFilterSize == verticalFilterSize)
456 {
457 return FrameFilterSeparable::filter<T, TFilter>(source, target, width, height, channels, sourcePaddingElements, targetPaddingElements, horizontalFilter, horizontalFilterSize, horizontalFilter, horizontalFilterSize, worker, separableReusableMemory, processorInstructions);
458 }
459 else
460 {
461 std::vector<TFilter> localVerticalFilter;
462 TFilter* verticalFilter = nullptr;
463
464 if (reusableMemory != nullptr)
465 {
466 if (reusableMemory->verticalFilterMemory_.size() != verticalFilterSize * sizeof(TFilter))
467 {
468 reusableMemory->verticalFilterMemory_ = Memory::create<TFilter>(verticalFilterSize);
469 }
470
471 verticalFilter = reusableMemory->verticalFilterMemory_.data<TFilter>();
472 }
473 else
474 {
475 localVerticalFilter.resize(verticalFilterSize);
476 verticalFilter = localVerticalFilter.data();
477 }
478
479 if (sigma <= 0.0f)
480 {
481 determineFilterFactors(verticalFilterSize, verticalFilter);
482 }
483 else
484 {
485 determineFilterFactorsWithExplicitSigma(verticalFilterSize, sigma, verticalFilter);
486 }
487
488 return FrameFilterSeparable::filter<T, TFilter>(source, target, width, height, channels, sourcePaddingElements, targetPaddingElements, horizontalFilter, horizontalFilterSize, verticalFilter, verticalFilterSize, worker, separableReusableMemory, processorInstructions);
489 }
490}
491
492template <typename T, typename TFilter>
493inline bool FrameFilterGaussian::filter(T* frame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int framePaddingElements, const unsigned int horizontalFilterSize, const unsigned int verticalFilterSize, const float sigma, Worker* worker, ReusableMemory* reusableMemory, const ProcessorInstructions processorInstructions)
494{
495 return filter<T, TFilter>(frame, frame, width, height, channels, framePaddingElements, framePaddingElements, horizontalFilterSize, verticalFilterSize, sigma, worker, reusableMemory, processorInstructions);
496}
497
498#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
499
500inline void FrameFilterGaussian::filter1Channel8Bit121NEON(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, ReusableMemory* reusableMemory)
501{
502 ocean_assert(source != nullptr);
503 ocean_assert(target != nullptr);
504 ocean_assert(width >= 18u);
505 ocean_assert(height >= 1u);
506
507 // [2, 2, 2, 2, 2, 2, 2, 2]
508 const uint8x8_t constant_2_u_8x8 = vdup_n_u8(2u);
509 const uint16x8_t constant_2_u_16x8 = vdupq_n_u16(2u);
510
511 const unsigned int sourceStrideElements = width * 1u + sourcePaddingElements;
512 const unsigned int targetStrideElements = width * 1u + targetPaddingElements;
513
514 const unsigned int innerPixels = width - 2u;
515
516 Memory memoryResponseRows; // memory for three response rows, each row contains 'innerPixels' uint16_t elements
517 uint16_t* responseRows = nullptr;
518
519 const unsigned int reusableMemoryNecessaryElements = width * 4u;
520
521 if (reusableMemory != nullptr)
522 {
523 if (reusableMemory->responseRowsMemory_.size() != reusableMemoryNecessaryElements * sizeof(uint16_t))
524 {
525 reusableMemory->responseRowsMemory_ = Memory::create<uint16_t>(reusableMemoryNecessaryElements);
526 }
527
528 responseRows = reusableMemory->responseRowsMemory_.data<uint16_t>();
529 }
530 else
531 {
532 memoryResponseRows = Memory::create<uint16_t>(reusableMemoryNecessaryElements);
533 responseRows = memoryResponseRows.data<uint16_t>();
534 }
535
536 ocean_assert(responseRows != nullptr);
537
538 uint16_t* responseTopRow = responseRows + width * 0u;
539
540 // first, we determine the horizontal filter response for the 1D filter [1 2 1]
541
542 responseTopRow[0] = source[0] * 3u + source[1]; // special handling for first pixel response
543
544 for (unsigned int n = 0u; n < innerPixels; n += 16u)
545 {
546 if (n + 16u > innerPixels)
547 {
548 ocean_assert(n >= 16u && innerPixels > 16u);
549 const unsigned int newN = innerPixels - 16u;
550
551 const unsigned int offset = n - newN;
552 ocean_assert_and_suppress_unused(offset < innerPixels, offset);
553
554 ocean_assert(n > newN);
555
556 n = newN;
557
558 // the for loop will stop after this iteration
559 ocean_assert(n + 16u == innerPixels);
560 ocean_assert(!(n + 16u < innerPixels));
561 }
562
563 const uint8x16_t source_0_u_8x16 = vld1q_u8(source + n + 0u);
564 const uint8x16_t source_1_u_8x16 = vld1q_u8(source + n + 1u);
565 const uint8x16_t source_2_u_8x16 = vld1q_u8(source + n + 2u);
566
567 // result = source0 + source2
568 uint16x8_t low_u_16x8 = vaddl_u8(vget_low_u8(source_0_u_8x16), vget_low_u8(source_2_u_8x16));
569 uint16x8_t high_u_16x8 = vaddl_u8(vget_high_u8(source_0_u_8x16), vget_high_u8(source_2_u_8x16));
570
571 // result += 2 * source1
572 low_u_16x8 = vmlal_u8(low_u_16x8, vget_low_u8(source_1_u_8x16), constant_2_u_8x8);
573 high_u_16x8 = vmlal_u8(high_u_16x8, vget_high_u8(source_1_u_8x16), constant_2_u_8x8);
574
575 vst1q_u16(responseTopRow + 1u + n + 0u, low_u_16x8);
576 vst1q_u16(responseTopRow + 1u + n + 8u, high_u_16x8);
577 }
578
579 responseTopRow[width - 1u] = source[width - 2u] + source[width - 1u] * 3u; // special handling for last pixel response
580
581 // due to border mirroring, our top and center row is identical for the first iteration
582 uint16_t* responseCenterRow = responseTopRow;
583 uint16_t* responseBottomRow = responseRows + width * 2u;
584 uint8_t* const sourceExtraCopy = (uint8_t*)(responseRows + width * 3u);
585
586 source += sourceStrideElements;
587
588 for (unsigned int y = 0u; y < height; ++y)
589 {
590 if (y == height - 2u)
591 {
592 // we need to make a copy of the last source row for in-place filtering
593 memcpy(sourceExtraCopy, source, width * sizeof(uint8_t));
594 }
595
596 // for each iteration, we have a pre-calculated (horizontal) response for the top and center row already
597
598 responseBottomRow[0u] = source[0] * 3u + source[1];
599
600 // handle left pixel: (outside) (inside)
601 // | 3 1 1 | 2 1
602 // | [6] 2 2 | [4] 2
603 // | 3 1 the filter factors are based on: 1 | 2 1
604
605 // using scoped value for intermediate storage as source and target can be identical e.g., for in-place filtering
606 const ScopedValueT<uint8_t> firstPixelValue(*target, uint8_t((responseTopRow[0] + responseCenterRow[0] * 2u + responseBottomRow[0] + 8u) / 16u));
607
608 for (unsigned int n = 0u; n < innerPixels; n += 16u)
609 {
610 if (n + 16u > innerPixels)
611 {
612 ocean_assert(n >= 16u && innerPixels > 16u);
613 const unsigned int newN = innerPixels - 16u;
614
615 const unsigned int offset = n - newN;
616 ocean_assert_and_suppress_unused(offset < innerPixels, offset);
617
618 ocean_assert(n > newN);
619
620 n = newN;
621
622 // the for loop will stop after this iteration
623 ocean_assert(n + 16u == innerPixels);
624 ocean_assert(!(n + 16u < innerPixels));
625 }
626
627 const uint8x16_t sourceBottom_0_u_8x16 = vld1q_u8(source + n + 0u);
628 const uint8x16_t sourceBottom_1_u_8x16 = vld1q_u8(source + n + 1u);
629 const uint8x16_t sourceBottom_2_u_8x16 = vld1q_u8(source + n + 2u);
630
631 // bottomResult = bottomSource0 + bottomSource2
632 uint16x8_t bottomLow_u_16x8 = vaddl_u8(vget_low_u8(sourceBottom_0_u_8x16), vget_low_u8(sourceBottom_2_u_8x16));
633 uint16x8_t bottomHigh_u_16x8 = vaddl_u8(vget_high_u8(sourceBottom_0_u_8x16), vget_high_u8(sourceBottom_2_u_8x16));
634
635 // bottomResult += 2 * bottomSource1
636 bottomLow_u_16x8 = vmlal_u8(bottomLow_u_16x8, vget_low_u8(sourceBottom_1_u_8x16), constant_2_u_8x8);
637 bottomHigh_u_16x8 = vmlal_u8(bottomHigh_u_16x8, vget_high_u8(sourceBottom_1_u_8x16), constant_2_u_8x8);
638
639
640 // load the pre-calculated values for top
641 const uint16x8_t topLow_u_16x8 = vld1q_u16(responseTopRow + 1u + n + 0u);
642 const uint16x8_t topHigh_u_16x8 = vld1q_u16(responseTopRow + 1u + n + 8u);
643
644 // load the pre-calculated values for bottom
645 const uint16x8_t centerLow_u_16x8 = vld1q_u16(responseCenterRow + 1u + n + 0u);
646 const uint16x8_t centerHigh_u_16x8 = vld1q_u16(responseCenterRow + 1u + n + 8u);
647
648 // result = top + bottom
649 uint16x8_t resultLow_u_16x8 = vaddq_u16(topLow_u_16x8, bottomLow_u_16x8);
650 uint16x8_t resultHigh_u_16x8 = vaddq_u16(topHigh_u_16x8, bottomHigh_u_16x8);
651
652 // result += 2 * center
653 resultLow_u_16x8 = vmlaq_u16(resultLow_u_16x8, centerLow_u_16x8, constant_2_u_16x8);
654 resultHigh_u_16x8 = vmlaq_u16(resultHigh_u_16x8, centerHigh_u_16x8, constant_2_u_16x8);
655
656 // write the results for the bottom row so that we can use them as new pre-calculated values in the next iteration
657 // as we may re-calculate the last 16 pixels once again in the very last iteration, we cannot simply write the results to the center row
658 vst1q_u16(responseBottomRow + 1u + n + 0u, bottomLow_u_16x8);
659 vst1q_u16(responseBottomRow + 1u + n + 8u, bottomHigh_u_16x8);
660
661 // result = (result + 8) / 16
662 const uint8x16_t result_u_8x16 = vcombine_u8(vrshrn_n_u16(resultLow_u_16x8, 4), vrshrn_n_u16(resultHigh_u_16x8, 4));
663
664 vst1q_u8(target + 1u + n, result_u_8x16);
665 }
666
667 responseBottomRow[width - 1u] = source[width - 2u] + source[width - 1u] * 3u;
668
669 // handle right pixel: (inside) (outside)
670 // 1 3 | 1 2 | 1
671 // 2 [6] | 2 [4] | 2
672 // 1 3 | 1 2 | 1
673
674 target[width - 1u] = uint8_t((responseTopRow[width - 1u] + responseCenterRow[width - 1u] * 2u + responseBottomRow[width - 1u] + 8u) / 16u);
675
676 source += sourceStrideElements;
677 target += targetStrideElements;
678
679 std::swap(responseTopRow, responseCenterRow);
680
681 if (y == 0u)
682 {
683 // the next row will not have any border mirroring anymore
684
685 responseCenterRow = responseRows + width * 1u;
686 }
687 else if (y == height - 2u)
688 {
689 // the next iteration will handle the last row in the frame
690 // the bottom row will be mirrored which is actually the last row again
691
692 source = sourceExtraCopy;
693 }
694
695 std::swap(responseCenterRow, responseBottomRow);
696 }
697}
698
699#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
700
701}
702
703}
704
705#endif // META_OCEAN_CV_FRAME_FILTER_GAUSSIAN_H
This class holds re-usable memory for the filtering process.
Definition FrameFilterGaussian.h:36
ReusableMemory()=default
Default constructor.
Memory horizontalFilterMemory_
The reusable memory for horizontal filter factors.
Definition FrameFilterGaussian.h:52
Memory verticalFilterMemory_
The reusable memory for vertical filter factors.
Definition FrameFilterGaussian.h:55
FrameFilterSeparable::ReusableMemory separableReusableMemory_
The reusable memory object for the separable filter.
Definition FrameFilterGaussian.h:49
Memory responseRowsMemory_
The reusable memory for several response rows.
Definition FrameFilterGaussian.h:58
This class implements Gaussian image blur filters.
Definition FrameFilterGaussian.h:29
static T filterSize2sigma(const unsigned int filterSize)
Calculates the sigma corresponding to a specified box filter so that the Gauss distribution using the...
Definition FrameFilterGaussian.h:211
static void determineFilterFactors(const unsigned int filterSize, T *filter, T *denominator=nullptr)
Determines 1D Gaussian blur filter factors for a given filter size.
Definition FrameFilterGaussian.h:352
static bool filter(const Frame &source, Frame &target, const unsigned int filterSize, Worker *worker=nullptr, ReusableMemory *reusableMemory=nullptr)
Applies a Gaussian blur filter to a given source image and copies the resulting filter results to a g...
static bool filter(Frame &frame, const unsigned int filterSize, Worker *worker=nullptr, ReusableMemory *reusableMemory=nullptr)
Applies a Gaussian blur filter to a given frame.
static unsigned int sigma2filterSize(const T sigma)
Calculates the ideal size of a box filter for a specified sigma defining the shape of the Gauss distr...
Definition FrameFilterGaussian.h:198
static void determineFilterFactorsWithExplicitSigma(const unsigned int filterSize, const float sigma, T *filter, T *denominator=nullptr)
Determines 1D Gaussian blur filter factors for a given filter size.
Definition FrameFilterGaussian.h:244
static void filter1Channel8Bit121NEON(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, ReusableMemory *reusableMemory)
Applies a horizontal and vertical filtering with a Gaussian kernel with size 3, applying a horizontal...
Definition FrameFilterGaussian.h:500
This class holds re-usable memory for the filtering process.
Definition FrameFilterSeparable.h:40
This class implements Ocean's image class.
Definition Frame.h:1808
This class implements an object able to allocate memory.
Definition base/Memory.h:22
size_t size() const
Returns the size of the memory in bytes.
Definition base/Memory.h:386
void * data()
Returns the pointer to the writable memory which is allocated by this object.
Definition base/Memory.h:303
This class provides basic numeric functionalities.
Definition Numeric.h:57
static T exp(const T value)
Returns the base-e exponential function of a given value.
Definition Numeric.h:1643
static constexpr T eps()
Returns a small epsilon.
This class implements a scoped value that allows to change a specified value at the end of a scope.
Definition ScopedValue.h:23
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
ProcessorInstructions
Definition of individual processor instruction types.
Definition base/Processor.h:22
The namespace covering the entire Ocean framework.
Definition Accessor.h:15