8 #ifndef META_OCEAN_CV_FRAME_FILTER_GAUSSIAN_H
9 #define META_OCEAN_CV_FRAME_FILTER_GAUSSIAN_H
70 static inline unsigned int sigma2filterSize(
const T sigma);
79 static inline T filterSize2sigma(
const unsigned int filterSize);
92 static void determineFilterFactors(
const unsigned int filterSize, T* filter, T* denominator =
nullptr);
105 template <
typename T>
106 static void determineFilterFactorsWithExplicitSigma(
const unsigned int filterSize,
const float sigma, T* filter, T* denominator =
nullptr);
152 template <
typename T,
typename TFilter>
153 static bool filter(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int horizontalFilterSize,
const unsigned int verticalFilterSize,
const float sigma = -1.0f,
Worker* worker =
nullptr,
ReusableMemory* reusableMemory =
nullptr,
const ProcessorInstructions processorInstructions =
Processor::get().instructions());
172 template <
typename T,
typename TFilter>
173 static inline bool filter(T* frame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int framePaddingElements,
const unsigned int horizontalFilterSize,
const unsigned int verticalFilterSize,
const float sigma = -1.0f,
Worker* worker =
nullptr,
ReusableMemory* reusableMemory =
nullptr,
const ProcessorInstructions processorInstructions =
Processor::get().instructions());
177 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
192 static inline void filter1Channel8Bit121NEON(
const uint8_t* source, uint8_t* target,
const unsigned int width,
const unsigned int height,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
ReusableMemory* reusableMemory);
197 template <
typename T>
202 const unsigned int size = (
unsigned int)
NumericT<T>::ceil((sigma - T(0.8)) * T(6.666666666) + T(2.999)) | 0x01u;
204 ocean_assert(size >= 1u);
205 ocean_assert(size % 2u == 1u);
210 template <
typename T>
213 ocean_assert(filterSize >= 1u && (filterSize % 2u) == 1u);
215 return T(0.3) * (T(filterSize / 2u) - T(1)) + T(0.8);
219 inline void FrameFilterGaussian::determineFilterFactorsWithExplicitSigma<unsigned int>(
const unsigned int filterSize,
const float sigma,
unsigned int* filter,
unsigned int* denominator)
221 ocean_assert(filterSize % 2u == 1u);
222 ocean_assert(
filter !=
nullptr);
224 std::vector<float> floatFilter(filterSize);
225 determineFilterFactorsWithExplicitSigma<float>(filterSize, sigma, floatFilter.data());
227 const float factor = 1.0f / floatFilter[0];
229 unsigned int filterSum = 0u;
231 for (
unsigned int n = 0u; n < filterSize; ++n)
233 filter[n] = (
unsigned int)(floatFilter[n] * factor + 0.5f);
239 *denominator = filterSum;
243 template <
typename T>
246 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
"Invalid data type for a filter!");
248 ocean_assert(filterSize % 2u == 1u);
251 ocean_assert(
filter !=
nullptr);
256 const unsigned int filterSize_2 = filterSize / 2u;
258 const T scaleFactor = T(-0.5f / (sigma * sigma));
262 for (
unsigned int n = 0u; n < filterSize; ++n)
264 const int i = int(n - filterSize_2);
271 const T invFilterSum = T(1) / filterSum;
273 for (
unsigned int n = 0u; n < filterSize; ++n)
275 filter[n] *= invFilterSum;
280 T debugFilterSum = T(0);
281 for (
unsigned int n = 0u; n < filterSize; ++n)
283 debugFilterSum +=
filter[n];
296 inline void FrameFilterGaussian::determineFilterFactors<unsigned int>(
const unsigned int filterSize,
unsigned int* filter,
unsigned int* denominator)
298 ocean_assert(filterSize % 2u == 1u);
299 ocean_assert(
filter !=
nullptr);
301 if (filterSize <= 7u)
303 static constexpr std::array<unsigned int, 16> predefinedFilters =
308 1u, 4u, 7u, 9u, 7u, 4u, 1u
311 static constexpr std::array<unsigned int, 4> predefinedDenominators =
319 static constexpr std::array<unsigned int, 4> offsets =
327 ocean_assert(filterSize / 2u < offsets.size());
328 const unsigned int filterOffset = offsets[filterSize / 2u];
330 for (
unsigned int n = 0u; n < filterSize; ++n)
332 ocean_assert(filterOffset + n < predefinedFilters.size());
333 filter[n] = predefinedFilters[filterOffset + n];
337 if (denominator !=
nullptr)
339 ocean_assert(filterSize / 2u < predefinedDenominators.size());
340 *denominator = predefinedDenominators[filterSize / 2u];
346 const float sigma = filterSize2sigma<float>(filterSize);
348 determineFilterFactorsWithExplicitSigma<unsigned int>(filterSize, sigma,
filter, denominator);
351 template <
typename T>
354 ocean_assert(filterSize % 2u == 1u);
355 ocean_assert(
filter !=
nullptr);
357 if (filterSize <= 7u)
359 static constexpr std::array<float, 16> predefinedFilters =
363 0.0625f, 0.25f, 0.375f, 0.25f, 0.0625f,
364 0.03125f, 0.109375f, 0.21875f, 0.28125f, 0.21875f, 0.109375f, 0.03125f,
368 static constexpr std::array<unsigned int, 4> offsets =
376 ocean_assert(filterSize / 2u < offsets.size());
377 const unsigned int filterOffset = offsets[filterSize / 2u];
379 for (
unsigned int n = 0u; n < filterSize; ++n)
381 ocean_assert(filterOffset + n < predefinedFilters.size());
382 filter[n] = T(predefinedFilters[filterOffset + n]);
385 if (denominator !=
nullptr)
393 const float sigma = filterSize2sigma<float>(filterSize);
395 determineFilterFactorsWithExplicitSigma<T>(filterSize, sigma,
filter, denominator);
398 template <
typename T,
typename TFilter>
399 bool FrameFilterGaussian::filter(
const T* source, T* target,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int horizontalFilterSize,
const unsigned int verticalFilterSize,
const float sigma,
Worker* worker,
ReusableMemory* reusableMemory,
const ProcessorInstructions processorInstructions)
401 ocean_assert(source !=
nullptr && target !=
nullptr);
402 ocean_assert(width >= horizontalFilterSize && height >= verticalFilterSize);
404 ocean_assert(horizontalFilterSize >= 1u && horizontalFilterSize % 2u == 1u);
405 ocean_assert(verticalFilterSize >= 1u && verticalFilterSize % 2u == 1u);
406 if (horizontalFilterSize == 0u || horizontalFilterSize % 2u != 1u || verticalFilterSize == 0u || verticalFilterSize % 2u != 1u)
411 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
415 if (std::is_same<T, uint8_t>::value && std::is_same<TFilter, unsigned int>::value)
417 if (width >= 18u && channels == 1u && horizontalFilterSize == 3u && verticalFilterSize == 3u && sigma <= 0.0f)
419 filter1Channel8Bit121NEON((
const uint8_t*)(source), (uint8_t*)(target), width, height, sourcePaddingElements, targetPaddingElements, reusableMemory);
428 std::vector<TFilter> localHorizontalFilter;
429 TFilter* horizontalFilter =
nullptr;
431 if (reusableMemory !=
nullptr)
442 localHorizontalFilter.resize(horizontalFilterSize);
443 horizontalFilter = localHorizontalFilter.data();
455 if (horizontalFilterSize == verticalFilterSize)
457 return FrameFilterSeparable::filter<T, TFilter>(source, target, width, height, channels, sourcePaddingElements, targetPaddingElements, horizontalFilter, horizontalFilterSize, horizontalFilter, horizontalFilterSize, worker, separableReusableMemory, processorInstructions);
461 std::vector<TFilter> localVerticalFilter;
462 TFilter* verticalFilter =
nullptr;
464 if (reusableMemory !=
nullptr)
475 localVerticalFilter.resize(verticalFilterSize);
476 verticalFilter = localVerticalFilter.data();
488 return FrameFilterSeparable::filter<T, TFilter>(source, target, width, height, channels, sourcePaddingElements, targetPaddingElements, horizontalFilter, horizontalFilterSize, verticalFilter, verticalFilterSize, worker, separableReusableMemory, processorInstructions);
492 template <
typename T,
typename TFilter>
493 inline bool FrameFilterGaussian::filter(T* frame,
const unsigned int width,
const unsigned int height,
const unsigned int channels,
const unsigned int framePaddingElements,
const unsigned int horizontalFilterSize,
const unsigned int verticalFilterSize,
const float sigma,
Worker* worker,
ReusableMemory* reusableMemory,
const ProcessorInstructions processorInstructions)
495 return filter<T, TFilter>(frame, frame, width, height, channels, framePaddingElements, framePaddingElements, horizontalFilterSize, verticalFilterSize, sigma, worker, reusableMemory, processorInstructions);
498 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
502 ocean_assert(source !=
nullptr);
503 ocean_assert(target !=
nullptr);
504 ocean_assert(width >= 18u);
505 ocean_assert(height >= 1u);
508 const uint8x8_t constant_2_u_8x8 = vdup_n_u8(2u);
509 const uint16x8_t constant_2_u_16x8 = vdupq_n_u16(2u);
511 const unsigned int sourceStrideElements = width * 1u + sourcePaddingElements;
512 const unsigned int targetStrideElements = width * 1u + targetPaddingElements;
514 const unsigned int innerPixels = width - 2u;
516 Memory memoryResponseRows;
517 uint16_t* responseRows =
nullptr;
519 const unsigned int reusableMemoryNecessaryElements = width * 4u;
521 if (reusableMemory !=
nullptr)
525 reusableMemory->
responseRowsMemory_ = Memory::create<uint16_t>(reusableMemoryNecessaryElements);
532 memoryResponseRows = Memory::create<uint16_t>(reusableMemoryNecessaryElements);
533 responseRows = memoryResponseRows.
data<uint16_t>();
536 ocean_assert(responseRows !=
nullptr);
538 uint16_t* responseTopRow = responseRows + width * 0u;
542 responseTopRow[0] = source[0] * 3u + source[1];
544 for (
unsigned int n = 0u; n < innerPixels; n += 16u)
546 if (n + 16u > innerPixels)
548 ocean_assert(n >= 16u && innerPixels > 16u);
549 const unsigned int newN = innerPixels - 16u;
551 const unsigned int offset = n - newN;
552 ocean_assert_and_suppress_unused(offset < innerPixels, offset);
554 ocean_assert(n > newN);
559 ocean_assert(n + 16u == innerPixels);
560 ocean_assert(!(n + 16u < innerPixels));
563 const uint8x16_t source_0_u_8x16 = vld1q_u8(source + n + 0u);
564 const uint8x16_t source_1_u_8x16 = vld1q_u8(source + n + 1u);
565 const uint8x16_t source_2_u_8x16 = vld1q_u8(source + n + 2u);
568 uint16x8_t low_u_16x8 = vaddl_u8(vget_low_u8(source_0_u_8x16), vget_low_u8(source_2_u_8x16));
569 uint16x8_t high_u_16x8 = vaddl_u8(vget_high_u8(source_0_u_8x16), vget_high_u8(source_2_u_8x16));
572 low_u_16x8 = vmlal_u8(low_u_16x8, vget_low_u8(source_1_u_8x16), constant_2_u_8x8);
573 high_u_16x8 = vmlal_u8(high_u_16x8, vget_high_u8(source_1_u_8x16), constant_2_u_8x8);
575 vst1q_u16(responseTopRow + 1u + n + 0u, low_u_16x8);
576 vst1q_u16(responseTopRow + 1u + n + 8u, high_u_16x8);
579 responseTopRow[width - 1u] = source[width - 2u] + source[width - 1u] * 3u;
582 uint16_t* responseCenterRow = responseTopRow;
583 uint16_t* responseBottomRow = responseRows + width * 2u;
584 uint8_t*
const sourceExtraCopy = (uint8_t*)(responseRows + width * 3u);
586 source += sourceStrideElements;
588 for (
unsigned int y = 0u; y < height; ++y)
590 if (y == height - 2u)
593 memcpy(sourceExtraCopy, source, width *
sizeof(uint8_t));
598 responseBottomRow[0u] = source[0] * 3u + source[1];
606 const ScopedValueT<uint8_t> firstPixelValue(*target, uint8_t((responseTopRow[0] + responseCenterRow[0] * 2u + responseBottomRow[0] + 8u) / 16u));
608 for (
unsigned int n = 0u; n < innerPixels; n += 16u)
610 if (n + 16u > innerPixels)
612 ocean_assert(n >= 16u && innerPixels > 16u);
613 const unsigned int newN = innerPixels - 16u;
615 const unsigned int offset = n - newN;
616 ocean_assert_and_suppress_unused(offset < innerPixels, offset);
618 ocean_assert(n > newN);
623 ocean_assert(n + 16u == innerPixels);
624 ocean_assert(!(n + 16u < innerPixels));
627 const uint8x16_t sourceBottom_0_u_8x16 = vld1q_u8(source + n + 0u);
628 const uint8x16_t sourceBottom_1_u_8x16 = vld1q_u8(source + n + 1u);
629 const uint8x16_t sourceBottom_2_u_8x16 = vld1q_u8(source + n + 2u);
632 uint16x8_t bottomLow_u_16x8 = vaddl_u8(vget_low_u8(sourceBottom_0_u_8x16), vget_low_u8(sourceBottom_2_u_8x16));
633 uint16x8_t bottomHigh_u_16x8 = vaddl_u8(vget_high_u8(sourceBottom_0_u_8x16), vget_high_u8(sourceBottom_2_u_8x16));
636 bottomLow_u_16x8 = vmlal_u8(bottomLow_u_16x8, vget_low_u8(sourceBottom_1_u_8x16), constant_2_u_8x8);
637 bottomHigh_u_16x8 = vmlal_u8(bottomHigh_u_16x8, vget_high_u8(sourceBottom_1_u_8x16), constant_2_u_8x8);
641 const uint16x8_t topLow_u_16x8 = vld1q_u16(responseTopRow + 1u + n + 0u);
642 const uint16x8_t topHigh_u_16x8 = vld1q_u16(responseTopRow + 1u + n + 8u);
645 const uint16x8_t centerLow_u_16x8 = vld1q_u16(responseCenterRow + 1u + n + 0u);
646 const uint16x8_t centerHigh_u_16x8 = vld1q_u16(responseCenterRow + 1u + n + 8u);
649 uint16x8_t resultLow_u_16x8 = vaddq_u16(topLow_u_16x8, bottomLow_u_16x8);
650 uint16x8_t resultHigh_u_16x8 = vaddq_u16(topHigh_u_16x8, bottomHigh_u_16x8);
653 resultLow_u_16x8 = vmlaq_u16(resultLow_u_16x8, centerLow_u_16x8, constant_2_u_16x8);
654 resultHigh_u_16x8 = vmlaq_u16(resultHigh_u_16x8, centerHigh_u_16x8, constant_2_u_16x8);
658 vst1q_u16(responseBottomRow + 1u + n + 0u, bottomLow_u_16x8);
659 vst1q_u16(responseBottomRow + 1u + n + 8u, bottomHigh_u_16x8);
662 const uint8x16_t result_u_8x16 = vcombine_u8(vrshrn_n_u16(resultLow_u_16x8, 4), vrshrn_n_u16(resultHigh_u_16x8, 4));
664 vst1q_u8(target + 1u + n, result_u_8x16);
667 responseBottomRow[width - 1u] = source[width - 2u] + source[width - 1u] * 3u;
674 target[width - 1u] = uint8_t((responseTopRow[width - 1u] + responseCenterRow[width - 1u] * 2u + responseBottomRow[width - 1u] + 8u) / 16u);
676 source += sourceStrideElements;
677 target += targetStrideElements;
679 std::swap(responseTopRow, responseCenterRow);
685 responseCenterRow = responseRows + width * 1u;
687 else if (y == height - 2u)
692 source = sourceExtraCopy;
695 std::swap(responseCenterRow, responseBottomRow);
This class holds re-usable memory for the filtering process.
Definition: FrameFilterGaussian.h:36
ReusableMemory()=default
Default constructor.
Memory horizontalFilterMemory_
The reusable memory for horizontal filter factors.
Definition: FrameFilterGaussian.h:52
Memory verticalFilterMemory_
The reusable memory for vertical filter factors.
Definition: FrameFilterGaussian.h:55
FrameFilterSeparable::ReusableMemory separableReusableMemory_
The reusable memory object for the separable filter.
Definition: FrameFilterGaussian.h:49
Memory responseRowsMemory_
The reusable memory for several response rows.
Definition: FrameFilterGaussian.h:58
This class implements Gaussian image blur filters.
Definition: FrameFilterGaussian.h:29
static T filterSize2sigma(const unsigned int filterSize)
Calculates the sigma corresponding to a specified box filter so that the Gauss distribution using the...
Definition: FrameFilterGaussian.h:211
static void determineFilterFactors(const unsigned int filterSize, T *filter, T *denominator=nullptr)
Determines 1D Gaussian blur filter factors for a given filter size.
Definition: FrameFilterGaussian.h:352
static bool filter(const Frame &source, Frame &target, const unsigned int filterSize, Worker *worker=nullptr, ReusableMemory *reusableMemory=nullptr)
Applies a Gaussian blur filter to a given source image and copies the resulting filter results to a g...
static bool filter(Frame &frame, const unsigned int filterSize, Worker *worker=nullptr, ReusableMemory *reusableMemory=nullptr)
Applies a Gaussian blur filter to a given frame.
static unsigned int sigma2filterSize(const T sigma)
Calculates the ideal size of a box filter for a specified sigma defining the shape of the Gauss distr...
Definition: FrameFilterGaussian.h:198
static void determineFilterFactorsWithExplicitSigma(const unsigned int filterSize, const float sigma, T *filter, T *denominator=nullptr)
Determines 1D Gaussian blur filter factors for a given filter size.
Definition: FrameFilterGaussian.h:244
static void filter1Channel8Bit121NEON(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, ReusableMemory *reusableMemory)
Applies a horizontal and vertical filtering with a Gaussian kernel with size 3, applying a horizontal...
Definition: FrameFilterGaussian.h:500
This class holds re-usable memory for the filtering process.
Definition: FrameFilterSeparable.h:40
This class implements Ocean's image class.
Definition: Frame.h:1792
This class implements an object able to allocate memory.
Definition: base/Memory.h:22
size_t size() const
Returns the size of the memory in bytes.
Definition: base/Memory.h:386
void * data()
Returns the pointer to the writable memory which is allocated by this object.
Definition: base/Memory.h:303
This class provides basic numeric functionalities.
Definition: Numeric.h:57
static T exp(const T value)
Returns the base-e exponential function of a given value.
Definition: Numeric.h:1643
static constexpr T eps()
Returns a small epsilon.
This class implements a scoped value that allows to change a specified value at the end of a scope.
Definition: ScopedValue.h:23
static Processor & get()
Returns a reference to the unique object.
Definition: Singleton.h:115
This class implements a worker able to distribute function calls over different threads.
Definition: Worker.h:33
ProcessorInstructions
Definition of individual processor instruction types.
Definition: base/Processor.h:22
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15