8#ifndef META_OCEAN_CV_FRAME_INTERPOLATOR_BILINEAR_H
9#define META_OCEAN_CV_FRAME_INTERPOLATOR_BILINEAR_H
37namespace Test {
namespace TestCV {
class TestFrameInterpolatorBilinearNEON; } }
85 static inline bool resize(
Frame& frame,
const unsigned int width,
const unsigned int height,
Worker* worker =
nullptr);
283 static bool rotate(
const Frame& source,
Frame& target,
const Scalar horizontalAnchorPosition,
const Scalar verticalAnchorPosition,
const Scalar angle,
Worker* worker =
nullptr,
const uint8_t* borderColor =
nullptr);
316 template <
typename TScalar = Scalar>
317 static bool interpolatePixel8BitPerChannel(
const uint8_t* frame,
const unsigned int channels,
const unsigned int width,
const unsigned int height,
const unsigned int framePaddingElements,
const PixelCenter pixelCenter,
const VectorT2<TScalar>& position, uint8_t* result);
337 template <
typename TSource,
typename TTarget,
typename TScalar = Scalar,
typename TIntermediate = TScalar>
338 static bool interpolatePixel(
const TSource* frame,
const unsigned int channels,
const unsigned int width,
const unsigned int height,
const unsigned int framePaddingElements,
const PixelCenter pixelCenter,
const VectorT2<TScalar>& position, TTarget* result,
const TIntermediate& resultBias = TIntermediate(0));
388 template <
typename T,
unsigned int tChannels>
389 static inline void resize(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
410 template <
typename T,
unsigned int tChannels>
411 static inline void scale(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const double sourceX_s_targetX,
const double sourceY_s_targetY,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
429 template <
unsigned int tChannels>
430 static inline void rotate8BitPerChannel(
const uint8_t* source, uint8_t* target,
const unsigned int width,
const unsigned int height,
const Scalar horizontalAnchorPosition,
const Scalar verticalAnchorPosition,
const Scalar angle,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr,
const uint8_t* borderColor =
nullptr);
459 template <
unsigned int tChannels>
460 static inline void affine8BitPerChannel(
const uint8_t* source,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const SquareMatrix3& source_A_target,
const uint8_t* borderColor, uint8_t* target,
const PixelPositionI& targetOrigin,
const unsigned int targetWidth,
const unsigned int targetHeight,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
483 template <
typename T,
unsigned int tChannels>
484 static inline void homography(
const T* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3& input_H_output,
const T* borderColor, T* output,
const PixelPositionI& outputOrigin,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
Worker* worker =
nullptr);
508 template <
unsigned int tChannels>
509 static inline void homographies8BitPerChannel(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3 homographies[4],
const uint8_t* borderColor, uint8_t* output,
const Vector2& outputQuadrantCenter,
const PixelPositionI& outputOrigin,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
Worker* worker =
nullptr);
533 template <
unsigned int tChannels>
534 static inline void homographyMask8BitPerChannel(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3& input_H_output, uint8_t* output, uint8_t* outputMask,
const PixelPositionI& outputOrigin,
const unsigned int outputWidth,
const unsigned int outputHeight,
const uint8_t maskValue ,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
Worker* worker =
nullptr);
560 template <
unsigned int tChannels>
561 static inline void homographiesMask8BitPerChannel(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3 homographies[4], uint8_t* output, uint8_t* outputMask,
const Vector2& outputQuadrantCenter,
const PixelPositionI& outputOrigin,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
Worker* worker =
nullptr,
const uint8_t maskValue = 0xFF);
581 template <
unsigned int tChannels>
582 static inline void homographyWithCamera8BitPerChannel(
const PinholeCamera& inputCamera,
const PinholeCamera& outputCamera,
const uint8_t* input,
const SquareMatrix3& homography,
const bool useDistortionParameters,
const uint8_t* borderColor, uint8_t* output,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
Worker* worker =
nullptr);
603 template <
unsigned int tChannels>
604 static inline void homographyWithCameraMask8BitPerChannel(
const PinholeCamera& inputCamera,
const PinholeCamera& outputCamera,
const uint8_t* input,
const unsigned int inputPaddingElements,
const SquareMatrix3& homography, uint8_t* output, uint8_t* outputMask,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
Worker* worker =
nullptr,
const uint8_t maskValue = 0xFF);
623 template <
typename T,
unsigned int tChannels>
624 static inline void lookup(
const T* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const LookupTable& input_LT_output,
const bool offset,
const T* borderColor, T* output,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
Worker* worker =
nullptr,
const bool useOptimizedNEON =
false,
const bool useOptimizedBilinearValuesAndFactorCalculation =
false,
const bool useOptimizedNEONFactorReplication =
false);
645 template <
unsigned int tChannels>
646 static inline void lookupMask8BitPerChannel(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const LookupTable& input_LT_output,
const bool offset, uint8_t* output, uint8_t* outputMask,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
Worker* worker =
nullptr,
const uint8_t maskValue = 0xFF);
666 template <
typename T,
unsigned int tChannels>
667 static void resampleCameraImage(
const T* sourceFrame,
const AnyCamera& sourceCamera,
const SquareMatrix3& source_R_target,
const AnyCamera& targetCamera, T* targetFrame,
const unsigned int sourceFramePaddingElements,
const unsigned int targetFramePaddingElements,
LookupCorner2<Vector2>* source_OLT_target =
nullptr,
Worker* worker =
nullptr,
const unsigned int binSizeInPixel = 8u,
const T* borderColor =
nullptr);
683 template <
unsigned int tChannels, PixelCenter tPixelCenter = PC_TOP_LEFT,
typename TScalar = Scalar>
684 static inline void interpolatePixel8BitPerChannel(
const uint8_t* frame,
const unsigned int width,
const unsigned int height,
const unsigned int framePaddingElements,
const VectorT2<TScalar>& position, uint8_t* result);
704 template <
typename TSource,
typename TTarget,
unsigned int tChannels, PixelCenter tPixelCenter = PC_TOP_LEFT,
typename TScalar = Scalar,
typename TIntermediate = TScalar>
705 static inline void interpolatePixel(
const TSource* frame,
const unsigned int width,
const unsigned int height,
const unsigned int framePaddingElements,
const VectorT2<TScalar>& position, TTarget* result,
const TIntermediate& resultBias = TIntermediate(0));
722 template <
unsigned int tChannels,
bool tAlphaAtFront,
bool tTransparentIs0xFF>
723 static inline void interpolate1PixelFullAlphaBorder8BitPerChannel(
const uint8_t* frame,
const unsigned int width,
const unsigned int height,
const Vector2& position, uint8_t* result,
const unsigned int framePaddingElements);
737 static Scalar patchIntensitySum1Channel(
const uint32_t* linedIntegralFrame,
const unsigned int frameWidth,
const unsigned int frameHeight,
const unsigned int lineIntegralFramePaddingElements,
const Vector2& center,
const CV::PixelCenter pixelCenter,
const unsigned int patchWidth,
const unsigned int patchHeight);
750 static bool coversHomographyInputFrame(
const unsigned int inputWidth,
const unsigned int inputHeight,
const unsigned int outputWidth,
const unsigned int outputHeight,
const SquareMatrix3& input_H_output,
const int outputOriginX = 0,
const int outputOriginY = 0);
774 template <
unsigned int tChannels>
775 static inline void homography8BitPerChannel(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3& input_H_output,
const uint8_t* borderColor, uint8_t* output,
const PixelPositionI& outputOrigin,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
Worker* worker =
nullptr);
794 template <
unsigned int tChannels>
795 static inline void scale8BitPerChannel(
const uint8_t* source, uint8_t* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const double sourceX_s_targetX,
const double sourceY_s_targetY,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker =
nullptr);
813 template <
unsigned int tChannels>
814 static void scale8BitPerChannelSubset(
const uint8_t* source, uint8_t* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const double sourceX_s_targetX,
const double sourceY_s_targetY,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows);
840 template <
typename T>
841 static void interpolateRowVertical(
const T* sourceRowTop,
const T* sourceRowBottom, T* targetRow,
const unsigned int elements,
const float factorBottom);
856 template <
typename T,
unsigned int tChannels>
857 static void interpolateRowHorizontal(
const T* extendedSourceRow, T* targetRow,
const unsigned int targetWidth,
const unsigned int channels,
const unsigned int* interpolationLocations,
const float* interpolationFactorsRight);
859#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
884 template <
typename T>
885 static void interpolateRowVerticalNEON(
const T* sourceRowTop,
const T* sourceRowBottom, T* targetRow,
const unsigned int elements,
const float factorBottom);
899 template <
unsigned int tChannels>
915 template <
typename T,
unsigned int tChannels>
916 static void interpolateRowHorizontalNEON(
const T* extendedSourceRow, T* targetRow,
const unsigned int targetWidth,
const unsigned int channels,
const unsigned int* interpolationLocations,
const float* interpolationFactorsRight);
936 static void scale8BitPerChannelSubset7BitPrecisionNEON(
const uint8_t* source, uint8_t* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const unsigned int channels,
const double sourceX_s_targetX,
const double sourceY_s_targetY,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows);
958 template <
typename T,
typename TScale,
unsigned int tChannels>
959 static void scaleSubset(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const double sourceX_s_targetX,
const double sourceY_s_targetY,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows);
977 template <
unsigned int tChannels>
978 static void rotate8BitPerChannelSubset(
const uint8_t* source, uint8_t* target,
const unsigned int width,
const unsigned int height,
const Scalar horizontalAnchorPosition,
const Scalar verticalAnchorPosition,
const Scalar angle,
const uint8_t* borderColor,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows);
1006 template <
unsigned int tChannels>
1007 static inline void affine8BitPerChannelSubset(
const uint8_t* source,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const SquareMatrix3* source_A_target,
const uint8_t* borderColor, uint8_t* target,
const unsigned int targetWidth,
const unsigned int targetHeight,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements);
1028 template <
unsigned int tChannels>
1029 static inline void homography8BitPerChannelSubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3* input_H_output,
const uint8_t* borderColor, uint8_t* output,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstOutputRow,
const unsigned int numberOutputRows);
1050 template <
typename T,
unsigned int tChannels>
1051 static inline void homographySubset(
const T* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3* input_H_output,
const T* borderColor, T* output,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstOutputRow,
const unsigned int numberOutputRows);
1053#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1082 template <
unsigned int tChannels>
1083 static inline void affine8BitPerChannelSSESubset(
const uint8_t* source,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const SquareMatrix3* source_A_target,
const uint8_t* borderColor, uint8_t* target,
const unsigned int targetWidth,
const unsigned int targetHeight,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements);
1104 template <
unsigned int tChannels>
1105 static inline void homography8BitPerChannelSSESubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3* input_H_output,
const uint8_t* borderColor, uint8_t* output,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstOutputRow,
const unsigned int numberOutputRows);
1122 template <
unsigned int tChannels>
1123 static OCEAN_FORCE_INLINE
void interpolate4Pixels8BitPerChannelSSE(
const uint8_t* source,
const unsigned int offsetsTopLeft[4],
const unsigned int offsetsTopRight[4],
const unsigned int offsetsBottomLeft[4],
const unsigned int offsetsBottomRight[4],
const unsigned int validPixels[4],
const typename DataType<uint8_t, tChannels>::Type& borderColor,
const __m128i& m128_factorsRight,
const __m128i& m128_factorsBottom,
typename DataType<uint8_t, tChannels>::Type* targetPositionPixels);
1139 template <
unsigned int tChannels>
1140 static OCEAN_FORCE_INLINE __m128i
interpolate4Pixels8BitPerChannelSSE(
const __m128i& m128_sourcesTopLeft,
const __m128i& m128_sourcesTopRight,
const __m128i& m128_sourcesBottomLeft,
const __m128i& m128_sourcesBottomRight,
const __m128i& m128_factorsTopLeft,
const __m128i& m128_factorsTopRight,
const __m128i& m128_factorsBottomLeft,
const __m128i& m128_factorsBottomRight);
1144#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1173 template <
unsigned int tChannels>
1174 static inline void affine8BitPerChannelNEONSubset(
const uint8_t* source,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const SquareMatrix3* source_A_target,
const uint8_t* borderColor, uint8_t* target,
const unsigned int targetWidth,
const unsigned int targetHeight,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements);
1195 template <
unsigned int tChannels>
1196 static inline void homography8BitPerChannelNEONSubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3* input_H_output,
const uint8_t* borderColor, uint8_t* output,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstOutputRow,
const unsigned int numberOutputRows);
1213 template <
unsigned int tChannels>
1214 static OCEAN_FORCE_INLINE
void interpolate4Pixels8BitPerChannelNEON(
const uint8_t* source,
const unsigned int offsetsTopLeftElements[4],
const unsigned int offsetsTopRightElements[4],
const unsigned int offsetsBottomLeftElements[4],
const unsigned int offsetsBottomRightElements[4],
const unsigned int validPixels[4],
const typename DataType<uint8_t, tChannels>::Type& borderColor,
const uint32x4_t& m128_factorsRight,
const uint32x4_t& m128_factorsBottom,
typename DataType<uint8_t, tChannels>::Type* targetPositionPixels);
1225 static OCEAN_FORCE_INLINE
void interpolate8Pixels1Channel8BitNEON(
const uint8x8_t& topLeft_u_8x8,
const uint8x8_t& topRight_u_8x8,
const uint8x8_t& bottomLeft_u_8x8,
const uint8x8_t& bottomRight_u_8x8,
const uint8x16_t& factorsRight_factorsBottom_128_u_8x16, uint8_t* targetPositionPixels);
1241 static OCEAN_FORCE_INLINE
void interpolate4Pixels4Channel8BitPerChannelNEON(
const uint8x16_t& topLeftPixels_u8x16,
const uint8x16_t& topRightPixels_u8x16,
const uint8x16_t& bottomLeftPixels_u8x16,
const uint8x16_t& bottomRightPixels_u8x16,
const uint32x4_t& m128_factorsRight,
const uint32x4_t& m128_factorsBottom,
typename DataType<uint8_t, 4u>::Type* targetPositionPixels,
const bool useOptimizedNEONFactorReplication =
false);
1265 template <
unsigned int tChannels>
1266 static inline void homographies8BitPerChannelSubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3* homographies,
const uint8_t* borderColor, uint8_t* output,
const Scalar outputQuadrantCenterX,
const Scalar outputQuadrantCenterY,
const int outputOriginX,
const int outputOriginY,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstOutputRow,
const unsigned int numberOutputRows);
1286 template <
unsigned int tChannels>
1287 static inline void homographyMask8BitPerChannelSubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3* input_H_output, uint8_t* output, uint8_t* outputMask,
const uint8_t maskValue,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
const unsigned int firstOutputRow,
const unsigned int numberOutputRows);
1311 template <
unsigned int tChannels>
1312 static inline void homographiesMask8BitPerChannelSubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3* homographies, uint8_t* output, uint8_t* outputMask,
const uint8_t maskValue,
const Scalar outputQuadrantCenterX,
const Scalar outputQuadrantCenterY,
const int outputOriginX,
const int outputOriginY,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
const unsigned int firstOutputRow,
const unsigned int numberOutputRows);
1330 template <
unsigned int tChannels>
1331 static void homographyWithCamera8BitPerChannelSubset(
const PinholeCamera* inputCamera,
const PinholeCamera* outputCamera,
const PinholeCamera::DistortionLookup* outputCameraDistortionLookup,
const uint8_t* input,
const SquareMatrix3* normalizedHomography,
const bool useDistortionParameters,
const uint8_t* borderColor, uint8_t* output,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1350 template <
unsigned int tChannels>
1351 static void homographyWithCameraMask8BitPerChannelSubset(
const PinholeCamera* inputCamera,
const PinholeCamera* outputCamera,
const PinholeCamera::DistortionLookup* outputCameraDistortionLookup,
const uint8_t* input,
const unsigned int inputPaddingElements,
const SquareMatrix3* normalizedHomography, uint8_t* output, uint8_t* outputMask,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
const uint8_t maskValue,
const unsigned int firstRow,
const unsigned int numberRows);
1369 template <
unsigned int tChannels>
1370 static void lookup8BitPerChannelSubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const LookupTable* input_LT_output,
const bool offset,
const uint8_t* borderColor, uint8_t* output,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1389 template <
typename T,
unsigned int tChannels>
1390 static void lookupSubset(
const T* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const LookupTable* input_LT_output,
const bool offset,
const T* borderColor, T* output,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1392#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1410 template <
unsigned int tChannels>
1411 static void lookup8BitPerChannelSubsetNEON(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const LookupTable* input_LT_output,
const bool offset,
const uint8_t* borderColor, uint8_t* output,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows,
const bool useOptimizedNEON =
false,
const bool useOptimizedBilinearValuesAndFactorCalculation =
false,
const bool useOptimizedNEONFactorReplication =
false);
1434 template <
unsigned int tChannels>
1435 static void lookupMask8BitPerChannelSubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const LookupTable* input_LT_output,
const bool offset, uint8_t* output, uint8_t* outputMask,
const uint8_t maskValue,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows);
1440 ocean_assert(frame.
isValid());
1441 ocean_assert(width >= 1u && height >= 1u);
1445 if (!
resize(frame, target, worker))
1453 frame = std::move(target);
1457template <
typename TScalar>
1460 ocean_assert(frame !=
nullptr);
1461 ocean_assert(channels >= 1u && channels <= 8u);
1468 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<1u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1472 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<2u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1476 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<3u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1480 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<4u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1484 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<5u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1488 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<6u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1492 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<7u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1496 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<8u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1510 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<1u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1514 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<2u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1518 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<3u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1522 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<4u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1526 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<5u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1530 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<6u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1534 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<7u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1538 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<8u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1546 ocean_assert(
false &&
"Invalid channel number");
1550template <
typename TSource,
typename TTarget,
typename TScalar,
typename TIntermediate>
1553 ocean_assert(frame !=
nullptr);
1554 ocean_assert(channels >= 1u && channels <= 8u);
1561 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 1u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1565 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 2u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1569 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 3u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1573 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 4u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1577 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 5u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1581 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 6u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1585 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 7u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1589 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 8u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1603 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 1u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1607 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 2u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1611 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 3u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1615 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 4u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1619 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 5u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1623 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 6u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1627 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 7u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1631 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 8u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1639 ocean_assert(
false &&
"Invalid channel number");
1643template <
typename T,
unsigned int tChannels>
1644inline void FrameInterpolatorBilinear::resize(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
1646 ocean_assert(source !=
nullptr && target !=
nullptr);
1647 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
1648 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
1650 const double sourceX_s_targetX = double(sourceWidth) / double(targetWidth);
1651 const double sourceY_s_targetY = double(sourceHeight) / double(targetHeight);
1653 scale<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, worker);
1656template <
typename T,
unsigned int tChannels>
1657inline void FrameInterpolatorBilinear::scale(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const double sourceX_s_targetX,
const double sourceY_s_targetY,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
1659 ocean_assert(source !=
nullptr && target !=
nullptr);
1660 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
1661 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
1662 ocean_assert(sourceX_s_targetX > 0.0);
1663 ocean_assert(sourceY_s_targetY > 0.0);
1665 if (sourceWidth == targetWidth && sourceHeight == targetHeight)
1667 FrameConverter::subFrame<T>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
1671 if (std::is_same<T, uint8_t>::value)
1675 scale8BitPerChannel<tChannels>((
const uint8_t*)source, (uint8_t*)target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, worker);
1683 worker->
executeFunction(
Worker::Function::createStatic(&scaleSubset<T, TScale, tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
1687 scaleSubset<T, TScale, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
1692template <
unsigned int tChannels>
1693inline void FrameInterpolatorBilinear::affine8BitPerChannel(
const uint8_t* source,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const SquareMatrix3& source_A_target,
const uint8_t* borderColor, uint8_t* target,
const CV::PixelPositionI& targetOrigin,
const unsigned int targetWidth,
const unsigned int targetHeight,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
1700 if (targetWidth >= 4u)
1702#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1703 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelSSESubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1705#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1706 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelNEONSubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1711 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelSubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1715 if (targetWidth >= 4u)
1717#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1718 affine8BitPerChannelSSESubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1720#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1721 affine8BitPerChannelNEONSubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1726 affine8BitPerChannelSubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1730template <
unsigned int tChannels>
1731inline void FrameInterpolatorBilinear::homography8BitPerChannel(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3& input_H_output,
const uint8_t* borderColor, uint8_t* output,
const CV::PixelPositionI& outputOrigin,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
Worker* worker)
1738 if (outputWidth >= 4u)
1740#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1741 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelSSESubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1743#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1744 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelNEONSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1749 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1753 if (outputWidth >= 4u)
1755#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1756 homography8BitPerChannelSSESubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1758#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1759 homography8BitPerChannelNEONSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1764 homography8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1768template <
typename T,
unsigned int tChannels>
1769inline void FrameInterpolatorBilinear::homography(
const T* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3& input_H_output,
const T* borderColor, T* output,
const CV::PixelPositionI& outputOrigin,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
Worker* worker)
1771 if (std::is_same<T, uint8_t>::value)
1773 homography8BitPerChannel<tChannels>((
const uint8_t*)input, inputWidth, inputHeight, input_H_output, (
const uint8_t*)borderColor, (uint8_t*)output, outputOrigin, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, worker);
1783 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::homographySubset<T, tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1787 homographySubset<T, tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1792template <
unsigned int tChannels>
1793inline void FrameInterpolatorBilinear::homographies8BitPerChannel(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3 homographies[4],
const uint8_t* borderColor, uint8_t* output,
const Vector2& outputQuadrantCenter,
const PixelPositionI& outputOrigin,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
Worker* worker)
1797 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::homographies8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, homographies, borderColor, output, outputQuadrantCenter.
x(), outputQuadrantCenter.
y(), outputOrigin.
x(), outputOrigin.
y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 14u, 15u, 20u);
1801 homographies8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, homographies, borderColor, output, outputQuadrantCenter.
x(), outputQuadrantCenter.
y(), outputOrigin.
x(), outputOrigin.
y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1805template <
unsigned int tChannels>
1806inline void FrameInterpolatorBilinear::homographyMask8BitPerChannel(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3& input_H_output, uint8_t* output, uint8_t* outputMask,
const CV::PixelPositionI& outputOrigin,
const unsigned int outputWidth,
const unsigned int outputHeight,
const uint8_t maskValue,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
Worker* worker)
1813 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, output, outputMask, maskValue, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0, outputHeight, 12u, 13u, 20u);
1817 homographyMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, output, outputMask, maskValue, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, outputHeight);
1821template <
unsigned int tChannels>
1822inline void FrameInterpolatorBilinear::homographiesMask8BitPerChannel(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3 homographies[4], uint8_t* output, uint8_t* outputMask,
const Vector2& outputQuadrantCenter,
const CV::PixelPositionI& outputOrigin,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
Worker* worker,
const uint8_t maskValue)
1826 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::homographiesMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, homographies, output, outputMask, maskValue, outputQuadrantCenter.
x(), outputQuadrantCenter.
y(), outputOrigin.
x(), outputOrigin.
y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0, outputHeight);
1830 homographiesMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, homographies, output, outputMask, maskValue, outputQuadrantCenter.
x(), outputQuadrantCenter.
y(), outputOrigin.
x(), outputOrigin.
y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, outputHeight);
1834template <
unsigned int tChannels>
1839 const PinholeCamera::DistortionLookup outputCameraDistortionLookup(outputCamera, 10u);
1843 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyWithCamera8BitPerChannelSubset<tChannels>, &inputCamera, &outputCamera, &outputCameraDistortionLookup, input, &normalizedHomography, useDistortionParameters, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputCamera.
height());
1847 homographyWithCamera8BitPerChannelSubset<tChannels>(&inputCamera, &outputCamera, &outputCameraDistortionLookup, input, &normalizedHomography, useDistortionParameters, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, outputCamera.
height());
1851template <
unsigned int tChannels>
1852inline void FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannel(
const PinholeCamera& inputCamera,
const PinholeCamera& outputCamera,
const uint8_t* input,
const unsigned int inputPaddingElements,
const SquareMatrix3&
homography, uint8_t* output, uint8_t* outputMask,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
Worker* worker,
const uint8_t maskValue)
1856 const PinholeCamera::DistortionLookup outputCameraDistortionLookup(outputCamera, 10u);
1860 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannelSubset<tChannels>, &inputCamera, &outputCamera, &outputCameraDistortionLookup, input, inputPaddingElements, &normalizedHomography, output, outputMask, outputPaddingElements, outputMaskPaddingElements, maskValue, 0u, 0u), 0, outputCamera.
height(), 11u, 12u, 10u);
1864 homographyWithCameraMask8BitPerChannelSubset<tChannels>(&inputCamera, &outputCamera, &outputCameraDistortionLookup, input, inputPaddingElements, &normalizedHomography, output, outputMask, outputPaddingElements, outputMaskPaddingElements, maskValue, 0u, outputCamera.
height());
1868template <
typename T,
unsigned int tChannels>
1869inline void FrameInterpolatorBilinear::lookup(
const T* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const LookupTable& input_LT_output,
const bool offset,
const T* borderColor, T* output,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
Worker* worker,
const bool useOptimizedNEON,
const bool useOptimizedBilinearValuesAndFactorCalculation, [[maybe_unused]]
const bool useOptimizedNEONFactorReplication)
1871 if constexpr (std::is_same<T, uint8_t>::value)
1873#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1874 if ((tChannels >= 1u && input_LT_output.
sizeX() >= 8) || (tChannels >= 2u && input_LT_output.
sizeX() >= 4))
1880 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u, useOptimizedNEON, useOptimizedBilinearValuesAndFactorCalculation, useOptimizedNEONFactorReplication), 0u, (
unsigned int)(input_LT_output.
sizeY()), 9u, 10u, 20u);
1884 lookup8BitPerChannelSubsetNEON<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (
unsigned int)(input_LT_output.
sizeY()), useOptimizedNEON, useOptimizedBilinearValuesAndFactorCalculation, useOptimizedNEONFactorReplication);
1893 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::lookup8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (
unsigned int)input_LT_output.
sizeY(), 9u, 10u, 20u);
1897 lookup8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (
unsigned int)(input_LT_output.
sizeY()));
1902 ocean_assert((!std::is_same<T, uint8_t>::value));
1906 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::lookupSubset<T, tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (
unsigned int)(input_LT_output.
sizeY()), 9u, 10u, 20u);
1910 lookupSubset<T, tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (
unsigned int)(input_LT_output.
sizeY()));
1915template <
unsigned int tChannels>
1916inline void FrameInterpolatorBilinear::lookupMask8BitPerChannel(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const LookupTable& input_LT_output,
const bool offset, uint8_t* output, uint8_t* outputMask,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
Worker* worker,
const uint8_t maskValue)
1920 worker->
executeFunction(
Worker::Function::createStatic(&FrameInterpolatorBilinear::lookupMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, output, outputMask, maskValue, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0u, (
unsigned int)(input_LT_output.
sizeY()), 11u, 12u, 20u);
1924 lookupMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, output, outputMask, maskValue, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, (
unsigned int)(input_LT_output.
sizeY()));
1928template <
typename T,
unsigned int tChannels>
1929void FrameInterpolatorBilinear::resampleCameraImage(
const T* sourceFrame,
const AnyCamera& sourceCamera,
const SquareMatrix3& source_R_target,
const AnyCamera& targetCamera, T* targetFrame,
const unsigned int sourceFramePaddingElements,
const unsigned int targetFramePaddingElements,
LookupCorner2<Vector2>* source_OLT_target,
Worker* worker,
const unsigned int binSizeInPixel,
const T* borderColor)
1931 static_assert(tChannels >= 1u,
"Invalid channel number!");
1933 ocean_assert(sourceFrame !=
nullptr);
1934 ocean_assert(sourceCamera.
isValid());
1936 ocean_assert(targetCamera.
isValid());
1937 ocean_assert(targetFrame !=
nullptr);
1938 ocean_assert(binSizeInPixel >= 1u);
1940 const size_t binsX = std::max(1u, targetCamera.
width() / binSizeInPixel);
1941 const size_t binsY = std::max(1u, targetCamera.
height() / binSizeInPixel);
1944 for (
size_t yBin = 0; yBin <= lookupTable.
binsY(); ++yBin)
1946 for (
size_t xBin = 0; xBin <= lookupTable.
binsX(); ++xBin)
1950 constexpr bool makeUnitVector =
false;
1952 const Vector3 rayI = source_R_target * targetCamera.
vector(cornerPosition, makeUnitVector);
1969 lookup<T, tChannels>(sourceFrame, sourceCamera.
width(), sourceCamera.
height(), lookupTable,
true , borderColor, targetFrame, sourceFramePaddingElements, targetFramePaddingElements, worker);
1971 if (source_OLT_target)
1973 *source_OLT_target = std::move(lookupTable);
1977template <
unsigned int tChannels>
1978void FrameInterpolatorBilinear::rotate8BitPerChannel(
const uint8_t* source, uint8_t* target,
const unsigned int width,
const unsigned int height,
const Scalar horizontalAnchorPosition,
const Scalar verticalAnchorPosition,
const Scalar angle,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker,
const uint8_t* borderColor)
1980 static_assert(tChannels != 0u,
"Invalid channel number!");
1982 ocean_assert(source !=
nullptr && target !=
nullptr);
1983 ocean_assert(width >= 1u && height >= 1u);
1987 worker->
executeFunction(
Worker::Function::createStatic(&rotate8BitPerChannelSubset<tChannels>, source, target, width, height, horizontalAnchorPosition, verticalAnchorPosition, angle, borderColor, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height);
1991 rotate8BitPerChannelSubset<tChannels>(source, target, width, height, horizontalAnchorPosition, verticalAnchorPosition, angle, borderColor, sourcePaddingElements, targetPaddingElements, 0u, height);
1995template <
unsigned int tChannels, PixelCenter tPixelCenter,
typename TScalar>
1998 static_assert(tChannels != 0u,
"Invalid channel number!");
1999 static_assert(tPixelCenter ==
PC_TOP_LEFT || tPixelCenter ==
PC_CENTER,
"Invalid pixel center!");
2001 ocean_assert(frame !=
nullptr && result !=
nullptr);
2002 ocean_assert(width != 0u && height != 0u);
2004 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
2006 ocean_assert(position.
x() >= TScalar(0));
2007 ocean_assert(position.
y() >= TScalar(0));
2011 ocean_assert(position.
x() <= TScalar(width - 1u));
2012 ocean_assert(position.
y() <= TScalar(height - 1u));
2014 const unsigned int left = (
unsigned int)(position.
x());
2015 const unsigned int top = (
unsigned int)(position.
y());
2016 ocean_assert(left < width && top < height);
2018 const TScalar tx = position.
x() - TScalar(left);
2019 ocean_assert(tx >= 0 && tx <= 1);
2020 const unsigned int txi = (
unsigned int)(tx * TScalar(128) + TScalar(0.5));
2021 const unsigned int txi_ = 128u - txi;
2023 const TScalar ty = position.
y() - TScalar(top);
2024 ocean_assert(ty >= 0 && ty <= 1);
2025 const unsigned int tyi = (
unsigned int)(ty * TScalar(128) + TScalar(0.5));
2026 const unsigned int tyi_ = 128u - tyi;
2028 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2029 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2031 const uint8_t*
const topLeft = frame + top * frameStrideElements + tChannels * left;
2033 const unsigned int txty = txi * tyi;
2034 const unsigned int txty_ = txi * tyi_;
2035 const unsigned int tx_ty = txi_ * tyi;
2036 const unsigned int tx_ty_ = txi_ * tyi_;
2038 for (
unsigned int n = 0u; n < tChannels; ++n)
2040 result[n] = uint8_t((topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_ + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u);
2045 ocean_assert(tPixelCenter ==
PC_CENTER);
2047 ocean_assert(position.
x() <= TScalar(width));
2048 ocean_assert(position.
y() <= TScalar(height));
2050 const TScalar xShifted = std::max(TScalar(0.0), position.
x() - TScalar(0.5));
2051 const TScalar yShifted = std::max(TScalar(0.0), position.
y() - TScalar(0.5));
2053 const unsigned int left = (
unsigned int)(xShifted);
2054 const unsigned int top = (
unsigned int)(yShifted);
2056 ocean_assert(left < width);
2057 ocean_assert(top < height);
2059 const TScalar tx = xShifted - TScalar(left);
2060 const TScalar ty = yShifted - TScalar(top);
2062 ocean_assert(tx >= 0 && tx <= 1);
2063 ocean_assert(ty >= 0 && ty <= 1);
2065 const unsigned int txi = (
unsigned int)(tx * TScalar(128) + TScalar(0.5));
2066 const unsigned int txi_ = 128u - txi;
2068 const unsigned int tyi = (
unsigned int)(ty * TScalar(128) + TScalar(0.5));
2069 const unsigned int tyi_ = 128u - tyi;
2071 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2072 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2074 const uint8_t*
const topLeft = frame + top * frameStrideElements + left * tChannels;
2076 const unsigned int txty = txi * tyi;
2077 const unsigned int txty_ = txi * tyi_;
2078 const unsigned int tx_ty = txi_ * tyi;
2079 const unsigned int tx_ty_ = txi_ * tyi_;
2081 for (
unsigned int n = 0u; n < tChannels; ++n)
2083 result[n] = uint8_t((topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_ + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u);
2088template <
typename TSource,
typename TTarget,
unsigned int tChannels, PixelCenter tPixelCenter,
typename TScalar,
typename TIntermediate>
2091 static_assert(tChannels != 0u,
"Invalid channel number!");
2092 static_assert(tPixelCenter ==
PC_TOP_LEFT || tPixelCenter ==
PC_CENTER,
"Invalid pixel center!");
2094 ocean_assert(frame !=
nullptr && result !=
nullptr);
2095 ocean_assert(width != 0u && height != 0u);
2097 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
2099 ocean_assert(position.
x() >= TScalar(0));
2100 ocean_assert(position.
y() >= TScalar(0));
2104 ocean_assert(position.
x() <= TScalar(width - 1u));
2105 ocean_assert(position.
y() <= TScalar(height - 1u));
2107 const unsigned int left = (
unsigned int)(position.
x());
2108 const unsigned int top = (
unsigned int)(position.
y());
2110 const TScalar tx = position.
x() - TScalar(left);
2111 ocean_assert(tx >= 0 && tx <= 1);
2113 const TScalar ty = position.
y() - TScalar(top);
2114 ocean_assert(ty >= 0 && ty <= 1);
2116 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2117 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2119 const TSource*
const topLeft = frame + top * frameStrideElements + tChannels * left;
2121 const TIntermediate txty = TIntermediate(tx) * TIntermediate(ty);
2122 const TIntermediate txty_ = TIntermediate(tx) * (TIntermediate(1) - TIntermediate(ty));
2123 const TIntermediate tx_ty = (TIntermediate(1) - TIntermediate(tx)) * TIntermediate(ty);
2124 const TIntermediate tx_ty_ = (TIntermediate(1) - TIntermediate(tx)) * (TIntermediate(1) - TIntermediate(ty));
2128 for (
unsigned int n = 0u; n < tChannels; ++n)
2130 result[n] = TTarget(TIntermediate(topLeft[n]) * tx_ty_ + TIntermediate(topLeft[rightOffset + n]) * txty_ + TIntermediate(topLeft[bottomOffset + n]) * tx_ty + TIntermediate(topLeft[bottomOffset + rightOffset + n]) * txty + resultBias);
2135 ocean_assert(tPixelCenter ==
PC_CENTER);
2137 ocean_assert(position.
x() <= TScalar(width));
2138 ocean_assert(position.
y() <= TScalar(height));
2140 const TScalar xShifted = std::max(TScalar(0.0), position.
x() - TScalar(0.5));
2141 const TScalar yShifted = std::max(TScalar(0.0), position.
y() - TScalar(0.5));
2143 const unsigned int left = (
unsigned int)(xShifted);
2144 const unsigned int top = (
unsigned int)(yShifted);
2146 ocean_assert(left < width);
2147 ocean_assert(top < height);
2149 const TScalar tx = xShifted - TScalar(left);
2150 const TScalar ty = yShifted - TScalar(top);
2152 ocean_assert(tx >= 0 && tx <= 1);
2153 ocean_assert(ty >= 0 && ty <= 1);
2155 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2156 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2158 const TSource*
const topLeft = frame + top * frameStrideElements + tChannels * left;
2160 const TIntermediate txty = TIntermediate(tx) * TIntermediate(ty);
2161 const TIntermediate txty_ = TIntermediate(tx) * (TIntermediate(1) - TIntermediate(ty));
2162 const TIntermediate tx_ty = (TIntermediate(1) - TIntermediate(tx)) * TIntermediate(ty);
2163 const TIntermediate tx_ty_ = (TIntermediate(1) - TIntermediate(tx)) * (TIntermediate(1) - TIntermediate(ty));
2167 for (
unsigned int n = 0u; n < tChannels; ++n)
2169 result[n] = TTarget(TIntermediate(topLeft[n]) * tx_ty_ + TIntermediate(topLeft[rightOffset + n]) * txty_ + TIntermediate(topLeft[bottomOffset + n]) * tx_ty + TIntermediate(topLeft[bottomOffset + rightOffset + n]) * txty + resultBias);
2174template <
unsigned int tChannels,
bool tAlphaAtFront,
bool tTransparentIs0xFF>
2177 static_assert(tChannels != 0u,
"Invalid channel number!");
2179 ocean_assert(frame && result);
2186 for (
unsigned int n = 0u; n < tChannels - 1u; ++n)
2196 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
2201 ocean_assert(left >= -1 && left <
int(width));
2202 ocean_assert(top >= -1 && top <
int(height));
2204 if ((
unsigned int)left < width - 1u && (
unsigned int)top < height - 1u)
2208 const unsigned int txi = (
unsigned int)((pos.
x() -
Scalar(left)) *
Scalar(128) +
Scalar(0.5));
2209 const unsigned int txi_ = 128u - txi;
2211 const unsigned int tyi = (
unsigned int)((pos.
y() -
Scalar(top)) *
Scalar(128) +
Scalar(0.5));
2212 const unsigned int tyi_ = 128u - tyi;
2214 const uint8_t*
const topLeft = frame + top * frameStrideElements + left * tChannels;
2216 const unsigned int txty = txi * tyi;
2217 const unsigned int txty_ = txi * tyi_;
2218 const unsigned int tx_ty = txi_ * tyi;
2219 const unsigned int tx_ty_ = txi_ * tyi_;
2221 for (
unsigned int n = 0u; n < tChannels; ++n)
2223 result[n] = (topLeft[n] * tx_ty_ + topLeft[tChannels + n] * txty_
2224 + topLeft[frameStrideElements + n] * tx_ty + topLeft[frameStrideElements + tChannels + n] * txty + 8192u) >> 14u;
2231 const unsigned int txi = (
unsigned int)((pos.
x() -
Scalar(left)) *
Scalar(128) +
Scalar(0.5));
2232 const unsigned int txi_ = 128u - txi;
2234 const unsigned int tyi = (
unsigned int)((pos.
y() -
Scalar(top)) *
Scalar(128) +
Scalar(0.5));
2235 const unsigned int tyi_ = 128u - tyi;
2237 const unsigned int rightOffset = (left >= 0 && left + 1u < width) ? tChannels : 0u;
2238 const unsigned int bottomOffset = (top >= 0 && top + 1u < height) ? frameStrideElements : 0u;
2240 ocean_assert(left <
int(width) && top <
int(height));
2241 const uint8_t*
const topLeft = frame + max(0, top) * frameStrideElements + max(0, left) * tChannels;
2243 const unsigned int txty = txi * tyi;
2244 const unsigned int txty_ = txi * tyi_;
2245 const unsigned int tx_ty = txi_ * tyi;
2246 const unsigned int tx_ty_ = txi_ * tyi_;
2250 result[n] = (topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_
2251 + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u;
2263template <
unsigned int tChannels>
2264void FrameInterpolatorBilinear::affine8BitPerChannelSubset(
const uint8_t* source,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const SquareMatrix3* source_A_target,
const uint8_t* borderColor, uint8_t* target,
const unsigned int targetWidth,
const unsigned int targetHeight,
const unsigned int firstTargetRow,
const unsigned int numberOutputRows,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements)
2266 static_assert(tChannels >= 1u,
"Invalid channel number!");
2268 ocean_assert(source !=
nullptr && target !=
nullptr);
2269 ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
2270 ocean_assert_and_suppress_unused(targetWidth > 0u && targetHeight > 0u, targetHeight);
2271 ocean_assert(source_A_target);
2274 ocean_assert(firstTargetRow + numberOutputRows <= targetHeight);
2276 const unsigned int targetStrideElements = tChannels * targetWidth + targetPaddingElements;
2278 const Scalar scalarSourceWidth_1 =
Scalar(sourceWidth - 1u);
2279 const Scalar scalarSourceHeight_1 =
Scalar(sourceHeight - 1u);
2283 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2284 const PixelType*
const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2286 for (
unsigned int y = firstTargetRow; y < firstTargetRow + numberOutputRows; ++y)
2288 PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
2314 for (
unsigned int x = 0u; x < targetWidth; ++x)
2319 const Scalar debugSourceX = (*source_A_target)[0] *
Scalar(x) + (*source_A_target)[3] *
Scalar(y) + (*source_A_target)[6];
2320 const Scalar debugSourceY = (*source_A_target)[1] *
Scalar(x) + (*source_A_target)[4] *
Scalar(y) + (*source_A_target)[7];
2324 if (sourcePosition.
x() <
Scalar(0) || sourcePosition.
x() > scalarSourceWidth_1 || sourcePosition.
y() <
Scalar(0) || sourcePosition.
y() > scalarSourceHeight_1)
2326 *targetRow = *bColor;
2330 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(source, sourceWidth, sourceHeight, sourcePaddingElements, sourcePosition, (uint8_t*)(targetRow));
2338template <
unsigned int tChannels>
2339void FrameInterpolatorBilinear::homography8BitPerChannelSubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3* input_H_output,
const uint8_t* borderColor, uint8_t* output,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstOutputRow,
const unsigned int numberOutputRows)
2341 static_assert(tChannels >= 1u,
"Invalid channel number!");
2343 ocean_assert(input !=
nullptr && output !=
nullptr);
2344 ocean_assert(inputWidth > 0u && inputHeight > 0u);
2345 ocean_assert(outputWidth > 0u && outputHeight > 0u);
2346 ocean_assert(input_H_output !=
nullptr);
2348 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
2350 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2352 const Scalar scalarInputWidth_1 =
Scalar(inputWidth - 1u);
2353 const Scalar scalarInputHeight_1 =
Scalar(inputHeight - 1u);
2357 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2358 const PixelType bColor = borderColor ? *(PixelType*)borderColor : *(PixelType*)zeroColor;
2360 for (
unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2386 const Scalar X2 = (*input_H_output)(2, 0);
2387 const Scalar constValue2 = (*input_H_output)(2, 1) *
Scalar(y) + (*input_H_output)(2, 2);
2389 PixelType* outputRowPixel = (PixelType*)(output + y * outputStrideElements);
2391 for (
unsigned int x = 0u; x < outputWidth; ++x)
2398 ocean_assert(inputPosition.
isEqual(debugInputPosition,
Scalar(0.01)));
2401 if (inputPosition.
x() <
Scalar(0) || inputPosition.
x() > scalarInputWidth_1 || inputPosition.
y() <
Scalar(0) || inputPosition.
y() > scalarInputHeight_1)
2403 *outputRowPixel = bColor;
2407 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputRowPixel));
2415template <
typename T,
unsigned int tChannels>
2416void FrameInterpolatorBilinear::homographySubset(
const T* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3* input_H_output,
const T* borderColor, T* output,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstOutputRow,
const unsigned int numberOutputRows)
2418 static_assert(tChannels >= 1u,
"Invalid channel number!");
2420 ocean_assert(input !=
nullptr && output !=
nullptr);
2421 ocean_assert(inputWidth > 0u && inputHeight > 0u);
2422 ocean_assert_and_suppress_unused(outputWidth > 0u && outputHeight > 0u, outputHeight);
2423 ocean_assert(input_H_output !=
nullptr);
2425 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
2427 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2429 const Scalar scalarInputWidth1 =
Scalar(inputWidth - 1u);
2430 const Scalar scalarInputHeight1 =
Scalar(inputHeight - 1u);
2437 constexpr T zeroColor[tChannels] = {T(0)};
2438 const PixelType*
const bColor = borderColor ? (PixelType*)(borderColor) : (PixelType*)(zeroColor);
2440 constexpr TIntermediate bias = TIntermediate(0);
2442 for (
unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2468 const Scalar X2 = (*input_H_output)(2, 0);
2469 const Scalar constValue2 = (*input_H_output)(2, 1) *
Scalar(y) + (*input_H_output)(2, 2);
2471 PixelType* outputRowPixel = (PixelType*)(output + y * outputStrideElements);
2473 for (
unsigned int x = 0u; x < outputWidth; ++x)
2480 ocean_assert((std::is_same<float, Scalar>::value) || inputPosition.
isEqual(debugInputPosition,
Scalar(0.01)));
2483 if (inputPosition.
x() >=
Scalar(0) && inputPosition.
x() <= scalarInputWidth1 && inputPosition.
y() >=
Scalar(0) && inputPosition.
y() <= scalarInputHeight1)
2485 interpolatePixel<T, T, tChannels, CV::PC_TOP_LEFT, Scalar, TIntermediate>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (T*)(outputRowPixel), bias);
2489 *outputRowPixel = *bColor;
2497#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
2499template <
unsigned int tChannels>
2500inline void FrameInterpolatorBilinear::affine8BitPerChannelSSESubset(
const uint8_t* source,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const SquareMatrix3* source_A_target,
const uint8_t* borderColor, uint8_t* target,
const unsigned int targetWidth,
const unsigned int targetHeight,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements)
2502 static_assert(tChannels >= 1u,
"Invalid channel number!");
2504 ocean_assert(source && target);
2505 ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
2506 ocean_assert(targetWidth >= 4u && targetHeight > 0u);
2507 ocean_assert(source_A_target);
2510 ocean_assert_and_suppress_unused(firstTargetRow + numberTargetRows <= targetHeight, targetHeight);
2512 const unsigned int sourceStrideElements = tChannels * sourceWidth + sourcePaddingElements;
2513 const unsigned int targetStrideElements = tChannels * targetWidth + targetPaddingElements;
2517 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2518 const PixelType*
const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2520 OCEAN_ALIGN_DATA(16)
unsigned int validPixels[4];
2522 OCEAN_ALIGN_DATA(16)
unsigned int topLeftOffsets[4];
2523 OCEAN_ALIGN_DATA(16)
unsigned int topRightOffsets[4];
2524 OCEAN_ALIGN_DATA(16)
unsigned int bottomLeftOffsets[4];
2525 OCEAN_ALIGN_DATA(16)
unsigned int bottomRightOffsets[4];
2528 const __m128 m128_f_X0 = _mm_set_ps1(
float((*source_A_target)(0, 0)));
2529 const __m128 m128_f_X1 = _mm_set_ps1(
float((*source_A_target)(1, 0)));
2531 for (
unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
2533 PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
2557 const __m128 m128_f_C0 = _mm_set_ps1(
float((*source_A_target)(0, 1) *
Scalar(y) + (*source_A_target)(0, 2)));
2558 const __m128 m128_f_C1 = _mm_set_ps1(
float((*source_A_target)(1, 1) *
Scalar(y) + (*source_A_target)(1, 2)));
2561 const __m128 m128_f_zero = _mm_setzero_ps();
2564 const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
2567 const __m128i m128_i_sourceStrideElements = _mm_set1_epi32(sourceStrideElements);
2570 const __m128i m128_i_sourceWidth_1 = _mm_set1_epi32(
int(sourceWidth) - 1);
2571 const __m128i m128_i_sourceHeight_1 = _mm_set1_epi32(
int(sourceHeight) - 1);
2574 const __m128 m128_f_sourceWidth_1 = _mm_set_ps1(
float(sourceWidth - 1u));
2575 const __m128 m128_f_sourceHeight_1 = _mm_set_ps1(
float(sourceHeight - 1u));
2577 for (
unsigned int x = 0u; x < targetWidth; x += 4u)
2579 if (x + 4u > targetWidth)
2584 ocean_assert(x >= 4u && targetWidth > 4u);
2585 const unsigned int newX = targetWidth - 4u;
2587 ocean_assert(x > newX);
2588 targetRow -= x - newX;
2593 ocean_assert(!(x + 4u < targetWidth));
2599 const __m128 m128_f_x_0123 = _mm_set_ps(
float(x + 3u),
float(x + 2u),
float(x + 1u),
float(x + 0u));
2602 const __m128 m128_f_sourceX = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
2603 const __m128 m128_f_sourceY = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
2606 const __m128 m128_f_validPixelX = _mm_and_ps(_mm_cmple_ps(m128_f_sourceX, m128_f_sourceWidth_1), _mm_cmpge_ps(m128_f_sourceX, m128_f_zero));
2607 const __m128 m128_f_validPixelY = _mm_and_ps(_mm_cmple_ps(m128_f_sourceY, m128_f_sourceHeight_1), _mm_cmpge_ps(m128_f_sourceY, m128_f_zero));
2609 const __m128i m128_i_validPixel = _mm_castps_si128(_mm_and_ps(m128_f_validPixelX, m128_f_validPixelY));
2612 if (_mm_test_all_zeros(m128_i_validPixel, _mm_set1_epi32(0xFFFFFFFF)))
2615 OCEAN_ALIGN_DATA(16)
unsigned int debugValidPixels[4];
2616 _mm_store_si128((__m128i*)debugValidPixels, m128_i_validPixel);
2617 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
2620 targetRow[0] = *bColor;
2621 targetRow[1] = *bColor;
2622 targetRow[2] = *bColor;
2623 targetRow[3] = *bColor;
2631 _mm_store_si128((__m128i*)validPixels, m128_i_validPixel);
2632 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
2636 const __m128 m128_f_tx_floor = _mm_floor_ps(m128_f_sourceX);
2637 const __m128 m128_f_ty_floor = _mm_floor_ps(m128_f_sourceY);
2640 const __m128i m128_i_left = _mm_cvtps_epi32(m128_f_tx_floor);
2641 const __m128i m128_i_top = _mm_cvtps_epi32(m128_f_ty_floor);
2644 const __m128i m128_i_right = _mm_min_epu32(_mm_add_epi32(m128_i_left, _mm_set1_epi32(1)), m128_i_sourceWidth_1);
2645 const __m128i m128_i_bottom = _mm_min_epu32(_mm_add_epi32(m128_i_top, _mm_set1_epi32(1)), m128_i_sourceHeight_1);
2648 const __m128i m128_i_topLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left));
2649 const __m128i m128_i_topRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right));
2650 const __m128i m128_i_bottomLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left));
2651 const __m128i m128_i_bottomRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right));
2654 _mm_store_si128((__m128i*)topLeftOffsets, m128_i_topLeftOffset);
2655 _mm_store_si128((__m128i*)topRightOffsets, m128_i_topRightOffset);
2656 _mm_store_si128((__m128i*)bottomLeftOffsets, m128_i_bottomLeftOffset);
2657 _mm_store_si128((__m128i*)bottomRightOffsets, m128_i_bottomRightOffset);
2665 __m128 m128_f_tx = _mm_sub_ps(m128_f_sourceX, m128_f_tx_floor);
2666 __m128 m128_f_ty = _mm_sub_ps(m128_f_sourceY, m128_f_ty_floor);
2669 m128_f_tx = _mm_mul_ps(m128_f_tx, _mm_set_ps1(128.0f));
2670 m128_f_ty = _mm_mul_ps(m128_f_ty, _mm_set_ps1(128.0f));
2672 m128_f_tx = _mm_round_ps(m128_f_tx, _MM_FROUND_TO_NEAREST_INT);
2673 m128_f_ty = _mm_round_ps(m128_f_ty, _MM_FROUND_TO_NEAREST_INT);
2675 const __m128i m128_i_tx = _mm_cvtps_epi32(m128_f_tx);
2676 const __m128i m128_i_ty = _mm_cvtps_epi32(m128_f_ty);
2678 interpolate4Pixels8BitPerChannelSSE<tChannels>(source, topLeftOffsets, topRightOffsets, bottomLeftOffsets, bottomRightOffsets, validPixels, *bColor, m128_i_tx, m128_i_ty, targetRow);
2684template <
unsigned int tChannels>
2685inline void FrameInterpolatorBilinear::homography8BitPerChannelSSESubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3* input_H_output,
const uint8_t* borderColor, uint8_t* output,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstOutputRow,
const unsigned int numberOutputRows)
2687 static_assert(tChannels >= 1u,
"Invalid channel number!");
2689 ocean_assert(input !=
nullptr && output !=
nullptr);
2690 ocean_assert(inputWidth > 0u && inputHeight > 0u);
2691 ocean_assert(outputWidth >= 4u && outputHeight > 0u);
2692 ocean_assert(input_H_output !=
nullptr);
2694 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
2696 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
2697 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2701 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2702 const PixelType*
const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2704 OCEAN_ALIGN_DATA(16)
unsigned int validPixels[4];
2706 OCEAN_ALIGN_DATA(16)
unsigned int topLeftOffsets[4];
2707 OCEAN_ALIGN_DATA(16)
unsigned int topRightOffsets[4];
2708 OCEAN_ALIGN_DATA(16)
unsigned int bottomLeftOffsets[4];
2709 OCEAN_ALIGN_DATA(16)
unsigned int bottomRightOffsets[4];
2712 const __m128 m128_f_X0 = _mm_set_ps1(
float((*input_H_output)(0, 0)));
2713 const __m128 m128_f_X1 = _mm_set_ps1(
float((*input_H_output)(1, 0)));
2714 const __m128 m128_f_X2 = _mm_set_ps1(
float((*input_H_output)(2, 0)));
2717 const __m128 m128_f_zero = _mm_setzero_ps();
2720 const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
2723 const __m128i m128_i_inputStrideElements = _mm_set1_epi32(inputStrideElements);
2726 const __m128i m128_i_inputWidth_1 = _mm_set1_epi32(
int(inputWidth) - 1);
2727 const __m128i m128_i_inputHeight_1 = _mm_set1_epi32(
int(inputHeight) - 1);
2730 const __m128 m128_f_inputWidth_1 = _mm_set_ps1(
float(inputWidth - 1u));
2731 const __m128 m128_f_inputHeight_1 = _mm_set_ps1(
float(inputHeight - 1u));
2733 for (
unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2735 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
2759 const __m128 m128_f_C0 = _mm_set_ps1(
float((*input_H_output)(0, 1) *
Scalar(y) + (*input_H_output)(0, 2)));
2760 const __m128 m128_f_C1 = _mm_set_ps1(
float((*input_H_output)(1, 1) *
Scalar(y) + (*input_H_output)(1, 2)));
2761 const __m128 m128_f_C2 = _mm_set_ps1(
float((*input_H_output)(2, 1) *
Scalar(y) + (*input_H_output)(2, 2)));
2763 for (
unsigned int x = 0u; x < outputWidth; x += 4u)
2765 if (x + 4u > outputWidth)
2770 ocean_assert(x >= 4u && outputWidth > 4u);
2771 const unsigned int newX = outputWidth - 4u;
2773 ocean_assert(x > newX);
2774 outputPixelData -= x - newX;
2779 ocean_assert(!(x + 4u < outputWidth));
2785 const __m128 m128_f_x_0123 = _mm_set_ps(
float(x + 3u),
float(x + 2u),
float(x + 1u),
float(x + 0u));
2788 const __m128 m128_f_xx = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
2789 const __m128 m128_f_yy = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
2790 const __m128 m128_f_zz = _mm_add_ps(_mm_mul_ps(m128_f_X2, m128_f_x_0123), m128_f_C2);
2792#ifdef USE_APPROXIMATED_INVERSE_OF_ZZ
2797 const __m128 inv_zz_128 = _mm_rcp_ps(m128_f_zz);
2800 const __m128 m128_f_inputX = _mm_mul_ps(m128_f_xx, inv_zz_128);
2801 const __m128 m128_f_inputY = _mm_mul_ps(m128_f_yy, inv_zz_128);
2806 const __m128 m128_f_inputX = _mm_div_ps(m128_f_xx, m128_f_zz);
2807 const __m128 m128_f_inputY = _mm_div_ps(m128_f_yy, m128_f_zz);
2813 const __m128 m128_f_validPixelX = _mm_and_ps(_mm_cmple_ps (m128_f_inputX, m128_f_inputWidth_1), _mm_cmpge_ps(m128_f_inputX, m128_f_zero));
2814 const __m128 m128_f_validPixelY = _mm_and_ps(_mm_cmple_ps (m128_f_inputY, m128_f_inputHeight_1), _mm_cmpge_ps(m128_f_inputY, m128_f_zero));
2816 const __m128i m128_i_validPixel = _mm_castps_si128(_mm_and_ps(m128_f_validPixelX, m128_f_validPixelY));
2819 if (_mm_test_all_zeros(m128_i_validPixel, _mm_set1_epi32(0xFFFFFFFF)))
2822 OCEAN_ALIGN_DATA(16)
unsigned int debugValidPixels[4];
2823 _mm_store_si128((__m128i*)debugValidPixels, m128_i_validPixel);
2824 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
2827 outputPixelData[0] = *bColor;
2828 outputPixelData[1] = *bColor;
2829 outputPixelData[2] = *bColor;
2830 outputPixelData[3] = *bColor;
2832 outputPixelData += 4;
2838 _mm_store_si128((__m128i*)validPixels, m128_i_validPixel);
2839 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
2843 const __m128 m128_f_tx_floor = _mm_floor_ps(m128_f_inputX);
2844 const __m128 m128_f_ty_floor = _mm_floor_ps(m128_f_inputY);
2847 const __m128i m128_i_left = _mm_cvtps_epi32(m128_f_tx_floor);
2848 const __m128i m128_i_top = _mm_cvtps_epi32(m128_f_ty_floor);
2851 const __m128i m128_i_right = _mm_min_epu32(_mm_add_epi32(m128_i_left, _mm_set1_epi32(1)), m128_i_inputWidth_1);
2852 const __m128i m128_i_bottom = _mm_min_epu32(_mm_add_epi32(m128_i_top, _mm_set1_epi32(1)), m128_i_inputHeight_1);
2855 const __m128i m128_i_topLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left));
2856 const __m128i m128_i_topRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right));
2857 const __m128i m128_i_bottomLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left));
2858 const __m128i m128_i_bottomRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right));
2861 _mm_store_si128((__m128i*)topLeftOffsets, m128_i_topLeftOffset);
2862 _mm_store_si128((__m128i*)topRightOffsets, m128_i_topRightOffset);
2863 _mm_store_si128((__m128i*)bottomLeftOffsets, m128_i_bottomLeftOffset);
2864 _mm_store_si128((__m128i*)bottomRightOffsets, m128_i_bottomRightOffset);
2872 __m128 m128_f_tx = _mm_sub_ps(m128_f_inputX, m128_f_tx_floor);
2873 __m128 m128_f_ty = _mm_sub_ps(m128_f_inputY, m128_f_ty_floor);
2876 m128_f_tx = _mm_mul_ps(m128_f_tx, _mm_set_ps1(128.0f));
2877 m128_f_ty = _mm_mul_ps(m128_f_ty, _mm_set_ps1(128.0f));
2879 m128_f_tx = _mm_round_ps(m128_f_tx, _MM_FROUND_TO_NEAREST_INT);
2880 m128_f_ty = _mm_round_ps(m128_f_ty, _MM_FROUND_TO_NEAREST_INT);
2882 const __m128i m128_i_tx = _mm_cvtps_epi32(m128_f_tx);
2883 const __m128i m128_i_ty = _mm_cvtps_epi32(m128_f_ty);
2885 interpolate4Pixels8BitPerChannelSSE<tChannels>(input, topLeftOffsets, topRightOffsets, bottomLeftOffsets, bottomRightOffsets, validPixels, *bColor, m128_i_tx, m128_i_ty, outputPixelData);
2886 outputPixelData += 4;
2892OCEAN_FORCE_INLINE __m128i FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<3u>(
const __m128i& sourcesTopLeft,
const __m128i& sourcesTopRight,
const __m128i& sourcesBottomLeft,
const __m128i& sourcesBottomRight,
const __m128i& factorsTopLeft,
const __m128i& factorsTopRight,
const __m128i& factorsBottomLeft,
const __m128i& factorsBottomRight)
2909 const __m128i mask32_Channel0 =
SSE::set128i(0xFFFFFF09FFFFFF06ull, 0xFFFFFF03FFFFFF00ull);
2910 const __m128i mask32_Channel1 =
SSE::set128i(0xFFFFFF0AFFFFFF07ull, 0xFFFFFF04FFFFFF01ull);
2911 const __m128i mask32_Channel2 =
SSE::set128i(0xFFFFFF0BFFFFFF08ull, 0xFFFFFF05FFFFFF02ull);
2919 __m128i multiplication_channel0 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel0));
2922 __m128i multiplication_channel1 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel1));
2925 __m128i multiplication_channel2 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel2));
2929 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel0)));
2930 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel1)));
2931 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel2)));
2935 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel0)));
2936 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel1)));
2937 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel2)));
2941 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel0)));
2942 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel1)));
2943 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel2)));
2946 const __m128i m128_i_8192 = _mm_set1_epi32(8192);
2955 __m128i interpolation_channel0 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel0, m128_i_8192), 14),
SSE::set128i(0xFFFFFFFFFFFF0CFFull, 0xFF08FFFF04FFFF00ull));
2961 __m128i interpolation_channel1 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel1, m128_i_8192), 14),
SSE::set128i(0xFFFFFFFFFF0CFFFFull, 0x08FFFF04FFFF00FFull));
2967 __m128i interpolation_channel2 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel2, m128_i_8192), 14),
SSE::set128i(0xFFFFFFFF0CFFFF08ull, 0xFFFF04FFFF00FFFFull));
2972 return _mm_or_si128(_mm_or_si128(interpolation_channel0, interpolation_channel1), interpolation_channel2);
2976OCEAN_FORCE_INLINE __m128i FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<4u>(
const __m128i& sourcesTopLeft,
const __m128i& sourcesTopRight,
const __m128i& sourcesBottomLeft,
const __m128i& sourcesBottomRight,
const __m128i& factorsTopLeft,
const __m128i& factorsTopRight,
const __m128i& factorsBottomLeft,
const __m128i& factorsBottomRight)
2993 const __m128i mask32_Channel0 =
SSE::set128i(0xA0A0A00CA0A0A008ull, 0xA0A0A004A0A0A000ull);
2994 const __m128i mask32_Channel1 =
SSE::set128i(0xA0A0A00DA0A0A009ull, 0xA0A0A005A0A0A001ull);
2995 const __m128i mask32_Channel2 =
SSE::set128i(0xA0A0A00EA0A0A00Aull, 0xA0A0A006A0A0A002ull);
2996 const __m128i mask32_Channel3 =
SSE::set128i(0xA0A0A00FA0A0A00Bull, 0xA0A0A007A0A0A003ull);
3004 __m128i multiplication_channel0 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel0));
3007 __m128i multiplication_channel1 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel1));
3010 __m128i multiplication_channel2 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel2));
3013 __m128i multiplication_channel3 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel3));
3017 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel0)));
3018 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel1)));
3019 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel2)));
3020 multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel3)));
3024 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel0)));
3025 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel1)));
3026 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel2)));
3027 multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel3)));
3031 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel0)));
3032 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel1)));
3033 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel2)));
3034 multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel3)));
3037 const __m128i m128_i_8192 = _mm_set1_epi32(8192);
3044 __m128i interpolation_channel0 = _mm_srli_epi32(_mm_add_epi32(multiplication_channel0, m128_i_8192), 14);
3049 __m128i interpolation_channel1 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel1, m128_i_8192), 14), 8);
3053 __m128i interpolation_channel2 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel2, m128_i_8192), 14), 16);
3057 __m128i interpolation_channel3 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel3, m128_i_8192), 14), 24);
3062 return _mm_or_si128(_mm_or_si128(interpolation_channel0, interpolation_channel1), _mm_or_si128(interpolation_channel2, interpolation_channel3));
3065#ifdef OCEAN_COMPILER_MSC
3071OCEAN_FORCE_INLINE
void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<1u>(
const uint8_t* source,
const unsigned int offsetsTopLeft[4],
const unsigned int offsetsTopRight[4],
const unsigned int offsetsBottomLeft[4],
const unsigned int offsetsBottomRight[4],
const unsigned int validPixels[4],
const DataType<uint8_t, 1u>::Type& borderColor,
const __m128i& m128_factorsRight,
const __m128i& m128_factorsBottom,
typename DataType<uint8_t, 1u>::Type* targetPositionPixels)
3073 ocean_assert(source !=
nullptr);
3074 ocean_assert(targetPositionPixels !=
nullptr);
3080 OCEAN_ALIGN_DATA(16) PixelType pixels[16];
3084 for (
unsigned int i = 0u; i < 4u; ++i)
3088 pixels[i * 4u + 0u] = *((PixelType*)(source + offsetsTopLeft[i]));
3089 pixels[i * 4u + 1u] = *((PixelType*)(source + offsetsTopRight[i]));
3090 pixels[i * 4u + 2u] = *((PixelType*)(source + offsetsBottomLeft[i]));
3091 pixels[i * 4u + 3u] = *((PixelType*)(source + offsetsBottomRight[i]));
3095 pixels[i * 4u + 0u] = borderColor;
3096 pixels[i * 4u + 1u] = borderColor;
3097 pixels[i * 4u + 2u] = borderColor;
3098 pixels[i * 4u + 3u] = borderColor;
3102 static_assert(
sizeof(__m128i) ==
sizeof(pixels),
"Invalid data type!");
3104 const __m128i m128_pixels = _mm_load_si128((
const __m128i*)pixels);
3110 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3111 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3116 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3117 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3118 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3119 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3135 const __m128i mask32_topLeft =
SSE::set128i(0xFFFFFF0CFFFFFF08ull, 0xFFFFFF04FFFFFF00ull);
3136 const __m128i mask32_topRight =
SSE::set128i(0xFFFFFF0DFFFFFF09ull, 0xFFFFFF05FFFFFF01ull);
3137 const __m128i mask32_bottomLeft =
SSE::set128i(0xFFFFFF0EFFFFFF0Aull, 0xFFFFFF06FFFFFF02ull);
3138 const __m128i mask32_bottomRight =
SSE::set128i(0xFFFFFF0FFFFFFF0Bull, 0xFFFFFF07FFFFFF03ull);
3146 __m128i multiplicationA = _mm_mullo_epi32(m128_factorsTopLeft, _mm_shuffle_epi8(m128_pixels, mask32_topLeft));
3147 __m128i multiplicationB = _mm_mullo_epi32(m128_factorsTopRight, _mm_shuffle_epi8(m128_pixels, mask32_topRight));
3149 multiplicationA = _mm_add_epi32(multiplicationA, _mm_mullo_epi32(m128_factorsBottomLeft, _mm_shuffle_epi8(m128_pixels, mask32_bottomLeft)));
3150 multiplicationB = _mm_add_epi32(multiplicationB, _mm_mullo_epi32(m128_factorsBottomRight, _mm_shuffle_epi8(m128_pixels, mask32_bottomRight)));
3152 __m128i multiplication = _mm_add_epi32(multiplicationA, multiplicationB);
3154 const __m128i m128_i_8192 = _mm_set1_epi32(8192);
3159 const __m128i result = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication, m128_i_8192), 14),
SSE::set128i(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF0C080400ull));
3161 *((
unsigned int*)targetPositionPixels) = _mm_extract_epi32(result, 0);
3165OCEAN_FORCE_INLINE
void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<3u>(
const uint8_t* source,
const unsigned int offsetsTopLeft[4],
const unsigned int offsetsTopRight[4],
const unsigned int offsetsBottomLeft[4],
const unsigned int offsetsBottomRight[4],
const unsigned int validPixels[4],
const DataType<uint8_t, 3u>::Type& borderColor,
const __m128i& m128_factorsRight,
const __m128i& m128_factorsBottom,
typename DataType<uint8_t, 3u>::Type* targetPositionPixels)
3167 ocean_assert(source !=
nullptr);
3168 ocean_assert(targetPositionPixels !=
nullptr);
3174 OCEAN_ALIGN_DATA(16) PixelType topLeftPixels[6];
3175 OCEAN_ALIGN_DATA(16) PixelType topRightPixels[6];
3176 OCEAN_ALIGN_DATA(16) PixelType bottomLeftPixels[6];
3177 OCEAN_ALIGN_DATA(16) PixelType bottomRightPixels[6];
3181 for (
unsigned int i = 0u; i < 4u; ++i)
3185 topLeftPixels[i] = *((PixelType*)(source + offsetsTopLeft[i]));
3186 topRightPixels[i] = *((PixelType*)(source + offsetsTopRight[i]));
3187 bottomLeftPixels[i] = *((PixelType*)(source + offsetsBottomLeft[i]));
3188 bottomRightPixels[i] = *((PixelType*)(source + offsetsBottomRight[i]));
3192 topLeftPixels[i] = borderColor;
3193 topRightPixels[i] = borderColor;
3194 bottomLeftPixels[i] = borderColor;
3195 bottomRightPixels[i] = borderColor;
3199 static_assert(
sizeof(__m128i) <=
sizeof(topLeftPixels),
"Invalid data type!");
3201 const __m128i m128_topLeftPixels = _mm_load_si128((
const __m128i*)topLeftPixels);
3202 const __m128i m128_topRightPixels = _mm_load_si128((
const __m128i*)topRightPixels);
3203 const __m128i m128_bottomLeftPixels = _mm_load_si128((
const __m128i*)bottomLeftPixels);
3204 const __m128i m128_bottomRightPixels = _mm_load_si128((
const __m128i*)bottomRightPixels);
3210 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3211 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3216 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3217 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3218 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3219 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3222 const __m128i m128_interpolationResult = interpolate4Pixels8BitPerChannelSSE<3u>(m128_topLeftPixels, m128_topRightPixels, m128_bottomLeftPixels, m128_bottomRightPixels, m128_factorsTopLeft, m128_factorsTopRight, m128_factorsBottomLeft, m128_factorsBottomRight);
3225 memcpy(targetPositionPixels, &m128_interpolationResult, 12u);
3231OCEAN_FORCE_INLINE
void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<4u>(
const uint8_t* source,
const unsigned int offsetsTopLeft[4],
const unsigned int offsetsTopRight[4],
const unsigned int offsetsBottomLeft[4],
const unsigned int offsetsBottomRight[4],
const unsigned int validPixels[4],
const DataType<uint8_t, 4u>::Type& borderColor,
const __m128i& m128_factorsRight,
const __m128i& m128_factorsBottom,
typename DataType<uint8_t, 4u>::Type* targetPositionPixels)
3233 ocean_assert(source !=
nullptr);
3234 ocean_assert(targetPositionPixels !=
nullptr);
3240 OCEAN_ALIGN_DATA(16) PixelType topLeftPixels[4];
3241 OCEAN_ALIGN_DATA(16) PixelType topRightPixels[4];
3242 OCEAN_ALIGN_DATA(16) PixelType bottomLeftPixels[4];
3243 OCEAN_ALIGN_DATA(16) PixelType bottomRightPixels[4];
3248 for (
unsigned int i = 0u; i < 4u; ++i)
3252 topLeftPixels[i] = *((PixelType*)(source + offsetsTopLeft[i]));
3253 topRightPixels[i] = *((PixelType*)(source + offsetsTopRight[i]));
3254 bottomLeftPixels[i] = *((PixelType*)(source + offsetsBottomLeft[i]));
3255 bottomRightPixels[i] = *((PixelType*)(source + offsetsBottomRight[i]));
3259 topLeftPixels[i] = borderColor;
3260 topRightPixels[i] = borderColor;
3261 bottomLeftPixels[i] = borderColor;
3262 bottomRightPixels[i] = borderColor;
3266 static_assert(
sizeof(__m128i) ==
sizeof(topLeftPixels),
"Invalid data type!");
3268 const __m128i m128_topLeftPixels = _mm_load_si128((
const __m128i*)topLeftPixels);
3269 const __m128i m128_topRightPixels = _mm_load_si128((
const __m128i*)topRightPixels);
3270 const __m128i m128_bottomLeftPixels = _mm_load_si128((
const __m128i*)bottomLeftPixels);
3271 const __m128i m128_bottomRightPixels = _mm_load_si128((
const __m128i*)bottomRightPixels);
3277 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3278 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3283 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3284 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3285 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3286 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3289 const __m128i m128_interpolationResult = interpolate4Pixels8BitPerChannelSSE<4u>(m128_topLeftPixels, m128_topRightPixels, m128_bottomLeftPixels, m128_bottomRightPixels, m128_factorsTopLeft, m128_factorsTopRight, m128_factorsBottomLeft, m128_factorsBottomRight);
3291 _mm_storeu_si128((__m128i*)targetPositionPixels, m128_interpolationResult);
3294template <
unsigned int tChannels>
3295OCEAN_FORCE_INLINE
void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE(
const uint8_t* source,
const unsigned int offsetsTopLeft[4],
const unsigned int offsetsTopRight[4],
const unsigned int offsetsBottomLeft[4],
const unsigned int offsetsBottomRight[4],
const unsigned int validPixels[4],
const typename DataType<uint8_t, tChannels>::Type& borderColor,
const __m128i& m128_factorsRight,
const __m128i& m128_factorsBottom,
typename DataType<uint8_t, tChannels>::Type* targetPositionPixels)
3297 ocean_assert(source !=
nullptr);
3298 ocean_assert(targetPositionPixels !=
nullptr);
3302 OCEAN_ALIGN_DATA(16)
unsigned int factorsTopLeft[4];
3303 OCEAN_ALIGN_DATA(16)
unsigned int factorsTopRight[4];
3304 OCEAN_ALIGN_DATA(16)
unsigned int factorsBottomLeft[4];
3305 OCEAN_ALIGN_DATA(16)
unsigned int factorsBottomRight[4];
3311 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3312 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3317 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3318 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3319 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3320 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3324 _mm_store_si128((__m128i*)factorsTopLeft, m128_factorsTopLeft);
3325 _mm_store_si128((__m128i*)factorsTopRight, m128_factorsTopRight);
3326 _mm_store_si128((__m128i*)factorsBottomLeft, m128_factorsBottomLeft);
3327 _mm_store_si128((__m128i*)factorsBottomRight, m128_factorsBottomRight);
3329 for (
unsigned int i = 0u; i < 4u; ++i)
3333 const uint8_t* topLeft = source + offsetsTopLeft[i];
3334 const uint8_t* topRight = source + offsetsTopRight[i];
3336 const uint8_t* bottomLeft = source + offsetsBottomLeft[i];
3337 const uint8_t* bottomRight = source + offsetsBottomRight[i];
3339 const unsigned int& factorTopLeft = factorsTopLeft[i];
3340 const unsigned int& factorTopRight = factorsTopRight[i];
3341 const unsigned int& factorBottomLeft = factorsBottomLeft[i];
3342 const unsigned int& factorBottomRight = factorsBottomRight[i];
3344 for (
unsigned int n = 0u; n < tChannels; ++n)
3346 ((uint8_t*)targetPositionPixels)[n] = (uint8_t)((topLeft[n] * factorTopLeft + topRight[n] * factorTopRight + bottomLeft[n] * factorBottomLeft + bottomRight[n] * factorBottomRight + 8192u) >> 14u);
3351 *targetPositionPixels = borderColor;
3354 targetPositionPixels++;
3360#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3362template <
unsigned int tChannels>
3363void FrameInterpolatorBilinear::affine8BitPerChannelNEONSubset(
const uint8_t* source,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const SquareMatrix3* source_A_target,
const uint8_t* borderColor, uint8_t* target,
const unsigned int targetWidth,
const unsigned int targetHeight,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements)
3365 static_assert(tChannels >= 1u,
"Invalid channel number!");
3367 ocean_assert(source && target);
3368 ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
3369 ocean_assert_and_suppress_unused(targetWidth >= 4u && targetHeight > 0u, targetHeight);
3370 ocean_assert(source_A_target);
3373 ocean_assert(firstTargetRow + numberTargetRows <= targetHeight);
3375 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
3376 const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
3380 uint8_t zeroColor[tChannels] = {uint8_t(0)};
3381 const PixelType*
const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
3383 unsigned int validPixels[4];
3385 unsigned int topLeftOffsetsElements[4];
3386 unsigned int topRightOffsetsElements[4];
3387 unsigned int bottomLeftOffsetsElements[4];
3388 unsigned int bottomRightOffsetsElements[4];
3390 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
3393 const float32x4_t m128_f_X0 = vdupq_n_f32(
float((*source_A_target)(0, 0)));
3394 const float32x4_t m128_f_X1 = vdupq_n_f32(
float((*source_A_target)(1, 0)));
3396 for (
unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
3398 PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
3422 const float32x4_t m128_f_C0 = vdupq_n_f32(
float((*source_A_target)(0, 1) *
Scalar(y) + (*source_A_target)(0, 2)));
3423 const float32x4_t m128_f_C1 = vdupq_n_f32(
float((*source_A_target)(1, 1) *
Scalar(y) + (*source_A_target)(1, 2)));
3426 const float32x4_t m128_f_zero = vdupq_n_f32(0.0f);
3429 const uint32x4_t m128_u_sourceStrideElements = vdupq_n_u32(sourceStrideElements);
3432 const uint32x4_t m128_u_sourceWidth_1 = vdupq_n_u32(sourceWidth - 1u);
3433 const uint32x4_t m128_u_sourceHeight_1 = vdupq_n_u32(sourceHeight - 1u);
3436 const float32x4_t m128_f_sourceWidth_1 = vdupq_n_f32(
float(sourceWidth - 1u));
3437 const float32x4_t m128_f_sourceHeight_1 = vdupq_n_f32(
float(sourceHeight - 1u));
3439 for (
unsigned int x = 0u; x < targetWidth; x += 4u)
3441 if (x + 4u > targetWidth)
3446 ocean_assert(x >= 4u && targetWidth > 4u);
3447 const unsigned int newX = targetWidth - 4u;
3449 ocean_assert(x > newX);
3450 targetRow -= x - newX;
3455 ocean_assert(!(x + 4u < targetWidth));
3461 float x_0123[4] = {float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u)};
3462 const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
3465 const float32x4_t m128_f_sourceX = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
3466 const float32x4_t m128_f_sourceY = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
3470 const uint32x4_t m128_u_validPixelX = vandq_u32(vcleq_f32(m128_f_sourceX, m128_f_sourceWidth_1), vcgeq_f32(m128_f_sourceX, m128_f_zero));
3471 const uint32x4_t m128_u_validPixelY = vandq_u32(vcleq_f32(m128_f_sourceY, m128_f_sourceHeight_1), vcgeq_f32(m128_f_sourceY, m128_f_zero));
3473 const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY);
3477 const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
3478 if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
3481 OCEAN_ALIGN_DATA(16)
unsigned int debugValidPixels[4];
3482 vst1q_u32(debugValidPixels, m128_u_validPixel);
3483 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
3486 targetRow[0] = *bColor;
3487 targetRow[1] = *bColor;
3488 targetRow[2] = *bColor;
3489 targetRow[3] = *bColor;
3498 vst1q_u32(validPixels, m128_u_validPixel);
3499 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
3504 const uint32x4_t m128_u_left = vcvtq_u32_f32(m128_f_sourceX);
3505 const uint32x4_t m128_u_top = vcvtq_u32_f32(m128_f_sourceY);
3508 const uint32x4_t m128_u_right = vminq_u32(vaddq_u32(m128_u_left, vdupq_n_u32(1u)), m128_u_sourceWidth_1);
3509 const uint32x4_t m128_u_bottom = vminq_u32(vaddq_u32(m128_u_top, vdupq_n_u32(1u)), m128_u_sourceHeight_1);
3512 const uint32x4_t m128_u_topLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_top, m128_u_sourceStrideElements);
3513 const uint32x4_t m128_u_topRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_top, m128_u_sourceStrideElements);
3514 const uint32x4_t m128_u_bottomLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_bottom, m128_u_sourceStrideElements);
3515 const uint32x4_t m128_u_bottomRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_bottom, m128_u_sourceStrideElements);
3518 vst1q_u32(topLeftOffsetsElements, m128_u_topLeftOffsetElements);
3519 vst1q_u32(topRightOffsetsElements, m128_u_topRightOffsetElements);
3520 vst1q_u32(bottomLeftOffsetsElements, m128_u_bottomLeftOffsetElements);
3521 vst1q_u32(bottomRightOffsetsElements, m128_u_bottomRightOffsetElements);
3527 float32x4_t m128_f_tx = vsubq_f32(m128_f_sourceX, vcvtq_f32_u32(m128_u_left));
3528 float32x4_t m128_f_ty = vsubq_f32(m128_f_sourceY, vcvtq_f32_u32(m128_u_top));
3531 m128_f_tx = vmulq_f32(m128_f_tx, vdupq_n_f32(128.0f));
3532 m128_f_ty = vmulq_f32(m128_f_ty, vdupq_n_f32(128.0f));
3534 const uint32x4_t m128_u_tx = vcvtq_u32_f32(vaddq_f32(m128_f_tx, vdupq_n_f32(0.5)));
3535 const uint32x4_t m128_u_ty = vcvtq_u32_f32(vaddq_f32(m128_f_ty, vdupq_n_f32(0.5)));
3537 if constexpr (tChannels > 4u)
3547 const uint32x4_t m128_u_tx_ = vsubq_u32(vdupq_n_u32(128u), m128_u_tx);
3548 const uint32x4_t m128_u_ty_ = vsubq_u32(vdupq_n_u32(128u), m128_u_ty);
3552 const uint32x4_t m128_u_tx_ty_ = vmulq_u32(m128_u_tx_, m128_u_ty_);
3553 const uint32x4_t m128_u_txty_ = vmulq_u32(m128_u_tx, m128_u_ty_);
3554 const uint32x4_t m128_u_tx_ty = vmulq_u32(m128_u_tx_, m128_u_ty);
3555 const uint32x4_t m128_u_txty = vmulq_u32(m128_u_tx, m128_u_ty);
3557 unsigned int tx_ty_s[4];
3558 unsigned int txty_s[4];
3559 unsigned int tx_tys[4];
3560 unsigned int txtys[4];
3563 vst1q_u32(tx_ty_s, m128_u_tx_ty_);
3564 vst1q_u32(txty_s, m128_u_txty_);
3565 vst1q_u32(tx_tys, m128_u_tx_ty);
3566 vst1q_u32(txtys, m128_u_txty);
3568 for (
unsigned int i = 0u; i < 4u; ++i)
3572 ocean_assert(topLeftOffsetsElements[i] < sourceStrideElements * sourceHeight);
3573 ocean_assert(topRightOffsetsElements[i] < sourceStrideElements * sourceHeight);
3574 ocean_assert(bottomLeftOffsetsElements[i] < sourceStrideElements * sourceHeight);
3575 ocean_assert(bottomRightOffsetsElements[i] < sourceStrideElements * sourceHeight);
3577 const uint8_t* topLeft = source + topLeftOffsetsElements[i];
3578 const uint8_t* topRight = source + topRightOffsetsElements[i];
3580 const uint8_t* bottomLeft = source + bottomLeftOffsetsElements[i];
3581 const uint8_t* bottomRight = source + bottomRightOffsetsElements[i];
3583 const unsigned int tx_ty_ = tx_ty_s[i];
3584 const unsigned int txty_ = txty_s[i];
3585 const unsigned int tx_ty = tx_tys[i];
3586 const unsigned int txty = txtys[i];
3588 ocean_assert(tx_ty_ + txty_ + tx_ty + txty == 128u * 128u);
3590 for (
unsigned int n = 0u; n < tChannels; ++n)
3592 ((uint8_t*)targetRow)[n] = uint8_t((topLeft[n] * tx_ty_ + topRight[n] * txty_ + bottomLeft[n] * tx_ty + bottomRight[n] * txty + 8192u) >> 14u);
3597 *targetRow = *bColor;
3605 interpolate4Pixels8BitPerChannelNEON<tChannels>(source, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, targetRow);
3612template <
unsigned int tChannels>
3613void FrameInterpolatorBilinear::homography8BitPerChannelNEONSubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3* input_H_output,
const uint8_t* borderColor, uint8_t* output,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstOutputRow,
const unsigned int numberOutputRows)
3615 static_assert(tChannels >= 1u,
"Invalid channel number!");
3617 ocean_assert(input !=
nullptr && output !=
nullptr);
3618 ocean_assert(inputWidth > 0u && inputHeight > 0u);
3619 ocean_assert_and_suppress_unused(outputWidth >= 4u && outputHeight > 0u, outputHeight);
3620 ocean_assert(input_H_output !=
nullptr);
3622 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
3624 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
3625 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
3629 uint8_t zeroColor[tChannels] = {uint8_t(0)};
3630 const PixelType*
const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
3632 unsigned int validPixels[4];
3634 unsigned int topLeftOffsetsElements[4];
3635 unsigned int topRightOffsetsElements[4];
3636 unsigned int bottomLeftOffsetsElements[4];
3637 unsigned int bottomRightOffsetsElements[4];
3639 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
3642 const float32x4_t m128_f_X0 = vdupq_n_f32(
float((*input_H_output)(0, 0)));
3643 const float32x4_t m128_f_X1 = vdupq_n_f32(
float((*input_H_output)(1, 0)));
3644 const float32x4_t m128_f_X2 = vdupq_n_f32(
float((*input_H_output)(2, 0)));
3646 for (
unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
3648 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
3672 const float32x4_t m128_f_C0 = vdupq_n_f32(
float((*input_H_output)(0, 1) *
Scalar(y) + (*input_H_output)(0, 2)));
3673 const float32x4_t m128_f_C1 = vdupq_n_f32(
float((*input_H_output)(1, 1) *
Scalar(y) + (*input_H_output)(1, 2)));
3674 const float32x4_t m128_f_C2 = vdupq_n_f32(
float((*input_H_output)(2, 1) *
Scalar(y) + (*input_H_output)(2, 2)));
3677 const float32x4_t m128_f_zero = vdupq_n_f32(0.0f);
3680 const uint32x4_t m128_u_inputStrideElements = vdupq_n_u32(inputStrideElements);
3683 const uint32x4_t m128_u_inputWidth_1 = vdupq_n_u32(inputWidth - 1u);
3684 const uint32x4_t m128_u_inputHeight_1 = vdupq_n_u32(inputHeight - 1u);
3687 const float32x4_t m128_f_inputWidth_1 = vdupq_n_f32(
float(inputWidth - 1u));
3688 const float32x4_t m128_f_inputHeight_1 = vdupq_n_f32(
float(inputHeight - 1u));
3690 for (
unsigned int x = 0u; x < outputWidth; x += 4u)
3692 if (x + 4u > outputWidth)
3697 ocean_assert(x >= 4u && outputWidth > 4u);
3698 const unsigned int newX = outputWidth - 4u;
3700 ocean_assert(x > newX);
3701 outputPixelData -= x - newX;
3706 ocean_assert(!(x + 4u < outputWidth));
3712 float x_0123[4] = {float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u)};
3713 const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
3716 const float32x4_t m128_f_xx = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
3717 const float32x4_t m128_f_yy = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
3718 const float32x4_t m128_f_zz = vmlaq_f32(m128_f_C2, m128_f_X2, m128_f_x_0123);
3720#ifdef USE_DIVISION_ARM64_ARCHITECTURE
3723 const float32x4_t m128_f_inputX = vdivq_f32(m128_f_xx, m128_f_zz);
3724 const float32x4_t m128_f_inputY = vdivq_f32(m128_f_yy, m128_f_zz);
3730 float32x4_t inv_zz_128 = vrecpeq_f32(m128_f_zz);
3731 inv_zz_128 = vmulq_f32(vrecpsq_f32(m128_f_zz, inv_zz_128), inv_zz_128);
3734 const float32x4_t m128_f_inputX = vmulq_f32(m128_f_xx, inv_zz_128);
3735 const float32x4_t m128_f_inputY = vmulq_f32(m128_f_yy, inv_zz_128);
3741 const uint32x4_t m128_u_validPixelX = vandq_u32(vcleq_f32(m128_f_inputX, m128_f_inputWidth_1), vcgeq_f32(m128_f_inputX, m128_f_zero));
3742 const uint32x4_t m128_u_validPixelY = vandq_u32(vcleq_f32(m128_f_inputY, m128_f_inputHeight_1), vcgeq_f32(m128_f_inputY, m128_f_zero));
3744 const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY);
3748 const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
3749 if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
3752 OCEAN_ALIGN_DATA(16)
unsigned int debugValidPixels[4];
3753 vst1q_u32(debugValidPixels, m128_u_validPixel);
3754 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
3757 outputPixelData[0] = *bColor;
3758 outputPixelData[1] = *bColor;
3759 outputPixelData[2] = *bColor;
3760 outputPixelData[3] = *bColor;
3762 outputPixelData += 4;
3769 vst1q_u32(validPixels, m128_u_validPixel);
3770 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
3775 const uint32x4_t m128_u_left = vcvtq_u32_f32(m128_f_inputX);
3776 const uint32x4_t m128_u_top = vcvtq_u32_f32(m128_f_inputY);
3779 const uint32x4_t m128_u_right = vminq_u32(vaddq_u32(m128_u_left, vdupq_n_u32(1u)), m128_u_inputWidth_1);
3780 const uint32x4_t m128_u_bottom = vminq_u32(vaddq_u32(m128_u_top, vdupq_n_u32(1u)), m128_u_inputHeight_1);
3783 const uint32x4_t m128_u_topLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_top, m128_u_inputStrideElements);
3784 const uint32x4_t m128_u_topRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_top, m128_u_inputStrideElements);
3785 const uint32x4_t m128_u_bottomLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_bottom, m128_u_inputStrideElements);
3786 const uint32x4_t m128_u_bottomRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_bottom, m128_u_inputStrideElements);
3789 vst1q_u32(topLeftOffsetsElements, m128_u_topLeftOffsetElements);
3790 vst1q_u32(topRightOffsetsElements, m128_u_topRightOffsetElements);
3791 vst1q_u32(bottomLeftOffsetsElements, m128_u_bottomLeftOffsetElements);
3792 vst1q_u32(bottomRightOffsetsElements, m128_u_bottomRightOffsetElements);
3798 float32x4_t m128_f_tx = vsubq_f32(m128_f_inputX, vcvtq_f32_u32(m128_u_left));
3799 float32x4_t m128_f_ty = vsubq_f32(m128_f_inputY, vcvtq_f32_u32(m128_u_top));
3802 m128_f_tx = vmulq_f32(m128_f_tx, vdupq_n_f32(128.0f));
3803 m128_f_ty = vmulq_f32(m128_f_ty, vdupq_n_f32(128.0f));
3805 const uint32x4_t m128_u_tx = vcvtq_u32_f32(vaddq_f32(m128_f_tx, vdupq_n_f32(0.5)));
3806 const uint32x4_t m128_u_ty = vcvtq_u32_f32(vaddq_f32(m128_f_ty, vdupq_n_f32(0.5)));
3808 if constexpr (tChannels > 4u)
3818 const uint32x4_t m128_u_tx_ = vsubq_u32(vdupq_n_u32(128u), m128_u_tx);
3819 const uint32x4_t m128_u_ty_ = vsubq_u32(vdupq_n_u32(128u), m128_u_ty);
3823 const uint32x4_t m128_u_tx_ty_ = vmulq_u32(m128_u_tx_, m128_u_ty_);
3824 const uint32x4_t m128_u_txty_ = vmulq_u32(m128_u_tx, m128_u_ty_);
3825 const uint32x4_t m128_u_tx_ty = vmulq_u32(m128_u_tx_, m128_u_ty);
3826 const uint32x4_t m128_u_txty = vmulq_u32(m128_u_tx, m128_u_ty);
3828 unsigned int tx_ty_s[4];
3829 unsigned int txty_s[4];
3830 unsigned int tx_tys[4];
3831 unsigned int txtys[4];
3834 vst1q_u32(tx_ty_s, m128_u_tx_ty_);
3835 vst1q_u32(txty_s, m128_u_txty_);
3836 vst1q_u32(tx_tys, m128_u_tx_ty);
3837 vst1q_u32(txtys, m128_u_txty);
3839 for (
unsigned int i = 0u; i < 4u; ++i)
3843 ocean_assert(topLeftOffsetsElements[i] < inputStrideElements * inputHeight);
3844 ocean_assert(topRightOffsetsElements[i] < inputStrideElements * inputHeight);
3845 ocean_assert(bottomLeftOffsetsElements[i] < inputStrideElements * inputHeight);
3846 ocean_assert(bottomRightOffsetsElements[i] < inputStrideElements * inputHeight);
3848 const uint8_t* topLeft = input + topLeftOffsetsElements[i];
3849 const uint8_t* topRight = input + topRightOffsetsElements[i];
3851 const uint8_t* bottomLeft = input + bottomLeftOffsetsElements[i];
3852 const uint8_t* bottomRight = input + bottomRightOffsetsElements[i];
3854 const unsigned int tx_ty_ = tx_ty_s[i];
3855 const unsigned int txty_ = txty_s[i];
3856 const unsigned int tx_ty = tx_tys[i];
3857 const unsigned int txty = txtys[i];
3859 ocean_assert(tx_ty_ + txty_ + tx_ty + txty == 128u * 128u);
3861 for (
unsigned int n = 0u; n < tChannels; ++n)
3863 ((uint8_t*)outputPixelData)[n] = uint8_t((topLeft[n] * tx_ty_ + topRight[n] * txty_ + bottomLeft[n] * tx_ty + bottomRight[n] * txty + 8192u) >> 14u);
3868 *outputPixelData = *bColor;
3876 interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, outputPixelData);
3877 outputPixelData += 4;
3884OCEAN_FORCE_INLINE
void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<1u>(
const uint8_t* source,
const unsigned int offsetsTopLeftElements[4],
const unsigned int offsetsTopRightElements[4],
const unsigned int offsetsBottomLeftElements[4],
const unsigned int offsetsBottomRightElements[4],
const unsigned int validPixels[4],
const DataType<uint8_t, 1u>::Type& borderColor,
const uint32x4_t& m128_factorsRight,
const uint32x4_t& m128_factorsBottom,
typename DataType<uint8_t, 1u>::Type* targetPositionPixels)
3886 ocean_assert(source !=
nullptr);
3887 ocean_assert(targetPositionPixels !=
nullptr);
3899 for (
unsigned int i = 0u; i < 4u; ++i)
3910 pixels[i * 4u + 0u] = borderColor;
3911 pixels[i * 4u + 1u] = borderColor;
3912 pixels[i * 4u + 2u] = borderColor;
3913 pixels[i * 4u + 3u] = borderColor;
3917 static_assert(
sizeof(uint8x16_t) ==
sizeof(pixels),
"Invalid data type!");
3919 const uint8x16_t m128_pixels = vld1q_u8((
const uint8_t*)pixels);
3925 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
3926 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
3931 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
3932 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
3933 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
3934 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
3949 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
3951 const uint32x4_t m128_muliplicationA = vmulq_u32(vandq_u32(vreinterpretq_u32_u8(m128_pixels), m128_maskFirstByte), m128_factorsTopLeft);
3952 const uint32x4_t m128_muliplicationB = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 8), m128_maskFirstByte), m128_factorsTopRight);
3953 const uint32x4_t m128_muliplicationC = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 16), m128_maskFirstByte), m128_factorsBottomLeft);
3954 const uint32x4_t m128_muliplicationD = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 24), m128_maskFirstByte), m128_factorsBottomRight);
3956 const uint32x4_t m128_multiplication = vaddq_u32(vaddq_u32(m128_muliplicationA, m128_muliplicationB), vaddq_u32(m128_muliplicationC, m128_muliplicationD));
3960 const uint8x16_t m128_interpolation = vreinterpretq_u8_u32(vshrq_n_u32(vaddq_u32(m128_multiplication, vdupq_n_u32(8192u)), 14));
3970#if defined(__aarch64__)
3975 const uint8x8_t m64_interpolation01 = vtbl1_u8(vget_low_u8(m128_interpolation), m64_mask0);
3976 const uint8x8_t m64_interpolation23 = vtbl1_u8(vget_high_u8(m128_interpolation), m64_mask1);
3978 const uint8x8_t m64_interpolation0123 = vorr_u8(m64_interpolation01, m64_interpolation23);
3980 const uint32_t result = vget_lane_u32(vreinterpret_u32_u8(m64_interpolation0123), 0);
3981 memcpy(targetPositionPixels, &result,
sizeof(uint32_t));
3985 *((uint8_t*)targetPositionPixels + 0) = vgetq_lane_u8(m128_interpolation, 0);
3986 *((uint8_t*)targetPositionPixels + 1) = vgetq_lane_u8(m128_interpolation, 4);
3987 *((uint8_t*)targetPositionPixels + 2) = vgetq_lane_u8(m128_interpolation, 8);
3988 *((uint8_t*)targetPositionPixels + 3) = vgetq_lane_u8(m128_interpolation, 12);
3995 const uint8x16_t factorsLeft_factorsTop_128_u_8x16 = vsubq_u8(vdupq_n_u8(128u), factorsRight_factorsBottom_128_u_8x16);
3997 const uint8x8_t factorsRight_u_8x8 = vget_low_u8(factorsRight_factorsBottom_128_u_8x16);
3998 const uint16x8_t factorsBottom_u_16x8 = vmovl_u8(vget_high_u8(factorsRight_factorsBottom_128_u_8x16));
4000 const uint8x8_t factorsLeft_u_8x8 = vget_low_u8(factorsLeft_factorsTop_128_u_8x16);
4001 const uint16x8_t factorsTop_u_16x8 = vmovl_u8(vget_high_u8(factorsLeft_factorsTop_128_u_8x16));
4003 const uint16x8_t intermediateTop_u_16x8 = vmlal_u8(vmull_u8(topLeft_u_8x8, factorsLeft_u_8x8), topRight_u_8x8, factorsRight_u_8x8);
4004 const uint16x8_t intermediateBottom_u_16x8 = vmlal_u8(vmull_u8(bottomLeft_u_8x8, factorsLeft_u_8x8), bottomRight_u_8x8, factorsRight_u_8x8);
4006 const uint32x4_t resultA_32x4 = vmlal_u16(vmull_u16(vget_low_u16(intermediateTop_u_16x8), vget_low_u16(factorsTop_u_16x8)), vget_low_u16(intermediateBottom_u_16x8), vget_low_u16(factorsBottom_u_16x8));
4007 const uint32x4_t resultB_32x4 = vmlal_u16(vmull_u16(vget_high_u16(intermediateTop_u_16x8), vget_high_u16(factorsTop_u_16x8)), vget_high_u16(intermediateBottom_u_16x8), vget_high_u16(factorsBottom_u_16x8));
4009 const uint16x8_t result_16x8 = vcombine_u16(vrshrn_n_u32(resultA_32x4, 14), vrshrn_n_u32(resultB_32x4, 14));
4011 const uint8x8_t result_8x8 = vmovn_u16(result_16x8);
4013 vst1_u8(targetPositionPixels, result_8x8);
4017OCEAN_FORCE_INLINE
void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<2u>(
const uint8_t* source,
const unsigned int offsetsTopLeftElements[4],
const unsigned int offsetsTopRightElements[4],
const unsigned int offsetsBottomLeftElements[4],
const unsigned int offsetsBottomRightElements[4],
const unsigned int validPixels[4],
const DataType<uint8_t, 2u>::Type& borderColor,
const uint32x4_t& m128_factorsRight,
const uint32x4_t& m128_factorsBottom,
typename DataType<uint8_t, 2u>::Type* targetPositionPixels)
4019 ocean_assert(source !=
nullptr);
4020 ocean_assert(targetPositionPixels !=
nullptr);
4026 PixelType topPixels[8];
4027 PixelType bottomPixels[8];
4036 for (
unsigned int i = 0u; i < 4u; ++i)
4040 *(topPixels + i * 2u + 0u) = *((
const PixelType*)(source + offsetsTopLeftElements[i]));
4041 *(topPixels + i * 2u + 1u) = *((
const PixelType*)(source + offsetsTopRightElements[i]));
4042 *(bottomPixels + i * 2u + 0u) = *((
const PixelType*)(source + offsetsBottomLeftElements[i]));
4043 *(bottomPixels + i * 2u + 1u) = *((
const PixelType*)(source + offsetsBottomRightElements[i]));
4047 *(topPixels + i * 2u + 0u) = borderColor;
4048 *(topPixels + i * 2u + 1u) = borderColor;
4049 *(bottomPixels + i * 2u + 0u) = borderColor;
4050 *(bottomPixels + i * 2u + 1u) = borderColor;
4054 static_assert(
sizeof(uint32x4_t) ==
sizeof(topPixels),
"Invalid data type!");
4056 const uint32x4_t m128_topPixels = vreinterpretq_u32_u8(vld1q_u8((
const uint8_t*)topPixels));
4057 const uint32x4_t m128_bottomPixels = vreinterpretq_u32_u8(vld1q_u8((
const uint8_t*)bottomPixels));
4063 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4064 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4069 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4070 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4071 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4072 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4075 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4077 uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topPixels, m128_maskFirstByte), m128_factorsTopLeft);
4078 uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4080 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4081 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 24), m128_maskFirstByte), m128_factorsTopRight));
4083 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4084 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4086 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4087 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 24), m128_maskFirstByte), m128_factorsBottomRight));
4092 const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4093 const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4099 const uint32x4_t m128_interpolation = vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8));
4106 const uint8x8_t m64_interpolation_low = vtbl1_u8(vget_low_u8(vreinterpretq_u8_u32(m128_interpolation)), m64_mask0);
4107 const uint8x8_t m64_interpolation_high = vtbl1_u8(vget_high_u8(vreinterpretq_u8_u32(m128_interpolation)), m64_mask1);
4109 const uint8x8_t m64_interpolation = vorr_u8(m64_interpolation_low, m64_interpolation_high);
4116 vst1_u8((uint8_t*)targetPositionPixels, m64_interpolation);
4120OCEAN_FORCE_INLINE
void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<3u>(
const uint8_t* source,
const unsigned int offsetsTopLeftElements[4],
const unsigned int offsetsTopRightElements[4],
const unsigned int offsetsBottomLeftElements[4],
const unsigned int offsetsBottomRightElements[4],
const unsigned int validPixels[4],
const DataType<uint8_t, 3u>::Type& borderColor,
const uint32x4_t& m128_factorsRight,
const uint32x4_t& m128_factorsBottom,
typename DataType<uint8_t, 3u>::Type* targetPositionPixels)
4122 ocean_assert(source !=
nullptr);
4123 ocean_assert(targetPositionPixels !=
nullptr);
4127 uint32_t topLeftPixels[4];
4128 uint32_t topRightPixels[4];
4129 uint32_t bottomLeftPixels[4];
4130 uint32_t bottomRightPixels[4];
4138 for (
unsigned int i = 0u; i < 4u; ++i)
4142 memcpy(topLeftPixels + i, source + offsetsTopLeftElements[i],
sizeof(uint8_t) * 3);
4143 memcpy(topRightPixels + i, source + offsetsTopRightElements[i],
sizeof(uint8_t) * 3);
4144 memcpy(bottomLeftPixels + i, source + offsetsBottomLeftElements[i],
sizeof(uint8_t) * 3);
4145 memcpy(bottomRightPixels + i, source + offsetsBottomRightElements[i],
sizeof(uint8_t) * 3);
4149 memcpy(topLeftPixels + i, &borderColor,
sizeof(uint8_t) * 3);
4150 memcpy(topRightPixels + i, &borderColor,
sizeof(uint8_t) * 3);
4151 memcpy(bottomLeftPixels + i, &borderColor,
sizeof(uint8_t) * 3);
4152 memcpy(bottomRightPixels + i, &borderColor,
sizeof(uint8_t) * 3);
4156 static_assert(
sizeof(uint32x4_t) ==
sizeof(topLeftPixels),
"Invalid data type!");
4158 const uint32x4_t m128_topLeftPixels = vld1q_u32(topLeftPixels);
4159 const uint32x4_t m128_topRightPixels = vld1q_u32(topRightPixels);
4160 const uint32x4_t m128_bottomLeftPixels = vld1q_u32(bottomLeftPixels);
4161 const uint32x4_t m128_bottomRightPixels = vld1q_u32(bottomRightPixels);
4167 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4168 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4173 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4174 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4175 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4176 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4179 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4181 uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topLeftPixels, m128_maskFirstByte), m128_factorsTopLeft);
4182 uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4183 uint32x4_t m128_muliplicationChannel2 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 16), m128_maskFirstByte), m128_factorsTopLeft);
4185 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_topRightPixels, m128_maskFirstByte), m128_factorsTopRight));
4186 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 8), m128_maskFirstByte), m128_factorsTopRight));
4187 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4189 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomLeftPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4190 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4191 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 16), m128_maskFirstByte), m128_factorsBottomLeft));
4193 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomRightPixels, m128_maskFirstByte), m128_factorsBottomRight));
4194 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 8), m128_maskFirstByte), m128_factorsBottomRight));
4195 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4200 const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4201 const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4202 const uint32x4_t m128_interpolation2 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel2, vdupq_n_u32(8192u)), 14);
4206 const uint32x4_t m128_interpolation = vorrq_u32(vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8)), vshlq_n_u32(m128_interpolation2, 16));
4212 uint32_t intermediateBuffer[4];
4213 vst1q_u32(intermediateBuffer, m128_interpolation);
4215 for (
unsigned int i = 0u; i < 4u; ++i)
4217 memcpy(targetPositionPixels + i, intermediateBuffer + i,
sizeof(uint8_t) * 3);
4222OCEAN_FORCE_INLINE
void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<4u>(
const uint8_t* source,
const unsigned int offsetsTopLeftElements[4],
const unsigned int offsetsTopRightElements[4],
const unsigned int offsetsBottomLeftElements[4],
const unsigned int offsetsBottomRightElements[4],
const unsigned int validPixels[4],
const DataType<uint8_t, 4u>::Type& borderColor,
const uint32x4_t& m128_factorsRight,
const uint32x4_t& m128_factorsBottom,
typename DataType<uint8_t, 4u>::Type* targetPositionPixels)
4224 ocean_assert(source !=
nullptr);
4225 ocean_assert(targetPositionPixels !=
nullptr);
4231 PixelType topLeftPixels[4];
4232 PixelType topRightPixels[4];
4233 PixelType bottomLeftPixels[4];
4234 PixelType bottomRightPixels[4];
4242 for (
unsigned int i = 0u; i < 4u; ++i)
4246 *(topLeftPixels + i) = *((
const PixelType*)(source + offsetsTopLeftElements[i]));
4247 *(topRightPixels + i) = *((
const PixelType*)(source + offsetsTopRightElements[i]));
4248 *(bottomLeftPixels + i) = *((
const PixelType*)(source + offsetsBottomLeftElements[i]));
4249 *(bottomRightPixels + i) = *((
const PixelType*)(source + offsetsBottomRightElements[i]));
4253 *(topLeftPixels + i) = borderColor;
4254 *(topRightPixels + i) = borderColor;
4255 *(bottomLeftPixels + i) = borderColor;
4256 *(bottomRightPixels + i) = borderColor;
4260 static_assert(
sizeof(uint32x4_t) ==
sizeof(topLeftPixels),
"Invalid data type!");
4262 const uint32x4_t m128_topLeftPixels = vreinterpretq_u32_u8(vld1q_u8((
const uint8_t*)topLeftPixels));
4263 const uint32x4_t m128_topRightPixels = vreinterpretq_u32_u8(vld1q_u8((
const uint8_t*)topRightPixels));
4264 const uint32x4_t m128_bottomLeftPixels = vreinterpretq_u32_u8(vld1q_u8((
const uint8_t*)bottomLeftPixels));
4265 const uint32x4_t m128_bottomRightPixels = vreinterpretq_u32_u8(vld1q_u8((
const uint8_t*)bottomRightPixels));
4271 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4272 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4277 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4278 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4279 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4280 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4283 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4285 uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topLeftPixels, m128_maskFirstByte), m128_factorsTopLeft);
4286 uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4287 uint32x4_t m128_muliplicationChannel2 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 16), m128_maskFirstByte), m128_factorsTopLeft);
4288 uint32x4_t m128_muliplicationChannel3 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 24), m128_maskFirstByte), m128_factorsTopLeft);
4290 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_topRightPixels, m128_maskFirstByte), m128_factorsTopRight));
4291 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 8), m128_maskFirstByte), m128_factorsTopRight));
4292 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4293 m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 24), m128_maskFirstByte), m128_factorsTopRight));
4295 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomLeftPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4296 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4297 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 16), m128_maskFirstByte), m128_factorsBottomLeft));
4298 m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 24), m128_maskFirstByte), m128_factorsBottomLeft));
4300 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomRightPixels, m128_maskFirstByte), m128_factorsBottomRight));
4301 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 8), m128_maskFirstByte), m128_factorsBottomRight));
4302 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4303 m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 24), m128_maskFirstByte), m128_factorsBottomRight));
4308 const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4309 const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4310 const uint32x4_t m128_interpolation2 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel2, vdupq_n_u32(8192u)), 14);
4311 const uint32x4_t m128_interpolation3 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel3, vdupq_n_u32(8192u)), 14);
4315 const uint32x4_t m128_interpolation = vorrq_u32(vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8)), vorrq_u32(vshlq_n_u32(m128_interpolation2, 16), vshlq_n_u32(m128_interpolation3, 24)));
4317 vst1q_u8((uint8_t*)targetPositionPixels, vreinterpretq_u8_u32(m128_interpolation));
4322 ocean_assert(targetPositionPixels !=
nullptr);
4327 uint8x8_t factorsRight_lo;
4328 uint8x8_t factorsRight_hi;
4329 uint8x8_t factorsLeft_lo;
4330 uint8x8_t factorsLeft_hi;
4331 uint8x8_t factorsBottom_lo;
4332 uint8x8_t factorsBottom_hi;
4333 uint8x8_t factorsTop_lo;
4334 uint8x8_t factorsTop_hi;
4336#if defined(__aarch64__)
4338 if (!useOptimizedNEONFactorReplication)
4340 (void)useOptimizedNEONFactorReplication;
4343 const uint8x8_t factorsRight_u8x8 = vmovn_u16(vcombine_u16(vmovn_u32(m128_factorsRight), vmovn_u32(m128_factorsRight)));
4344 const uint8x8x2_t factorsRight_zip1 = vzip_u8(factorsRight_u8x8, factorsRight_u8x8);
4345 const uint8x8x2_t factorsRight_zip2 = vzip_u8(factorsRight_zip1.val[0], factorsRight_zip1.val[0]);
4347 factorsRight_lo = factorsRight_zip2.val[0];
4348 factorsRight_hi = factorsRight_zip2.val[1];
4349 factorsLeft_lo = vsub_u8(vdup_n_u8(128u), factorsRight_lo);
4350 factorsLeft_hi = vsub_u8(vdup_n_u8(128u), factorsRight_hi);
4352 const uint8x8_t factorsBottom_u8x8 = vmovn_u16(vcombine_u16(vmovn_u32(m128_factorsBottom), vmovn_u32(m128_factorsBottom)));
4353 const uint8x8x2_t factorsBottom_zip1 = vzip_u8(factorsBottom_u8x8, factorsBottom_u8x8);
4354 const uint8x8x2_t factorsBottom_zip2 = vzip_u8(factorsBottom_zip1.val[0], factorsBottom_zip1.val[0]);
4356 factorsBottom_lo = factorsBottom_zip2.val[0];
4357 factorsBottom_hi = factorsBottom_zip2.val[1];
4358 factorsTop_lo = vsub_u8(vdup_n_u8(128u), factorsBottom_lo);
4359 factorsTop_hi = vsub_u8(vdup_n_u8(128u), factorsBottom_hi);
4361#if defined(__aarch64__)
4367 static const uint8x16_t replicateU32Idx = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12};
4369 const uint8x16_t factorsRight_rep = vqtbl1q_u8(vreinterpretq_u8_u32(m128_factorsRight), replicateU32Idx);
4370 const uint8x16_t factorsLeft_rep = vsubq_u8(vdupq_n_u8(128u), factorsRight_rep);
4372 factorsRight_lo = vget_low_u8(factorsRight_rep);
4373 factorsRight_hi = vget_high_u8(factorsRight_rep);
4374 factorsLeft_lo = vget_low_u8(factorsLeft_rep);
4375 factorsLeft_hi = vget_high_u8(factorsLeft_rep);
4377 const uint8x16_t factorsBottom_rep = vqtbl1q_u8(vreinterpretq_u8_u32(m128_factorsBottom), replicateU32Idx);
4378 const uint8x16_t factorsTop_rep = vsubq_u8(vdupq_n_u8(128u), factorsBottom_rep);
4380 factorsBottom_lo = vget_low_u8(factorsBottom_rep);
4381 factorsBottom_hi = vget_high_u8(factorsBottom_rep);
4382 factorsTop_lo = vget_low_u8(factorsTop_rep);
4383 factorsTop_hi = vget_high_u8(factorsTop_rep);
4388 const uint8x8_t topLeft_lo = vget_low_u8(topLeftPixels_u8x16);
4389 const uint8x8_t topLeft_hi = vget_high_u8(topLeftPixels_u8x16);
4390 const uint8x8_t topRight_lo = vget_low_u8(topRightPixels_u8x16);
4391 const uint8x8_t topRight_hi = vget_high_u8(topRightPixels_u8x16);
4392 const uint8x8_t bottomLeft_lo = vget_low_u8(bottomLeftPixels_u8x16);
4393 const uint8x8_t bottomLeft_hi = vget_high_u8(bottomLeftPixels_u8x16);
4394 const uint8x8_t bottomRight_lo = vget_low_u8(bottomRightPixels_u8x16);
4395 const uint8x8_t bottomRight_hi = vget_high_u8(bottomRightPixels_u8x16);
4399 const uint16x8_t topInterp_lo = vmlal_u8(vmull_u8(topLeft_lo, factorsLeft_lo), topRight_lo, factorsRight_lo);
4400 const uint16x8_t botInterp_lo = vmlal_u8(vmull_u8(bottomLeft_lo, factorsLeft_lo), bottomRight_lo, factorsRight_lo);
4403 const uint16x8_t factorsTop_lo_u16 = vmovl_u8(factorsTop_lo);
4404 const uint16x8_t factorsBottom_lo_u16 = vmovl_u8(factorsBottom_lo);
4406 const uint32x4_t result_lo_A = vmlal_u16(vmull_u16(vget_low_u16(topInterp_lo), vget_low_u16(factorsTop_lo_u16)), vget_low_u16(botInterp_lo), vget_low_u16(factorsBottom_lo_u16));
4407 const uint32x4_t result_lo_B = vmlal_u16(vmull_u16(vget_high_u16(topInterp_lo), vget_high_u16(factorsTop_lo_u16)), vget_high_u16(botInterp_lo), vget_high_u16(factorsBottom_lo_u16));
4410 const uint16x8_t topInterp_hi = vmlal_u8(vmull_u8(topLeft_hi, factorsLeft_hi), topRight_hi, factorsRight_hi);
4411 const uint16x8_t botInterp_hi = vmlal_u8(vmull_u8(bottomLeft_hi, factorsLeft_hi), bottomRight_hi, factorsRight_hi);
4413 const uint16x8_t factorsTop_hi_u16 = vmovl_u8(factorsTop_hi);
4414 const uint16x8_t factorsBottom_hi_u16 = vmovl_u8(factorsBottom_hi);
4416 const uint32x4_t result_hi_A = vmlal_u16(vmull_u16(vget_low_u16(topInterp_hi), vget_low_u16(factorsTop_hi_u16)), vget_low_u16(botInterp_hi), vget_low_u16(factorsBottom_hi_u16));
4417 const uint32x4_t result_hi_B = vmlal_u16(vmull_u16(vget_high_u16(topInterp_hi), vget_high_u16(factorsTop_hi_u16)), vget_high_u16(botInterp_hi), vget_high_u16(factorsBottom_hi_u16));
4420 const uint16x4_t narrow_lo_A = vrshrn_n_u32(result_lo_A, 14);
4421 const uint16x4_t narrow_lo_B = vrshrn_n_u32(result_lo_B, 14);
4422 const uint16x4_t narrow_hi_A = vrshrn_n_u32(result_hi_A, 14);
4423 const uint16x4_t narrow_hi_B = vrshrn_n_u32(result_hi_B, 14);
4425 const uint8x8_t result_lo = vmovn_u16(vcombine_u16(narrow_lo_A, narrow_lo_B));
4426 const uint8x8_t result_hi = vmovn_u16(vcombine_u16(narrow_hi_A, narrow_hi_B));
4428 vst1q_u8((uint8_t*)targetPositionPixels, vcombine_u8(result_lo, result_hi));
4431template <
unsigned int tChannels>
4432OCEAN_FORCE_INLINE
void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON(
const uint8_t* source,
const unsigned int offsetsTopLeftElements[4],
const unsigned int offsetsTopRightElements[4],
const unsigned int offsetsBottomLeftElements[4],
const unsigned int offsetsBottomRightElements[4],
const unsigned int validPixels[4],
const typename DataType<uint8_t, tChannels>::Type& borderColor,
const uint32x4_t& m128_factorsRight,
const uint32x4_t& m128_factorsBottom,
typename DataType<uint8_t, tChannels>::Type* targetPositionPixels)
4434 ocean_assert(source !=
nullptr);
4435 ocean_assert(targetPositionPixels !=
nullptr);
4439 unsigned int factorsTopLeft[4];
4440 unsigned int factorsTopRight[4];
4441 unsigned int factorsBottomLeft[4];
4442 unsigned int factorsBottomRight[4];
4448 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4449 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4454 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4455 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4456 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4457 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4461 vst1q_u32(factorsTopLeft, m128_factorsTopLeft);
4462 vst1q_u32(factorsTopRight, m128_factorsTopRight);
4463 vst1q_u32(factorsBottomLeft, m128_factorsBottomLeft);
4464 vst1q_u32(factorsBottomRight, m128_factorsBottomRight);
4466 for (
unsigned int i = 0u; i < 4u; ++i)
4470 const uint8_t* topLeft = source + offsetsTopLeftElements[i];
4471 const uint8_t* topRight = source + offsetsTopRightElements[i];
4473 const uint8_t* bottomLeft = source + offsetsBottomLeftElements[i];
4474 const uint8_t* bottomRight = source + offsetsBottomRightElements[i];
4476 const unsigned int& factorTopLeft = factorsTopLeft[i];
4477 const unsigned int& factorTopRight = factorsTopRight[i];
4478 const unsigned int& factorBottomLeft = factorsBottomLeft[i];
4479 const unsigned int& factorBottomRight = factorsBottomRight[i];
4481 for (
unsigned int n = 0u; n < tChannels; ++n)
4483 ((uint8_t*)targetPositionPixels)[n] = (topLeft[n] * factorTopLeft + topRight[n] * factorTopRight + bottomLeft[n] * factorBottomLeft + bottomRight[n] * factorBottomRight + 8192u) >> 14u;
4488 *targetPositionPixels = borderColor;
4491 targetPositionPixels++;
4497template <
unsigned int tChannels>
4498inline void FrameInterpolatorBilinear::homographies8BitPerChannelSubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3* homographies,
const uint8_t* borderColor, uint8_t* output,
const Scalar outputQuadrantCenterX,
const Scalar outputQuadrantCenterY,
const int outputOriginX,
const int outputOriginY,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstOutputRow,
const unsigned int numberOutputRows)
4500 static_assert(tChannels >= 1u,
"Invalid channel number!");
4502 ocean_assert(input && output);
4503 ocean_assert(inputWidth > 0u && inputHeight > 0u);
4504 ocean_assert(outputWidth > 0u && outputHeight > 0u);
4506 ocean_assert(outputQuadrantCenterX >= 0 && outputQuadrantCenterX <
Scalar(outputWidth));
4507 ocean_assert(outputQuadrantCenterY >= 0 && outputQuadrantCenterY <
Scalar(outputHeight));
4508 ocean_assert(homographies);
4510 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
4512 const Scalar scalarInputWidth_1 =
Scalar(inputWidth - 1u);
4513 const Scalar scalarInputHeight_1 =
Scalar(inputHeight - 1u);
4515 constexpr uint8_t zeroColor[tChannels] = {uint8_t(0)};
4516 const uint8_t*
const bColor = borderColor ? borderColor : zeroColor;
4518 uint8_t* outputData = output + firstOutputRow * outputStrideElements;
4532 for (
unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4534 for (
unsigned int x = 0; x < outputWidth; ++x)
4538 const Scalar _tx = minmax<Scalar>(0, (outputPosition.
x() - left) * invWidth, 1);
4539 const Scalar _ty = minmax<Scalar>(0, (outputPosition.
y() - top) * invHeight, 1);
4543 const Scalar tx = 1 - _tx;
4544 const Scalar ty = 1 - _ty;
4546 const Vector2 inputPositionTopLeft(homographies[0] * outputPosition);
4547 const Vector2 inputPositionTopRight(homographies[1] * outputPosition);
4548 const Vector2 inputPositionBottomLeft(homographies[2] * outputPosition);
4549 const Vector2 inputPositionBottomRight(homographies[3] * outputPosition);
4551 const Scalar tTopLeft = tx * ty;
4552 const Scalar tTopRight = _tx * ty;
4553 const Scalar tBottomLeft = tx * _ty;
4554 const Scalar tBottomRight = _tx * _ty;
4556 const Vector2 inputPosition = inputPositionTopLeft * tTopLeft + inputPositionTopRight * tTopRight
4557 + inputPositionBottomLeft * tBottomLeft + inputPositionBottomRight * tBottomRight;
4559 if (inputPosition.
x() <
Scalar(0) || inputPosition.
x() > scalarInputWidth_1 || inputPosition.
y() <
Scalar(0) || inputPosition.
y() > scalarInputHeight_1)
4561 for (
unsigned int c = 0u; c < tChannels; ++c)
4563 outputData[c] = bColor[c];
4568 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, outputData);
4571 outputData += tChannels;
4574 outputData += outputPaddingElements;
4578template <
unsigned int tChannels>
4579void FrameInterpolatorBilinear::homographyMask8BitPerChannelSubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3* input_H_output, uint8_t* output, uint8_t* outputMask,
const uint8_t maskValue,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
unsigned int firstOutputRow,
const unsigned int numberOutputRows)
4581 static_assert(tChannels >= 1u,
"Invalid channel number!");
4583 ocean_assert(input !=
nullptr && output !=
nullptr);
4584 ocean_assert(inputWidth > 0u && inputHeight > 0u);
4585 ocean_assert(outputWidth > 0u && outputHeight > 0u);
4586 ocean_assert(input_H_output !=
nullptr);
4588 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
4590 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
4591 const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
4593 const Scalar scalarInputWidth_1 =
Scalar(inputWidth - 1u);
4594 const Scalar scalarInputHeight_1 =
Scalar(inputHeight - 1u);
4598 for (
unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4600 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4601 uint8_t* outputMaskData = outputMask + y * outputMaskStrideElements;
4625 const Scalar X2 = (*input_H_output)(2, 0);
4626 const Scalar constValue2 = (*input_H_output)(2, 1) *
Scalar(y) + (*input_H_output)(2, 2);
4628 for (
unsigned int x = 0; x < outputWidth; ++x)
4634 ocean_assert(inputPosition.
isEqual(debugInputPosition,
Scalar(0.01)));
4637 if (inputPosition.
x() <
Scalar(0) || inputPosition.
x() > scalarInputWidth_1 || inputPosition.
y() <
Scalar(0) || inputPosition.
y() > scalarInputHeight_1)
4639 *outputMaskData = 0xFF - maskValue;
4643 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
4644 *outputMaskData = maskValue;
4653template <
unsigned int tChannels>
4654inline void FrameInterpolatorBilinear::homographiesMask8BitPerChannelSubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const SquareMatrix3* homographies, uint8_t* output, uint8_t* outputMask,
const uint8_t maskValue,
const Scalar outputQuadrantCenterX,
const Scalar outputQuadrantCenterY,
const int outputOriginX,
const int outputOriginY,
const unsigned int outputWidth,
const unsigned int outputHeight,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
const unsigned int firstOutputRow,
const unsigned int numberOutputRows)
4656 static_assert(tChannels >= 1u,
"Invalid channel number!");
4658 ocean_assert(input && output);
4659 ocean_assert(inputWidth > 0u && inputHeight > 0u);
4660 ocean_assert(outputWidth > 0u && outputHeight > 0u);
4662 ocean_assert(outputQuadrantCenterX >= 0 && outputQuadrantCenterX <
Scalar(outputWidth));
4663 ocean_assert(outputQuadrantCenterY >= 0 && outputQuadrantCenterY <
Scalar(outputHeight));
4664 ocean_assert(homographies);
4666 const unsigned int outputStrideElements = tChannels * outputWidth + outputPaddingElements;
4667 const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
4669 const Scalar scalarInputWidth_1 =
Scalar(inputWidth - 1u);
4670 const Scalar scalarInputHeight_1 =
Scalar(inputHeight - 1u);
4672 uint8_t* outputData = output + firstOutputRow * outputStrideElements;
4673 outputMask += firstOutputRow * outputMaskStrideElements;
4687 for (
unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4689 for (
unsigned int x = 0u; x < outputWidth; ++x)
4693 const Scalar _tx = minmax<Scalar>(0, (outputPosition.
x() - left) * invWidth, 1);
4694 const Scalar _ty = minmax<Scalar>(0, (outputPosition.
y() - top) * invHeight, 1);
4698 const Scalar tx = 1 - _tx;
4699 const Scalar ty = 1 - _ty;
4701 const Vector2 inputPositionTopLeft(homographies[0] * outputPosition);
4702 const Vector2 inputPositionTopRight(homographies[1] * outputPosition);
4703 const Vector2 inputPositionBottomLeft(homographies[2] * outputPosition);
4704 const Vector2 inputPositionBottomRight(homographies[3] * outputPosition);
4706 const Scalar tTopLeft = tx * ty;
4707 const Scalar tTopRight = _tx * ty;
4708 const Scalar tBottomLeft = tx * _ty;
4709 const Scalar tBottomRight = _tx * _ty;
4711 const Vector2 inputPosition = inputPositionTopLeft * tTopLeft + inputPositionTopRight * tTopRight
4712 + inputPositionBottomLeft * tBottomLeft + inputPositionBottomRight * tBottomRight;
4714 if (inputPosition.
x() <
Scalar(0) || inputPosition.
x() > scalarInputWidth_1 || inputPosition.
y() <
Scalar(0) || inputPosition.
y() > scalarInputHeight_1)
4716 *outputMask = 0xFFu - maskValue;
4720 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, outputData);
4721 *outputMask = maskValue;
4724 outputData += tChannels;
4728 outputData += outputPaddingElements;
4729 outputMask += outputMaskPaddingElements;
4733template <
unsigned int tChannels>
4734void FrameInterpolatorBilinear::homographyWithCamera8BitPerChannelSubset(
const PinholeCamera* inputCamera,
const PinholeCamera* outputCamera,
const PinholeCamera::DistortionLookup* outputCameraDistortionLookup,
const uint8_t* input,
const SquareMatrix3* normalizedHomography,
const bool useDistortionParameters,
const uint8_t* borderColor, uint8_t* output,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows)
4736 static_assert(tChannels >= 1u,
"Invalid channel number!");
4738 ocean_assert(inputCamera && outputCamera && normalizedHomography);
4739 ocean_assert(input && output);
4741 ocean_assert(firstRow + numberRows <= outputCamera->height());
4743 const unsigned int outputStrideElements = tChannels * outputCamera->
width() + outputPaddingElements;
4752 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
4753 const PixelType*
const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4755 uint8_t* outputData = output + firstRow * outputStrideElements;
4757 for (
unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4759 for (
unsigned int x = 0; x < outputCamera->
width(); ++x)
4763 if (inputPosition.
x() <
Scalar(0) || inputPosition.
x() > scalarInputWidth_1 || inputPosition.
y() <
Scalar(0) || inputPosition.
y() > scalarInputHeight_1)
4765 *((PixelType*)outputData) = *bColor;
4769 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputCamera->
width(), inputCamera->
height(), inputPaddingElements, inputPosition, outputData);
4772 outputData += tChannels;
4775 outputData += outputPaddingElements;
4779template <
unsigned int tChannels>
4780void FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannelSubset(
const PinholeCamera* inputCamera,
const PinholeCamera* outputCamera,
const PinholeCamera::DistortionLookup* outputCameraDistortionLookup,
const uint8_t* input,
const unsigned int inputPaddingElements,
const SquareMatrix3* normalizedHomography, uint8_t* output, uint8_t* outputMask,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
const uint8_t maskValue,
const unsigned int firstRow,
const unsigned int numberRows)
4782 static_assert(tChannels >= 1u,
"Invalid channel number!");
4784 ocean_assert(inputCamera !=
nullptr && outputCamera !=
nullptr && normalizedHomography !=
nullptr);
4785 ocean_assert(input !=
nullptr && output !=
nullptr);
4787 ocean_assert(firstRow + numberRows <= outputCamera->height());
4789 const unsigned int outputStrideElements = outputCamera->
width() * tChannels + outputPaddingElements;
4790 const unsigned int outputMaskStrideElements = outputCamera->
width() + outputMaskPaddingElements;
4797 uint8_t* outputData = output + firstRow * outputStrideElements;
4798 outputMask += firstRow * outputMaskStrideElements;
4800 constexpr bool useDistortionParameters =
true;
4802 for (
unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4804 for (
unsigned int x = 0; x < outputCamera->
width(); ++x)
4808 if (inputPosition.
x() <
Scalar(0) || inputPosition.
x() > scalarInputWidth_1 || inputPosition.
y() <
Scalar(0) || inputPosition.
y() > scalarInputHeight_1)
4810 *outputMask = 0xFF - maskValue;
4814 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputCamera->
width(), inputCamera->
height(), inputPaddingElements, inputPosition, outputData);
4815 *outputMask = maskValue;
4818 outputData += tChannels;
4822 outputData += outputPaddingElements;
4823 outputMask += outputMaskPaddingElements;
4827template <
unsigned int tChannels>
4828void FrameInterpolatorBilinear::lookup8BitPerChannelSubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const LookupTable* input_LT_output,
const bool offset,
const uint8_t* borderColor, uint8_t* output,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows)
4830 static_assert(tChannels >= 1u,
"Invalid channel number!");
4832 ocean_assert(input_LT_output !=
nullptr);
4833 ocean_assert(input !=
nullptr && output !=
nullptr);
4835 ocean_assert(inputWidth != 0u && inputHeight != 0u);
4836 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4840 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
4841 const PixelType*
const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4843 const unsigned int columns = (
unsigned int)(input_LT_output->
sizeX());
4845 const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
4847 static_assert(std::is_same<Vector2, LookupTable::Type>::value,
"Invalid data type!");
4852 Memory rowLookupMemory = Memory::create<Vector2>(columns);
4855 for (
unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4859 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4861 for (
unsigned int x = 0u; x < columns; ++x)
4863 const Vector2& lookupValue = rowLookupData[x];
4867 if (inputPosition.
x() >=
Scalar(0) && inputPosition.
y() >=
Scalar(0) && inputPosition.
x() <= inputWidth1 && inputPosition.
y() <= inputHeight1)
4869 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
4873 *outputData = *bColor;
4881template <
typename T,
unsigned int tChannels>
4882void FrameInterpolatorBilinear::lookupSubset(
const T* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const LookupTable* input_LT_output,
const bool offset,
const T* borderColor, T* output,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows)
4884 static_assert(tChannels >= 1u,
"Invalid channel number!");
4886 ocean_assert((!std::is_same<uint8_t, T>::value));
4888 ocean_assert(input_LT_output !=
nullptr);
4889 ocean_assert(input !=
nullptr && output !=
nullptr);
4891 ocean_assert(inputWidth != 0u && inputHeight != 0u);
4892 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4896 const T zeroColor[tChannels] = {T(0)};
4897 const PixelType*
const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4899 const unsigned int columns = (
unsigned int)(input_LT_output->
sizeX());
4901 const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
4903 static_assert(std::is_same<Vector2, LookupTable::Type>::value,
"Invalid data type!");
4908 Memory rowLookupMemory = Memory::create<Vector2>(columns);
4911 for (
unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4915 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4917 for (
unsigned int x = 0u; x < columns; ++x)
4919 const Vector2& lookupValue = rowLookupData[x];
4923 if (inputPosition.
x() >=
Scalar(0) && inputPosition.
y() >=
Scalar(0) && inputPosition.
x() <= inputWidth1 && inputPosition.
y() <= inputHeight1)
4925 interpolatePixel<T, T, tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (T*)(outputData));
4929 *outputData = *bColor;
4937#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4940inline void FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON<1u>(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const LookupTable* input_LT_output,
const bool offset,
const uint8_t* borderColor, uint8_t* output,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows,
const bool useOptimizedNEON,
const bool useOptimizedBilinearValuesAndFactorCalculation,
const bool )
4942 ocean_assert(input_LT_output !=
nullptr);
4943 ocean_assert(input !=
nullptr && output !=
nullptr);
4945 ocean_assert(inputWidth != 0u && inputHeight != 0u);
4946 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4948 using PixelType = uint8_t;
4950 const uint8x16_t constantBorderColor_u_8x16 = vdupq_n_u8(borderColor ? *borderColor : 0u);
4952 const unsigned int outputWidth = (
unsigned int)(input_LT_output->
sizeX());
4953 ocean_assert(outputWidth >= 8u);
4955 static_assert(std::is_same<Vector2, LookupTable::Type>::value,
"Invalid data type!");
4957 const unsigned int inputStrideElements = inputWidth + inputPaddingElements;
4958 const unsigned int outputStrideElements = outputWidth + outputPaddingElements;
4960 Memory rowLookupMemory = Memory::create<VectorF2>(outputWidth);
4963 const float32x4_t constantZero_f_32x4 = vdupq_n_f32(0.0f);
4964 const float32x4_t constantEight_f_32x4 = vdupq_n_f32(8.0f);
4967 const float f_01234567[8] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
4968 const float32x4_t conststant0123_f_32x4 = vld1q_f32(f_01234567 + 0);
4969 const float32x4_t conststant4567_f_32x4 = vld1q_f32(f_01234567 + 4);
4971 const float32x4_t constant128_f_32x4 = vdupq_n_f32(128.0f);
4973 const uint32x4_t constantOne_u_32x4 = vdupq_n_u32(1u);
4975 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(1u);
4977 const float32x4_t constantInputWidth1_f_32x4 = vdupq_n_f32(
float(inputWidth - 1u));
4978 const float32x4_t constantInputHeight1_f_32x4 = vdupq_n_f32(
float(inputHeight - 1u));
4980 const uint32x4_t constantInputStrideElements_u_32x4 = vdupq_n_u32(inputStrideElements);
4981 const uint32x4_t constantInputHeight1_u_32x4 = vdupq_n_u32(inputHeight - 1u);
4983 unsigned int validPixels[8];
4985 unsigned int topLeftOffsetsElements[8];
4986 unsigned int bottomLeftOffsetsElements[8];
4990 for (
unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4992 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
4996 float32x4_t additionalInputOffsetX0123_f_32x4 = conststant0123_f_32x4;
4997 float32x4_t additionalInputOffsetX4567_f_32x4 = conststant4567_f_32x4;
4999 const float32x4_t additionalInputOffsetY_f_32x4 = vdupq_n_f32(
float(y));
5001 for (
unsigned int x = 0u; x < outputWidth; x += 8u)
5003 if (x + 8u > outputWidth)
5008 ocean_assert(x >= 8u && outputWidth > 8u);
5009 const unsigned int newX = outputWidth - 8u;
5011 ocean_assert(x > newX);
5012 const unsigned int xOffset = x - newX;
5014 outputPixelData -= xOffset;
5018 additionalInputOffsetX0123_f_32x4 = vsubq_f32(additionalInputOffsetX0123_f_32x4, vdupq_n_f32(
float(xOffset)));
5019 additionalInputOffsetX4567_f_32x4 = vsubq_f32(additionalInputOffsetX4567_f_32x4, vdupq_n_f32(
float(xOffset)));
5025 ocean_assert(!(x + 8u < outputWidth));
5028 const float32x4x2_t inputPositions0123_f_32x4x2 = vld2q_f32((
const float*)(rowLookupData + x + 0u));
5029 const float32x4x2_t inputPositions4567_f_32x4x2 = vld2q_f32((
const float*)(rowLookupData + x + 4u));
5031 float32x4_t inputPositionsX0123_f_32x4 = inputPositions0123_f_32x4x2.val[0];
5032 float32x4_t inputPositionsY0123_f_32x4 = inputPositions0123_f_32x4x2.val[1];
5034 float32x4_t inputPositionsX4567_f_32x4 = inputPositions4567_f_32x4x2.val[0];
5035 float32x4_t inputPositionsY4567_f_32x4 = inputPositions4567_f_32x4x2.val[1];
5039 inputPositionsX0123_f_32x4 = vaddq_f32(inputPositionsX0123_f_32x4, additionalInputOffsetX0123_f_32x4);
5040 inputPositionsY0123_f_32x4 = vaddq_f32(inputPositionsY0123_f_32x4, additionalInputOffsetY_f_32x4);
5042 inputPositionsX4567_f_32x4 = vaddq_f32(inputPositionsX4567_f_32x4, additionalInputOffsetX4567_f_32x4);
5043 inputPositionsY4567_f_32x4 = vaddq_f32(inputPositionsY4567_f_32x4, additionalInputOffsetY_f_32x4);
5045 additionalInputOffsetX0123_f_32x4 = vaddq_f32(additionalInputOffsetX0123_f_32x4, constantEight_f_32x4);
5046 additionalInputOffsetX4567_f_32x4 = vaddq_f32(additionalInputOffsetX4567_f_32x4, constantEight_f_32x4);
5050 const uint32x4_t validPixelsX0123_u_32x4 = vandq_u32(vcltq_f32(inputPositionsX0123_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX0123_f_32x4, constantZero_f_32x4));
5051 const uint32x4_t validPixelsX4567_u_32x4 = vandq_u32(vcltq_f32(inputPositionsX4567_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX4567_f_32x4, constantZero_f_32x4));
5053 const uint32x4_t validPixelsY0123_u_32x4 = vandq_u32(vcltq_f32(inputPositionsY0123_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY0123_f_32x4, constantZero_f_32x4));
5054 const uint32x4_t validPixelsY4567_u_32x4 = vandq_u32(vcltq_f32(inputPositionsY4567_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY4567_f_32x4, constantZero_f_32x4));
5056 const uint32x4_t validPixels0123_u_32x4 = vandq_u32(validPixelsX0123_u_32x4, validPixelsY0123_u_32x4);
5057 const uint32x4_t validPixels4567_u_32x4 = vandq_u32(validPixelsX4567_u_32x4, validPixelsY4567_u_32x4);
5059 vst1q_u32(validPixels + 0, validPixels0123_u_32x4);
5060 vst1q_u32(validPixels + 4, validPixels4567_u_32x4);
5063 const uint32x4_t inputPositionsLeft0123_u_32x4 = vcvtq_u32_f32(inputPositionsX0123_f_32x4);
5064 const uint32x4_t inputPositionsLeft4567_u_32x4 = vcvtq_u32_f32(inputPositionsX4567_f_32x4);
5066 const uint32x4_t inputPositionsTop0123_u_32x4 = vcvtq_u32_f32(inputPositionsY0123_f_32x4);
5067 const uint32x4_t inputPositionsTop4567_u_32x4 = vcvtq_u32_f32(inputPositionsY4567_f_32x4);
5069 const uint32x4_t inputPositionsBottom0123_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop0123_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
5070 const uint32x4_t inputPositionsBottom4567_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop4567_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
5073 const uint32x4_t topLeftOffsetsElements0123_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft0123_u_32x4, constantChannels_u_32x4), inputPositionsTop0123_u_32x4, constantInputStrideElements_u_32x4);
5074 vst1q_u32(topLeftOffsetsElements + 0, topLeftOffsetsElements0123_u_32x4);
5075 const uint32x4_t topLeftOffsetsElements4567_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft4567_u_32x4, constantChannels_u_32x4), inputPositionsTop4567_u_32x4, constantInputStrideElements_u_32x4);
5076 vst1q_u32(topLeftOffsetsElements + 4, topLeftOffsetsElements4567_u_32x4);
5078 const uint32x4_t bottomLeftOffsetsElements0123_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft0123_u_32x4, constantChannels_u_32x4), inputPositionsBottom0123_u_32x4, constantInputStrideElements_u_32x4);
5079 vst1q_u32(bottomLeftOffsetsElements + 0, bottomLeftOffsetsElements0123_u_32x4);
5080 const uint32x4_t bottomLeftOffsetsElements4567_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft4567_u_32x4, constantChannels_u_32x4), inputPositionsBottom4567_u_32x4, constantInputStrideElements_u_32x4);
5081 vst1q_u32(bottomLeftOffsetsElements + 4, bottomLeftOffsetsElements4567_u_32x4);
5085 float32x4_t tx0123_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsX0123_f_32x4, vcvtq_f32_u32(inputPositionsLeft0123_u_32x4)), constant128_f_32x4);
5086 float32x4_t tx4567_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsX4567_f_32x4, vcvtq_f32_u32(inputPositionsLeft4567_u_32x4)), constant128_f_32x4);
5088 float32x4_t ty0123_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsY0123_f_32x4, vcvtq_f32_u32(inputPositionsTop0123_u_32x4)), constant128_f_32x4);
5089 float32x4_t ty4567_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsY4567_f_32x4, vcvtq_f32_u32(inputPositionsTop4567_u_32x4)), constant128_f_32x4);
5091 const uint32x4_t tx0123_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx0123_f_32x4, vdupq_n_f32(0.5)));
5092 const uint32x4_t tx4567_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx4567_f_32x4, vdupq_n_f32(0.5)));
5094 const uint32x4_t ty0123_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty0123_f_32x4, vdupq_n_f32(0.5)));
5095 const uint32x4_t ty4567_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty4567_f_32x4, vdupq_n_f32(0.5)));
5097 const uint16x8_t tx01234567_128_u_16x8 = vcombine_u16(vmovn_u32(tx0123_128_u_32x4), vmovn_u32(tx4567_128_u_32x4));
5098 const uint16x8_t ty01234567_128_u_16x8 = vcombine_u16(vmovn_u32(ty0123_128_u_32x4), vmovn_u32(ty4567_128_u_32x4));
5100 const uint8x16_t tx_ty_128_u_8x16 = vcombine_u8(vmovn_u16(tx01234567_128_u_16x8), vmovn_u16(ty01234567_128_u_16x8));
5103 vst1q_u8(pixels + 0, constantBorderColor_u_8x16);
5104 vst1q_u8(pixels + 16, constantBorderColor_u_8x16);
5106 struct LeftRightPixel
5112 static_assert(
sizeof(LeftRightPixel) == 2,
"Invalid data type!");
5116 for (
unsigned int i = 0u; i < 8u; ++i)
5120 ocean_assert((topLeftOffsetsElements[i] % inputStrideElements) < inputWidth - 1u);
5121 ocean_assert((bottomLeftOffsetsElements[i] % inputStrideElements) < inputWidth - 1u);
5123 ((LeftRightPixel*)pixels)[0u + i] = *(LeftRightPixel*)(input + topLeftOffsetsElements[i]);
5124 ((LeftRightPixel*)pixels)[8u + i] = *(LeftRightPixel*)(input + bottomLeftOffsetsElements[i]);
5128 const uint8x8x2_t topLeft_topRight_u_8x8x2 = vld2_u8(pixels);
5129 const uint8x8x2_t bottomLeft_bottomRight_u_8x8x2 = vld2_u8(pixels + 16);
5131 interpolate8Pixels1Channel8BitNEON(topLeft_topRight_u_8x8x2.val[0], topLeft_topRight_u_8x8x2.val[1], bottomLeft_bottomRight_u_8x8x2.val[0], bottomLeft_bottomRight_u_8x8x2.val[1], tx_ty_128_u_8x16, outputPixelData);
5133 outputPixelData += 8;
5138template <
unsigned int tChannels>
5139void FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const LookupTable* input_LT_output,
const bool offset,
const uint8_t* borderColor, uint8_t* output,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows,
const bool useOptimizedNEON,
const bool useOptimizedBilinearValuesAndFactorCalculation,
const bool useOptimizedNEONFactorReplication)
5141 ocean_assert(input_LT_output !=
nullptr);
5142 ocean_assert(input !=
nullptr && output !=
nullptr);
5144 ocean_assert(inputWidth != 0u && inputHeight != 0u);
5145 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
5149 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
5150 const PixelType*
const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
5152 const unsigned int outputWidth = (
unsigned int)(input_LT_output->
sizeX());
5153 ocean_assert(outputWidth >= 4u);
5155 static_assert(std::is_same<Vector2, LookupTable::Type>::value,
"Invalid data type!");
5157 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
5158 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
5160 Memory rowLookupMemory = Memory::create<VectorF2>(outputWidth);
5163 const float32x4_t constantZero_f_32x4 = vdupq_n_f32(0.0f);
5164 const float32x4_t constantFour_f_32x4 = vdupq_n_f32(4.0f);
5167 const float f_0123[4] = {0.0f, 1.0f, 2.0f, 3.0f};
5168 float32x4_t conststant0123_f_32x4 = vld1q_f32(f_0123);
5170 const uint32x4_t constantOne_u_32x4 = vdupq_n_u32(1u);
5172 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
5174 const float32x4_t constantInputWidth1_f_32x4 = vdupq_n_f32(
float(inputWidth - 1u));
5175 const float32x4_t constantInputHeight1_f_32x4 = vdupq_n_f32(
float(inputHeight - 1u));
5177#if defined(__aarch64__)
5178 const float32x4_t constant128_f_32x4 = vdupq_n_f32(128.0f);
5181 const uint32x4_t constantInputStrideElements_u_32x4 = vdupq_n_u32(inputStrideElements);
5182 const uint32x4_t constantInputWidth1_u_32x4 = vdupq_n_u32(inputWidth - 1u);
5183 const uint32x4_t constantInputHeight1_u_32x4 = vdupq_n_u32(inputHeight - 1u);
5185 unsigned int validPixels[4];
5187 unsigned int topLeftOffsetsElements[4];
5188 unsigned int topRightOffsetsElements[4];
5189 unsigned int bottomLeftOffsetsElements[4];
5190 unsigned int bottomRightOffsetsElements[4];
5192 for (
unsigned int y = firstRow; y < firstRow + numberRows; ++y)
5194 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
5196 input_LT_output->
bilinearValues<
VectorF2>(y, rowLookupData, useOptimizedBilinearValuesAndFactorCalculation);
5198 float32x4_t additionalInputOffsetX_f_32x4 = conststant0123_f_32x4;
5199 const float32x4_t additionalInputOffsetY_f_32x4 = vdupq_n_f32(
float(y));
5201 for (
unsigned int x = 0u; x < outputWidth; x += 4u)
5203 if (x + 4u > outputWidth)
5208 ocean_assert(x >= 4u && outputWidth > 4u);
5209 const unsigned int newX = outputWidth - 4u;
5211 ocean_assert(x > newX);
5212 const unsigned int xOffset = x - newX;
5214 outputPixelData -= xOffset;
5218 additionalInputOffsetX_f_32x4 = vsubq_f32(additionalInputOffsetX_f_32x4, vdupq_n_f32(
float(xOffset)));
5224 ocean_assert(!(x + 4u < outputWidth));
5227 const float32x4x2_t inputPositions_f_32x4x2 = vld2q_f32((
const float*)(rowLookupData + x));
5229 float32x4_t inputPositionsX_f_32x4 = inputPositions_f_32x4x2.val[0];
5230 float32x4_t inputPositionsY_f_32x4 = inputPositions_f_32x4x2.val[1];
5234 inputPositionsX_f_32x4 = vaddq_f32(inputPositionsX_f_32x4, additionalInputOffsetX_f_32x4);
5235 inputPositionsY_f_32x4 = vaddq_f32(inputPositionsY_f_32x4, additionalInputOffsetY_f_32x4);
5237 additionalInputOffsetX_f_32x4 = vaddq_f32(additionalInputOffsetX_f_32x4, constantFour_f_32x4);
5241 const uint32x4_t validPixelsX_u_32x4 = vandq_u32(vcleq_f32(inputPositionsX_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX_f_32x4, constantZero_f_32x4));
5242 const uint32x4_t validPixelsY_u_32x4 = vandq_u32(vcleq_f32(inputPositionsY_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY_f_32x4, constantZero_f_32x4));
5244 const uint32x4_t validPixels_u_32x4 = vandq_u32(validPixelsX_u_32x4, validPixelsY_u_32x4);
5246 vst1q_u32(validPixels, validPixels_u_32x4);
5248 const uint32x4_t inputPositionsLeft_u_32x4 = vcvtq_u32_f32(inputPositionsX_f_32x4);
5249 const uint32x4_t inputPositionsTop_u_32x4 = vcvtq_u32_f32(inputPositionsY_f_32x4);
5251 const uint32x4_t inputPositionsRight_u_32x4 = vminq_u32(vaddq_u32(inputPositionsLeft_u_32x4, constantOne_u_32x4), constantInputWidth1_u_32x4);
5252 const uint32x4_t inputPositionsBottom_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
5254 const uint32x4_t topLeftOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft_u_32x4, constantChannels_u_32x4), inputPositionsTop_u_32x4, constantInputStrideElements_u_32x4);
5255 const uint32x4_t topRightOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsRight_u_32x4, constantChannels_u_32x4), inputPositionsTop_u_32x4, constantInputStrideElements_u_32x4);
5256 const uint32x4_t bottomLeftOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft_u_32x4, constantChannels_u_32x4), inputPositionsBottom_u_32x4, constantInputStrideElements_u_32x4);
5257 const uint32x4_t bottomRightOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsRight_u_32x4, constantChannels_u_32x4), inputPositionsBottom_u_32x4, constantInputStrideElements_u_32x4);
5260 uint32x4_t tx_128_u_32x4;
5261 uint32x4_t ty_128_u_32x4;
5263#if defined(__aarch64__)
5265 if (useOptimizedBilinearValuesAndFactorCalculation)
5268 const float32x4_t tx_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsX_f_32x4, vrndmq_f32(inputPositionsX_f_32x4)), constant128_f_32x4);
5269 const float32x4_t ty_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsY_f_32x4, vrndmq_f32(inputPositionsY_f_32x4)), constant128_f_32x4);
5271 tx_128_u_32x4 = vcvtaq_u32_f32(tx_f_32x4);
5272 ty_128_u_32x4 = vcvtaq_u32_f32(ty_f_32x4);
5278 float32x4_t tx_f_32x4 = vsubq_f32(inputPositionsX_f_32x4, vcvtq_f32_u32(inputPositionsLeft_u_32x4));
5279 float32x4_t ty_f_32x4 = vsubq_f32(inputPositionsY_f_32x4, vcvtq_f32_u32(inputPositionsTop_u_32x4));
5282 tx_f_32x4 = vmulq_f32(tx_f_32x4, vdupq_n_f32(128.0f));
5283 ty_f_32x4 = vmulq_f32(ty_f_32x4, vdupq_n_f32(128.0f));
5285 tx_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx_f_32x4, vdupq_n_f32(0.5)));
5286 ty_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty_f_32x4, vdupq_n_f32(0.5)));
5289 if constexpr (tChannels == 4u)
5291 if (useOptimizedNEON)
5297 PixelType topLeftPixels[4];
5298 PixelType topRightPixels[4];
5299 PixelType bottomLeftPixels[4];
5300 PixelType bottomRightPixels[4];
5302 topLeftPixels[0] = validPixels[0] ? *((
const PixelType*)(input + vgetq_lane_u32(topLeftOffsetsElements_u_32x4, 0))) : *bColor;
5303 topLeftPixels[1] = validPixels[1] ? *((
const PixelType*)(input + vgetq_lane_u32(topLeftOffsetsElements_u_32x4, 1))) : *bColor;
5304 topLeftPixels[2] = validPixels[2] ? *((
const PixelType*)(input + vgetq_lane_u32(topLeftOffsetsElements_u_32x4, 2))) : *bColor;
5305 topLeftPixels[3] = validPixels[3] ? *((
const PixelType*)(input + vgetq_lane_u32(topLeftOffsetsElements_u_32x4, 3))) : *bColor;
5307 topRightPixels[0] = validPixels[0] ? *((
const PixelType*)(input + vgetq_lane_u32(topRightOffsetsElements_u_32x4, 0))) : *bColor;
5308 topRightPixels[1] = validPixels[1] ? *((
const PixelType*)(input + vgetq_lane_u32(topRightOffsetsElements_u_32x4, 1))) : *bColor;
5309 topRightPixels[2] = validPixels[2] ? *((
const PixelType*)(input + vgetq_lane_u32(topRightOffsetsElements_u_32x4, 2))) : *bColor;
5310 topRightPixels[3] = validPixels[3] ? *((
const PixelType*)(input + vgetq_lane_u32(topRightOffsetsElements_u_32x4, 3))) : *bColor;
5312 bottomLeftPixels[0] = validPixels[0] ? *((
const PixelType*)(input + vgetq_lane_u32(bottomLeftOffsetsElements_u_32x4, 0))) : *bColor;
5313 bottomLeftPixels[1] = validPixels[1] ? *((
const PixelType*)(input + vgetq_lane_u32(bottomLeftOffsetsElements_u_32x4, 1))) : *bColor;
5314 bottomLeftPixels[2] = validPixels[2] ? *((
const PixelType*)(input + vgetq_lane_u32(bottomLeftOffsetsElements_u_32x4, 2))) : *bColor;
5315 bottomLeftPixels[3] = validPixels[3] ? *((
const PixelType*)(input + vgetq_lane_u32(bottomLeftOffsetsElements_u_32x4, 3))) : *bColor;
5317 bottomRightPixels[0] = validPixels[0] ? *((
const PixelType*)(input + vgetq_lane_u32(bottomRightOffsetsElements_u_32x4, 0))) : *bColor;
5318 bottomRightPixels[1] = validPixels[1] ? *((
const PixelType*)(input + vgetq_lane_u32(bottomRightOffsetsElements_u_32x4, 1))) : *bColor;
5319 bottomRightPixels[2] = validPixels[2] ? *((
const PixelType*)(input + vgetq_lane_u32(bottomRightOffsetsElements_u_32x4, 2))) : *bColor;
5320 bottomRightPixels[3] = validPixels[3] ? *((
const PixelType*)(input + vgetq_lane_u32(bottomRightOffsetsElements_u_32x4, 3))) : *bColor;
5322 const uint8x16_t topLeftPixels_u8x16 = vld1q_u8((
const uint8_t*)topLeftPixels);
5323 const uint8x16_t topRightPixels_u8x16 = vld1q_u8((
const uint8_t*)topRightPixels);
5324 const uint8x16_t bottomLeftPixels_u8x16 = vld1q_u8((
const uint8_t*)bottomLeftPixels);
5325 const uint8x16_t bottomRightPixels_u8x16 = vld1q_u8((
const uint8_t*)bottomRightPixels);
5327 interpolate4Pixels4Channel8BitPerChannelNEON(topLeftPixels_u8x16, topRightPixels_u8x16, bottomLeftPixels_u8x16, bottomRightPixels_u8x16, tx_128_u_32x4, ty_128_u_32x4, outputPixelData, useOptimizedNEONFactorReplication);
5331 vst1q_u32(topLeftOffsetsElements, topLeftOffsetsElements_u_32x4);
5332 vst1q_u32(topRightOffsetsElements, topRightOffsetsElements_u_32x4);
5333 vst1q_u32(bottomLeftOffsetsElements, bottomLeftOffsetsElements_u_32x4);
5334 vst1q_u32(bottomRightOffsetsElements, bottomRightOffsetsElements_u_32x4);
5336 interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, tx_128_u_32x4, ty_128_u_32x4, outputPixelData);
5341 vst1q_u32(topLeftOffsetsElements, topLeftOffsetsElements_u_32x4);
5342 vst1q_u32(topRightOffsetsElements, topRightOffsetsElements_u_32x4);
5343 vst1q_u32(bottomLeftOffsetsElements, bottomLeftOffsetsElements_u_32x4);
5344 vst1q_u32(bottomRightOffsetsElements, bottomRightOffsetsElements_u_32x4);
5346 interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, tx_128_u_32x4, ty_128_u_32x4, outputPixelData);
5349 outputPixelData += 4;
5356template <
unsigned int tChannels>
5357void FrameInterpolatorBilinear::lookupMask8BitPerChannelSubset(
const uint8_t* input,
const unsigned int inputWidth,
const unsigned int inputHeight,
const LookupTable* input_LT_output,
const bool offset, uint8_t* output, uint8_t* outputMask,
const uint8_t maskValue,
const unsigned int inputPaddingElements,
const unsigned int outputPaddingElements,
const unsigned int outputMaskPaddingElements,
const unsigned int firstRow,
const unsigned int numberRows)
5359 ocean_assert(input_LT_output !=
nullptr);
5360 ocean_assert(input !=
nullptr && output !=
nullptr);
5362 ocean_assert(inputWidth != 0u && inputHeight != 0u);
5363 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
5367 const unsigned int columns = (
unsigned int)(input_LT_output->
sizeX());
5369 const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
5370 const unsigned int outputMaskStrideElements = columns + outputMaskPaddingElements;
5372 static_assert(std::is_same<Vector2, LookupTable::Type>::value,
"Invalid data type!");
5377 Memory rowLookupMemory = Memory::create<Vector2>(columns);
5380 for (
unsigned int y = firstRow; y < firstRow + numberRows; ++y)
5384 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
5385 uint8_t* outputMaskData = outputMask + y * outputMaskStrideElements;
5387 for (
unsigned int x = 0u; x < columns; ++x)
5389 const Vector2& lookupValue = rowLookupData[x];
5393 if (inputPosition.
x() >= 0 && inputPosition.
y() >= 0 && inputPosition.
x() <= inputWidth1 && inputPosition.
y() <= inputHeight1)
5395 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
5396 *outputMaskData = maskValue;
5400 *outputMaskData = 0xFFu - maskValue;
5409template <
unsigned int tChannels>
5410void FrameInterpolatorBilinear::scale8BitPerChannel(
const uint8_t* source, uint8_t* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const double sourceX_s_targetX,
const double sourceY_s_targetY,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
Worker* worker)
5412 ocean_assert(source !=
nullptr && target !=
nullptr);
5413 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
5414 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
5415 ocean_assert(sourceX_s_targetX > 0.0);
5416 ocean_assert(sourceY_s_targetY > 0.0);
5418 if (sourceWidth == targetWidth && sourceHeight == targetHeight)
5420 FrameConverter::subFrame<uint8_t>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
5424 if (worker && sourceWidth * tChannels >= 16u && targetWidth >= 8u)
5426#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5427 if (sourceWidth <= 65535u && sourceHeight <= 65535u && targetWidth <= 65535u && targetHeight <= 65535u)
5429 worker->
executeFunction(
Worker::Function::createStatic(&
scale8BitPerChannelSubset7BitPrecisionNEON, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
5433 worker->
executeFunction(
Worker::Function::createStatic(&scale8BitPerChannelSubset<tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
5438 if (sourceWidth * tChannels >= 16u && targetWidth >= 8u)
5440#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5441 if (sourceWidth <= 65535u && sourceHeight <= 65535u && targetWidth <= 65535u && targetHeight <= 65535u)
5443 scale8BitPerChannelSubset7BitPrecisionNEON(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
5449 scale8BitPerChannelSubset<tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
5453template <
unsigned int tChannels>
5454void FrameInterpolatorBilinear::scale8BitPerChannelSubset(
const uint8_t* source, uint8_t* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const double sourceX_s_targetX,
const double sourceY_s_targetY,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows)
5456 ocean_assert(source !=
nullptr && target !=
nullptr);
5457 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
5458 ocean_assert_and_suppress_unused(targetWidth >= 1u && targetHeight >= 1u, targetHeight);
5459 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5461 const Scalar sourceX_T_targetX =
Scalar(sourceX_s_targetX);
5462 const Scalar sourceY_T_targetY =
Scalar(sourceY_s_targetY);
5510 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
5512 const Scalar sourceX_T_targetXOffset = sourceX_T_targetX *
Scalar(0.5) -
Scalar(0.5);
5513 const Scalar sourceY_T_targetYOffset = sourceY_T_targetY *
Scalar(0.5) -
Scalar(0.5);
5516 const Scalar sourceHeight_1 =
Scalar(sourceHeight - 1u);
5518 target += (targetWidth * tChannels + targetPaddingElements) * firstTargetRow;
5520 for (
unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5523 ocean_assert(sy >=
Scalar(0) && sy <
Scalar(sourceHeight));
5525 const unsigned int sTop = (
unsigned int)sy;
5526 ocean_assert(sy >=
Scalar(sTop));
5529 ocean_assert(ty >= 0 && ty <= 1);
5531 const unsigned int factorBottom = (
unsigned int)(ty *
Scalar(128) +
Scalar(0.5));
5532 const unsigned int factorTop = 128u - factorBottom;
5534 const uint8_t*
const sourceTop = source + sourceStrideElements * sTop;
5535 const uint8_t*
const sourceBottom = (sTop + 1u < sourceHeight) ? sourceTop + sourceStrideElements : sourceTop;
5537 for (
unsigned int x = 0; x < targetWidth; ++x)
5540 ocean_assert(sx >=
Scalar(0) && sx <
Scalar(sourceWidth));
5542 const unsigned int sLeft = (
unsigned int)sx;
5543 ocean_assert(sx >=
Scalar(sLeft));
5546 ocean_assert(tx >= 0 && tx <= 1);
5548 const unsigned int factorRight = (
unsigned int)(tx *
Scalar(128) +
Scalar(0.5));
5549 const unsigned int factorLeft = 128u - factorRight;
5551 const unsigned int sourceRightOffset = sLeft + 1u < sourceWidth ? tChannels : 0u;
5553 const uint8_t*
const sourceTopLeft = sourceTop + sLeft * tChannels;
5554 const uint8_t*
const sourceBottomLeft = sourceBottom + sLeft * tChannels;
5556 const unsigned int factorTopLeft = factorTop * factorLeft;
5557 const unsigned int factorTopRight = factorTop * factorRight;
5558 const unsigned int factorBottomLeft = factorBottom * factorLeft;
5559 const unsigned int factorBottomRight = factorBottom * factorRight;
5561 for (
unsigned int n = 0u; n < tChannels; ++n)
5563 target[n] = (uint8_t)((sourceTopLeft[n] * factorTopLeft + sourceTopLeft[sourceRightOffset + n] * factorTopRight
5564 + sourceBottomLeft[n] * factorBottomLeft + sourceBottomLeft[sourceRightOffset + n] * factorBottomRight + 8192u) >> 14u);
5567 target += tChannels;
5570 target += targetPaddingElements;
5574template <
typename T>
5577 ocean_assert(sourceRowTop !=
nullptr);
5578 ocean_assert(sourceRowBottom !=
nullptr);
5579 ocean_assert(targetRow !=
nullptr);
5580 ocean_assert(elements >= 1u);
5581 ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
5585 const FloatType internalFactorBottom = FloatType(factorBottom);
5586 const FloatType internalFactorTop = FloatType(1.0f - factorBottom);
5588 for (
unsigned int n = 0u; n < elements; ++n)
5590 targetRow[n] = T(FloatType(sourceRowTop[n]) * internalFactorTop + FloatType(sourceRowBottom[n]) * internalFactorBottom);
5594template <
typename T,
unsigned int tChannels>
5597 static_assert(tChannels != 0u,
"Invalid channel number!");
5599 ocean_assert(extendedSourceRow !=
nullptr);
5600 ocean_assert(targetRow !=
nullptr);
5601 ocean_assert(targetWidth >= 1u);
5602 ocean_assert(interpolationLocations !=
nullptr);
5603 ocean_assert(interpolationFactorsRight !=
nullptr);
5604 ocean_assert_and_suppress_unused(channels == tChannels, channels);
5608 for (
unsigned int x = 0u; x < targetWidth; ++x)
5610 const FloatType internalFactorRight = FloatType(interpolationFactorsRight[x]);
5611 ocean_assert(internalFactorRight >= FloatType(0) && internalFactorRight <= FloatType(1));
5613 const FloatType internalFactorLeft = FloatType(1.0f - interpolationFactorsRight[x]);
5615 const unsigned int& leftLocation = interpolationLocations[x];
5616 const unsigned int rightLocation = leftLocation + tChannels;
5618 for (
unsigned int n = 0u; n < tChannels; ++n)
5620 targetRow[x * tChannels + n] = T(FloatType(extendedSourceRow[leftLocation + n]) * internalFactorLeft + FloatType(extendedSourceRow[rightLocation + n]) * internalFactorRight);
5625#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5627#ifdef OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
5630inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<2u, 8u>(
const uint8_t* source, uint8_t* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const double sourceX_s_targetX,
const double sourceY_s_targetY,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows)
5632 ocean_assert(source !=
nullptr && target !=
nullptr);
5633 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5634 ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
5635 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
5636 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5637 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5639 ocean_assert(sourcePaddingElements == 0u);
5640 ocean_assert(targetPaddingElements == 0u);
5644 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5645 const PixelType*
const sourcePixelData = (
const PixelType*)source;
5648 unsigned int leftOffsets[8];
5661 const unsigned int sourceX_T_targetX_fixed16 = (
unsigned int)(
double(0x10000u) * sourceX_s_targetX + 0.5);
5662 const unsigned int sourceY_T_targetY_fixed16 = (
unsigned int)(
double(0x10000u) * sourceY_s_targetY + 0.5);
5664 const int targetOffsetX_fixed16 = (int)(
double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5665 const int targetOffsetY_fixed16 = (int)(
double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5668 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5671 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5674 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5677 const int32x4_t m128_s_zero = vdupq_n_s32(0);
5679 const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5680 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5682 for (
unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5684 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 +
int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
5686 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u;
5687 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
5688 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
5690 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
5692 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
5694 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
5696 const PixelType*
const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
5697 const PixelType*
const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
5699 for (
unsigned int x = 0; x < targetWidth; x += 8u)
5701 if (x + 8u > targetWidth)
5706 ocean_assert(x >= 8u && targetWidth > 8u);
5707 const unsigned int newX = targetWidth - 8u;
5709 ocean_assert(x > newX);
5710 targetPixelData -= x - newX;
5715 ocean_assert(!(x + 8u < targetWidth));
5721 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
5722 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
5725 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
5726 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
5728 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
5729 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
5733 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2);
5734 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
5737 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
5738 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
5745 uint8x8x2_t topLeftPixels;
5746 uint8x8x2_t topRightPixels;
5748 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
5749 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
5751 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
5752 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
5754 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
5755 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
5757 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
5758 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
5760 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
5761 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
5763 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
5764 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
5766 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
5767 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
5769 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
5770 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
5775 uint8x8x2_t bottomLeftPixels;
5776 uint8x8x2_t bottomRightPixels;
5778 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
5779 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
5781 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
5782 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
5784 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
5785 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
5787 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
5788 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
5790 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
5791 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
5793 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
5794 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
5796 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
5797 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
5799 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
5800 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
5808 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5809 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5812 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
5813 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
5814 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
5819 uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
5820 uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
5822 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
5823 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
5825 uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5826 uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5831 m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
5832 m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
5834 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
5835 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
5837 uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5838 uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5843 m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
5844 m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
5846 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
5847 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
5852 result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5853 result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5856 vst2_u8((uint8_t*)targetPixelData, result);
5858 targetPixelData += 8;
5864 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
5866 for (
unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
5868 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 +
int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
5870 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
5871 ocean_assert(lastSourcePixelLeft < sourceWidth);
5872 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
5874 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
5876 const unsigned int factorRight = factorRight_fixed16 >> 9u;
5877 const unsigned int factorLeft = 128u - factorRight;
5879 for (
unsigned int c = 0u; c < 2u; ++c)
5881 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((
const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((
const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
5882 + (((
const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((
const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
5890#ifdef OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_SLIGHTLY_DIFFERENT_APPROACH
5893inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<2u, 8u>(
const uint8_t* source, uint8_t* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const double sourceX_s_targetX,
const double sourceY_s_targetY,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows)
5895 ocean_assert(source !=
nullptr && target !=
nullptr);
5896 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5897 ocean_assert(sourceHeight >= 0u && sourceHeight <= 65535u);
5898 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u)
5899 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5900 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5902 ocean_assert(sourcePaddingElements == 0u);
5903 ocean_assert(targetPaddingElements == 0u);
5905 using PixelType = typename
DataType<uint8_t, 2u>::Type;
5907 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5908 const PixelType* const sourcePixelData = (const PixelType*)source;
5911 unsigned int leftOffsets[8];
5914 unsigned int topPixels[8];
5915 unsigned int bottomPixels[8];
5928 const
unsigned int sourceX_T_targetX_fixed16 = (
unsigned int)(
double(0x10000u) * sourceX_s_targetX + 0.5);
5929 const
unsigned int sourceY_T_targetY_fixed16 = (
unsigned int)(
double(0x10000u) * sourceY_s_targetY + 0.5);
5931 const
int targetOffsetX_fixed16 = (
int)(
double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5932 const
int targetOffsetY_fixed16 = (
int)(
double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5935 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5938 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5941 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5944 const int32x4_t m128_s_zero = vdupq_n_s32(0);
5946 const
unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5947 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5949 for (
unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5951 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 +
int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
5953 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u;
5954 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
5955 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
5957 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
5959 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
5961 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
5963 const PixelType*
const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
5964 const PixelType*
const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
5966 for (
unsigned int x = 0; x < targetWidth; x += 8u)
5968 if (x + 8u > targetWidth)
5973 ocean_assert(x >= 8u && targetWidth > 8u);
5974 const unsigned int newX = targetWidth - 8u;
5976 ocean_assert(x > newX);
5977 targetPixelData -= x - newX;
5982 ocean_assert(!(x + 8u < targetWidth));
5988 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
5989 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
5992 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
5993 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
5995 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
5996 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
6000 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2);
6001 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
6004 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
6005 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
6015 for (
unsigned int n = 0u; n < 8u; ++n)
6017 topPixels[n] = *(
unsigned int*)(sourceTopRowPixelData + leftOffsets[n]);
6020 const uint16x8_t m128_topPixels_0123 = vreinterpretq_u16_u32(vld1q_u32(topPixels + 0));
6021 const uint16x8_t m128_topPixels_4567 = vreinterpretq_u16_u32(vld1q_u32(topPixels + 4));
6023 for (
unsigned int n = 0u; n < 8u; ++n)
6025 bottomPixels[n] = *(
unsigned int*)(sourceBottomRowPixelData + leftOffsets[n]);
6028 const uint16x8_t m128_bottomPixels_0123 = vreinterpretq_u16_u32(vld1q_u32(bottomPixels + 0));
6029 const uint16x8_t m128_bottomPixels_4567 = vreinterpretq_u16_u32(vld1q_u32(bottomPixels + 4));
6036 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6037 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6040 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
6041 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
6046 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
6053 const uint16x8x2_t m2_128_topPixelsLeftRight = vuzpq_u16(m128_topPixels_0123, m128_topPixels_4567);
6059 const uint8x8x2_t m2_64_topPixelsLeft_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[0])), vget_high_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[0])));
6060 const uint8x8x2_t m2_64_topPixelsRight_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[1])), vget_high_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[1])));
6062 const uint8x8_t& m64_topPixelsLeft_channel_0 = m2_64_topPixelsLeft_channels_01.val[0];
6063 const uint8x8_t& m64_topPixelsLeft_channel_1 = m2_64_topPixelsLeft_channels_01.val[1];
6065 const uint8x8_t& m64_topPixelsRight_channel_0 = m2_64_topPixelsRight_channels_01.val[0];
6066 const uint8x8_t& m64_topPixelsRight_channel_1 = m2_64_topPixelsRight_channels_01.val[1];
6070 uint16x8_t m128_muliplication_channel_0 = vmull_u8(m64_topPixelsLeft_channel_0, m64_u_factorsLeft);
6071 uint16x8_t m128_muliplication_channel_1 = vmull_u8(m64_topPixelsLeft_channel_1, m64_u_factorsLeft);
6073 m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_topPixelsRight_channel_0, m64_u_factorsRight);
6074 m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_topPixelsRight_channel_1, m64_u_factorsRight);
6076 const uint8x8_t m64_topRow_channel_0 = vrshrn_n_u16(m128_muliplication_channel_0, 7);
6077 const uint8x8_t m64_topRow_channel_1 = vrshrn_n_u16(m128_muliplication_channel_1, 7);
6081 const uint16x8x2_t m2_128_bottomPixelsLeftRight = vuzpq_u16(m128_bottomPixels_0123, m128_bottomPixels_4567);
6083 const uint8x8x2_t m2_64_bottomPixelsLeft_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[0])), vget_high_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[0])));
6084 const uint8x8x2_t m2_64_bottomPixelsRight_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[1])), vget_high_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[1])));
6086 const uint8x8_t& m64_bottomPixelsLeft_channel_0 = m2_64_bottomPixelsLeft_channels_01.val[0];
6087 const uint8x8_t& m64_bottomPixelsLeft_channel_1 = m2_64_bottomPixelsLeft_channels_01.val[1];
6089 const uint8x8_t& m64_bottomPixelsRight_channel_0 = m2_64_bottomPixelsRight_channels_01.val[0];
6090 const uint8x8_t& m64_bottomPixelsRight_channel_1 = m2_64_bottomPixelsRight_channels_01.val[1];
6094 m128_muliplication_channel_0 = vmull_u8(m64_bottomPixelsLeft_channel_0, m64_u_factorsLeft);
6095 m128_muliplication_channel_1 = vmull_u8(m64_bottomPixelsLeft_channel_1, m64_u_factorsLeft);
6097 m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_bottomPixelsRight_channel_0, m64_u_factorsRight);
6098 m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_bottomPixelsRight_channel_1, m64_u_factorsRight);
6100 const uint8x8_t m64_bottomRow_channel_0 = vrshrn_n_u16(m128_muliplication_channel_0, 7);
6101 const uint8x8_t m64_bottomRow_channel_1 = vrshrn_n_u16(m128_muliplication_channel_1, 7);
6105 m128_muliplication_channel_0 = vmull_u8(m64_topRow_channel_0, m64_u_factorsTop);
6106 m128_muliplication_channel_1 = vmull_u8(m64_topRow_channel_1, m64_u_factorsTop);
6108 m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_bottomRow_channel_0, m64_u_factorsBottom);
6109 m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_bottomRow_channel_1, m64_u_factorsBottom);
6113 uint8x8x2_t m2_64_result;
6114 m2_64_result.val[0] = vrshrn_n_u16(m128_muliplication_channel_0, 7);
6115 m2_64_result.val[1] = vrshrn_n_u16(m128_muliplication_channel_1, 7);
6118 vst2_u8((uint8_t*)targetPixelData, m2_64_result);
6120 targetPixelData += 8;
6126 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
6128 for (
unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
6130 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 +
int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
6132 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
6133 ocean_assert(lastSourcePixelLeft < sourceWidth);
6134 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
6136 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
6138 const unsigned int factorRight = factorRight_fixed16 >> 9u;
6139 const unsigned int factorLeft = 128u - factorRight;
6141 for (
unsigned int c = 0u; c < 2u; ++c)
6143 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((
const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((
const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
6144 + (((
const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((
const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
6152#ifdef OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
6155inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<3u, 8u>(
const uint8_t* source, uint8_t* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const double sourceX_s_targetX,
const double sourceY_s_targetY,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows)
6157 ocean_assert(source !=
nullptr && target !=
nullptr);
6158 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
6159 ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
6160 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
6161 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
6162 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6164 ocean_assert(sourcePaddingElements == 0u);
6165 ocean_assert(targetPaddingElements == 0u);
6167 using PixelType =
typename DataType<uint8_t, 3u>::Type;
6169 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
6170 const PixelType*
const sourcePixelData = (
const PixelType*)source;
6173 unsigned int leftOffsets[8];
6186 const unsigned int sourceX_T_targetX_fixed16 = (
unsigned int)(
double(0x10000u) * sourceX_s_targetX + 0.5);
6187 const unsigned int sourceY_T_targetY_fixed16 = (
unsigned int)(
double(0x10000u) * sourceY_s_targetY + 0.5);
6189 const int targetOffsetX_fixed16 = (int)(
double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
6190 const int targetOffsetY_fixed16 = (int)(
double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
6193 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
6196 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
6199 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
6202 const int32x4_t m128_s_zero = vdupq_n_s32(0);
6204 const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
6205 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
6207 for (
unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6209 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 +
int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
6211 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u;
6212 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
6213 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
6215 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
6217 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
6219 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
6221 const PixelType*
const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
6222 const PixelType*
const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
6224 for (
unsigned int x = 0; x < targetWidth; x += 8u)
6226 if (x + 8u > targetWidth)
6231 ocean_assert(x >= 8u && targetWidth > 8u);
6232 const unsigned int newX = targetWidth - 8u;
6234 ocean_assert(x > newX);
6235 targetPixelData -= x - newX;
6240 ocean_assert(!(x + 8u < targetWidth));
6246 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
6247 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
6250 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
6251 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
6253 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
6254 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
6258 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2);
6259 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
6262 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
6263 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
6270 uint8x8x3_t topLeftPixels;
6271 uint8x8x3_t topRightPixels;
6273 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
6274 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
6276 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
6277 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
6279 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
6280 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
6282 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
6283 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
6285 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
6286 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
6288 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
6289 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
6291 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
6292 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
6294 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
6295 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
6300 uint8x8x3_t bottomLeftPixels;
6301 uint8x8x3_t bottomRightPixels;
6303 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
6304 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
6306 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
6307 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
6309 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
6310 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
6312 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
6313 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
6315 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
6316 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
6318 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
6319 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
6321 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
6322 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
6324 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
6325 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
6333 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6334 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6337 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
6338 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
6339 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
6344 uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
6345 uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
6346 uint16x8_t m128_muliplicationChannel_2 = vmull_u8(topLeftPixels.val[2], m64_u_factorsLeft);
6348 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
6349 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
6350 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, topRightPixels.val[2], m64_u_factorsRight);
6352 uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6353 uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6354 uint8x8_t m64_topRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6359 m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
6360 m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
6361 m128_muliplicationChannel_2 = vmull_u8(bottomLeftPixels.val[2], m64_u_factorsLeft);
6363 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
6364 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
6365 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, bottomRightPixels.val[2], m64_u_factorsRight);
6367 uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6368 uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6369 uint8x8_t m64_bottomRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6374 m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
6375 m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
6376 m128_muliplicationChannel_2 = vmull_u8(m64_topRowChannel_2, m64_u_factorsTop);
6378 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
6379 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
6380 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, m64_bottomRowChannel_2, m64_u_factorsBottom);
6385 result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6386 result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6387 result.val[2] = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6390 vst3_u8((uint8_t*)targetPixelData, result);
6392 targetPixelData += 8;
6398 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
6400 for (
unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
6402 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 +
int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
6404 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
6405 ocean_assert(lastSourcePixelLeft < sourceWidth);
6406 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
6408 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
6410 const unsigned int factorRight = factorRight_fixed16 >> 9u;
6411 const unsigned int factorLeft = 128u - factorRight;
6413 for (
unsigned int c = 0u; c < 3u; ++c)
6415 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((
const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((
const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
6416 + (((
const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((
const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
6424#ifdef OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_MORE_GENERIC_APPROACH
6429inline void FrameInterpolatorBilinear::resize8BitPerChannelSubset7BitPrecisionNEON<4u, 8u>(
const uint8_t* source, uint8_t* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows)
6431 ocean_assert(source !=
nullptr && target !=
nullptr);
6432 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
6433 ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
6434 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
6435 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
6436 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6438 ocean_assert(sourcePaddingElements == 0u);
6439 ocean_assert(targetPaddingElements == 0u);
6441 using PixelType =
typename DataType<uint8_t, 4u>::Type;
6443 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
6444 const PixelType*
const sourcePixelData = (
const PixelType*)source;
6447 unsigned int leftOffsets[8];
6450 const unsigned int sourceY_T_targetY_fixed16 = (
unsigned int)(
double(0x10000u) * sourceY_s_targetY + 0.5);
6463 const unsigned int sourceX_T_targetX_fixed16 = (
unsigned int)(
double(0x10000u) * sourceX_s_targetX + 0.5);
6464 const unsigned int sourceY_T_targetY_fixed16 = (
unsigned int)(
double(0x10000u) * sourceY_s_targetY + 0.5);
6466 const int targetOffsetX_fixed16 = (int)(
double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
6467 const int targetOffsetY_fixed16 = (int)(
double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
6470 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
6473 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
6476 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
6479 const int32x4_t m128_s_zero = vdupq_n_s32(0);
6481 const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
6482 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
6484 for (
unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6486 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 +
int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
6488 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u;
6489 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
6490 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
6492 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
6494 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
6496 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
6498 const PixelType*
const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
6499 const PixelType*
const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
6501 for (
unsigned int x = 0; x < targetWidth; x += 8u)
6503 if (x + 8u > targetWidth)
6508 ocean_assert(x >= 8u && targetWidth > 8u);
6509 const unsigned int newX = targetWidth - 8u;
6511 ocean_assert(x > newX);
6512 targetPixelData -= x - newX;
6517 ocean_assert(!(x + 8u < targetWidth));
6523 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
6524 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
6527 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
6528 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
6530 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
6531 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
6535 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2);
6536 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
6539 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
6540 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
6547 uint8x8x4_t topLeftPixels;
6548 uint8x8x4_t topRightPixels;
6550 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
6551 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
6553 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
6554 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
6556 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
6557 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
6559 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
6560 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
6562 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
6563 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
6565 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
6566 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
6568 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
6569 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
6571 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
6572 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
6577 uint8x8x4_t bottomLeftPixels;
6578 uint8x8x4_t bottomRightPixels;
6580 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
6581 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
6583 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
6584 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
6586 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
6587 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
6589 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
6590 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
6592 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
6593 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
6595 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
6596 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
6598 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
6599 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
6601 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
6602 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
6610 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6611 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6614 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
6615 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
6616 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
6621 uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
6622 uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
6623 uint16x8_t m128_muliplicationChannel_2 = vmull_u8(topLeftPixels.val[2], m64_u_factorsLeft);
6624 uint16x8_t m128_muliplicationChannel_3 = vmull_u8(topLeftPixels.val[3], m64_u_factorsLeft);
6626 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
6627 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
6628 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, topRightPixels.val[2], m64_u_factorsRight);
6629 m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, topRightPixels.val[3], m64_u_factorsRight);
6631 uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6632 uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6633 uint8x8_t m64_topRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6634 uint8x8_t m64_topRowChannel_3 = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6639 m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
6640 m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
6641 m128_muliplicationChannel_2 = vmull_u8(bottomLeftPixels.val[2], m64_u_factorsLeft);
6642 m128_muliplicationChannel_3 = vmull_u8(bottomLeftPixels.val[3], m64_u_factorsLeft);
6644 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
6645 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
6646 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, bottomRightPixels.val[2], m64_u_factorsRight);
6647 m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, bottomRightPixels.val[3], m64_u_factorsRight);
6649 uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6650 uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6651 uint8x8_t m64_bottomRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6652 uint8x8_t m64_bottomRowChannel_3 = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6657 m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
6658 m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
6659 m128_muliplicationChannel_2 = vmull_u8(m64_topRowChannel_2, m64_u_factorsTop);
6660 m128_muliplicationChannel_3 = vmull_u8(m64_topRowChannel_3, m64_u_factorsTop);
6662 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
6663 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
6664 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, m64_bottomRowChannel_2, m64_u_factorsBottom);
6665 m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, m64_bottomRowChannel_3, m64_u_factorsBottom);
6670 result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6671 result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6672 result.val[2] = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6673 result.val[3] = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6676 vst4_u8((uint8_t*)targetPixelData, result);
6678 targetPixelData += 8;
6684 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
6686 for (
unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
6688 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 +
int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
6690 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
6691 ocean_assert(lastSourcePixelLeft < sourceWidth);
6692 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
6694 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
6696 const unsigned int factorRight = factorRight_fixed16 >> 9u;
6697 const unsigned int factorLeft = 128u - factorRight;
6699 for (
unsigned int c = 0u; c < 4u; ++c)
6701 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((
const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((
const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorTop
6702 + (((
const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((
const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
6713inline void FrameInterpolatorBilinear::interpolateRowVerticalNEON<float>(
const float* sourceRowTop,
const float* sourceRowBottom,
float* targetRow,
const unsigned int elements,
const float factorBottom)
6715 ocean_assert(sourceRowTop !=
nullptr);
6716 ocean_assert(sourceRowBottom !=
nullptr);
6717 ocean_assert(targetRow !=
nullptr);
6718 ocean_assert(elements >= 16u);
6719 ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
6722 const float32x4_t constant_1_f_32x4 = vdupq_n_f32(1.0f);
6724 const float32x4_t factorsBottom_f_32x4 = vdupq_n_f32(factorBottom);
6725 const float32x4_t factorsTop_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsBottom_f_32x4);
6727 for (
unsigned int n = 0u; n < elements; n += 16u)
6729 if (n + 16u > elements)
6734 ocean_assert(n >= 16u && elements > 16u);
6735 const unsigned int offset = n - (elements - 16u);
6736 ocean_assert(offset < 16u);
6738 sourceRowTop -= offset;
6739 sourceRowBottom -= offset;
6740 targetRow -= offset;
6743 ocean_assert(!(n + 16u < elements));
6747 const float32x4_t top_03_32x4 = vld1q_f32(sourceRowTop + 0);
6748 const float32x4_t top_47_32x4 = vld1q_f32(sourceRowTop + 4);
6749 const float32x4_t top_8B_32x4 = vld1q_f32(sourceRowTop + 8);
6750 const float32x4_t top_CF_32x4 = vld1q_f32(sourceRowTop + 12);
6752 const float32x4_t bottom_03_32x4 = vld1q_f32(sourceRowBottom + 0);
6753 const float32x4_t bottom_47_32x4 = vld1q_f32(sourceRowBottom + 4);
6754 const float32x4_t bottom_8B_32x4 = vld1q_f32(sourceRowBottom + 8);
6755 const float32x4_t bottom_CF_32x4 = vld1q_f32(sourceRowBottom + 12);
6758 float32x4_t interpolatedRow_03_32x4 = vmulq_f32(top_03_32x4, factorsTop_f_32x4);
6759 float32x4_t interpolatedRow_47_32x4 = vmulq_f32(top_47_32x4, factorsTop_f_32x4);
6760 float32x4_t interpolatedRow_8B_32x4 = vmulq_f32(top_8B_32x4, factorsTop_f_32x4);
6761 float32x4_t interpolatedRow_CF_32x4 = vmulq_f32(top_CF_32x4, factorsTop_f_32x4);
6763 interpolatedRow_03_32x4 = vmlaq_f32(interpolatedRow_03_32x4, bottom_03_32x4, factorsBottom_f_32x4);
6764 interpolatedRow_47_32x4 = vmlaq_f32(interpolatedRow_47_32x4, bottom_47_32x4, factorsBottom_f_32x4);
6765 interpolatedRow_8B_32x4 = vmlaq_f32(interpolatedRow_8B_32x4, bottom_8B_32x4, factorsBottom_f_32x4);
6766 interpolatedRow_CF_32x4 = vmlaq_f32(interpolatedRow_CF_32x4, bottom_CF_32x4, factorsBottom_f_32x4);
6769 vst1q_f32(targetRow + 0, interpolatedRow_03_32x4);
6770 vst1q_f32(targetRow + 4, interpolatedRow_47_32x4);
6771 vst1q_f32(targetRow + 8, interpolatedRow_8B_32x4);
6772 vst1q_f32(targetRow + 12, interpolatedRow_CF_32x4);
6775 sourceRowBottom += 16;
6781inline void FrameInterpolatorBilinear::interpolateRowHorizontalNEON<float, 1u>(
const float* extendedSourceRow,
float* targetRow,
const unsigned int targetWidth,
const unsigned int channels,
const unsigned int* interpolationLocations,
const float* interpolationFactorsRight)
6783 ocean_assert(extendedSourceRow !=
nullptr);
6784 ocean_assert(targetRow !=
nullptr);
6785 ocean_assert(targetWidth >= 8u);
6786 ocean_assert(interpolationLocations !=
nullptr);
6787 ocean_assert(interpolationFactorsRight !=
nullptr);
6789 ocean_assert_and_suppress_unused(channels == 1u, channels);
6792 const float32x4_t constant_1_f_32x4 = vdupq_n_f32(1.0f);
6794 for (
unsigned int x = 0; x < targetWidth; x += 8u)
6796 if (x + 8u > targetWidth)
6801 ocean_assert(x >= 8u && targetWidth > 8u);
6802 const unsigned int newX = targetWidth - 8u;
6804 ocean_assert(x > newX);
6805 const unsigned int offset = x - newX;
6807 targetRow -= offset;
6808 interpolationLocations -= offset;
6809 interpolationFactorsRight -= offset;
6814 ocean_assert(!(x + 8u < targetWidth));
6819 const float32x2_t pixel_0_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[0]);
6820 const float32x2_t pixel_1_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[1]);
6821 const float32x4_t pixel_01_f_32x4 = vcombine_f32(pixel_0_f_32x2, pixel_1_f_32x2);
6823 const float32x2_t pixel_2_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[2]);
6824 const float32x2_t pixel_3_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[3]);
6825 const float32x4_t pixel_23_f_32x4 = vcombine_f32(pixel_2_f_32x2, pixel_3_f_32x2);
6827 const float32x2_t pixel_4_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[4]);
6828 const float32x2_t pixel_5_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[5]);
6829 const float32x4_t pixel_45_f_32x4 = vcombine_f32(pixel_4_f_32x2, pixel_5_f_32x2);
6831 const float32x2_t pixel_6_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[6]);
6832 const float32x2_t pixel_7_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[7]);
6833 const float32x4_t pixel_67_f_32x4 = vcombine_f32(pixel_6_f_32x2, pixel_7_f_32x2);
6835 const float32x4_t factorsRight_0123_f_32x4 = vld1q_f32(interpolationFactorsRight + 0);
6836 const float32x4_t factorsLeft_0123_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsRight_0123_f_32x4);
6837 const float32x4x2_t factorsLeftRight_0123_f_32x4_2 = vzipq_f32(factorsLeft_0123_f_32x4, factorsRight_0123_f_32x4);
6839 const float32x4_t factorsRight_4567_f_32x4 = vld1q_f32(interpolationFactorsRight + 4);
6840 const float32x4_t factorsLeft_4567_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsRight_4567_f_32x4);
6841 const float32x4x2_t factorsLeftRight_4567_f_32x4_2 = vzipq_f32(factorsLeft_4567_f_32x4, factorsRight_4567_f_32x4);
6843 const float32x4_t multiplied_01_f_32x4 = vmulq_f32(pixel_01_f_32x4, factorsLeftRight_0123_f_32x4_2.val[0]);
6844 const float32x4_t multiplied_23_f_32x4 = vmulq_f32(pixel_23_f_32x4, factorsLeftRight_0123_f_32x4_2.val[1]);
6846 const float32x4_t multiplied_45_f_32x4 = vmulq_f32(pixel_45_f_32x4, factorsLeftRight_4567_f_32x4_2.val[0]);
6847 const float32x4_t multiplied_67_f_32x4 = vmulq_f32(pixel_67_f_32x4, factorsLeftRight_4567_f_32x4_2.val[1]);
6849 const float32x2_t result_01_f_32x2 = vpadd_f32(vget_low_f32(multiplied_01_f_32x4), vget_high_f32(multiplied_01_f_32x4));
6850 const float32x2_t result_23_f_32x2 = vpadd_f32(vget_low_f32(multiplied_23_f_32x4), vget_high_f32(multiplied_23_f_32x4));
6852 const float32x2_t result_45_f_32x2 = vpadd_f32(vget_low_f32(multiplied_45_f_32x4), vget_high_f32(multiplied_45_f_32x4));
6853 const float32x2_t result_67_f_32x2 = vpadd_f32(vget_low_f32(multiplied_67_f_32x4), vget_high_f32(multiplied_67_f_32x4));
6855 const float32x4_t result_0123_f_32x4 = vcombine_f32(result_01_f_32x2, result_23_f_32x2);
6856 const float32x4_t result_4567_f_32x4 = vcombine_f32(result_45_f_32x2, result_67_f_32x2);
6858 vst1q_f32(targetRow + 0, result_0123_f_32x4);
6859 vst1q_f32(targetRow + 4, result_4567_f_32x4);
6862 interpolationLocations += 8;
6863 interpolationFactorsRight += 8;
6868inline void FrameInterpolatorBilinear::scaleSubset<float, float, 1u>(
const float* source,
float* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const double sourceX_s_targetX,
const double sourceY_s_targetY,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows)
6870 ocean_assert(source !=
nullptr && target !=
nullptr);
6871 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
6872 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
6873 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6875 ocean_assert(sourceWidth != targetWidth || sourceHeight != targetHeight);
6877 const unsigned int sourceStrideElements = sourceWidth * 1u + sourcePaddingElements;
6878 const unsigned int targetStrideElements = targetWidth * 1u + targetPaddingElements;
6880 using InterpolateRowVerticalFunction = void (*)(
const float*,
const float*,
float*,
const unsigned int,
const float);
6881 using InterpolateRowHorizontalFunction = void (*)(
const float*,
float*,
const unsigned int,
const unsigned int,
const unsigned int*,
const float*);
6883 InterpolateRowVerticalFunction interpolateRowVerticalFunction = interpolateRowVertical<float>;
6884 InterpolateRowHorizontalFunction interpolateRowHorizontalFunction = interpolateRowHorizontal<float, 1u>;
6886 if (sourceWidth * 1u >= 16u)
6888 interpolateRowVerticalFunction = interpolateRowVerticalNEON<float>;
6891 if (targetWidth >= 8u)
6893 interpolateRowHorizontalFunction = interpolateRowHorizontalNEON<float, 1u>;
6896 target += targetStrideElements * firstTargetRow;
6898 const float sourceX_T_targetX = float(sourceX_s_targetX);
6899 const float sourceY_T_targetY = float(sourceY_s_targetY);
6903 Memory memoryIntermediateExtendedRow;
6904 Memory memoryHorizontalInterpolationLocations;
6905 Memory memoryHorizontalInterpolationFactorsRight;
6907 if (sourceWidth != targetWidth)
6911 memoryIntermediateExtendedRow = Memory::create<float>(sourceWidth + 1u);
6913 memoryHorizontalInterpolationLocations = Memory::create<unsigned int>(targetWidth);
6915 memoryHorizontalInterpolationFactorsRight = Memory::create<float>(targetWidth);
6918 if (memoryHorizontalInterpolationLocations)
6920 ocean_assert(memoryHorizontalInterpolationFactorsRight);
6922 if (targetWidth >= 4u)
6924 const float32x4_t sourceX_T_targetX_f_32x4 = vdupq_n_f32(sourceX_T_targetX);
6925 const float32x4_t targetOffsetX_f_32x4 = vdupq_n_f32(sourceX_T_targetX * 0.5f - 0.5f);
6928 const float32x4_t constant_0_f_32x4 = vdupq_n_f32(0);
6931 const float32x4_t constant_4_f_32x4 = vdupq_n_f32(4.0f);
6934 const uint32x4_t sourceWidth_1_u_32x4 = vdupq_n_u32(sourceWidth - 1u);
6937 const float f_0123[4] = {0.0f, 1.0f, 2.0f, 3.0f};
6938 float32x4_t x_0123_f_32x4 = vld1q_f32(f_0123);
6942 for (
unsigned int x = 0u; x < targetWidth; x += 4u)
6944 if (x + 4u > targetWidth)
6949 ocean_assert(x >= 4u && targetWidth > 4u);
6950 const unsigned int newX = targetWidth - 4u;
6952 ocean_assert(x > newX);
6953 const unsigned int offset = x - newX;
6957 x_0123_f_32x4 = vsubq_f32(x_0123_f_32x4, vdupq_n_f32(
float(offset)));
6960 ocean_assert(!(x + 4u < targetWidth));
6964 const float32x4_t sourceX_0123_f_32x4 = vmaxq_f32(constant_0_f_32x4, vaddq_f32(targetOffsetX_f_32x4, vmulq_f32(sourceX_T_targetX_f_32x4, x_0123_f_32x4)));
6968 uint32x4_t left_0123_u_32x4 = vminq_u32(vcvtq_u32_f32(sourceX_0123_f_32x4), sourceWidth_1_u_32x4);
6971 vst1q_u32(memoryHorizontalInterpolationLocations.
data<
unsigned int>() + x, left_0123_u_32x4);
6974 const float32x4_t factorsRight_f_32x4 = vsubq_f32(sourceX_0123_f_32x4, vcvtq_f32_u32(left_0123_u_32x4));
6976 vst1q_f32(memoryHorizontalInterpolationFactorsRight.
data<
float>() + x, factorsRight_f_32x4);
6979 x_0123_f_32x4 = vaddq_f32(x_0123_f_32x4, constant_4_f_32x4);
6984 const float targetOffsetX = sourceX_T_targetX * 0.5f - 0.5f;
6988 for (
unsigned int x = 0u; x < targetWidth; ++x)
6990 const float sourceX = max(0.0f, targetOffsetX +
float(x) * sourceX_T_targetX);
6992 const unsigned int left = min((
unsigned int)sourceX, sourceWidth - 1u);
6994 memoryHorizontalInterpolationLocations.
data<
unsigned int>()[x] = left;
6996 const float factorRight = sourceX - float(left);
6997 ocean_assert(factorRight >= 0.0f && factorRight <= 1.0f);
6999 memoryHorizontalInterpolationFactorsRight.
data<
float>()[x] = factorRight;
7004 const float targetOffsetY = sourceY_T_targetY * 0.5f - 0.5f;
7006 for (
unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
7008 const float sourceY = minmax<float>(0.0f, targetOffsetY + sourceY_T_targetY *
float(y),
float(sourceHeight) - 1.0f);
7010 const unsigned int sourceRowTop = (
unsigned int)sourceY;
7011 const float factorBottom = sourceY - float(sourceRowTop);
7012 ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
7014 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
7016 const float*
const sourceTopRow = source + sourceStrideElements * sourceRowTop;
7017 const float*
const sourceBottomRow = source + sourceStrideElements * sourceRowBottom;
7019 float* targetRow =
nullptr;
7021 if (sourceHeight == targetHeight)
7023 ocean_assert(sourceWidth != targetWidth);
7024 ocean_assert(memoryIntermediateExtendedRow);
7027 memcpy(memoryIntermediateExtendedRow.
data<
float>(), sourceTopRow, sourceWidth *
sizeof(
float));
7032 targetRow = memoryIntermediateExtendedRow.
isNull() ? target : memoryIntermediateExtendedRow.
data<
float>();
7034 ocean_assert(targetRow !=
nullptr);
7035 ocean_assert(interpolateRowVerticalFunction !=
nullptr);
7036 interpolateRowVerticalFunction(sourceTopRow, sourceBottomRow, targetRow, sourceWidth * 1u, factorBottom);
7039 if (memoryIntermediateExtendedRow)
7043 memoryIntermediateExtendedRow.
data<
float>()[sourceWidth] = memoryIntermediateExtendedRow.
data<
float>()[sourceWidth - 1u];
7045 interpolateRowHorizontalFunction(memoryIntermediateExtendedRow.
data<
float>(), target, targetWidth, 1u, memoryHorizontalInterpolationLocations.
data<
unsigned int>(), memoryHorizontalInterpolationFactorsRight.
data<
float>());
7048 target += targetStrideElements;
7054template <
typename T,
typename TScale,
unsigned int tChannels>
7055void FrameInterpolatorBilinear::scaleSubset(
const T* source, T* target,
const unsigned int sourceWidth,
const unsigned int sourceHeight,
const unsigned int targetWidth,
const unsigned int targetHeight,
const double sourceX_s_targetX,
const double sourceY_s_targetY,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows)
7057 static_assert((std::is_same<float, TScale>::value || std::is_same<double, TScale>::value),
"Invalid TScale type");
7059 ocean_assert(source !=
nullptr && target !=
nullptr);
7060 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
7061 ocean_assert_and_suppress_unused(targetWidth >= 1u && targetHeight >= 1u, targetHeight);
7062 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
7064 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
7065 const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
7067 const TScale sourceX_T_targetX = TScale(sourceX_s_targetX);
7068 const TScale sourceY_T_targetY = TScale(sourceY_s_targetY);
7116 const TScale sourceX_T_targetXOffset = sourceX_T_targetX * TScale(0.5) - TScale(0.5);
7117 const TScale sourceY_T_targetYOffset = sourceY_T_targetY * TScale(0.5) - TScale(0.5);
7119 const TScale sourceWidth_1 = TScale(sourceWidth - 1u);
7120 const TScale sourceHeight_1 = TScale(sourceHeight - 1u);
7122 target += targetStrideElements * firstTargetRow;
7124 for (
unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
7126 const TScale sy =
minmax(TScale(0), sourceY_T_targetYOffset + sourceY_T_targetY * TScale(y), sourceHeight_1);
7127 ocean_assert(sy >= TScale(0) && sy < TScale(sourceHeight));
7129 const unsigned int sTop = (
unsigned int)sy;
7130 ocean_assert(sy >= TScale(sTop));
7132 const TScale factorBottom = sy - TScale(sTop);
7133 ocean_assert(factorBottom >= TScale(0) && factorBottom <= TScale(1));
7135 const TScale factorTop = TScale(1) - factorBottom;
7136 ocean_assert(factorTop >= TScale(0) && factorTop <= TScale(1));
7138 const T*
const sourceTop = source + sTop * sourceStrideElements;
7139 const T*
const sourceBottom = (sTop + 1u < sourceHeight) ? sourceTop + sourceStrideElements : sourceTop;
7141 for (
unsigned int x = 0; x < targetWidth; ++x)
7143 const TScale sx =
minmax(TScale(0), sourceX_T_targetXOffset + sourceX_T_targetX * TScale(x), sourceWidth_1);
7144 ocean_assert(sx >= TScale(0) && sx < TScale(sourceWidth));
7146 const unsigned int sLeft = (
unsigned int)sx;
7147 ocean_assert(sx >= TScale(sLeft));
7149 const TScale factorRight = sx - TScale(sLeft);
7150 ocean_assert(factorRight >= TScale(0) && factorRight <= TScale(1));
7152 const TScale factorLeft = TScale(1) - factorRight;
7153 ocean_assert(factorLeft >= TScale(0) && factorLeft <= TScale(1));
7155 const unsigned int sourceRightOffset = sLeft + 1u < sourceWidth ? tChannels : 0u;
7157 const T*
const sourceTopLeft = sourceTop + sLeft * tChannels;
7158 const T*
const sourceBottomLeft = sourceBottom + sLeft * tChannels;
7160 const TScale factorTopLeft = factorTop * factorLeft;
7161 const TScale factorTopRight = factorTop * factorRight;
7162 const TScale factorBottomLeft = factorBottom * factorLeft;
7163 const TScale factorBottomRight = factorBottom * factorRight;
7165 for (
unsigned int n = 0u; n < tChannels; ++n)
7167 target[n] = T(TScale(sourceTopLeft[n]) * factorTopLeft + TScale(sourceTopLeft[sourceRightOffset + n]) * factorTopRight
7168 + TScale(sourceBottomLeft[n]) * factorBottomLeft + TScale(sourceBottomLeft[sourceRightOffset + n]) * factorBottomRight);
7171 target += tChannels;
7174 target += targetPaddingElements;
7178template <
unsigned int tChannels>
7179void FrameInterpolatorBilinear::rotate8BitPerChannelSubset(
const uint8_t* source, uint8_t* target,
const unsigned int width,
const unsigned int height,
const Scalar horizontalAnchorPosition,
const Scalar verticalAnchorPosition,
const Scalar angle,
const uint8_t* borderColor,
const unsigned int sourcePaddingElements,
const unsigned int targetPaddingElements,
const unsigned int firstTargetRow,
const unsigned int numberTargetRows)
7181 static_assert(tChannels != 0u,
"Invalid channel number!");
7183 ocean_assert(firstTargetRow + numberTargetRows <= height);
7187 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
7189 uint8_t zeroColor[tChannels] = {uint8_t(0)};
7190 const PixelType bColor = borderColor ? *(
const PixelType*)borderColor : *(
const PixelType*)zeroColor;
7193 const SquareMatrix2 rotationMatrix2(rotationMatrix3(0, 0), rotationMatrix3(1, 0), rotationMatrix3(0, 1), rotationMatrix3(1, 1));
7197 const Vector2 anchorPosition(horizontalAnchorPosition, verticalAnchorPosition);
7199 for (
unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
7201 PixelType* targetPixel = (PixelType*)(target + y * targetStrideElements);
7205 for (
unsigned int x = 0; x < width; ++x)
7207 const Vector2 sourceLocation(anchorPosition + rotationMatrix2 * (
Vector2(
Scalar(x), floatY) - anchorPosition));
7209 if (sourceLocation.
x() >= 0 && sourceLocation.
y() >= 0 && sourceLocation.
x() <= width_1 && sourceLocation.
y() <= height_1)
7211 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(source, width, height, sourcePaddingElements, sourceLocation, (uint8_t*)(targetPixel));
7215 *targetPixel = bColor;
This class implements the abstract base class for all AnyCamera objects.
Definition AnyCamera.h:131
virtual VectorT3< T > vector(const VectorT2< T > &distortedImagePoint, const bool makeUnitVector=true) const =0
Returns a vector starting at the camera's center and intersecting a given 2D point in the image.
virtual unsigned int width() const =0
Returns the width of the camera image.
virtual unsigned int height() const =0
Returns the height of the camera image.
virtual VectorT2< T > projectToImageIF(const VectorT3< T > &objectPoint) const =0
Projects a 3D object point into the camera frame.
virtual bool isValid() const =0
Returns whether this camera is valid.
Helper class allowing to determine the offset that is necessary to access the alpha channel.
Definition FrameBlender.h:60
static constexpr unsigned int data()
Returns the offset that is applied to access the first data channel.
Definition FrameBlender.h:1171
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition FrameInterpolatorBilinear.h:65
static bool homographies(const Frame &input, Frame &output, const SquareMatrix3 homographies[4], const Vector2 &outputQuadrantCenter, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool zoom(const Frame &source, Frame &target, const Scalar zoomFactor, Worker *worker=nullptr)
Zooms into a given input frame (or zooms out) and stores the zoomed image content in an output frame.
static bool homographyMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &input_H_output, Worker *worker=nullptr, const uint8_t maskValue=0xFF, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool lookupMask(const Frame &input, Frame &output, Frame &outputMask, const LookupTable &input_LT_output, const bool offset, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame into an output frame by application of an interpolation lookup table a...
static bool homographyWithCameraMask(const AnyCamera &inputCamera, const AnyCamera &outputCamera, const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &homography, Worker *worker=nullptr, const uint8_t maskValue=0xFFu)
Transforms a given input frame into an output frame by application of a homography.
static bool rotate(const Frame &source, Frame &target, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, Worker *worker=nullptr, const uint8_t *borderColor=nullptr)
Rotates a given frame by a bilinear interpolation.
static bool resize(const Frame &source, Frame &target, Worker *worker=nullptr)
Resizes/rescales a given frame by application of a bilinear interpolation.
static bool homographiesMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 *homographies, const Vector2 &outputQuadrantCenter, Worker *worker=nullptr, const uint8_t maskValue=0xFF, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool interpolatePixel(const TSource *frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2< TScalar > &position, TTarget *result, const TIntermediate &resultBias=TIntermediate(0))
Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data ty...
Definition FrameInterpolatorBilinear.h:1551
static bool resampleCameraImage(const Frame &sourceFrame, const AnyCamera &sourceCamera, const SquareMatrix3 &source_R_target, const AnyCamera &targetCamera, Frame &targetFrame, LookupCorner2< Vector2 > *source_OLT_target=nullptr, Worker *worker=nullptr, const unsigned int binSizeInPixel=8u, const void *borderColor=nullptr)
Re-samples a camera image which has been captured with a camera profile as if the image would have be...
static bool homographyWithCamera(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const Frame &input, Frame &output, const SquareMatrix3 &homography, const bool useDistortionParameters, const uint8_t *borderColor=nullptr, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of a homography.
static bool lookup(const Frame &input, Frame &output, const LookupTable &input_LT_output, const bool offset, const void *borderColor, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
static bool affine(const Frame &source, Frame &target, const SquareMatrix3 &source_A_target, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &targetOrigin=PixelPositionI(0, 0))
Applies an affine transformation to an image.
static bool interpolatePixel8BitPerChannel(const uint8_t *frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2< TScalar > &position, uint8_t *result)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
Definition FrameInterpolatorBilinear.h:1458
static bool homography(const Frame &input, Frame &output, const SquareMatrix3 &input_H_output, const void *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
This class implements highly optimized interpolation functions with fixed properties.
Definition FrameInterpolatorBilinear.h:346
static void resize400x400To256x256_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 256x256 ...
static void resize400x400To224x224_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 ...
This class implements bilinear frame interpolator functions.
Definition FrameInterpolatorBilinear.h:47
static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelNEON(const uint8_t *source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType< uint8_t, tChannels >::Type &borderColor, const uint32x4_t &m128_factorsRight, const uint32x4_t &m128_factorsBottom, typename DataType< uint8_t, tChannels >::Type *targetPositionPixels)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
Definition FrameInterpolatorBilinear.h:4432
static void homography(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const T *borderColor, T *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a input frame with (almost) arbitrary pixel format into an output frame by application of ...
Definition FrameInterpolatorBilinear.h:1769
static void resampleCameraImage(const T *sourceFrame, const AnyCamera &sourceCamera, const SquareMatrix3 &source_R_target, const AnyCamera &targetCamera, T *targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2< Vector2 > *source_OLT_target=nullptr, Worker *worker=nullptr, const unsigned int binSizeInPixel=8u, const T *borderColor=nullptr)
Re-samples a camera image which has been captured with a camera profile as if the image would have be...
Definition FrameInterpolatorBilinear.h:1929
static void interpolateRowVerticalNEON(const T *sourceRowTop, const T *sourceRowBottom, T *targetRow, const unsigned int elements, const float factorBottom)
Applies a (vertical) linear interpolation between two rows with arbitrary data types.
static void homographyWithCamera8BitPerChannel(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const uint8_t *input, const SquareMatrix3 &homography, const bool useDistortionParameters, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1835
static void interpolatePixel8BitPerChannel(const uint8_t *frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2< TScalar > &position, uint8_t *result)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
Definition FrameInterpolatorBilinear.h:1996
static void affine8BitPerChannelSSESubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using SSE).
Definition FrameInterpolatorBilinear.h:2500
static Scalar patchIntensitySum1Channel(const uint32_t *linedIntegralFrame, const unsigned int frameWidth, const unsigned int frameHeight, const unsigned int lineIntegralFramePaddingElements, const Vector2 ¢er, const CV::PixelCenter pixelCenter, const unsigned int patchWidth, const unsigned int patchHeight)
Interpolate the sum of intensity values of an image patch in a frame, while the frame is provided as ...
static void homographyWithCameraMask8BitPerChannel(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const uint8_t *input, const unsigned int inputPaddingElements, const SquareMatrix3 &homography, uint8_t *output, uint8_t *outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1852
static void homographiesMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *homographies, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4654
static void homographiesMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t *output, uint8_t *outputMask, const Vector2 &outputQuadrantCenter, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of four homograp...
Definition FrameInterpolatorBilinear.h:1822
static void interpolateRowHorizontal8BitPerChannel7BitPrecision(const uint8_t *extendedSourceRow, uint8_t *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const uint8_t *interpolationFactors)
Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
static void homography8BitPerChannelNEONSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:3613
static OCEAN_FORCE_INLINE void interpolate4Pixels4Channel8BitPerChannelNEON(const uint8x16_t &topLeftPixels_u8x16, const uint8x16_t &topRightPixels_u8x16, const uint8x16_t &bottomLeftPixels_u8x16, const uint8x16_t &bottomRightPixels_u8x16, const uint32x4_t &m128_factorsRight, const uint32x4_t &m128_factorsBottom, typename DataType< uint8_t, 4u >::Type *targetPositionPixels, const bool useOptimizedNEONFactorReplication=false)
Interpolates 4 independent 4-channel pixels using widening byte multiply.
Definition FrameInterpolatorBilinear.h:4320
static void interpolateRowVertical8BitPerChannel7BitPrecisionNEON(const uint8_t *sourceRowTop, const uint8_t *sourceRowBottom, uint8_t *targetRow, const unsigned int elements, const unsigned int factorBottom)
Applies a (vertical) linear interpolation between two rows with 8 bit per channel.
static void homographySubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms a frame with (almost) arbitrary pixel format using the given homography.
Definition FrameInterpolatorBilinear.h:2416
static void homographyMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, uint8_t *output, uint8_t *outputMask, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1806
static OCEAN_FORCE_INLINE void interpolate8Pixels1Channel8BitNEON(const uint8x8_t &topLeft_u_8x8, const uint8x8_t &topRight_u_8x8, const uint8x8_t &bottomLeft_u_8x8, const uint8x8_t &bottomRight_u_8x8, const uint8x16_t &factorsRight_factorsBottom_128_u_8x16, uint8_t *targetPositionPixels)
Interpolates 8 independent pixels concurrently of a 1 channel frame, the source pixel locations must ...
Definition FrameInterpolatorBilinear.h:3993
static void homographies8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *homographies, const uint8_t *borderColor, uint8_t *output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homographies.
Definition FrameInterpolatorBilinear.h:4498
static void homographyWithCamera8BitPerChannelSubset(const PinholeCamera *inputCamera, const PinholeCamera *outputCamera, const PinholeCamera::DistortionLookup *outputCameraDistortionLookup, const uint8_t *input, const SquareMatrix3 *normalizedHomography, const bool useDistortionParameters, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4734
static void affine8BitPerChannel(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 &source_A_target, const uint8_t *borderColor, uint8_t *target, const PixelPositionI &targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Apply an affine transforms to a N-channel, 8-bit frame The target frame must have the same pixel form...
Definition FrameInterpolatorBilinear.h:1693
static void lookup(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &input_LT_output, const bool offset, const T *borderColor, T *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr, const bool useOptimizedNEON=false, const bool useOptimizedBilinearValuesAndFactorCalculation=false, const bool useOptimizedNEONFactorReplication=false)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition FrameInterpolatorBilinear.h:1869
static void homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera *inputCamera, const PinholeCamera *outputCamera, const PinholeCamera::DistortionLookup *outputCameraDistortionLookup, const uint8_t *input, const unsigned int inputPaddingElements, const SquareMatrix3 *normalizedHomography, uint8_t *output, uint8_t *outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4780
static void affine8BitPerChannelNEONSubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using NEON).
Definition FrameInterpolatorBilinear.h:3363
static void interpolateRowHorizontalNEON(const T *extendedSourceRow, T *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const float *interpolationFactorsRight)
Applies a (horizontal) linear interpolation for one row with arbitrary data type.
static void scale8BitPerChannelSubset7BitPrecisionNEON(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int channels, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Rescales a subset of a given frame with 8 bit per channel by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:5630
static OCEAN_FORCE_INLINE __m128i interpolate4Pixels8BitPerChannelSSE(const __m128i &m128_sourcesTopLeft, const __m128i &m128_sourcesTopRight, const __m128i &m128_sourcesBottomLeft, const __m128i &m128_sourcesBottomRight, const __m128i &m128_factorsTopLeft, const __m128i &m128_factorsTopRight, const __m128i &m128_factorsBottomLeft, const __m128i &m128_factorsBottomRight)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
static void interpolateRowHorizontal(const T *extendedSourceRow, T *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const float *interpolationFactorsRight)
Applies a (horizontal) linear interpolation for one row with arbitrary data type.
Definition FrameInterpolatorBilinear.h:5595
static void rotate8BitPerChannelSubset(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t *borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Rotates a subset of a given frame by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:7179
static void lookupMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &input_LT_output, const bool offset, uint8_t *output, uint8_t *outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition FrameInterpolatorBilinear.h:1916
static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelSSE(const uint8_t *source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType< uint8_t, tChannels >::Type &borderColor, const __m128i &m128_factorsRight, const __m128i &m128_factorsBottom, typename DataType< uint8_t, tChannels >::Type *targetPositionPixels)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
Definition FrameInterpolatorBilinear.h:3295
static void homographies8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t *borderColor, uint8_t *output, const Vector2 &outputQuadrantCenter, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of four homograp...
Definition FrameInterpolatorBilinear.h:1793
static void lookup8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame with uint8_t as element type into an output frame by appli...
Definition FrameInterpolatorBilinear.h:4828
static void resize(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Resizes a given frame with (almost) arbitrary data type (e.g., float, double, int) by using a bilinea...
Definition FrameInterpolatorBilinear.h:1644
static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3 &input_H_output, const int outputOriginX=0, const int outputOriginY=0)
Checks whether the application of a given homography for a specified input frame and output frame cov...
static void scale(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rescales a given frame with arbitrary data type (e.g., float, double, int) by using a bilinear interp...
Definition FrameInterpolatorBilinear.h:1657
static void lookupSubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const T *borderColor, T *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame with arbitrary element type into an output frame by applic...
Definition FrameInterpolatorBilinear.h:4882
static void scale8BitPerChannelSubset(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a subset of a given frame with 8 bit per channel by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:5454
static void rotate8BitPerChannel(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr, const uint8_t *borderColor=nullptr)
Rotates a given frame by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:1978
static void interpolateRowVertical(const T *sourceRowTop, const T *sourceRowBottom, T *targetRow, const unsigned int elements, const float factorBottom)
Applies a (vertical) linear interpolation between two rows with arbitrary data types.
Definition FrameInterpolatorBilinear.h:5575
static void homography8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const uint8_t *borderColor, uint8_t *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1731
static void interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t *frame, const unsigned int width, const unsigned int height, const Vector2 &position, uint8_t *result, const unsigned int framePaddingElements)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame wit...
Definition FrameInterpolatorBilinear.h:2175
static void lookupMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition FrameInterpolatorBilinear.h:5357
static void affine8BitPerChannelSubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image.
Definition FrameInterpolatorBilinear.h:2264
static void homography8BitPerChannelSSESubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:2685
static void interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON(const uint8_t *extendedSourceRow, uint8_t *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const uint8_t *interpolationFactors)
Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
static void scale8BitPerChannel(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rescales a given frame with 8 bit per data channel by using a bilinear interpolation with user-define...
Definition FrameInterpolatorBilinear.h:5410
static void lookup8BitPerChannelSubsetNEON(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows, const bool useOptimizedNEON=false, const bool useOptimizedBilinearValuesAndFactorCalculation=false, const bool useOptimizedNEONFactorReplication=false)
Transforms a subset of a given input frame into an output frame by application of an interpolation lo...
Definition FrameInterpolatorBilinear.h:5139
static void homographyMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4579
static void scaleSubset(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a subset of a given frame with arbitrary data type by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:7055
static void homography8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:2339
static void interpolatePixel(const TSource *frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2< TScalar > &position, TTarget *result, const TIntermediate &resultBias=TIntermediate(0))
Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data ty...
Definition FrameInterpolatorBilinear.h:2089
static constexpr uint8x8_t create_uint8x8(const uint8_t v0, const uint8_t v1, const uint8_t v2, const uint8_t v3, const uint8_t v4, const uint8_t v5, const uint8_t v6, const uint8_t v7)
Creates a uint8x8_t vector from 8 individual uint8_t values.
Definition NEON.h:591
This class implements a 2D pixel position with pixel precision.
Definition PixelPosition.h:63
T y() const
Returns the vertical coordinate position of this object.
Definition PixelPosition.h:468
T x() const
Returns the horizontal coordinate position of this object.
Definition PixelPosition.h:456
static __m128i set128i(const unsigned long long high64, const unsigned long long low64)
Sets a 128i value by two 64 bit values.
Definition SSE.h:3875
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition Caller.h:2877
Template class allowing to define an array of data types.
Definition DataType.h:27
This class implements Ocean's image class.
Definition Frame.h:1879
void setRelativeTimestamp(const Timestamp &relative)
Sets the relative timestamp of this frame.
Definition Frame.h:4320
bool isValid() const
Returns whether this frame is valid.
Definition Frame.h:4615
void setTimestamp(const Timestamp ×tamp)
Sets the timestamp of this frame.
Definition Frame.h:4315
const Timestamp & timestamp() const
Returns the timestamp of this frame.
Definition Frame.h:4305
const Timestamp & relativeTimestamp() const
Returns the relative timestamp of this frame.
Definition Frame.h:4310
Definition of a frame type composed by the frame dimension, pixel format and pixel origin.
Definition Frame.h:30
size_t sizeY() const
Returns the vertical dimension of this lookup object.
Definition Lookup2.h:960
size_t sizeX() const
Returns the horizontal dimension of this lookup object.
Definition Lookup2.h:954
size_t binsY() const
Returns the number of vertical bins of this lookup object.
Definition Lookup2.h:972
size_t binsX() const
Returns the number of horizontal bins of this lookup object.
Definition Lookup2.h:966
This class implements a 2D lookup object with values at the bins' corners defining the individual loo...
Definition Lookup2.h:636
Vector2 binTopLeftCornerPosition(const size_t binX, const size_t binY) const
Returns the corner position (the top left corner) of a specific bin in relation to the dimension of t...
Definition Lookup2.h:1799
void setBinTopLeftCornerValue(const size_t binX, const size_t binY, const T &value)
Sets the value of one specific lookup bin's top left corner.
Definition Lookup2.h:2215
void bilinearValues(const size_t y, TTarget *values) const
Applies a lookup for an entire row in this lookup object.
Definition Lookup2.h:1877
This class implements an object able to allocate memory.
Definition base/Memory.h:22
bool isNull() const
Returns whether this object holds any memory.
Definition base/Memory.h:401
void * data()
Returns the pointer to the writable memory which is allocated by this object.
Definition base/Memory.h:303
This class provides basic numeric functionalities.
Definition Numeric.h:57
static constexpr T eps()
Returns a small epsilon.
static T floor(const T value)
Returns the largest integer value that is not greater than the given value.
Definition Numeric.h:2035
static constexpr bool isEqualEps(const T value)
Returns whether a value is smaller than or equal to a small epsilon.
Definition Numeric.h:2096
static constexpr bool isNotEqualEps(const T value)
Returns whether a value is not smaller than or equal to a small epsilon.
Definition Numeric.h:2246
unsigned int width() const
Returns the width of the camera image.
Definition PinholeCamera.h:1452
const SquareMatrixT3< T > & invertedIntrinsic() const
Returns the inverted intrinsic camera matrix.
Definition PinholeCamera.h:1333
const SquareMatrixT3< T > & intrinsic() const
Returns the intrinsic camera matrix.
Definition PinholeCamera.h:1327
unsigned int height() const
Returns the height of the camera image.
Definition PinholeCamera.h:1458
VectorT2< T > normalizedImagePoint2imagePoint(const VectorT2< T > &normalizedImagePoint, const bool distortImagePoint) const
Calculates the image point corresponding to a given normalized image point.
Definition PinholeCamera.h:1792
This class implements a 2x2 square matrix.
Definition SquareMatrix2.h:73
bool isNull() const
Returns whether this matrix is a zero matrix.
Definition SquareMatrix3.h:1334
const T * data() const
Returns a pointer to the internal values.
Definition SquareMatrix3.h:1047
bool isOrthonormal(const T epsilon=NumericT< T >::eps()) const
Returns whether this matrix is an orthonormal matrix.
Definition SquareMatrix3.h:1366
This class implements tests for the NEON-accelerated bilinear interpolation functions.
Definition TestFrameInterpolatorBilinearNEON.h:27
const T & x() const noexcept
Returns the x value.
Definition Vector2.h:710
const T & y() const noexcept
Returns the y value.
Definition Vector2.h:722
bool isEqual(const VectorT2< T > &vector, const T eps) const
Returns whether two vectors are equal up to a specified epsilon.
Definition Vector2.h:758
const T & y() const noexcept
Returns the y value.
Definition Vector3.h:824
const T & x() const noexcept
Returns the x value.
Definition Vector3.h:812
const T & z() const noexcept
Returns the z value.
Definition Vector3.h:836
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
T minmax(const T &lowerBoundary, const T &value, const T &upperBoundary)
This function fits a given parameter into a specified value range.
Definition base/Utilities.h:973
PixelCenter
Definition of individual centers of pixels.
Definition CV.h:117
@ PC_TOP_LEFT
The center of a pixel is in the upper-left corner of each pixel's square.
Definition CV.h:133
@ PC_CENTER
The center of a pixel is located in the center of each pixel's square (with an offset of 0....
Definition CV.h:150
float Scalar
Definition of a scalar type.
Definition Math.h:129
SquareMatrixT3< Scalar > SquareMatrix3
Definition of the SquareMatrix3 object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION either with ...
Definition SquareMatrix3.h:43
VectorT3< Scalar > Vector3
Definition of a 3D vector.
Definition Vector3.h:29
VectorT2< Scalar > Vector2
Definition of a 2D vector.
Definition Vector2.h:28
RotationT< Scalar > Rotation
Definition of the Rotation object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION flag either with ...
Definition Rotation.h:32
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Default definition of a type with tBytes bytes.
Definition DataType.h:32
float Type
The 32 bit floating point data type for any data type T but 'double'.
Definition DataType.h:373