72 static_assert(tSize >= 1u,
"Invalid buffer size!");
74 uint32x4_t sum_u_32x4 = vdupq_n_u32(0u);
78 constexpr unsigned int blocks16 = tSize / 16u;
80 for (
unsigned int n = 0u; n < blocks16; ++n)
83 const uint8x16_t absDifference_u_8x16 = vabdq_u8(vld1q_u8(buffer0), vld1q_u8(buffer1));
85 const uint16x8_t absDifference_u_16x8 = vaddl_u8(vget_low_u8(absDifference_u_8x16), vget_high_u8(absDifference_u_8x16));
87 sum_u_32x4 = vpadalq_u16(sum_u_32x4, absDifference_u_16x8);
95 constexpr unsigned int blocks8 = (tSize % 16u) / 8u;
96 static_assert(blocks8 <= 1u,
"Invalid number of blocks!");
101 const uint16x8_t absDifference_u_16x8 = vabdl_u8(vld1_u8(buffer0), vld1_u8(buffer1));
103 sum_u_32x4 = vpadalq_u16(sum_u_32x4, absDifference_u_16x8);
109 constexpr unsigned int remainingElements = tSize - blocks16 * 16u - blocks8 * 8u;
110 static_assert(remainingElements < 8u,
"Invalid number of remaining elements!");
116 for (
unsigned int n = 0u; n < remainingElements; ++n)
118 result += uint32_t(abs(int32_t(buffer0[n]) - int32_t(buffer1[n])));
127 static_assert(tChannels >= 1u,
"Invalid channel number!");
128 static_assert(tPatchSize >= 5u,
"Invalid patch size!");
130 ocean_assert(patch0 !=
nullptr && patch1 !=
nullptr);
132 ocean_assert(patch0StrideElements >= tChannels * tPatchSize);
133 ocean_assert(patch1StrideElements >= tChannels * tPatchSize);
135 constexpr unsigned int patchWidthElements = tChannels * tPatchSize;
137 constexpr unsigned int blocks16 = patchWidthElements / 16u;
138 constexpr unsigned int blocks8 = (patchWidthElements - blocks16 * 16u) / 8u;
139 constexpr unsigned int blocks1 = patchWidthElements - blocks16 * 16u - blocks8 * 8u;
141 static_assert(blocks1 <= 7u,
"Invalid block size!");
143 const uint8x8_t maskRight_u_8x8 = vcreate_u8(uint64_t(-1) >> (8u - blocks1) * 8u);
144 const uint8x8_t maskLeft_u_8x8 = vcreate_u8(uint64_t(-1) << (8u - blocks1) * 8u);
146 uint32x4_t sum_u_32x4 = vdupq_n_u32(0u);
148 uint32_t sumIndividual = 0u;
150 for (
unsigned int y = 0u; y < tPatchSize; ++y)
152 for (
unsigned int n = 0u; n < blocks16; ++n)
155 const uint8x16_t absDifference_u_8x16 = vabdq_u8(vld1q_u8(patch0), vld1q_u8(patch1));
157 const uint16x8_t absDifference_u_16x8 = vaddl_u8(vget_low_u8(absDifference_u_8x16), vget_high_u8(absDifference_u_8x16));
159 sum_u_32x4 = vpadalq_u16(sum_u_32x4, absDifference_u_16x8);
165 for (
unsigned int n = 0u; n < blocks8; ++n)
168 const uint16x8_t absDifference_u_16x8 = vabdl_u8(vld1_u8(patch0), vld1_u8(patch1));
170 sum_u_32x4 = vpadalq_u16(sum_u_32x4, absDifference_u_16x8);
176 if constexpr (blocks1 != 0u)
182 if (y < tPatchSize - 1u)
184 const uint8x8_t remaining0_u_8x8 = vand_u8(vld1_u8(patch0), maskRight_u_8x8);
185 const uint8x8_t remaining1_u_8x8 = vand_u8(vld1_u8(patch1), maskRight_u_8x8);
187 const uint16x8_t absDifference_u_16x8 = vabdl_u8(remaining0_u_8x8, remaining1_u_8x8);
189 sum_u_32x4 = vpadalq_u16(sum_u_32x4, absDifference_u_16x8);
193 constexpr unsigned int overlapElements = 8u - blocks1;
194 static_assert(overlapElements >= 1u && overlapElements < 8u,
"Invalid number!");
196 const uint8x8_t remaining0_u_8x8 = vand_u8(vld1_u8(patch0 - overlapElements), maskLeft_u_8x8);
197 const uint8x8_t remaining1_u_8x8 = vand_u8(vld1_u8(patch1 - overlapElements), maskLeft_u_8x8);
199 const uint16x8_t absDifference_u_16x8 = vabdl_u8(remaining0_u_8x8, remaining1_u_8x8);
201 sum_u_32x4 = vpadalq_u16(sum_u_32x4, absDifference_u_16x8);
206 for (
unsigned int n = 0u; n < blocks1; ++n)
208 sumIndividual += uint32_t(abs(int32_t(patch0[n]) - int32_t(patch1[n])));
216 patch0 += patch0StrideElements - patchWidthElements;
217 patch1 += patch1StrideElements - patchWidthElements;
static uint32_t patchBuffer8BitPerChannel(const uint8_t *patch0, const uint8_t *buffer1, const unsigned int patch0StrideElements)
Returns the sum of absolute differences between an image patch and a buffer.
Definition SumAbsoluteDifferencesNEON.h:224
static uint32_t patch8BitPerChannel(const uint8_t *patch0, const uint8_t *patch1, const unsigned int patch0StrideElements, const unsigned int patch1StrideElements)
Returns the sum of absolute differences between two patches within an image.
Definition SumAbsoluteDifferencesNEON.h:125
static uint32_t buffer8BitPerChannel(const uint8_t *buffer0, const uint8_t *buffer1)
Returns the sum of absolute differences between two memory buffers.
Definition SumAbsoluteDifferencesNEON.h:70