Ocean
Loading...
Searching...
No Matches
AdvancedFrameInterpolatorBilinearNEON.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_ADVANCED_ADVANCED_FRAME_INTERPOLATOR_BILINEAR_NEON_H
9#define META_OCEAN_CV_ADVANCED_ADVANCED_FRAME_INTERPOLATOR_BILINEAR_NEON_H
10
12
13#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
14
15#include "ocean/cv/NEON.h"
16
17#include "ocean/math/Vector2.h"
18
19namespace Ocean
20{
21
22namespace CV
23{
24
25namespace Advanced
26{
27
28/**
29 * This class implements advanced bilinear frame interpolation functions using NEON extensions.
30 * @ingroup cvadvanced
31 */
32class OCEAN_CV_ADVANCED_EXPORT AdvancedFrameInterpolatorBilinearNEON
33{
34 protected:
35
36 /**
37 * This class allows to specialize functions for individual channels.
38 * @tparam tChannels Specifies the number of channels for the given frames, with range [1, infinity)
39 */
40 template <unsigned int tChannels>
42 {
43 public:
44
45 /**
46 * Interpolates the content of a square image patch with sub-pixel accuracy inside a given image and stores the interpolated data into a buffer.
47 * The center of a pixel is expected to be located at the top-left corner of a pixel.
48 * @param imageTopLeft The pointer to the top-left position of the image, must be valid
49 * @param imageStrideElements The number of elements between two consecutive image rows (including padding), in elements, with range [tChannels * tPatchSize, infinity)
50 * @param buffer The target buffer with `tChannels * tSize * tSize` elements, must be valid
51 * @param factorRight The interpolation factor for the right pixels, with range [0, 128]
52 * @param factorBottom The interpolation factor for the bottom pixels, with range [0, 128]
53 * @tparam tPatchSize The size of the square patch (the edge length) in pixel, with range [1, infinity), must be odd
54 */
55 template <unsigned int tPatchSize>
56 static inline void interpolateSquarePatch8BitPerChannel(const uint8_t* imageTopLeft, const unsigned int imageStrideElements, uint8_t* buffer, const unsigned int factorRight, const unsigned int factorBottom);
57 };
58
59 public:
60
61 /**
62 * Interpolates the content of a square image patch with sub-pixel accuracy inside a given image and stores the interpolated data into a buffer.
63 * The center of a pixel is expected to be located at the top-left corner of a pixel.
64 * @param image The image in which the interpolated patch is located, must be valid
65 * @param width The width of the image, in pixel, with range [tPatchSize + 1, infinity)
66 * @param imagePaddingElements The number of padding elements at the end of each image row, in elements, with range [0, infinity)
67 * @param buffer The target buffer with `tChannels * tSize * tSize` elements, must be valid
68 * @param position The center position of the square region in the image, with range [tPatchSize/2, width - tPatchSize/2 - 1)x[tPatchSize/2, height - tPatchSize/2 - 1)
69 * @tparam tChannels The number of frame channels, with range [1, infinity)
70 * @tparam tPatchSize The size of the square patch (the edge length) in pixel, with range [5, infinity), must be odd
71 * @tparam tPixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
72 * @tparam TScalar The scalar data type of the sub-pixel position
73 */
74 template <unsigned int tChannels, unsigned int tPatchSize, PixelCenter tPixelCenter = PC_TOP_LEFT, typename TScalar = Scalar>
75 static inline void interpolateSquarePatch8BitPerChannel(const uint8_t* const image, const unsigned int width, const unsigned int imagePaddingElements, uint8_t* buffer, const VectorT2<TScalar>& position);
76};
77
78template <>
79template <unsigned int tPatchSize>
80inline void AdvancedFrameInterpolatorBilinearNEON::SpecializedForChannels<1u>::interpolateSquarePatch8BitPerChannel(const uint8_t* imageTopLeft, const unsigned int imageStrideElements, uint8_t* buffer, const unsigned int factorRight, const unsigned int factorBottom)
81{
82 ocean_assert(imageTopLeft != nullptr && buffer != nullptr);
83 ocean_assert(imageStrideElements >= 1u * tPatchSize);
84
85 ocean_assert(factorRight <= 128u && factorBottom <= 128u);
86
87 const unsigned int factorLeft = 128u - factorRight;
88 const unsigned int factorTop = 128u - factorBottom;
89
90 constexpr unsigned int blocks15 = tPatchSize / 15u;
91 constexpr unsigned int remainingAfterBlocks15 = tPatchSize % 15u;
92
93 constexpr bool partialBlock15 = remainingAfterBlocks15 > 10u;
94 constexpr unsigned int remainingAfterPartialBlock15 = partialBlock15 ? 0u : remainingAfterBlocks15;
95
96 constexpr bool block7 = remainingAfterPartialBlock15 >= 7u;
97 constexpr unsigned int remainingAfterBlock7 = remainingAfterPartialBlock15 % 7u;
98
99 constexpr bool partialBlock7 = remainingAfterBlock7 >= 3u;
100 constexpr unsigned int remainingAfterPartialBlock7 = partialBlock7 ? 0u : remainingAfterBlock7;
101
102 constexpr unsigned int blocks1 = remainingAfterPartialBlock7;
103
104 // L R L R L R L R
105 const uint8x8_t factorsLeftRight_u_8x8 = vreinterpret_u8_u16(vdup_n_u16(uint16_t(factorLeft | (factorRight << 8u))));
106
107 const uint32x4_t factorsTop_u_32x4 = vdupq_n_u32(factorTop);
108 const uint32x4_t factorsBottom_u_32x4 = vdupq_n_u32(factorBottom);
109
110 for (unsigned int y = 0u; y < tPatchSize; ++y)
111 {
112 for (unsigned int x = 0u; x < blocks15; ++x)
113 {
114 const uint8x16_t top_u_8x16 = vld1q_u8(imageTopLeft);
115 const uint8x16_t bottom_u_8x16 = vld1q_u8(imageTopLeft + imageStrideElements);
116
117 // top[1], top[2], top[3], ..., top[14], top[15], X
118 const uint8x16_t topB_u_8x16 = vextq_u8(top_u_8x16, vreinterpretq_u8_u32(factorsTop_u_32x4), 1);
119 const uint8x16_t bottomB_u_8x16 = vextq_u8(bottom_u_8x16, vreinterpretq_u8_u32(factorsTop_u_32x4), 1);
120
121
122 // top[0] * L, top[1] * R, top[2] * L, ...
123 const uint32x4_t topLowA_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(top_u_8x16), factorsLeftRight_u_8x8));
124 const uint32x4_t bottomLowA_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(bottom_u_8x16), factorsLeftRight_u_8x8));
125
126 // top[1] * L, top[2] * R, top[3] * L, ...
127 const uint32x4_t topLowB_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(topB_u_8x16), factorsLeftRight_u_8x8));
128 const uint32x4_t bottomLowB_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(bottomB_u_8x16), factorsLeftRight_u_8x8));
129
130 // top[8] * L, top[9] * R, top[10] * L, ...
131 const uint32x4_t topHighA_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(top_u_8x16), factorsLeftRight_u_8x8));
132 const uint32x4_t bottomHighA_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(bottom_u_8x16), factorsLeftRight_u_8x8));
133
134 // top[9] * L, top[10] * R, top[11] * L, ...
135 const uint32x4_t topHighB_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(topB_u_8x16), factorsLeftRight_u_8x8));
136 const uint32x4_t bottomHighB_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(bottomB_u_8x16), factorsLeftRight_u_8x8));
137
138
139 // (top[0] * T + bottom[0] * B + 8192) / 16384, (top[1] * T + bottom[1] * B + 8192) / 16384, ...
140 const uint16x4_t resultLowA_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topLowA_u_32x4, factorsTop_u_32x4), bottomLowA_u_32x4, factorsBottom_u_32x4), 14);
141 const uint16x4_t resultHighA_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topHighA_u_32x4, factorsTop_u_32x4), bottomHighA_u_32x4, factorsBottom_u_32x4), 14);
142
143 const uint16x4_t resultLowB_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topLowB_u_32x4, factorsTop_u_32x4), bottomLowB_u_32x4, factorsBottom_u_32x4), 14);
144 const uint16x4_t resultHighB_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topHighB_u_32x4, factorsTop_u_32x4), bottomHighB_u_32x4, factorsBottom_u_32x4), 14);
145
146 const uint16x8_t resultA_u_16x8 = vcombine_u16(resultLowA_u_16x4, resultHighA_u_16x4);
147 const uint16x8_t resultB_u_16x8 = vcombine_u16(resultLowB_u_16x4, resultHighB_u_16x4);
148
149 // resultA[0], resultB[0], resultA[1], resultB[1], resultA[2], ...
150 const uint8x16_t result_u_8x16 = vreinterpretq_u8_u16(vsliq_n_u16(resultA_u_16x8, resultB_u_16x8, 8));
151
152
153 const bool isLastBlock = (y + 1u == tPatchSize) && (x + 1u == blocks15) && (!block7 && !partialBlock7 && blocks1 == 0u);
154
155 if (isLastBlock)
156 {
157 uint8_t tempBuffer[16];
158 vst1q_u8(tempBuffer, result_u_8x16);
159
160 memcpy(buffer, &tempBuffer, 15);
161 }
162 else
163 {
164 vst1q_u8(buffer, result_u_8x16);
165 }
166
167 imageTopLeft += 15;
168 buffer += 15;
169 }
170
171 if constexpr (partialBlock15)
172 {
173 ocean_assert(!block7 && !partialBlock7 && blocks1 == 0u);
174
175 uint8x16_t top_u_8x16;
176 uint8x16_t bottom_u_8x16;
177
178 if (y < tPatchSize - 1u)
179 {
180 top_u_8x16 = vld1q_u8(imageTopLeft);
181 bottom_u_8x16 = vld1q_u8(imageTopLeft + imageStrideElements);
182 }
183 else
184 {
185 constexpr unsigned int overlapping = 16u - (remainingAfterBlocks15 + 1u);
186
187 top_u_8x16 = vld1q_u8(imageTopLeft - overlapping);
188 bottom_u_8x16 = vld1q_u8(imageTopLeft + imageStrideElements - overlapping);
189
190 top_u_8x16 = vextq_u8(top_u_8x16, vreinterpretq_u8_u32(factorsTop_u_32x4), overlapping);
191 bottom_u_8x16 = vextq_u8(bottom_u_8x16, vreinterpretq_u8_u32(factorsTop_u_32x4), overlapping);
192 }
193
194 // top[1], top[2], top[3], ..., top[14], top[15], top[0]
195 const uint8x16_t topB_u_8x16 = vextq_u8(top_u_8x16, top_u_8x16, 1);
196 const uint8x16_t bottomB_u_8x16 = vextq_u8(bottom_u_8x16, bottom_u_8x16, 1);
197
198
199 // top[0] * L, top[1] * R, top[2] * L, ...
200 const uint32x4_t topLowA_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(top_u_8x16), factorsLeftRight_u_8x8));
201 const uint32x4_t bottomLowA_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(bottom_u_8x16), factorsLeftRight_u_8x8));
202
203 // top[1] * L, top[2] * R, top[3] * L, ...
204 const uint32x4_t topLowB_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(topB_u_8x16), factorsLeftRight_u_8x8));
205 const uint32x4_t bottomLowB_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(bottomB_u_8x16), factorsLeftRight_u_8x8));
206
207 // top[8] * L, top[9] * R, top[10] * L, ...
208 const uint32x4_t topHighA_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(top_u_8x16), factorsLeftRight_u_8x8));
209 const uint32x4_t bottomHighA_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(bottom_u_8x16), factorsLeftRight_u_8x8));
210
211 // top[9] * L, top[10] * R, top[11] * L, ...
212 const uint32x4_t topHighB_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(topB_u_8x16), factorsLeftRight_u_8x8));
213 const uint32x4_t bottomHighB_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(bottomB_u_8x16), factorsLeftRight_u_8x8));
214
215
216 // (top[0] * T + bottom[0] * B + 8192) / 16384, (top[1] * T + bottom[1] * B + 8192) / 16384, ...
217 const uint16x4_t resultLowA_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topLowA_u_32x4, factorsTop_u_32x4), bottomLowA_u_32x4, factorsBottom_u_32x4), 14);
218 const uint16x4_t resultHighA_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topHighA_u_32x4, factorsTop_u_32x4), bottomHighA_u_32x4, factorsBottom_u_32x4), 14);
219
220 const uint16x4_t resultLowB_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topLowB_u_32x4, factorsTop_u_32x4), bottomLowB_u_32x4, factorsBottom_u_32x4), 14);
221 const uint16x4_t resultHighB_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topHighB_u_32x4, factorsTop_u_32x4), bottomHighB_u_32x4, factorsBottom_u_32x4), 14);
222
223 const uint16x8_t resultA_u_16x8 = vcombine_u16(resultLowA_u_16x4, resultHighA_u_16x4);
224 const uint16x8_t resultB_u_16x8 = vcombine_u16(resultLowB_u_16x4, resultHighB_u_16x4);
225
226 // resultA[0], resultB[0], resultA[1], resultB[1], resultA[2], ...
227 const uint8x16_t result_u_8x16 = vreinterpretq_u8_u16(vsliq_n_u16(resultA_u_16x8, resultB_u_16x8, 8));
228
229 ocean_assert(!block7 && !partialBlock7 && blocks1 == 0u);
230 const bool isLastBlock = y + 1u == tPatchSize;
231
232 if (isLastBlock)
233 {
234 uint8_t tempBuffer[16];
235 vst1q_u8(tempBuffer, result_u_8x16);
236
237 memcpy(buffer, &tempBuffer, remainingAfterBlocks15);
238 }
239 else
240 {
241 vst1q_u8(buffer, result_u_8x16);
242 }
243
244 imageTopLeft += remainingAfterBlocks15;
245 buffer += remainingAfterBlocks15;
246 }
247
248 if constexpr (block7)
249 {
250 const uint8x8_t top_u_8x8 = vld1_u8(imageTopLeft);
251 const uint8x8_t bottom_u_8x8 = vld1_u8(imageTopLeft + imageStrideElements);
252
253 // top[1], top[2], top[3], ..., top[6], top[7], X
254 const uint8x8_t topB_u_8x8 = vext_u8(top_u_8x8, factorsLeftRight_u_8x8, 1);
255 const uint8x8_t bottomB_u_8x8 = vext_u8(bottom_u_8x8, factorsLeftRight_u_8x8, 1);
256
257
258 // top[0] * L, top[1] * R, top[2] * L, ...
259 const uint32x4_t topA_u_32x4 = vpaddlq_u16(vmull_u8(top_u_8x8, factorsLeftRight_u_8x8));
260 const uint32x4_t bottomA_u_32x4 = vpaddlq_u16(vmull_u8(bottom_u_8x8, factorsLeftRight_u_8x8));
261
262 // top[1] * L, top[2] * R, top[3] * L, ...
263 const uint32x4_t topB_u_32x4 = vpaddlq_u16(vmull_u8(topB_u_8x8, factorsLeftRight_u_8x8));
264 const uint32x4_t bottomB_u_32x4 = vpaddlq_u16(vmull_u8(bottomB_u_8x8, factorsLeftRight_u_8x8));
265
266
267 // (top[0] * T + bottom[0] * B + 8192) / 16384, (top[1] * T + bottom[1] * B + 8192) / 16384, ...
268 const uint16x4_t resultA_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topA_u_32x4, factorsTop_u_32x4), bottomA_u_32x4, factorsBottom_u_32x4), 14);
269 const uint16x4_t resultB_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topB_u_32x4, factorsTop_u_32x4), bottomB_u_32x4, factorsBottom_u_32x4), 14);
270
271
272 // resultA[0], resultB[0], resultA[1], resultB[1], resultA[2], ...
273 const uint8x8_t result_u_8x8 = vreinterpret_u8_u16(vsli_n_u16(resultA_u_16x4, resultB_u_16x4, 8));
274
275 const bool isLastBlock = (y + 1u == tPatchSize) && (!partialBlock7 && blocks1 == 0u);
276
277 if (isLastBlock)
278 {
279 uint8_t tempBuffer[8];
280 vst1_u8(tempBuffer, result_u_8x8);
281
282 memcpy(buffer, &tempBuffer, 7);
283 }
284 else
285 {
286 vst1_u8(buffer, result_u_8x8);
287 }
288
289 imageTopLeft += 7;
290 buffer += 7;
291 }
292
293 if constexpr (partialBlock7)
294 {
295 ocean_assert(blocks1 == 0u);
296
297 uint8x8_t top_u_8x8;
298 uint8x8_t bottom_u_8x8;
299
300 if (y < tPatchSize - 1u)
301 {
302 top_u_8x8 = vld1_u8(imageTopLeft);
303 bottom_u_8x8 = vld1_u8(imageTopLeft + imageStrideElements);
304 }
305 else
306 {
307 constexpr unsigned int overlapping = 8u - (remainingAfterBlock7 + 1u);
308
309 top_u_8x8 = vld1_u8(imageTopLeft - overlapping);
310 bottom_u_8x8 = vld1_u8(imageTopLeft + imageStrideElements - overlapping);
311
312 top_u_8x8 = vext_u8(top_u_8x8, factorsLeftRight_u_8x8, overlapping);
313 bottom_u_8x8 = vext_u8(bottom_u_8x8, factorsLeftRight_u_8x8, overlapping);
314 }
315
316
317 // top[1], top[2], top[3], ..., top[6], top[7], X
318 const uint8x8_t topB_u_8x8 = vext_u8(top_u_8x8, factorsLeftRight_u_8x8, 1);
319 const uint8x8_t bottomB_u_8x8 = vext_u8(bottom_u_8x8, factorsLeftRight_u_8x8, 1);
320
321
322 // top[0] * L, top[1] * R, top[2] * L, ...
323 const uint32x4_t topA_u_32x4 = vpaddlq_u16(vmull_u8(top_u_8x8, factorsLeftRight_u_8x8));
324 const uint32x4_t bottomA_u_32x4 = vpaddlq_u16(vmull_u8(bottom_u_8x8, factorsLeftRight_u_8x8));
325
326 // top[1] * L, top[2] * R, top[3] * L, ...
327 const uint32x4_t topB_u_32x4 = vpaddlq_u16(vmull_u8(topB_u_8x8, factorsLeftRight_u_8x8));
328 const uint32x4_t bottomB_u_32x4 = vpaddlq_u16(vmull_u8(bottomB_u_8x8, factorsLeftRight_u_8x8));
329
330
331 // (top[0] * T + bottom[0] * B + 8192) / 16384, (top[1] * T + bottom[1] * B + 8192) / 16384, ...
332 const uint16x4_t resultA_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topA_u_32x4, factorsTop_u_32x4), bottomA_u_32x4, factorsBottom_u_32x4), 14);
333 const uint16x4_t resultB_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topB_u_32x4, factorsTop_u_32x4), bottomB_u_32x4, factorsBottom_u_32x4), 14);
334
335
336 // resultA[0], resultB[0], resultA[1], resultB[1], resultA[2], ...
337 const uint8x8_t result_u_8x8 = vreinterpret_u8_u16(vsli_n_u16(resultA_u_16x4, resultB_u_16x4, 8));
338
339 ocean_assert(blocks1 == 0u);
340 const bool isLastBlock = y + 1u == tPatchSize;
341
342 if (isLastBlock)
343 {
344 uint8_t tempBuffer[8];
345 vst1_u8(tempBuffer, result_u_8x8);
346
347 memcpy(buffer, &tempBuffer, remainingAfterBlock7);
348 }
349 else
350 {
351 vst1_u8(buffer, result_u_8x8);
352 }
353
354 imageTopLeft += remainingAfterBlock7;
355 buffer += remainingAfterBlock7;
356 }
357
358 if constexpr (blocks1 != 0u)
359 {
360 const unsigned int factorTopLeft = factorTop * factorLeft;
361 const unsigned int factorTopRight = factorTop * factorRight;
362
363 const unsigned int factorBottomLeft = factorBottom * factorLeft;
364 const unsigned int factorBottomRight = factorBottom * factorRight;
365
366 const uint8_t* const imageBottomLeft = imageTopLeft + imageStrideElements;
367
368 for (unsigned int n = 0u; n < blocks1; ++n)
369 {
370 buffer[n] = uint8_t((imageTopLeft[n] * factorTopLeft + imageTopLeft[1u + n] * factorTopRight + imageBottomLeft[n] * factorBottomLeft + imageBottomLeft[1u + n] * factorBottomRight + 8192u) / 16384u);
371 }
372
373 imageTopLeft += blocks1;
374 buffer += blocks1;
375 }
376
377 imageTopLeft += imageStrideElements - tPatchSize;
378 }
379}
380
381template <unsigned int tChannels>
382template <unsigned int tPatchSize>
383inline void AdvancedFrameInterpolatorBilinearNEON::SpecializedForChannels<tChannels>::interpolateSquarePatch8BitPerChannel(const uint8_t* imageTopLeft, const unsigned int imageStrideElements, uint8_t* buffer, const unsigned int factorRight, const unsigned int factorBottom)
384{
385 ocean_assert(imageTopLeft != nullptr && buffer != nullptr);
386 ocean_assert(imageStrideElements >= 1u * tPatchSize);
387
388 ocean_assert(factorRight <= 128u && factorBottom <= 128u);
389
390 const unsigned int factorLeft = 128u - factorRight;
391 const unsigned int factorTop = 128u - factorBottom;
392
393 const unsigned int factorTopLeft = factorTop * factorLeft;
394 const unsigned int factorTopRight = factorTop * factorRight;
395
396 const unsigned int factorBottomLeft = factorBottom * factorLeft;
397 const unsigned int factorBottomRight = factorBottom * factorRight;
398
399 const uint8_t* imageBottomLeft = imageTopLeft + imageStrideElements;
400
401 for (unsigned int y = 0u; y < tPatchSize; ++y)
402 {
403 for (unsigned int x = 0u; x < tPatchSize; ++x)
404 {
405 for (unsigned int n = 0u; n < tChannels; ++n)
406 {
407 buffer[n] = uint8_t((imageTopLeft[n] * factorTopLeft + imageTopLeft[tChannels + n] * factorTopRight + imageBottomLeft[n] * factorBottomLeft + imageBottomLeft[tChannels + n] * factorBottomRight + 8192u) / 16384u);
408 }
409
410 imageTopLeft += tChannels;
411 imageBottomLeft += tChannels;
412
413 buffer += tChannels;
414 }
415
416 imageTopLeft += imageStrideElements - tChannels * tPatchSize;
417 imageBottomLeft += imageStrideElements - tChannels * tPatchSize;
418 }
419}
420
421template <unsigned int tChannels, unsigned int tPatchSize, PixelCenter tPixelCenter, typename TScalar>
422inline void AdvancedFrameInterpolatorBilinearNEON::interpolateSquarePatch8BitPerChannel(const uint8_t* const image, const unsigned int width, const unsigned int imagePaddingElements, uint8_t* buffer, const VectorT2<TScalar>& position)
423{
424 static_assert(tChannels >= 1u, "Invalid channel number!");
425 static_assert(tPatchSize % 2u == 1u, "Invalid patch size!");
426
427 ocean_assert(image != nullptr && buffer != nullptr);
428 ocean_assert(tPatchSize + 1u <= width);
429
430 ocean_assert(tPatchSize >= 5u);
431
432 constexpr unsigned int tPatchSize_2 = tPatchSize / 2u;
433
434 const unsigned int imageStrideElements = width * tChannels + imagePaddingElements;
435
436 const VectorT2<TScalar> shiftedPosition = tPixelCenter == PC_TOP_LEFT ? position : position - VectorT2<TScalar>(TScalar(0.5), TScalar(0.5));
437
438 ocean_assert(shiftedPosition.x() >= TScalar(tPatchSize_2) && shiftedPosition.y() >= TScalar(tPatchSize_2));
439 ocean_assert(shiftedPosition.x() < TScalar(width - tPatchSize_2 - 1u));
440
441 const unsigned int left = (unsigned int)(shiftedPosition.x()) - tPatchSize_2;
442 const unsigned int top = (unsigned int)(shiftedPosition.y()) - tPatchSize_2;
443
444 ocean_assert(left + tPatchSize < width);
445
446 const TScalar tx = shiftedPosition.x() - TScalar(int(shiftedPosition.x()));
447 ocean_assert(tx >= TScalar(0) && tx <= TScalar(1));
448 const unsigned int factorRight = (unsigned int)(tx * TScalar(128) + TScalar(0.5));
449
450 const TScalar ty = shiftedPosition.y() - TScalar(int(shiftedPosition.y()));
451 ocean_assert(ty >= 0 && ty <= 1);
452 const unsigned int factorBottom = (unsigned int)(ty * TScalar(128) + TScalar(0.5));
453
454 const uint8_t* const imageTopLeft = image + top * imageStrideElements + left * tChannels;
455
456 SpecializedForChannels<tChannels>::template interpolateSquarePatch8BitPerChannel<tPatchSize>(imageTopLeft, imageStrideElements, buffer, factorRight, factorBottom);
457}
458
459}
460
461}
462
463}
464
465#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
466
467#endif // META_OCEAN_CV_ADVANCED_ADVANCED_FRAME_INTERPOLATOR_BILINEAR_NEON_H
This class allows to specialize functions for individual channels.
Definition AdvancedFrameInterpolatorBilinearNEON.h:42
static void interpolateSquarePatch8BitPerChannel(const uint8_t *imageTopLeft, const unsigned int imageStrideElements, uint8_t *buffer, const unsigned int factorRight, const unsigned int factorBottom)
Interpolates the content of a square image patch with sub-pixel accuracy inside a given image and sto...
Definition AdvancedFrameInterpolatorBilinearNEON.h:383
This class implements advanced bilinear frame interpolation functions using NEON extensions.
Definition AdvancedFrameInterpolatorBilinearNEON.h:33
static void interpolateSquarePatch8BitPerChannel(const uint8_t *const image, const unsigned int width, const unsigned int imagePaddingElements, uint8_t *buffer, const VectorT2< TScalar > &position)
Interpolates the content of a square image patch with sub-pixel accuracy inside a given image and sto...
Definition AdvancedFrameInterpolatorBilinearNEON.h:422
This class implements a vector with two elements.
Definition Vector2.h:96
const T & x() const noexcept
Returns the x value.
Definition Vector2.h:710
const T & y() const noexcept
Returns the y value.
Definition Vector2.h:722
@ PC_TOP_LEFT
The center of a pixel is in the upper-left corner of each pixel's square.
Definition CV.h:133
The namespace covering the entire Ocean framework.
Definition Accessor.h:15