Ocean
AdvancedFrameInterpolatorBilinearNEON.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #ifndef META_OCEAN_CV_ADVANCED_ADVANCED_FRAME_INTERPOLATOR_BILINEAR_NEON_H
9 #define META_OCEAN_CV_ADVANCED_ADVANCED_FRAME_INTERPOLATOR_BILINEAR_NEON_H
10 
12 
13 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
14 
15 #include "ocean/cv/NEON.h"
16 
17 #include "ocean/math/Vector2.h"
18 
19 namespace Ocean
20 {
21 
22 namespace CV
23 {
24 
25 namespace Advanced
26 {
27 
28 /**
29  * This class implements advanced bilinear frame interpolation functions using NEON extensions.
30  * @ingroup cvadvanced
31  */
32 class OCEAN_CV_ADVANCED_EXPORT AdvancedFrameInterpolatorBilinearNEON
33 {
34  protected:
35 
36  /**
37  * This class allows to specialize functions for individual channels.
38  * @tparam tChannels Specifies the number of channels for the given frames, with range [1, infinity)
39  */
40  template <unsigned int tChannels>
42  {
43  public:
44 
45  /**
46  * Interpolates the content of a square image patch with sub-pixel accuracy inside a given image and stores the interpolated data into a buffer.
47  * The center of a pixel is expected to be located at the top-left corner of a pixel.
48  * @param imageTopLeft The pointer to the top-left position of the image, must be valid
49  * @param imageStrideElements The number of elements between two consecutive image rows (including padding), in elements, with range [tChannels * tPatchSize, infinity)
50  * @param buffer The target buffer with `tChannels * tSize * tSize` elements, must be valid
51  * @param factorRight The interpolation factor for the right pixels, with range [0, 128]
52  * @param factorBottom The interpolation factor for the bottom pixels, with range [0, 128]
53  * @tparam tPatchSize The size of the square patch (the edge length) in pixel, with range [1, infinity), must be odd
54  */
55  template <unsigned int tPatchSize>
56  static inline void interpolateSquarePatch8BitPerChannel(const uint8_t* imageTopLeft, const unsigned int imageStrideElements, uint8_t* buffer, const unsigned int factorRight, const unsigned int factorBottom);
57  };
58 
59  public:
60 
61  /**
62  * Interpolates the content of a square image patch with sub-pixel accuracy inside a given image and stores the interpolated data into a buffer.
63  * The center of a pixel is expected to be located at the top-left corner of a pixel.
64  * @param image The image in which the interpolated patch is located, must be valid
65  * @param width The width of the image, in pixel, with range [tPatchSize + 1, infinity)
66  * @param imagePaddingElements The number of padding elements at the end of each image row, in elements, with range [0, infinity)
67  * @param buffer The target buffer with `tChannels * tSize * tSize` elements, must be valid
68  * @param position The center position of the square region in the image, with range [tPatchSize/2, width - tPatchSize/2 - 1)x[tPatchSize/2, height - tPatchSize/2 - 1)
69  * @tparam tChannels The number of frame channels, with range [1, infinity)
70  * @tparam tPatchSize The size of the square patch (the edge length) in pixel, with range [5, infinity), must be odd
71  * @tparam tPixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
72  * @tparam TScalar The scalar data type of the sub-pixel position
73  */
74  template <unsigned int tChannels, unsigned int tPatchSize, PixelCenter tPixelCenter = PC_TOP_LEFT, typename TScalar = Scalar>
75  static inline void interpolateSquarePatch8BitPerChannel(const uint8_t* const image, const unsigned int width, const unsigned int imagePaddingElements, uint8_t* buffer, const VectorT2<TScalar>& position);
76 };
77 
78 template <>
79 template <unsigned int tPatchSize>
80 inline void AdvancedFrameInterpolatorBilinearNEON::SpecializedForChannels<1u>::interpolateSquarePatch8BitPerChannel(const uint8_t* imageTopLeft, const unsigned int imageStrideElements, uint8_t* buffer, const unsigned int factorRight, const unsigned int factorBottom)
81 {
82  ocean_assert(imageTopLeft != nullptr && buffer != nullptr);
83  ocean_assert(imageStrideElements >= 1u * tPatchSize);
84 
85  ocean_assert(factorRight <= 128u && factorBottom <= 128u);
86 
87  const unsigned int factorLeft = 128u - factorRight;
88  const unsigned int factorTop = 128u - factorBottom;
89 
90  constexpr unsigned int blocks15 = tPatchSize / 15u;
91  constexpr unsigned int remainingAfterBlocks15 = tPatchSize % 15u;
92 
93  constexpr bool partialBlock15 = remainingAfterBlocks15 > 10u;
94  constexpr unsigned int remainingAfterPartialBlock15 = partialBlock15 ? 0u : remainingAfterBlocks15;
95 
96  constexpr bool block7 = remainingAfterPartialBlock15 >= 7u;
97  constexpr unsigned int remainingAfterBlock7 = remainingAfterPartialBlock15 % 7u;
98 
99  constexpr bool partialBlock7 = remainingAfterBlock7 >= 3u;
100  constexpr unsigned int remainingAfterPartialBlock7 = partialBlock7 ? 0u : remainingAfterBlock7;
101 
102  constexpr unsigned int blocks1 = remainingAfterPartialBlock7;
103 
104  // L R L R L R L R
105  const uint8x8_t factorsLeftRight_u_8x8 = vreinterpret_u8_u16(vdup_n_u16(uint16_t(factorLeft | (factorRight << 8u))));
106 
107  const uint32x4_t factorsTop_u_32x4 = vdupq_n_u32(factorTop);
108  const uint32x4_t factorsBottom_u_32x4 = vdupq_n_u32(factorBottom);
109 
110  for (unsigned int y = 0u; y < tPatchSize; ++y)
111  {
112  for (unsigned int x = 0u; x < blocks15; ++x)
113  {
114  const uint8x16_t top_u_8x16 = vld1q_u8(imageTopLeft);
115  const uint8x16_t bottom_u_8x16 = vld1q_u8(imageTopLeft + imageStrideElements);
116 
117  // top[1], top[2], top[3], ..., top[14], top[15], X
118  const uint8x16_t topB_u_8x16 = vextq_u8(top_u_8x16, vreinterpretq_u8_u32(factorsTop_u_32x4), 1);
119  const uint8x16_t bottomB_u_8x16 = vextq_u8(bottom_u_8x16, vreinterpretq_u8_u32(factorsTop_u_32x4), 1);
120 
121 
122  // top[0] * L, top[1] * R, top[2] * L, ...
123  const uint32x4_t topLowA_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(top_u_8x16), factorsLeftRight_u_8x8));
124  const uint32x4_t bottomLowA_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(bottom_u_8x16), factorsLeftRight_u_8x8));
125 
126  // top[1] * L, top[2] * R, top[3] * L, ...
127  const uint32x4_t topLowB_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(topB_u_8x16), factorsLeftRight_u_8x8));
128  const uint32x4_t bottomLowB_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(bottomB_u_8x16), factorsLeftRight_u_8x8));
129 
130  // top[8] * L, top[9] * R, top[10] * L, ...
131  const uint32x4_t topHighA_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(top_u_8x16), factorsLeftRight_u_8x8));
132  const uint32x4_t bottomHighA_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(bottom_u_8x16), factorsLeftRight_u_8x8));
133 
134  // top[9] * L, top[10] * R, top[11] * L, ...
135  const uint32x4_t topHighB_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(topB_u_8x16), factorsLeftRight_u_8x8));
136  const uint32x4_t bottomHighB_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(bottomB_u_8x16), factorsLeftRight_u_8x8));
137 
138 
139  // (top[0] * T + bottom[0] * B + 8192) / 16384, (top[1] * T + bottom[1] * B + 8192) / 16384, ...
140  const uint16x4_t resultLowA_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topLowA_u_32x4, factorsTop_u_32x4), bottomLowA_u_32x4, factorsBottom_u_32x4), 14);
141  const uint16x4_t resultHighA_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topHighA_u_32x4, factorsTop_u_32x4), bottomHighA_u_32x4, factorsBottom_u_32x4), 14);
142 
143  const uint16x4_t resultLowB_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topLowB_u_32x4, factorsTop_u_32x4), bottomLowB_u_32x4, factorsBottom_u_32x4), 14);
144  const uint16x4_t resultHighB_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topHighB_u_32x4, factorsTop_u_32x4), bottomHighB_u_32x4, factorsBottom_u_32x4), 14);
145 
146  const uint16x8_t resultA_u_16x8 = vcombine_u16(resultLowA_u_16x4, resultHighA_u_16x4);
147  const uint16x8_t resultB_u_16x8 = vcombine_u16(resultLowB_u_16x4, resultHighB_u_16x4);
148 
149  // resultA[0], resultB[0], resultA[1], resultB[1], resultA[2], ...
150  const uint8x16_t result_u_8x16 = vreinterpretq_u8_u16(vsliq_n_u16(resultA_u_16x8, resultB_u_16x8, 8));
151 
152 
153  const bool isLastBlock = (y + 1u == tPatchSize) && (x + 1u == blocks15) && (!block7 && !partialBlock7 && blocks1 == 0u);
154 
155  if (isLastBlock)
156  {
157  uint8_t tempBuffer[16];
158  vst1q_u8(tempBuffer, result_u_8x16);
159 
160  memcpy(buffer, &tempBuffer, 15);
161  }
162  else
163  {
164  vst1q_u8(buffer, result_u_8x16);
165  }
166 
167  imageTopLeft += 15;
168  buffer += 15;
169  }
170 
171  if constexpr (partialBlock15)
172  {
173  ocean_assert(!block7 && !partialBlock7 && blocks1 == 0u);
174 
175  uint8x16_t top_u_8x16;
176  uint8x16_t bottom_u_8x16;
177 
178  if (y < tPatchSize - 1u)
179  {
180  top_u_8x16 = vld1q_u8(imageTopLeft);
181  bottom_u_8x16 = vld1q_u8(imageTopLeft + imageStrideElements);
182  }
183  else
184  {
185  constexpr unsigned int overlapping = 16u - (remainingAfterBlocks15 + 1u);
186 
187  top_u_8x16 = vld1q_u8(imageTopLeft - overlapping);
188  bottom_u_8x16 = vld1q_u8(imageTopLeft + imageStrideElements - overlapping);
189 
190  top_u_8x16 = vextq_u8(top_u_8x16, vreinterpretq_u8_u32(factorsTop_u_32x4), overlapping);
191  bottom_u_8x16 = vextq_u8(bottom_u_8x16, vreinterpretq_u8_u32(factorsTop_u_32x4), overlapping);
192  }
193 
194  // top[1], top[2], top[3], ..., top[14], top[15], top[0]
195  const uint8x16_t topB_u_8x16 = vextq_u8(top_u_8x16, top_u_8x16, 1);
196  const uint8x16_t bottomB_u_8x16 = vextq_u8(bottom_u_8x16, bottom_u_8x16, 1);
197 
198 
199  // top[0] * L, top[1] * R, top[2] * L, ...
200  const uint32x4_t topLowA_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(top_u_8x16), factorsLeftRight_u_8x8));
201  const uint32x4_t bottomLowA_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(bottom_u_8x16), factorsLeftRight_u_8x8));
202 
203  // top[1] * L, top[2] * R, top[3] * L, ...
204  const uint32x4_t topLowB_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(topB_u_8x16), factorsLeftRight_u_8x8));
205  const uint32x4_t bottomLowB_u_32x4 = vpaddlq_u16(vmull_u8(vget_low_u8(bottomB_u_8x16), factorsLeftRight_u_8x8));
206 
207  // top[8] * L, top[9] * R, top[10] * L, ...
208  const uint32x4_t topHighA_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(top_u_8x16), factorsLeftRight_u_8x8));
209  const uint32x4_t bottomHighA_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(bottom_u_8x16), factorsLeftRight_u_8x8));
210 
211  // top[9] * L, top[10] * R, top[11] * L, ...
212  const uint32x4_t topHighB_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(topB_u_8x16), factorsLeftRight_u_8x8));
213  const uint32x4_t bottomHighB_u_32x4 = vpaddlq_u16(vmull_u8(vget_high_u8(bottomB_u_8x16), factorsLeftRight_u_8x8));
214 
215 
216  // (top[0] * T + bottom[0] * B + 8192) / 16384, (top[1] * T + bottom[1] * B + 8192) / 16384, ...
217  const uint16x4_t resultLowA_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topLowA_u_32x4, factorsTop_u_32x4), bottomLowA_u_32x4, factorsBottom_u_32x4), 14);
218  const uint16x4_t resultHighA_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topHighA_u_32x4, factorsTop_u_32x4), bottomHighA_u_32x4, factorsBottom_u_32x4), 14);
219 
220  const uint16x4_t resultLowB_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topLowB_u_32x4, factorsTop_u_32x4), bottomLowB_u_32x4, factorsBottom_u_32x4), 14);
221  const uint16x4_t resultHighB_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topHighB_u_32x4, factorsTop_u_32x4), bottomHighB_u_32x4, factorsBottom_u_32x4), 14);
222 
223  const uint16x8_t resultA_u_16x8 = vcombine_u16(resultLowA_u_16x4, resultHighA_u_16x4);
224  const uint16x8_t resultB_u_16x8 = vcombine_u16(resultLowB_u_16x4, resultHighB_u_16x4);
225 
226  // resultA[0], resultB[0], resultA[1], resultB[1], resultA[2], ...
227  const uint8x16_t result_u_8x16 = vreinterpretq_u8_u16(vsliq_n_u16(resultA_u_16x8, resultB_u_16x8, 8));
228 
229  ocean_assert(!block7 && !partialBlock7 && blocks1 == 0u);
230  const bool isLastBlock = y + 1u == tPatchSize;
231 
232  if (isLastBlock)
233  {
234  uint8_t tempBuffer[16];
235  vst1q_u8(tempBuffer, result_u_8x16);
236 
237  memcpy(buffer, &tempBuffer, remainingAfterBlocks15);
238  }
239  else
240  {
241  vst1q_u8(buffer, result_u_8x16);
242  }
243 
244  imageTopLeft += remainingAfterBlocks15;
245  buffer += remainingAfterBlocks15;
246  }
247 
248  if constexpr (block7)
249  {
250  const uint8x8_t top_u_8x8 = vld1_u8(imageTopLeft);
251  const uint8x8_t bottom_u_8x8 = vld1_u8(imageTopLeft + imageStrideElements);
252 
253  // top[1], top[2], top[3], ..., top[6], top[7], X
254  const uint8x8_t topB_u_8x8 = vext_u8(top_u_8x8, factorsLeftRight_u_8x8, 1);
255  const uint8x8_t bottomB_u_8x8 = vext_u8(bottom_u_8x8, factorsLeftRight_u_8x8, 1);
256 
257 
258  // top[0] * L, top[1] * R, top[2] * L, ...
259  const uint32x4_t topA_u_32x4 = vpaddlq_u16(vmull_u8(top_u_8x8, factorsLeftRight_u_8x8));
260  const uint32x4_t bottomA_u_32x4 = vpaddlq_u16(vmull_u8(bottom_u_8x8, factorsLeftRight_u_8x8));
261 
262  // top[1] * L, top[2] * R, top[3] * L, ...
263  const uint32x4_t topB_u_32x4 = vpaddlq_u16(vmull_u8(topB_u_8x8, factorsLeftRight_u_8x8));
264  const uint32x4_t bottomB_u_32x4 = vpaddlq_u16(vmull_u8(bottomB_u_8x8, factorsLeftRight_u_8x8));
265 
266 
267  // (top[0] * T + bottom[0] * B + 8192) / 16384, (top[1] * T + bottom[1] * B + 8192) / 16384, ...
268  const uint16x4_t resultA_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topA_u_32x4, factorsTop_u_32x4), bottomA_u_32x4, factorsBottom_u_32x4), 14);
269  const uint16x4_t resultB_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topB_u_32x4, factorsTop_u_32x4), bottomB_u_32x4, factorsBottom_u_32x4), 14);
270 
271 
272  // resultA[0], resultB[0], resultA[1], resultB[1], resultA[2], ...
273  const uint8x8_t result_u_8x8 = vreinterpret_u8_u16(vsli_n_u16(resultA_u_16x4, resultB_u_16x4, 8));
274 
275  const bool isLastBlock = (y + 1u == tPatchSize) && (!partialBlock7 && blocks1 == 0u);
276 
277  if (isLastBlock)
278  {
279  uint8_t tempBuffer[8];
280  vst1_u8(tempBuffer, result_u_8x8);
281 
282  memcpy(buffer, &tempBuffer, 7);
283  }
284  else
285  {
286  vst1_u8(buffer, result_u_8x8);
287  }
288 
289  imageTopLeft += 7;
290  buffer += 7;
291  }
292 
293  if constexpr (partialBlock7)
294  {
295  ocean_assert(blocks1 == 0u);
296 
297  uint8x8_t top_u_8x8;
298  uint8x8_t bottom_u_8x8;
299 
300  if (y < tPatchSize - 1u)
301  {
302  top_u_8x8 = vld1_u8(imageTopLeft);
303  bottom_u_8x8 = vld1_u8(imageTopLeft + imageStrideElements);
304  }
305  else
306  {
307  constexpr unsigned int overlapping = 8u - (remainingAfterBlock7 + 1u);
308 
309  top_u_8x8 = vld1_u8(imageTopLeft - overlapping);
310  bottom_u_8x8 = vld1_u8(imageTopLeft + imageStrideElements - overlapping);
311 
312  top_u_8x8 = vext_u8(top_u_8x8, factorsLeftRight_u_8x8, overlapping);
313  bottom_u_8x8 = vext_u8(bottom_u_8x8, factorsLeftRight_u_8x8, overlapping);
314  }
315 
316 
317  // top[1], top[2], top[3], ..., top[6], top[7], X
318  const uint8x8_t topB_u_8x8 = vext_u8(top_u_8x8, factorsLeftRight_u_8x8, 1);
319  const uint8x8_t bottomB_u_8x8 = vext_u8(bottom_u_8x8, factorsLeftRight_u_8x8, 1);
320 
321 
322  // top[0] * L, top[1] * R, top[2] * L, ...
323  const uint32x4_t topA_u_32x4 = vpaddlq_u16(vmull_u8(top_u_8x8, factorsLeftRight_u_8x8));
324  const uint32x4_t bottomA_u_32x4 = vpaddlq_u16(vmull_u8(bottom_u_8x8, factorsLeftRight_u_8x8));
325 
326  // top[1] * L, top[2] * R, top[3] * L, ...
327  const uint32x4_t topB_u_32x4 = vpaddlq_u16(vmull_u8(topB_u_8x8, factorsLeftRight_u_8x8));
328  const uint32x4_t bottomB_u_32x4 = vpaddlq_u16(vmull_u8(bottomB_u_8x8, factorsLeftRight_u_8x8));
329 
330 
331  // (top[0] * T + bottom[0] * B + 8192) / 16384, (top[1] * T + bottom[1] * B + 8192) / 16384, ...
332  const uint16x4_t resultA_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topA_u_32x4, factorsTop_u_32x4), bottomA_u_32x4, factorsBottom_u_32x4), 14);
333  const uint16x4_t resultB_u_16x4 = vrshrn_n_u32(vmlaq_u32(vmulq_u32(topB_u_32x4, factorsTop_u_32x4), bottomB_u_32x4, factorsBottom_u_32x4), 14);
334 
335 
336  // resultA[0], resultB[0], resultA[1], resultB[1], resultA[2], ...
337  const uint8x8_t result_u_8x8 = vreinterpret_u8_u16(vsli_n_u16(resultA_u_16x4, resultB_u_16x4, 8));
338 
339  ocean_assert(blocks1 == 0u);
340  const bool isLastBlock = y + 1u == tPatchSize;
341 
342  if (isLastBlock)
343  {
344  uint8_t tempBuffer[8];
345  vst1_u8(tempBuffer, result_u_8x8);
346 
347  memcpy(buffer, &tempBuffer, remainingAfterBlock7);
348  }
349  else
350  {
351  vst1_u8(buffer, result_u_8x8);
352  }
353 
354  imageTopLeft += remainingAfterBlock7;
355  buffer += remainingAfterBlock7;
356  }
357 
358  if constexpr (blocks1 != 0u)
359  {
360  const unsigned int factorTopLeft = factorTop * factorLeft;
361  const unsigned int factorTopRight = factorTop * factorRight;
362 
363  const unsigned int factorBottomLeft = factorBottom * factorLeft;
364  const unsigned int factorBottomRight = factorBottom * factorRight;
365 
366  const uint8_t* const imageBottomLeft = imageTopLeft + imageStrideElements;
367 
368  for (unsigned int n = 0u; n < blocks1; ++n)
369  {
370  buffer[n] = uint8_t((imageTopLeft[n] * factorTopLeft + imageTopLeft[1u + n] * factorTopRight + imageBottomLeft[n] * factorBottomLeft + imageBottomLeft[1u + n] * factorBottomRight + 8192u) / 16384u);
371  }
372 
373  imageTopLeft += blocks1;
374  buffer += blocks1;
375  }
376 
377  imageTopLeft += imageStrideElements - tPatchSize;
378  }
379 }
380 
381 template <unsigned int tChannels>
382 template <unsigned int tPatchSize>
383 inline void AdvancedFrameInterpolatorBilinearNEON::SpecializedForChannels<tChannels>::interpolateSquarePatch8BitPerChannel(const uint8_t* imageTopLeft, const unsigned int imageStrideElements, uint8_t* buffer, const unsigned int factorRight, const unsigned int factorBottom)
384 {
385  ocean_assert(imageTopLeft != nullptr && buffer != nullptr);
386  ocean_assert(imageStrideElements >= 1u * tPatchSize);
387 
388  ocean_assert(factorRight <= 128u && factorBottom <= 128u);
389 
390  const unsigned int factorLeft = 128u - factorRight;
391  const unsigned int factorTop = 128u - factorBottom;
392 
393  const unsigned int factorTopLeft = factorTop * factorLeft;
394  const unsigned int factorTopRight = factorTop * factorRight;
395 
396  const unsigned int factorBottomLeft = factorBottom * factorLeft;
397  const unsigned int factorBottomRight = factorBottom * factorRight;
398 
399  const uint8_t* imageBottomLeft = imageTopLeft + imageStrideElements;
400 
401  for (unsigned int y = 0u; y < tPatchSize; ++y)
402  {
403  for (unsigned int x = 0u; x < tPatchSize; ++x)
404  {
405  for (unsigned int n = 0u; n < tChannels; ++n)
406  {
407  buffer[n] = uint8_t((imageTopLeft[n] * factorTopLeft + imageTopLeft[tChannels + n] * factorTopRight + imageBottomLeft[n] * factorBottomLeft + imageBottomLeft[tChannels + n] * factorBottomRight + 8192u) / 16384u);
408  }
409 
410  imageTopLeft += tChannels;
411  imageBottomLeft += tChannels;
412 
413  buffer += tChannels;
414  }
415 
416  imageTopLeft += imageStrideElements - tChannels * tPatchSize;
417  imageBottomLeft += imageStrideElements - tChannels * tPatchSize;
418  }
419 }
420 
421 template <unsigned int tChannels, unsigned int tPatchSize, PixelCenter tPixelCenter, typename TScalar>
422 inline void AdvancedFrameInterpolatorBilinearNEON::interpolateSquarePatch8BitPerChannel(const uint8_t* const image, const unsigned int width, const unsigned int imagePaddingElements, uint8_t* buffer, const VectorT2<TScalar>& position)
423 {
424  static_assert(tChannels >= 1u, "Invalid channel number!");
425  static_assert(tPatchSize % 2u == 1u, "Invalid patch size!");
426 
427  ocean_assert(image != nullptr && buffer != nullptr);
428  ocean_assert(tPatchSize + 1u <= width);
429 
430  ocean_assert(tPatchSize >= 5u);
431 
432  constexpr unsigned int tPatchSize_2 = tPatchSize / 2u;
433 
434  const unsigned int imageStrideElements = width * tChannels + imagePaddingElements;
435 
436  const VectorT2<TScalar> shiftedPosition = tPixelCenter == PC_TOP_LEFT ? position : position - VectorT2<TScalar>(TScalar(0.5), TScalar(0.5));
437 
438  ocean_assert(shiftedPosition.x() >= TScalar(tPatchSize_2) && shiftedPosition.y() >= TScalar(tPatchSize_2));
439  ocean_assert(shiftedPosition.x() < TScalar(width - tPatchSize_2 - 1u));
440 
441  const unsigned int left = (unsigned int)(shiftedPosition.x()) - tPatchSize_2;
442  const unsigned int top = (unsigned int)(shiftedPosition.y()) - tPatchSize_2;
443 
444  ocean_assert(left + tPatchSize < width);
445 
446  const TScalar tx = shiftedPosition.x() - TScalar(int(shiftedPosition.x()));
447  ocean_assert(tx >= TScalar(0) && tx <= TScalar(1));
448  const unsigned int factorRight = (unsigned int)(tx * TScalar(128) + TScalar(0.5));
449 
450  const TScalar ty = shiftedPosition.y() - TScalar(int(shiftedPosition.y()));
451  ocean_assert(ty >= 0 && ty <= 1);
452  const unsigned int factorBottom = (unsigned int)(ty * TScalar(128) + TScalar(0.5));
453 
454  const uint8_t* const imageTopLeft = image + top * imageStrideElements + left * tChannels;
455 
456  SpecializedForChannels<tChannels>::template interpolateSquarePatch8BitPerChannel<tPatchSize>(imageTopLeft, imageStrideElements, buffer, factorRight, factorBottom);
457 }
458 
459 }
460 
461 }
462 
463 }
464 
465 #endif // OCEAN_HARDWARE_NEON_VERSION >= 10
466 
467 #endif // META_OCEAN_CV_ADVANCED_ADVANCED_FRAME_INTERPOLATOR_BILINEAR_NEON_H
This class allows to specialize functions for individual channels.
Definition: AdvancedFrameInterpolatorBilinearNEON.h:42
static void interpolateSquarePatch8BitPerChannel(const uint8_t *imageTopLeft, const unsigned int imageStrideElements, uint8_t *buffer, const unsigned int factorRight, const unsigned int factorBottom)
Interpolates the content of a square image patch with sub-pixel accuracy inside a given image and sto...
Definition: AdvancedFrameInterpolatorBilinearNEON.h:383
This class implements advanced bilinear frame interpolation functions using NEON extensions.
Definition: AdvancedFrameInterpolatorBilinearNEON.h:33
static void interpolateSquarePatch8BitPerChannel(const uint8_t *const image, const unsigned int width, const unsigned int imagePaddingElements, uint8_t *buffer, const VectorT2< TScalar > &position)
Interpolates the content of a square image patch with sub-pixel accuracy inside a given image and sto...
Definition: AdvancedFrameInterpolatorBilinearNEON.h:422
This class implements a vector with two elements.
Definition: Vector2.h:96
const T & x() const noexcept
Returns the x value.
Definition: Vector2.h:698
const T & y() const noexcept
Returns the y value.
Definition: Vector2.h:710
@ PC_TOP_LEFT
The center of a pixel is in the upper-left corner of each pixel's square.
Definition: CV.h:133
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15