Ocean
Loading...
Searching...
No Matches
FrameTransposer.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_FRAME_TRANSPOSER_H
9#define META_OCEAN_CV_FRAME_TRANSPOSER_H
10
11#include "ocean/cv/CV.h"
12#include "ocean/cv/NEON.h"
14
15#include "ocean/base/DataType.h"
16#include "ocean/base/Frame.h"
17#include "ocean/base/Worker.h"
18
19namespace Ocean
20{
21
22namespace CV
23{
24
25/**
26 * This class implements a frame transposer.
27 * @ingroup cv
28 */
29class OCEAN_CV_EXPORT FrameTransposer
30{
31 public:
32
33 /**
34 * The following comfort class provides comfortable functions simplifying prototyping applications but also increasing binary size of the resulting applications.
35 * Best practice is to avoid using these functions if binary size matters,<br>
36 * as for every comfort function a corresponding function exists with specialized functionality not increasing binary size significantly.<br>
37 */
38 class OCEAN_CV_EXPORT Comfort
39 {
40 public:
41
42 /**
43 * Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
44 * @param input The input frame which will be rotated, must be valid
45 * @param output The resulting rotated output frame, the frame type will be set automatically
46 * @param clockwise True, to rotate the frame clockwise; False, to rotate the frame counter-clockwise
47 * @param worker Optional worker object to distribute the computation
48 * @return True, if succeeded
49 */
50 static bool rotate90(const Frame& input, Frame& output, const bool clockwise, Worker* worker = nullptr);
51
52 /**
53 * Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
54 * @param frame The frame to rotate, must be valid
55 * @param clockwise True, to rotate the frame clockwise; False, to rotate the frame counter-clockwise
56 * @param worker Optional worker object to distribute the computation
57 * @return True, if succeeded
58 */
59 static inline bool rotate90(Frame& frame, const bool clockwise, Worker* worker = nullptr);
60
61 /**
62 * Rotates a given frame by 180 degrees.
63 * @param input The input frame which will be rotated, must be valid
64 * @param output The resulting rotated output frame, the frame type will be set automatically
65 * @param worker Optional worker object to distribute the computation
66 * @return True, if succeeded
67 */
68 static bool rotate180(const Frame& input, Frame& output, Worker* worker = nullptr);
69
70 /**
71 * Rotates a given frame by 180 degrees.
72 * @param frame The frame to rotate, must be valid
73 * @param worker Optional worker object to distribute the computation
74 * @return True, if succeeded
75 */
76 static inline bool rotate180(Frame& frame, Worker* worker = nullptr);
77
78 /**
79 * Rotates a given frame with 90 degree steps.
80 * @param input The input frame which will be rotated, must be valid
81 * @param output The resulting rotated output frame, the frame type will be set automatically
82 * @param angle The clockwise rotation angle to be used, must be a multiple of +/- 90, with range (-infinity, infinity)
83 * @param worker Optional worker object to distribute the computation
84 * @return True, if succeeded
85 */
86 static bool rotate(const Frame& input, Frame& output, const int angle, Worker* worker = nullptr);
87
88 /**
89 * Rotates a given frame with 90 degree steps.
90 * @param frame The frame to rotate, must be valid
91 * @param angle The clockwise rotation angle to be used, must be a multiple of +/- 90, with range (-infinity, infinity)
92 * @param worker Optional worker object to distribute the computation
93 * @return True, if succeeded
94 */
95 static inline bool rotate(Frame& frame, const int angle, Worker* worker = nullptr);
96 };
97
98 protected:
99
100 /**
101 * Definition of individual flip directions which can be applied to a transposed frame.
102 * Flipping the transposed result allows to rotate the image by 90 degree (clockwise and counter clockwise).
103 */
105 {
106 /// Applying no flip.
108 /// Applying a left-right flip like a mirror, combined with a transpose operation an image can be rotated clockwise.
110 /// Applying a top-bottom flip, combined with a transpose operation an image can be rotated counter clockwise.
111 FD_TOP_BOTTOM
112 };
113
114 /**
115 * Helper class for functions transposing blocks.
116 * The class is necessary to allow a partially specialization of template parameters.
117 * @tparam T The data type of each elements, e.g., 'uint8_t', 'int8_t', 'float'
118 * @tparam tChannels The number of channels the given data has, with range [1, infinity)
119 */
120 template <typename T, unsigned int tChannels>
122 {
123 public:
124
125 /**
126 * Transposes a block of 8x8 pixels.
127 * @param sourceBlock The pointer to the start location of the source block, must be valid
128 * @param targetBlock The pointer to the start location of the target block, must be valid
129 * @param sourceStrideElements The number of elements between two successive rows, in elements, with range [8 * tChannels, infinity)
130 * @param targetStrideElements The number of elements between two successive rows, in elements, with range [8 * tChannels, infinity)
131 * @tparam tFlipDirection The flip direction to be applied after transposing the block
132 * @see transposeBlock().
133 */
134 template <FlipDirection tFlipDirection>
135 static OCEAN_FORCE_INLINE void transposeBlock8x8(const T* sourceBlock, T* targetBlock, const unsigned int sourceStrideElements, const unsigned int targetStrideElements);
136
137 /**
138 * Transposes a block of n x m pixels.
139 * This function should be used for blocks smaller than 8x8.
140 * @param sourceBlock The pointer to the start location of the source block, must be valid
141 * @param targetBlock The pointer to the start location of the target block, must be valid
142 * @param blockWidth The width of the block to transpose, with range [1, 7]
143 * @param blockHeight The height of the block to transpose, with range [1, 7]
144 * @param sourceStrideElements The number of elements between two successive rows, in elements, with range [8 * tChannels, infinity)
145 * @param targetStrideElements The number of elements between two successive rows, in elements, with range [8 * tChannels, infinity)
146 * @tparam tFlipDirection The flip direction to be applied after transposing the block
147 * @see transposeBlock8x8().
148 */
149 template <FlipDirection tFlipDirection>
150 static OCEAN_FORCE_INLINE void transposeBlock(const T* sourceBlock, T* targetBlock, const unsigned int blockWidth, const unsigned int blockHeight, const unsigned int sourceStrideElements, const unsigned int targetStrideElements);
151
152#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
153
154 /**
155 * Transposes a block of 4x4 pixels.
156 * @param sourceBlock The pointer to the start location of the source block, must be valid
157 * @param targetBlock The pointer to the start location of the target block, must be valid
158 * @param sourceStrideElements The number of elements between two successive rows, in elements, with range [4 * tChannels, infinity)
159 * @param targetStrideElements The number of elements between two successive rows, in elements, with range [4 * tChannels, infinity)
160 * @tparam tFlipDirection The flip direction to be applied after transposing the block
161 * @see transposeBlock().
162 */
163 template <FlipDirection tFlipDirection>
164 static OCEAN_FORCE_INLINE void transposeBlock4x4NEON(const T* sourceBlock, T* targetBlock, const unsigned int sourceStrideElements, const unsigned int targetStrideElements);
165
166#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
167 };
168
169 public:
170
171 /**
172 * Transposes a given frame.
173 * Beware: This function has a significantly bigger binary size impact than the corresponding template-based function.
174 * @param source The source frame to transpose, must be valid
175 * @param target The target frame receiving the transposed image, if the frame type of the target frame does not match the transposed source frame, the target frame will be adjusted accordingly, must not be 'source'
176 * @param worker Optional worker to distribute the computation
177 * @return True, if succeeded
178 */
179 static bool transpose(const Frame& source, Frame& target, Worker* worker = nullptr);
180
181 /**
182 * Transposes a given frame.
183 * Beware: This function has a significantly bigger binary size impact than the corresponding template-based function.
184 * @param frame The frame to transpose, must be valid
185 * @param worker Optional worker to distribute the computation
186 * @return True, if succeeded
187 */
188 static inline bool transpose(Frame& frame, Worker* worker = nullptr);
189
190 /**
191 * Transposes a given image buffer.
192 * @param source The source buffer to transpose, must be valid
193 * @param target The target buffer receiving the transposed image, must not be 'source', must be valid
194 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
195 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
196 * @param sourcePaddingElements The number of padding elements at the end of each row of the source frame, in elements, with range [0, infinity)
197 * @param targetPaddingElements The number of padding elements at the end of each row of the target frame, in elements, with range [0, infinity)
198 * @param worker Optional worker to distribute the computation
199 * @tparam T The data type of each channel
200 * @tparam tChannels The number of frame channels, with range [1, infinity)
201 */
202 template <typename T, unsigned int tChannels>
203 static void transpose(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
204
205 /**
206 * Rotates a given image buffer 90 degrees clockwise or counter clockwise.
207 * @param source The source buffer to transpose, must be valid
208 * @param target The target buffer receiving the rotated image, must not be 'source', must be valid
209 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
210 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
211 * @param clockwise True, to rotate the source image clockwise; False, to rotate the image counter clockwise
212 * @param worker Optional worker to distribute the computation
213 * @param sourcePaddingElements The optional number of padding elements at the end of each row of the source frame, in elements, with range [0, infinity)
214 * @param targetPaddingElements The optional number of padding elements at the end of each row of the target frame, in elements, with range [0, infinity)
215 * @tparam T The data type of each channel
216 * @tparam tChannels The number of frame channels, with range [1, infinity)
217 */
218 template <typename T, unsigned int tChannels>
219 static void rotate90(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
220
221 /**
222 * Rotates a given image buffer 180 degrees.
223 * @param source The source buffer to transpose, must be valid
224 * @param target The target buffer receiving the rotated image, must not be 'source', must be valid
225 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
226 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
227 * @param worker Optional worker to distribute the computation
228 * @param sourcePaddingElements The optional number of padding elements at the end of each row of the source frame, in elements, with range [0, infinity)
229 * @param targetPaddingElements The optional number of padding elements at the end of each row of the target frame, in elements, with range [0, infinity)
230 * @tparam T The data type of each channel
231 * @tparam tChannels The number of frame channels, with range [1, infinity)
232 */
233 template <typename T, unsigned int tChannels>
234 static void rotate180(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
235
236 /**
237 * Rotates a given image with 90 degree steps.
238 * @param source The source buffer to rotated, must be valid
239 * @param target The target buffer receiving the rotated image, must not be 'source', must be valid
240 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
241 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
242 * @param angle The clockwise rotation angle to be used, must be a multiple of +/- 90, with range (-infinity, infinity)
243 * @param worker Optional worker to distribute the computation
244 * @param sourcePaddingElements The optional number of padding elements at the end of each row of the source frame, in elements, with range [0, infinity)
245 * @param targetPaddingElements The optional number of padding elements at the end of each row of the target frame, in elements, with range [0, infinity)
246 * @return True, if succeeded
247 * @tparam T The data type of each channel
248 * @tparam tChannels The number of frame channels, with range [1, infinity)
249 */
250 template <typename T, unsigned int tChannels>
251 static bool rotate(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const int angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
252
253 protected:
254
255 /**
256 * Transposes the subset of a given image buffer.
257 * @param source The source buffer to transpose
258 * @param target The target buffer receiving the transposed image
259 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
260 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
261 * @param sourcePaddingElements The optional number of padding elements at the end of each row of the source frame, in elements, with range [0, infinity)
262 * @param targetPaddingElements The optional number of padding elements at the end of each row of the target frame, in elements, with range [0, infinity)
263 * @param firstSourceRow First source row to be handled
264 * @param numberSourceRows The number of source rows to be handled
265 * @tparam T The data type of each channel
266 * @tparam tChannels The number of frame channels, with range [1, infinity)
267 * @tparam tFlipDirection The flip direction to be applied after transposing
268 * @see transposeBlock8x8(), transposeBlock().
269 */
270 template <typename T, unsigned int tChannels, FlipDirection tFlipDirection>
271 static void transposeSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstSourceRow, const unsigned int numberSourceRows);
272
273 /**
274 * Rotates a subset of a given frame either clockwise or counter-clockwise by 90 degree.
275 * @param source The source frame which will be rotated, must be valid
276 * @param target The resulting rotated target frame, must be valid and must have the same buffer size as the source frame
277 * @param sourceWidth The width of the source frame in pixel, with range [1, infinity)
278 * @param sourceHeight The height of the source frame in pixel, with range [1, infinity)
279 * @param clockwise True, to rotate the frame clockwise; False, to rotate the frame counter-clockwise
280 * @param sourcePaddingElements Number of padding elements in the source frame, range: [0, infinity)
281 * @param targetPaddingElements Number of padding elements in the target frame, range: [0, infinity)
282 * @param firstTargetRow The first target row to be handled, with range [0, sourceWidth)
283 * @param numberTargetRows The number of target rows to be handled, with range [1, sourceWidth - firstTargetRow]
284 * @tparam TElementType Data type of the elements of the image pixels
285 * @tparam tChannels Number of data channels, with range [1, infinity)
286 */
287 template <typename TElementType, unsigned int tChannels>
288 static void rotate90Subset(const TElementType* source, TElementType* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
289};
290
291inline bool FrameTransposer::Comfort::rotate90(Frame& frame, const bool clockwise, Worker* worker)
292{
293 ocean_assert(frame.isValid());
294
295 Frame tmpFrame;
296 if (!rotate90(frame, tmpFrame, clockwise, worker))
297 {
298 return false;
299 }
300
301 tmpFrame.setTimestamp(frame.timestamp());
302 tmpFrame.setRelativeTimestamp(frame.relativeTimestamp());
303
304 frame = std::move(tmpFrame);
305
306 return true;
307}
308
310{
311 ocean_assert(frame.isValid());
312
313 Frame tmpFrame;
314 if (!rotate180(frame, tmpFrame, worker))
315 {
316 return false;
317 }
318
319 tmpFrame.setTimestamp(frame.timestamp());
320 tmpFrame.setRelativeTimestamp(frame.relativeTimestamp());
321
322 frame = std::move(tmpFrame);
323
324 return true;
325}
326
327inline bool FrameTransposer::Comfort::rotate(Frame& frame, const int angle, Worker* worker)
328{
329 ocean_assert(frame.isValid());
330
331 if (angle == 0)
332 {
333 return frame.isValid();
334 }
335
336 Frame tmpFrame;
337 if (!rotate(frame, tmpFrame, angle, worker))
338 {
339 return false;
340 }
341
342 tmpFrame.setTimestamp(frame.timestamp());
343 tmpFrame.setRelativeTimestamp(frame.relativeTimestamp());
344
345 frame = std::move(tmpFrame);
346
347 return true;
348}
349
350inline bool FrameTransposer::transpose(Frame& frame, Worker* worker)
351{
352 ocean_assert(frame);
353
354 Frame tmpFrame;
355
356 if (!transpose(frame, tmpFrame, worker))
357 {
358 return false;
359 }
360
361 tmpFrame.setTimestamp(frame.timestamp());
362 tmpFrame.setRelativeTimestamp(frame.relativeTimestamp());
363
364 frame = std::move(tmpFrame);
365 return true;
366}
367
368template <typename T, unsigned int tChannels>
369void FrameTransposer::transpose(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
370{
371 static_assert(tChannels != 0u, "Invalid channel number!");
372
373 ocean_assert(source && target);
374 ocean_assert(source != target);
375 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
376
377 const unsigned int xBlocks8 = (sourceWidth + 7u) / 8u;
378 const unsigned int yBlocks8 = (sourceHeight + 7u) / 8u;
379
380 const unsigned int blocks8 = xBlocks8 * yBlocks8;
381
382 typedef typename TypeMapper<T>::Type MappedType;
383
384 if (worker && blocks8 >= 800u)
385 {
386 worker->executeFunction(Worker::Function::createStatic(&transposeSubset<MappedType, tChannels, FD_NONE>, (const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, blocks8);
387 }
388 else
389 {
390 transposeSubset<MappedType, tChannels, FD_NONE>((const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements, 0u, blocks8);
391 }
392}
393
394template <typename T, unsigned int tChannels>
395void FrameTransposer::rotate90(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
396{
397 static_assert(tChannels != 0u, "Invalid channel number!");
398
399 ocean_assert(source && target);
400 ocean_assert(source != target);
401 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
402
403 typedef typename TypeMapper<T>::Type MappedType;
404
405#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION > 0
406
407 // on x86 CPUs, the SIMD implementation is slower than the non-SIMD implementation
408 // therefore, using a function without explicit SIMD instructions
409
410 if (worker)
411 {
412 worker->executeFunction(Worker::Function::createStatic(rotate90Subset<MappedType, tChannels>, (const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, clockwise, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, sourceWidth, 7u, 8u, 20u);
413 }
414 else
415 {
416 rotate90Subset<MappedType, tChannels>((const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, clockwise, sourcePaddingElements, targetPaddingElements, 0u, sourceWidth);
417 }
418
419#else
420
421 // on non-x86 CPUs (e.g., ARM), the SIMD implementation is significantly faster
422
423 const unsigned int xBlocks8 = (sourceWidth + 7u) / 8u;
424 const unsigned int yBlocks8 = (sourceHeight + 7u) / 8u;
425
426 const unsigned int blocks8 = xBlocks8 * yBlocks8;
427
428 if (worker && blocks8 >= 800u)
429 {
430 if (clockwise)
431 {
432 worker->executeFunction(Worker::Function::createStatic(&transposeSubset<MappedType, tChannels, FD_LEFT_RIGHT>, (const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, blocks8);
433 }
434 else
435 {
436 worker->executeFunction(Worker::Function::createStatic(&transposeSubset<MappedType, tChannels, FD_TOP_BOTTOM>, (const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, blocks8);
437 }
438 }
439 else
440 {
441 if (clockwise)
442 {
443 transposeSubset<MappedType, tChannels, FD_LEFT_RIGHT>((const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements, 0u, blocks8);
444 }
445 else
446 {
447 transposeSubset<MappedType, tChannels, FD_TOP_BOTTOM>((const MappedType*)(source), (MappedType*)(target), sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements, 0u, blocks8);
448 }
449 }
450
451#endif
452}
453
454template <typename T, unsigned int tChannels>
455void FrameTransposer::rotate180(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
456{
457 static_assert(tChannels != 0u, "Invalid channel number!");
458
459 ocean_assert(source != nullptr);
460 ocean_assert(target != nullptr);
461
462 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
463
464 FrameChannels::transformGeneric<T, tChannels>(source, target, sourceWidth, sourceHeight, CV::FrameConverter::CONVERT_FLIPPED_AND_MIRRORED, sourcePaddingElements, targetPaddingElements, worker);
465}
466
467template <typename T, unsigned int tChannels>
468bool FrameTransposer::rotate(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const int angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
469{
470 static_assert(tChannels != 0u, "Invalid channel number!");
471
472 ocean_assert(source != nullptr);
473 ocean_assert(target != nullptr);
474
475 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
476
477 if (angle % 90 != 0)
478 {
479 ocean_assert(false && "Angle must be multiple of +/- 90");
480 return false;
481 }
482
483 int adjustedAngle = angle % 360;
484
485 if (adjustedAngle < 0)
486 {
487 adjustedAngle = 360 + adjustedAngle;
488 }
489
490 ocean_assert(adjustedAngle == 0 || adjustedAngle == 90 || adjustedAngle == 180 || adjustedAngle == 270);
491
492 switch (adjustedAngle)
493 {
494 case 0:
495 CV::FrameChannels::subFrame<T>(source, target, sourceWidth, sourceHeight, sourceWidth, sourceHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
496 return true;
497
498 case 90:
499 rotate90<T, tChannels>(source, target, sourceWidth, sourceHeight, true /*clockwise*/, sourcePaddingElements, targetPaddingElements, worker);
500 return true;
501
502 case 180:
503 rotate180<T, tChannels>(source, target, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements, worker);
504 return true;
505
506 case 270:
507 rotate90<T, tChannels>(source, target, sourceWidth, sourceHeight, false /*clockwise*/, sourcePaddingElements, targetPaddingElements, worker);
508 return true;
509
510 default:
511 break;
512 }
513
514 ocean_assert(false && "This should never happen!");
515 return false;
516}
517
518template <typename T, unsigned int tChannels, FrameTransposer::FlipDirection tFlipDirection>
519void FrameTransposer::transposeSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstBlock8, const unsigned int numberBlocks8)
520{
521 static_assert(sizeof(T) != 0, "Invalid data type!");
522 static_assert(tChannels != 0u, "Invalid channel number!");
523
524 ocean_assert(source && target);
525 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
526
527 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
528 const unsigned int targetStrideElements = sourceHeight * tChannels + targetPaddingElements;
529
530 const unsigned int xBlocks8 = (sourceWidth + 7u) / 8u;
531 const unsigned int yBlocks8 = (sourceHeight + 7u) / 8u;
532 ocean_assert(firstBlock8 + numberBlocks8 <= xBlocks8 * yBlocks8);
533
534 const unsigned int xSmallBlockIndex = xBlocks8 * 8u == sourceWidth ? (unsigned int)(-1) : (xBlocks8 - 1u);
535 const unsigned int ySmallBlockIndex = yBlocks8 * 8u == sourceHeight ? (unsigned int)(-1) : (yBlocks8 - 1u);
536
537 for (unsigned int block8 = firstBlock8; block8 < firstBlock8 + numberBlocks8; ++block8)
538 {
539 const unsigned int yBlock8 = block8 / xBlocks8;
540 const unsigned int xBlock8 = block8 % xBlocks8;
541
542 const T* sourceBlockTopLeft = nullptr;
543 T* targetBlockTopLeft = nullptr;
544
545 switch (tFlipDirection)
546 {
547 case FD_NONE:
548 {
549 // simply transposing the block
550
551 sourceBlockTopLeft = source + sourceStrideElements * yBlock8 * 8u + xBlock8 * 8u * tChannels;
552 targetBlockTopLeft = target + targetStrideElements * xBlock8 * 8u + yBlock8 * 8u * tChannels;
553
554 break;
555 }
556
557 case FD_LEFT_RIGHT:
558 {
559 // transposing the block and applying a left-right flip like a mirror, actually a 90 degree clockwise rotation
560
561 const unsigned int xTarget = (unsigned int)(std::max(0, int(sourceHeight) - int((yBlock8 + 1u) * 8u)));
562
563 sourceBlockTopLeft = source + sourceStrideElements * yBlock8 * 8u + xBlock8 * 8u * tChannels;
564 targetBlockTopLeft = target + targetStrideElements * xBlock8 * 8u + xTarget * tChannels;
565
566 break;
567 }
568
569 case FD_TOP_BOTTOM:
570 {
571 // transposing the block and applying a top-bottom flip, actually a 90 degree counter clockwise rotation
572
573 const unsigned int yTarget = (unsigned int)(std::max(0, int(sourceWidth) - int((xBlock8 + 1u) * 8u)));
574
575 sourceBlockTopLeft = source + sourceStrideElements * yBlock8 * 8u + xBlock8 * 8u * tChannels;
576 targetBlockTopLeft = target + targetStrideElements * yTarget + yBlock8 * 8u * tChannels;
577
578 break;
579 }
580
581 default:
582 ocean_assert(false && "Invalid flip direction!");
583 }
584
585 ocean_assert(sourceBlockTopLeft != nullptr);
586 ocean_assert(targetBlockTopLeft != nullptr);
587
588 if (xBlock8 != xSmallBlockIndex && yBlock8 != ySmallBlockIndex)
589 {
590 BlockTransposer<T, tChannels>::template transposeBlock8x8<tFlipDirection>(sourceBlockTopLeft, targetBlockTopLeft, sourceStrideElements, targetStrideElements);
591 }
592 else
593 {
594 const unsigned int blockWidth = min(sourceWidth - xBlock8 * 8u, 8u);
595 const unsigned int blockHeight = min(sourceHeight - yBlock8 * 8u, 8u);
596
597 BlockTransposer<T, tChannels>::template transposeBlock<tFlipDirection>(sourceBlockTopLeft, targetBlockTopLeft, blockWidth, blockHeight, sourceStrideElements, targetStrideElements);
598 }
599 }
600}
601
602#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
603
604template <>
605template <FrameTransposer::FlipDirection tFlipDirection>
606OCEAN_FORCE_INLINE void FrameTransposer::BlockTransposer<uint8_t, 1u>::transposeBlock8x8(const uint8_t* sourceBlock, uint8_t* targetBlock, const unsigned int sourceStrideElements, const unsigned int targetStrideElements)
607{
608 ocean_assert(sourceBlock && targetBlock);
609 ocean_assert(sourceStrideElements >= 8u && targetStrideElements >= 8u);
610
611 // A B C D E F G H
612 // a b c d e f g h
613 // 0 1 2 3 4 5 6 7
614 // ! @ # $ % ^ & *
615 // ...
616
617 __m128 line02_f_32x4 = _mm_setzero_ps(); // A B C D E F G H 0 1 2 3 4 5 6 7
618 __m128 line13_f_32x4 = _mm_setzero_ps(); // a b c d e f g h ! @ # $ % ^ & *
619
620 line02_f_32x4 = _mm_loadl_pi(line02_f_32x4, (const __m64*)(sourceBlock + sourceStrideElements * 0u));
621 line13_f_32x4 = _mm_loadl_pi(line13_f_32x4, (const __m64*)(sourceBlock + sourceStrideElements * 1u));
622 line02_f_32x4 = _mm_loadh_pi(line02_f_32x4, (const __m64*)(sourceBlock + sourceStrideElements * 2u));
623 line13_f_32x4 = _mm_loadh_pi(line13_f_32x4, (const __m64*)(sourceBlock + sourceStrideElements * 3u));
624
625 const __m128i line01_u_8x16 = _mm_unpacklo_epi8(_mm_castps_si128(line02_f_32x4), _mm_castps_si128(line13_f_32x4)); // A a B b C c D d E e F f G g H h
626 const __m128i line23_u_8x16 = _mm_unpackhi_epi8(_mm_castps_si128(line02_f_32x4), _mm_castps_si128(line13_f_32x4)); // 0 ! 1 @ 2 # 3 $ 4 % 5 ^ 6 & 7 *
627
628 const __m128i intermediateA_03_u_8x16 = _mm_unpacklo_epi16(line01_u_8x16, line23_u_8x16); // A a 0 ! B b 1 @ C c 2 # D d 3 $
629 const __m128i intermediateB_03_u_8x16 = _mm_unpackhi_epi16(line01_u_8x16, line23_u_8x16); // E e 4 % F f 5 ^ G g 6 & H h 7 *
630
631 __m128 line46_f_32x4 = _mm_setzero_ps();
632 __m128 line57_f_32x4 = _mm_setzero_ps();
633 line46_f_32x4 = _mm_loadl_pi(line46_f_32x4, (const __m64*)(sourceBlock + sourceStrideElements * 4u));
634 line57_f_32x4 = _mm_loadl_pi(line57_f_32x4, (const __m64*)(sourceBlock + sourceStrideElements * 5u));
635 line46_f_32x4 = _mm_loadh_pi(line46_f_32x4, (const __m64*)(sourceBlock + sourceStrideElements * 6u));
636 line57_f_32x4 = _mm_loadh_pi(line57_f_32x4, (const __m64*)(sourceBlock + sourceStrideElements * 7u));
637
638 const __m128i line45_u_8x16 = _mm_unpacklo_epi8(_mm_castps_si128(line46_f_32x4), _mm_castps_si128(line57_f_32x4));
639 const __m128i line67_u_8x16 = _mm_unpackhi_epi8(_mm_castps_si128(line46_f_32x4), _mm_castps_si128(line57_f_32x4));
640
641 const __m128i intermediateA_47_u_8x16 = _mm_unpacklo_epi16(line45_u_8x16, line67_u_8x16);
642 const __m128i intermediateB_47_u_8x16 = _mm_unpackhi_epi16(line45_u_8x16, line67_u_8x16);
643
644 __m128i transposed01 = _mm_unpacklo_epi32(intermediateA_03_u_8x16, intermediateA_47_u_8x16);
645 __m128i transposed23 = _mm_unpackhi_epi32(intermediateA_03_u_8x16, intermediateA_47_u_8x16);
646 __m128i transposed45 = _mm_unpacklo_epi32(intermediateB_03_u_8x16, intermediateB_47_u_8x16);
647 __m128i transposed67 = _mm_unpackhi_epi32(intermediateB_03_u_8x16, intermediateB_47_u_8x16);
648
649 switch (tFlipDirection)
650 {
651 case FD_LEFT_RIGHT:
652 {
653 const __m128i reverseSuffleMask_u_16x8 = _mm_set_epi64x(0x08090A0B0C0D0E0Fll, 0x0001020304050607ll);
654
655 transposed01 = _mm_shuffle_epi8(transposed01, reverseSuffleMask_u_16x8);
656 transposed23 = _mm_shuffle_epi8(transposed23, reverseSuffleMask_u_16x8);
657 transposed45 = _mm_shuffle_epi8(transposed45, reverseSuffleMask_u_16x8);
658 transposed67 = _mm_shuffle_epi8(transposed67, reverseSuffleMask_u_16x8);
659
660 // no break, as we use the store function from FD_NONE
661 [[fallthrough]];
662 }
663
664 case FD_NONE:
665 {
666 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 0u), _mm_castsi128_ps(transposed01));
667 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 1u), _mm_castsi128_ps(transposed01));
668 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 2u), _mm_castsi128_ps(transposed23));
669 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 3u), _mm_castsi128_ps(transposed23));
670 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 4u), _mm_castsi128_ps(transposed45));
671 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 5u), _mm_castsi128_ps(transposed45));
672 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 6u), _mm_castsi128_ps(transposed67));
673 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 7u), _mm_castsi128_ps(transposed67));
674
675 break;
676 }
677
678 case FD_TOP_BOTTOM:
679 {
680 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 0u), _mm_castsi128_ps(transposed67));
681 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 1u), _mm_castsi128_ps(transposed67));
682 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 2u), _mm_castsi128_ps(transposed45));
683 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 3u), _mm_castsi128_ps(transposed45));
684 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 4u), _mm_castsi128_ps(transposed23));
685 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 5u), _mm_castsi128_ps(transposed23));
686 _mm_storeh_pi((__m64*)(targetBlock + targetStrideElements * 6u), _mm_castsi128_ps(transposed01));
687 _mm_storel_pi((__m64*)(targetBlock + targetStrideElements * 7u), _mm_castsi128_ps(transposed01));
688
689 break;
690 }
691
692 default:
693 ocean_assert(false && "Invalid flip direction!");
694 }
695}
696
697template <>
698template <FrameTransposer::FlipDirection tFlipDirection>
699OCEAN_FORCE_INLINE void FrameTransposer::BlockTransposer<uint8_t, 2u>::transposeBlock8x8(const uint8_t* sourceBlock, uint8_t* targetBlock, const unsigned int sourceStrideElements, const unsigned int targetStrideElements)
700{
701 ocean_assert(sourceBlock && targetBlock);
702 ocean_assert(sourceStrideElements >= 8u && targetStrideElements >= 8u);
703
704 // AA BB CC DD EE FF GG HH
705 // aa bb cc dd ee ff gg hh
706 // 00 11 22 33 44 55 66 77
707 // !! @@ ## $$ %% ^^ && **
708 // ...
709
710 const __m128i line0_u_8x16 = _mm_loadu_si128((const __m128i*)(sourceBlock + sourceStrideElements * 0u)); // AA BB CC DD EE FF GG HH
711 const __m128i line1_u_8x16 = _mm_loadu_si128((const __m128i*)(sourceBlock + sourceStrideElements * 1u)); // aa bb cc dd ee ff gg hh
712 const __m128i line2_u_8x16 = _mm_loadu_si128((const __m128i*)(sourceBlock + sourceStrideElements * 2u));
713 const __m128i line3_u_8x16 = _mm_loadu_si128((const __m128i*)(sourceBlock + sourceStrideElements * 3u));
714 const __m128i line4_u_8x16 = _mm_loadu_si128((const __m128i*)(sourceBlock + sourceStrideElements * 4u));
715 const __m128i line5_u_8x16 = _mm_loadu_si128((const __m128i*)(sourceBlock + sourceStrideElements * 5u));
716 const __m128i line6_u_8x16 = _mm_loadu_si128((const __m128i*)(sourceBlock + sourceStrideElements * 6u));
717 const __m128i line7_u_8x16 = _mm_loadu_si128((const __m128i*)(sourceBlock + sourceStrideElements * 7u));
718
719 const __m128i line01_A_u_8x16 = _mm_unpacklo_epi16(line0_u_8x16, line1_u_8x16); // AA aa BB bb CC cc DD dd
720 const __m128i line01_B_u_8x16 = _mm_unpackhi_epi16(line0_u_8x16, line1_u_8x16); // EE ee FF ff GG gg HH hh
721 const __m128i line23_A_u_8x16 = _mm_unpacklo_epi16(line2_u_8x16, line3_u_8x16); // 00 !! 11 @@ ...
722 const __m128i line23_B_u_8x16 = _mm_unpackhi_epi16(line2_u_8x16, line3_u_8x16); // 44 %% 55 ^^ ...
723 const __m128i line45_A_u_8x16 = _mm_unpacklo_epi16(line4_u_8x16, line5_u_8x16);
724 const __m128i line45_B_u_8x16 = _mm_unpackhi_epi16(line4_u_8x16, line5_u_8x16);
725 const __m128i line67_A_u_8x16 = _mm_unpacklo_epi16(line6_u_8x16, line7_u_8x16);
726 const __m128i line67_B_u_8x16 = _mm_unpackhi_epi16(line6_u_8x16, line7_u_8x16);
727
728 const __m128i intermediateAA_03_u_8x16 = _mm_unpacklo_epi32(line01_A_u_8x16, line23_A_u_8x16); // AA aa 00 !! BB bb 11 @@
729 const __m128i intermediateAB_03_u_8x16 = _mm_unpackhi_epi32(line01_A_u_8x16, line23_A_u_8x16); // CC cc 22 ## DD dd 33 $$
730 const __m128i intermediateBA_03_u_8x16 = _mm_unpacklo_epi32(line01_B_u_8x16, line23_B_u_8x16);
731 const __m128i intermediateBB_03_u_8x16 = _mm_unpackhi_epi32(line01_B_u_8x16, line23_B_u_8x16);
732 const __m128i intermediateAA_47_u_8x16 = _mm_unpacklo_epi32(line45_A_u_8x16, line67_A_u_8x16);
733 const __m128i intermediateAB_47_u_8x16 = _mm_unpackhi_epi32(line45_A_u_8x16, line67_A_u_8x16);
734 const __m128i intermediateBA_47_u_8x16 = _mm_unpacklo_epi32(line45_B_u_8x16, line67_B_u_8x16);
735 const __m128i intermediateBB_47_u_8x16 = _mm_unpackhi_epi32(line45_B_u_8x16, line67_B_u_8x16);
736
737 __m128i transposed0 = _mm_unpacklo_epi64(intermediateAA_03_u_8x16, intermediateAA_47_u_8x16);
738 __m128i transposed1 = _mm_unpackhi_epi64(intermediateAA_03_u_8x16, intermediateAA_47_u_8x16);
739 __m128i transposed2 = _mm_unpacklo_epi64(intermediateAB_03_u_8x16, intermediateAB_47_u_8x16);
740 __m128i transposed3 = _mm_unpackhi_epi64(intermediateAB_03_u_8x16, intermediateAB_47_u_8x16);
741 __m128i transposed4 = _mm_unpacklo_epi64(intermediateBA_03_u_8x16, intermediateBA_47_u_8x16);
742 __m128i transposed5 = _mm_unpackhi_epi64(intermediateBA_03_u_8x16, intermediateBA_47_u_8x16);
743 __m128i transposed6 = _mm_unpacklo_epi64(intermediateBB_03_u_8x16, intermediateBB_47_u_8x16);
744 __m128i transposed7 = _mm_unpackhi_epi64(intermediateBB_03_u_8x16, intermediateBB_47_u_8x16);
745
746 switch (tFlipDirection)
747 {
748 case FD_LEFT_RIGHT:
749 {
750 const __m128i reverseSuffleMask_u_16x8 = _mm_set_epi64x(0x0100030205040706ll, 0x09080B0A0D0C0F0Ell);
751
752 transposed0 = _mm_shuffle_epi8(transposed0, reverseSuffleMask_u_16x8);
753 transposed1 = _mm_shuffle_epi8(transposed1, reverseSuffleMask_u_16x8);
754 transposed2 = _mm_shuffle_epi8(transposed2, reverseSuffleMask_u_16x8);
755 transposed3 = _mm_shuffle_epi8(transposed3, reverseSuffleMask_u_16x8);
756 transposed4 = _mm_shuffle_epi8(transposed4, reverseSuffleMask_u_16x8);
757 transposed5 = _mm_shuffle_epi8(transposed5, reverseSuffleMask_u_16x8);
758 transposed6 = _mm_shuffle_epi8(transposed6, reverseSuffleMask_u_16x8);
759 transposed7 = _mm_shuffle_epi8(transposed7, reverseSuffleMask_u_16x8);
760
761 // no break, as we use the store function from FD_NONE
762 [[fallthrough]];
763 }
764
765 case FD_NONE:
766 {
767 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 0u), transposed0);
768 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 1u), transposed1);
769 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 2u), transposed2);
770 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 3u), transposed3);
771 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 4u), transposed4);
772 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 5u), transposed5);
773 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 6u), transposed6);
774 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 7u), transposed7);
775
776 break;
777 }
778
779 case FD_TOP_BOTTOM:
780 {
781 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 0u), transposed7);
782 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 1u), transposed6);
783 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 2u), transposed5);
784 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 3u), transposed4);
785 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 4u), transposed3);
786 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 5u), transposed2);
787 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 6u), transposed1);
788 _mm_storeu_si128((__m128i*)(targetBlock + targetStrideElements * 7u), transposed0);
789
790 break;
791 }
792
793 default:
794 ocean_assert(false && "Invalid flip direction!");
795 }
796}
797
798#endif // defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SEE_VERSION >= 41
799
800#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
801
802template <>
803template <FrameTransposer::FlipDirection tFlipDirection>
804OCEAN_FORCE_INLINE void FrameTransposer::BlockTransposer<uint8_t, 4u>::transposeBlock4x4NEON(const uint8_t* sourceBlock, uint8_t* targetBlock, const unsigned int sourceStrideElements, const unsigned int targetStrideElements)
805{
806 ocean_assert(sourceBlock && targetBlock);
807 ocean_assert(sourceStrideElements >= 4u * 3u && targetStrideElements >= 4u * 3u);
808
809 // the NEON code is straight forward simply using the VTRN (transpose) instruction
810
811 const uint32x4_t line0_u_32x4 = vreinterpretq_u32_u8(vld1q_u8(sourceBlock + sourceStrideElements * 0u));
812 const uint32x4_t line1_u_32x4 = vreinterpretq_u32_u8(vld1q_u8(sourceBlock + sourceStrideElements * 1u));
813
814 // A B C D A a C c
815 // a b c d -> B b D d
816 const uint32x4x2_t line01_u_32x4x2 = vtrnq_u32(line0_u_32x4, line1_u_32x4);
817
818 const uint32x4_t line2_u_32x4 = vreinterpretq_u32_u8(vld1q_u8(sourceBlock + sourceStrideElements * 2u));
819 const uint32x4_t line3_u_32x4 = vreinterpretq_u32_u8(vld1q_u8(sourceBlock + sourceStrideElements * 3u));
820
821 // 0 1 2 3 0 ! 2 #
822 // ! @ # $ -> 1 @ 3 $
823 const uint32x4x2_t line23_u_32x4x2 = vtrnq_u32(line2_u_32x4, line3_u_32x4);
824
825 // Aa Cc Aa 0!
826 // Bb Dd -> Bb 1@
827 // 0! 2# Cc 2#
828 // 1@ 3$ Dd 3$
829 const uint32x4_t result0_u_32x4 = vcombine_u32(vget_low_u32(line01_u_32x4x2.val[0]), vget_low_u32(line23_u_32x4x2.val[0]));
830 const uint32x4_t result1_u_32x4 = vcombine_u32(vget_low_u32(line01_u_32x4x2.val[1]), vget_low_u32(line23_u_32x4x2.val[1]));
831 const uint32x4_t result2_u_32x4 = vcombine_u32(vget_high_u32(line01_u_32x4x2.val[0]), vget_high_u32(line23_u_32x4x2.val[0]));
832 const uint32x4_t result3_u_32x4 = vcombine_u32(vget_high_u32(line01_u_32x4x2.val[1]), vget_high_u32(line23_u_32x4x2.val[1]));
833
834 switch (tFlipDirection)
835 {
836 case FD_NONE:
837 {
838 vst1q_u8(targetBlock + targetStrideElements * 0u, vreinterpretq_u8_u32(result0_u_32x4));
839 vst1q_u8(targetBlock + targetStrideElements * 1u, vreinterpretq_u8_u32(result1_u_32x4));
840 vst1q_u8(targetBlock + targetStrideElements * 2u, vreinterpretq_u8_u32(result2_u_32x4));
841 vst1q_u8(targetBlock + targetStrideElements * 3u, vreinterpretq_u8_u32(result3_u_32x4));
842
843 break;
844 }
845
846 case FD_LEFT_RIGHT:
847 {
848 const uint32x4_t halfReverseResult0_u_32x4 = vrev64q_u32(result0_u_32x4);
849 const uint8x16_t reverseResult0_u_32x4 = vreinterpretq_u8_u32(vcombine_u32(vget_high_u32(halfReverseResult0_u_32x4), vget_low_u32(halfReverseResult0_u_32x4)));
850 vst1q_u8(targetBlock + targetStrideElements * 0u, reverseResult0_u_32x4);
851
852 const uint32x4_t halfReverseResult1_u_32x4 = vrev64q_u32(result1_u_32x4);
853 const uint8x16_t reverseResult1_u_32x4 = vreinterpretq_u8_u32(vcombine_u32(vget_high_u32(halfReverseResult1_u_32x4), vget_low_u32(halfReverseResult1_u_32x4)));
854 vst1q_u8(targetBlock + targetStrideElements * 1u, reverseResult1_u_32x4);
855
856 const uint32x4_t halfReverseResult2_u_32x4 = vrev64q_u32(result2_u_32x4);
857 const uint8x16_t reverseResult2_u_32x4 = vreinterpretq_u8_u32(vcombine_u32(vget_high_u32(halfReverseResult2_u_32x4), vget_low_u32(halfReverseResult2_u_32x4)));
858 vst1q_u8(targetBlock + targetStrideElements * 2u, reverseResult2_u_32x4);
859
860 const uint32x4_t halfReverseResult3_u_32x4 = vrev64q_u32(result3_u_32x4);
861 const uint8x16_t reverseResult3_u_32x4 = vreinterpretq_u8_u32(vcombine_u32(vget_high_u32(halfReverseResult3_u_32x4), vget_low_u32(halfReverseResult3_u_32x4)));
862 vst1q_u8(targetBlock + targetStrideElements * 3u, reverseResult3_u_32x4);
863
864 break;
865 }
866
867 case FD_TOP_BOTTOM:
868 {
869 vst1q_u8(targetBlock + targetStrideElements * 0u, vreinterpretq_u8_u32(result3_u_32x4));
870 vst1q_u8(targetBlock + targetStrideElements * 1u, vreinterpretq_u8_u32(result2_u_32x4));
871 vst1q_u8(targetBlock + targetStrideElements * 2u, vreinterpretq_u8_u32(result1_u_32x4));
872 vst1q_u8(targetBlock + targetStrideElements * 3u, vreinterpretq_u8_u32(result0_u_32x4));
873
874 break;
875 }
876
877 default:
878 ocean_assert(false && "Invalid flip direction!");
879 }
880}
881
882template <>
883template <FrameTransposer::FlipDirection tFlipDirection>
884OCEAN_FORCE_INLINE void FrameTransposer::BlockTransposer<uint8_t, 1u>::transposeBlock8x8(const uint8_t* sourceBlock, uint8_t* targetBlock, const unsigned int sourceStrideElements, const unsigned int targetStrideElements)
885{
886 ocean_assert(sourceBlock && targetBlock);
887 ocean_assert(sourceStrideElements >= 8u && targetStrideElements >= 8u);
888
889 // the NEON code is straight forward simply using the VTRN (transpose) instruction
890
891 const uint8x8_t line0_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 0u);
892 const uint8x8_t line1_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 1u);
893
894 // A B C D E F G H A a C c E e G g
895 // a b c d e f g h -> B b D d F f H h
896 const uint8x8x2_t line01_u_8x8x2 = vtrn_u8(line0_u_8x8, line1_u_8x8);
897
898 const uint8x8_t line2_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 2u);
899 const uint8x8_t line3_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 3u);
900
901 // 0 1 2 3 4 5 6 7 0 ! 2 # 4 % 6 &
902 // ! @ # $ % ^ & * -> 1 @ 3 $ 5 ^ 7 *
903 const uint8x8x2_t line23_u_8x8x2 = vtrn_u8(line2_u_8x8, line3_u_8x8);
904
905 // Aa Cc Ee Gg Aa 0! Ee 4%
906 // 0! 2# 4% 6& -> Cc 2# Gg 6&
907 const uint16x4x2_t line02_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_u_8x8x2.val[0]), vreinterpret_u16_u8(line23_u_8x8x2.val[0]));
908
909 // Bb Dd Ff Hh Bb 1@ Ef 5^
910 // 1@ 3$ 5^ 7* -> Dd 3$ Hh 7*
911 const uint16x4x2_t line13_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_u_8x8x2.val[1]), vreinterpret_u16_u8(line23_u_8x8x2.val[1]));
912
913 const uint8x8_t line4_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 4u);
914 const uint8x8_t line5_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 5u);
915
916 const uint8x8x2_t line45_u_8x8x2 = vtrn_u8(line4_u_8x8, line5_u_8x8);
917
918 const uint8x8_t line6_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 6u);
919 const uint8x8_t line7_u_8x8 = vld1_u8(sourceBlock + sourceStrideElements * 7u);
920
921 const uint8x8x2_t line67_u_8x8x2 = vtrn_u8(line6_u_8x8, line7_u_8x8);
922
923 const uint16x4x2_t line46_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_u_8x8x2.val[0]), vreinterpret_u16_u8(line67_u_8x8x2.val[0]));
924 const uint16x4x2_t line57_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_u_8x8x2.val[1]), vreinterpret_u16_u8(line67_u_8x8x2.val[1]));
925
926 const uint32x2x2_t line04_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_u_16x4x2.val[0]), vreinterpret_u32_u16(line46_u_16x4x2.val[0]));
927 const uint32x2x2_t line26_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_u_16x4x2.val[1]), vreinterpret_u32_u16(line46_u_16x4x2.val[1]));
928
929 const uint32x2x2_t line15_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_u_16x4x2.val[0]), vreinterpret_u32_u16(line57_u_16x4x2.val[0]));
930 const uint32x2x2_t line37_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_u_16x4x2.val[1]), vreinterpret_u32_u16(line57_u_16x4x2.val[1]));
931
932 switch (tFlipDirection)
933 {
934 case FD_NONE:
935 {
936 vst1_u8(targetBlock + targetStrideElements * 0u, vreinterpret_u8_u32(line04_u_32x2x2.val[0]));
937 vst1_u8(targetBlock + targetStrideElements * 1u, vreinterpret_u8_u32(line15_u_32x2x2.val[0]));
938 vst1_u8(targetBlock + targetStrideElements * 2u, vreinterpret_u8_u32(line26_u_32x2x2.val[0]));
939 vst1_u8(targetBlock + targetStrideElements * 3u, vreinterpret_u8_u32(line37_u_32x2x2.val[0]));
940 vst1_u8(targetBlock + targetStrideElements * 4u, vreinterpret_u8_u32(line04_u_32x2x2.val[1]));
941 vst1_u8(targetBlock + targetStrideElements * 5u, vreinterpret_u8_u32(line15_u_32x2x2.val[1]));
942 vst1_u8(targetBlock + targetStrideElements * 6u, vreinterpret_u8_u32(line26_u_32x2x2.val[1]));
943 vst1_u8(targetBlock + targetStrideElements * 7u, vreinterpret_u8_u32(line37_u_32x2x2.val[1]));
944
945 break;
946 }
947
948 case FD_LEFT_RIGHT:
949 {
950 vst1_u8(targetBlock + targetStrideElements * 0u, vrev64_u8(vreinterpret_u8_u32(line04_u_32x2x2.val[0])));
951 vst1_u8(targetBlock + targetStrideElements * 1u, vrev64_u8(vreinterpret_u8_u32(line15_u_32x2x2.val[0])));
952 vst1_u8(targetBlock + targetStrideElements * 2u, vrev64_u8(vreinterpret_u8_u32(line26_u_32x2x2.val[0])));
953 vst1_u8(targetBlock + targetStrideElements * 3u, vrev64_u8(vreinterpret_u8_u32(line37_u_32x2x2.val[0])));
954 vst1_u8(targetBlock + targetStrideElements * 4u, vrev64_u8(vreinterpret_u8_u32(line04_u_32x2x2.val[1])));
955 vst1_u8(targetBlock + targetStrideElements * 5u, vrev64_u8(vreinterpret_u8_u32(line15_u_32x2x2.val[1])));
956 vst1_u8(targetBlock + targetStrideElements * 6u, vrev64_u8(vreinterpret_u8_u32(line26_u_32x2x2.val[1])));
957 vst1_u8(targetBlock + targetStrideElements * 7u, vrev64_u8(vreinterpret_u8_u32(line37_u_32x2x2.val[1])));
958
959 break;
960 }
961
962 case FD_TOP_BOTTOM:
963 {
964 vst1_u8(targetBlock + targetStrideElements * 0u, vreinterpret_u8_u32(line37_u_32x2x2.val[1]));
965 vst1_u8(targetBlock + targetStrideElements * 1u, vreinterpret_u8_u32(line26_u_32x2x2.val[1]));
966 vst1_u8(targetBlock + targetStrideElements * 2u, vreinterpret_u8_u32(line15_u_32x2x2.val[1]));
967 vst1_u8(targetBlock + targetStrideElements * 3u, vreinterpret_u8_u32(line04_u_32x2x2.val[1]));
968 vst1_u8(targetBlock + targetStrideElements * 4u, vreinterpret_u8_u32(line37_u_32x2x2.val[0]));
969 vst1_u8(targetBlock + targetStrideElements * 5u, vreinterpret_u8_u32(line26_u_32x2x2.val[0]));
970 vst1_u8(targetBlock + targetStrideElements * 6u, vreinterpret_u8_u32(line15_u_32x2x2.val[0]));
971 vst1_u8(targetBlock + targetStrideElements * 7u, vreinterpret_u8_u32(line04_u_32x2x2.val[0]));
972
973 break;
974 }
975
976 default:
977 ocean_assert(false && "Invalid flip direction!");
978 }
979}
980
981template <>
982template <FrameTransposer::FlipDirection tFlipDirection>
983OCEAN_FORCE_INLINE void FrameTransposer::BlockTransposer<uint8_t, 2u>::transposeBlock8x8(const uint8_t* sourceBlock, uint8_t* targetBlock, const unsigned int sourceStrideElements, const unsigned int targetStrideElements)
984{
985 ocean_assert(sourceBlock && targetBlock);
986 ocean_assert(sourceStrideElements >= 8u * 2u && targetStrideElements >= 8u * 2u);
987
988 // the NEON code is straight forward simply using the VTRN (transpose) instruction
989 // the 2-channel code is similar to the 1-channel code but simply transposes 16 bit values instead of 8 bit values
990
991 const uint16x8_t line0_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 0u));
992 const uint16x8_t line1_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 1u));
993
994 // A B C D E F G H A a C c E e G g
995 // a b c d e f g h -> B b D d F f H h
996 const uint16x8x2_t line01_u_16x8x2 = vtrnq_u16(line0_u_16x8, line1_u_16x8);
997
998 const uint16x8_t line2_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 2u));
999 const uint16x8_t line3_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 3u));
1000
1001 // 0 1 2 3 4 5 6 7 0 ! 2 # 4 % 6 &
1002 // ! @ # $ % ^ & * -> 1 @ 3 $ 5 ^ 7 *
1003 const uint16x8x2_t line23_u_16x8x2 = vtrnq_u16(line2_u_16x8, line3_u_16x8);
1004
1005 // Aa Cc Ee Gg Aa 0! Ee 4%
1006 // 0! 2# 4% 6& -> Cc 2# Gg 6&
1007 const uint32x4x2_t line02_u_32x4x2 = vtrnq_u32(vreinterpretq_u32_u16(line01_u_16x8x2.val[0]), vreinterpretq_u32_u16(line23_u_16x8x2.val[0]));
1008
1009 // Bb Dd Ff Hh Bb 1@ Ef 5^
1010 // 1@ 3$ 5^ 7* -> Dd 3$ Hh 7*
1011 const uint32x4x2_t line13_u_32x4x2 = vtrnq_u32(vreinterpretq_u32_u16(line01_u_16x8x2.val[1]), vreinterpretq_u32_u16(line23_u_16x8x2.val[1]));
1012
1013 const uint16x8_t line4_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 4u));
1014 const uint16x8_t line5_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 5u));
1015
1016 const uint16x8x2_t line45_u_16x8x2 = vtrnq_u16(line4_u_16x8, line5_u_16x8);
1017
1018 const uint16x8_t line6_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 6u));
1019 const uint16x8_t line7_u_16x8 = vreinterpretq_u16_u8(vld1q_u8(sourceBlock + sourceStrideElements * 7u));
1020
1021 const uint16x8x2_t line67_u_16x8x2 = vtrnq_u16(line6_u_16x8, line7_u_16x8);
1022
1023 const uint32x4x2_t line46_u_32x4x2 = vtrnq_u32(vreinterpretq_u32_u16(line45_u_16x8x2.val[0]), vreinterpretq_u32_u16(line67_u_16x8x2.val[0]));
1024 const uint32x4x2_t line57_u_32x4x2 = vtrnq_u32(vreinterpretq_u32_u16(line45_u_16x8x2.val[1]), vreinterpretq_u32_u16(line67_u_16x8x2.val[1]));
1025
1026 const uint32x4_t result0_u_32x4 = vcombine_u32(vget_low_u32(line02_u_32x4x2.val[0]), vget_low_u32(line46_u_32x4x2.val[0]));
1027 const uint32x4_t result1_u_32x4 = vcombine_u32(vget_low_u32(line13_u_32x4x2.val[0]), vget_low_u32(line57_u_32x4x2.val[0]));
1028
1029 const uint32x4_t result2_u_32x4 = vcombine_u32(vget_low_u32(line02_u_32x4x2.val[1]), vget_low_u32(line46_u_32x4x2.val[1]));
1030 const uint32x4_t result3_u_32x4 = vcombine_u32(vget_low_u32(line13_u_32x4x2.val[1]), vget_low_u32(line57_u_32x4x2.val[1]));
1031
1032 const uint32x4_t result4_u_32x4 = vcombine_u32(vget_high_u32(line02_u_32x4x2.val[0]), vget_high_u32(line46_u_32x4x2.val[0]));
1033 const uint32x4_t result5_u_32x4 = vcombine_u32(vget_high_u32(line13_u_32x4x2.val[0]), vget_high_u32(line57_u_32x4x2.val[0]));
1034
1035 const uint32x4_t result6_u_32x4 = vcombine_u32(vget_high_u32(line02_u_32x4x2.val[1]), vget_high_u32(line46_u_32x4x2.val[1]));
1036 const uint32x4_t result7_u_32x4 = vcombine_u32(vget_high_u32(line13_u_32x4x2.val[1]), vget_high_u32(line57_u_32x4x2.val[1]));
1037
1038 switch (tFlipDirection)
1039 {
1040 case FD_NONE:
1041 {
1042 vst1q_u8(targetBlock + targetStrideElements * 0u, vreinterpretq_u8_u32(result0_u_32x4));
1043 vst1q_u8(targetBlock + targetStrideElements * 1u, vreinterpretq_u8_u32(result1_u_32x4));
1044 vst1q_u8(targetBlock + targetStrideElements * 2u, vreinterpretq_u8_u32(result2_u_32x4));
1045 vst1q_u8(targetBlock + targetStrideElements * 3u, vreinterpretq_u8_u32(result3_u_32x4));
1046 vst1q_u8(targetBlock + targetStrideElements * 4u, vreinterpretq_u8_u32(result4_u_32x4));
1047 vst1q_u8(targetBlock + targetStrideElements * 5u, vreinterpretq_u8_u32(result5_u_32x4));
1048 vst1q_u8(targetBlock + targetStrideElements * 6u, vreinterpretq_u8_u32(result6_u_32x4));
1049 vst1q_u8(targetBlock + targetStrideElements * 7u, vreinterpretq_u8_u32(result7_u_32x4));
1050
1051 break;
1052 }
1053
1054 case FD_LEFT_RIGHT:
1055 {
1056 const uint8x16_t targetHalfReverse0_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result0_u_32x4)));
1057 vst1q_u8(targetBlock + targetStrideElements * 0u, vcombine_u8(vget_high_u8(targetHalfReverse0_u_8x16), vget_low_u8(targetHalfReverse0_u_8x16)));
1058
1059 const uint8x16_t targetHalfReverse1_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result1_u_32x4)));
1060 vst1q_u8(targetBlock + targetStrideElements * 1u, vcombine_u8(vget_high_u8(targetHalfReverse1_u_8x16), vget_low_u8(targetHalfReverse1_u_8x16)));
1061
1062 const uint8x16_t targetHalfReverse2_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result2_u_32x4)));
1063 vst1q_u8(targetBlock + targetStrideElements * 2u, vcombine_u8(vget_high_u8(targetHalfReverse2_u_8x16), vget_low_u8(targetHalfReverse2_u_8x16)));
1064
1065 const uint8x16_t targetHalfReverse3_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result3_u_32x4)));
1066 vst1q_u8(targetBlock + targetStrideElements * 3u, vcombine_u8(vget_high_u8(targetHalfReverse3_u_8x16), vget_low_u8(targetHalfReverse3_u_8x16)));
1067
1068 const uint8x16_t targetHalfReverse4_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result4_u_32x4)));
1069 vst1q_u8(targetBlock + targetStrideElements * 4u, vcombine_u8(vget_high_u8(targetHalfReverse4_u_8x16), vget_low_u8(targetHalfReverse4_u_8x16)));
1070
1071 const uint8x16_t targetHalfReverse5_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result5_u_32x4)));
1072 vst1q_u8(targetBlock + targetStrideElements * 5u, vcombine_u8(vget_high_u8(targetHalfReverse5_u_8x16), vget_low_u8(targetHalfReverse5_u_8x16)));
1073
1074 const uint8x16_t targetHalfReverse6_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result6_u_32x4)));
1075 vst1q_u8(targetBlock + targetStrideElements * 6u, vcombine_u8(vget_high_u8(targetHalfReverse6_u_8x16), vget_low_u8(targetHalfReverse6_u_8x16)));
1076
1077 const uint8x16_t targetHalfReverse7_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u32(result7_u_32x4)));
1078 vst1q_u8(targetBlock + targetStrideElements * 7u, vcombine_u8(vget_high_u8(targetHalfReverse7_u_8x16), vget_low_u8(targetHalfReverse7_u_8x16)));
1079
1080 break;
1081 }
1082
1083 case FD_TOP_BOTTOM:
1084 {
1085 vst1q_u8(targetBlock + targetStrideElements * 0u, vreinterpretq_u8_u32(result7_u_32x4));
1086 vst1q_u8(targetBlock + targetStrideElements * 1u, vreinterpretq_u8_u32(result6_u_32x4));
1087 vst1q_u8(targetBlock + targetStrideElements * 2u, vreinterpretq_u8_u32(result5_u_32x4));
1088 vst1q_u8(targetBlock + targetStrideElements * 3u, vreinterpretq_u8_u32(result4_u_32x4));
1089 vst1q_u8(targetBlock + targetStrideElements * 4u, vreinterpretq_u8_u32(result3_u_32x4));
1090 vst1q_u8(targetBlock + targetStrideElements * 5u, vreinterpretq_u8_u32(result2_u_32x4));
1091 vst1q_u8(targetBlock + targetStrideElements * 6u, vreinterpretq_u8_u32(result1_u_32x4));
1092 vst1q_u8(targetBlock + targetStrideElements * 7u, vreinterpretq_u8_u32(result0_u_32x4));
1093
1094 break;
1095 }
1096
1097 default:
1098 ocean_assert(false && "Invalid flip direction!");
1099 }
1100}
1101
1102template <>
1103template <FrameTransposer::FlipDirection tFlipDirection>
1104OCEAN_FORCE_INLINE void FrameTransposer::BlockTransposer<uint8_t, 3u>::transposeBlock8x8(const uint8_t* sourceBlock, uint8_t* targetBlock, const unsigned int sourceStrideElements, const unsigned int targetStrideElements)
1105{
1106 ocean_assert(sourceBlock && targetBlock);
1107 ocean_assert(sourceStrideElements >= 8u * 3u && targetStrideElements >= 8u * 3u);
1108
1109 // the NEON code is straight forward simply using the VTRN (transpose) instruction
1110 // the 3-channel code is similar to the 1-channel code but uses vld3_u8/vst3_u8 instead of vld1_u8/vst1_u8
1111
1112 const uint8x8x3_t line0_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 0u);
1113 const uint8x8x3_t line1_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 1u);
1114
1115 // A B C D E F G H A a C c E e G g
1116 // a b c d e f g h -> B b D d F f H h
1117 const uint8x8x2_t line01_channel0_u_8x8x2 = vtrn_u8(line0_u_8x8x3.val[0], line1_u_8x8x3.val[0]);
1118 const uint8x8x2_t line01_channel1_u_8x8x2 = vtrn_u8(line0_u_8x8x3.val[1], line1_u_8x8x3.val[1]);
1119 const uint8x8x2_t line01_channel2_u_8x8x2 = vtrn_u8(line0_u_8x8x3.val[2], line1_u_8x8x3.val[2]);
1120
1121 const uint8x8x3_t line2_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 2u);
1122 const uint8x8x3_t line3_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 3u);
1123
1124 // 0 1 2 3 4 5 6 7 0 ! 2 # 4 % 6 &
1125 // ! @ # $ % ^ & * -> 1 @ 3 $ 5 ^ 7 *
1126 const uint8x8x2_t line23_channel0_u_8x8x2 = vtrn_u8(line2_u_8x8x3.val[0], line3_u_8x8x3.val[0]);
1127 const uint8x8x2_t line23_channel1_u_8x8x2 = vtrn_u8(line2_u_8x8x3.val[1], line3_u_8x8x3.val[1]);
1128 const uint8x8x2_t line23_channel2_u_8x8x2 = vtrn_u8(line2_u_8x8x3.val[2], line3_u_8x8x3.val[2]);
1129
1130 // Aa Cc Ee Gg Aa 0! Ee 4%
1131 // 0! 2# 4% 6& -> Cc 2# Gg 6&
1132 const uint16x4x2_t line02_channel0_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_channel0_u_8x8x2.val[0]), vreinterpret_u16_u8(line23_channel0_u_8x8x2.val[0]));
1133 const uint16x4x2_t line02_channel1_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_channel1_u_8x8x2.val[0]), vreinterpret_u16_u8(line23_channel1_u_8x8x2.val[0]));
1134 const uint16x4x2_t line02_channel2_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_channel2_u_8x8x2.val[0]), vreinterpret_u16_u8(line23_channel2_u_8x8x2.val[0]));
1135
1136 // Bb Dd Ff Hh Bb 1@ Ef 5^
1137 // 1@ 3$ 5^ 7* -> Dd 3$ Hh 7*
1138 const uint16x4x2_t line13_channel0_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_channel0_u_8x8x2.val[1]), vreinterpret_u16_u8(line23_channel0_u_8x8x2.val[1]));
1139 const uint16x4x2_t line13_channel1_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_channel1_u_8x8x2.val[1]), vreinterpret_u16_u8(line23_channel1_u_8x8x2.val[1]));
1140 const uint16x4x2_t line13_channel2_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line01_channel2_u_8x8x2.val[1]), vreinterpret_u16_u8(line23_channel2_u_8x8x2.val[1]));
1141
1142 const uint8x8x3_t line4_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 4u);
1143 const uint8x8x3_t line5_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 5u);
1144
1145 const uint8x8x2_t line45_channel0_u_8x8x2 = vtrn_u8(line4_u_8x8x3.val[0], line5_u_8x8x3.val[0]);
1146 const uint8x8x2_t line45_channel1_u_8x8x2 = vtrn_u8(line4_u_8x8x3.val[1], line5_u_8x8x3.val[1]);
1147 const uint8x8x2_t line45_channel2_u_8x8x2 = vtrn_u8(line4_u_8x8x3.val[2], line5_u_8x8x3.val[2]);
1148
1149 const uint8x8x3_t line6_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 6u);
1150 const uint8x8x3_t line7_u_8x8x3 = vld3_u8(sourceBlock + sourceStrideElements * 7u);
1151
1152 const uint8x8x2_t line67_channel0_u_8x8x2 = vtrn_u8(line6_u_8x8x3.val[0], line7_u_8x8x3.val[0]);
1153 const uint8x8x2_t line67_channel1_u_8x8x2 = vtrn_u8(line6_u_8x8x3.val[1], line7_u_8x8x3.val[1]);
1154 const uint8x8x2_t line67_channel2_u_8x8x2 = vtrn_u8(line6_u_8x8x3.val[2], line7_u_8x8x3.val[2]);
1155
1156 const uint16x4x2_t line46_channel0_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_channel0_u_8x8x2.val[0]), vreinterpret_u16_u8(line67_channel0_u_8x8x2.val[0]));
1157 const uint16x4x2_t line46_channel1_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_channel1_u_8x8x2.val[0]), vreinterpret_u16_u8(line67_channel1_u_8x8x2.val[0]));
1158 const uint16x4x2_t line46_channel2_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_channel2_u_8x8x2.val[0]), vreinterpret_u16_u8(line67_channel2_u_8x8x2.val[0]));
1159
1160 const uint16x4x2_t line57_channel0_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_channel0_u_8x8x2.val[1]), vreinterpret_u16_u8(line67_channel0_u_8x8x2.val[1]));
1161 const uint16x4x2_t line57_channel1_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_channel1_u_8x8x2.val[1]), vreinterpret_u16_u8(line67_channel1_u_8x8x2.val[1]));
1162 const uint16x4x2_t line57_channel2_u_16x4x2 = vtrn_u16(vreinterpret_u16_u8(line45_channel2_u_8x8x2.val[1]), vreinterpret_u16_u8(line67_channel2_u_8x8x2.val[1]));
1163
1164 const uint32x2x2_t line04_channel0_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_channel0_u_16x4x2.val[0]), vreinterpret_u32_u16(line46_channel0_u_16x4x2.val[0]));
1165 const uint32x2x2_t line04_channel1_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_channel1_u_16x4x2.val[0]), vreinterpret_u32_u16(line46_channel1_u_16x4x2.val[0]));
1166 const uint32x2x2_t line04_channel2_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_channel2_u_16x4x2.val[0]), vreinterpret_u32_u16(line46_channel2_u_16x4x2.val[0]));
1167
1168 const uint32x2x2_t line26_channel0_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_channel0_u_16x4x2.val[1]), vreinterpret_u32_u16(line46_channel0_u_16x4x2.val[1]));
1169 const uint32x2x2_t line26_channel1_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_channel1_u_16x4x2.val[1]), vreinterpret_u32_u16(line46_channel1_u_16x4x2.val[1]));
1170 const uint32x2x2_t line26_channel2_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line02_channel2_u_16x4x2.val[1]), vreinterpret_u32_u16(line46_channel2_u_16x4x2.val[1]));
1171
1172 const uint32x2x2_t line15_channel0_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_channel0_u_16x4x2.val[0]), vreinterpret_u32_u16(line57_channel0_u_16x4x2.val[0]));
1173 const uint32x2x2_t line15_channel1_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_channel1_u_16x4x2.val[0]), vreinterpret_u32_u16(line57_channel1_u_16x4x2.val[0]));
1174 const uint32x2x2_t line15_channel2_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_channel2_u_16x4x2.val[0]), vreinterpret_u32_u16(line57_channel2_u_16x4x2.val[0]));
1175
1176 const uint32x2x2_t line37_channel0_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_channel0_u_16x4x2.val[1]), vreinterpret_u32_u16(line57_channel0_u_16x4x2.val[1]));
1177 const uint32x2x2_t line37_channel1_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_channel1_u_16x4x2.val[1]), vreinterpret_u32_u16(line57_channel1_u_16x4x2.val[1]));
1178 const uint32x2x2_t line37_channel2_u_32x2x2 = vtrn_u32(vreinterpret_u32_u16(line13_channel2_u_16x4x2.val[1]), vreinterpret_u32_u16(line57_channel2_u_16x4x2.val[1]));
1179
1180 switch (tFlipDirection)
1181 {
1182 case FD_NONE:
1183 {
1184 uint8x8x3_t result0_u_8x8x3;
1185 result0_u_8x8x3.val[0] = vreinterpret_u8_u32(line04_channel0_u_32x2x2.val[0]);
1186 result0_u_8x8x3.val[1] = vreinterpret_u8_u32(line04_channel1_u_32x2x2.val[0]);
1187 result0_u_8x8x3.val[2] = vreinterpret_u8_u32(line04_channel2_u_32x2x2.val[0]);
1188 vst3_u8(targetBlock + targetStrideElements * 0u, result0_u_8x8x3);
1189
1190 uint8x8x3_t result1_u_8x8x3;
1191 result1_u_8x8x3.val[0] = vreinterpret_u8_u32(line15_channel0_u_32x2x2.val[0]);
1192 result1_u_8x8x3.val[1] = vreinterpret_u8_u32(line15_channel1_u_32x2x2.val[0]);
1193 result1_u_8x8x3.val[2] = vreinterpret_u8_u32(line15_channel2_u_32x2x2.val[0]);
1194 vst3_u8(targetBlock + targetStrideElements * 1u, result1_u_8x8x3);
1195
1196 uint8x8x3_t result2_u_8x8x3;
1197 result2_u_8x8x3.val[0] = vreinterpret_u8_u32(line26_channel0_u_32x2x2.val[0]);
1198 result2_u_8x8x3.val[1] = vreinterpret_u8_u32(line26_channel1_u_32x2x2.val[0]);
1199 result2_u_8x8x3.val[2] = vreinterpret_u8_u32(line26_channel2_u_32x2x2.val[0]);
1200 vst3_u8(targetBlock + targetStrideElements * 2u, result2_u_8x8x3);
1201
1202 uint8x8x3_t result3_u_8x8x3;
1203 result3_u_8x8x3.val[0] = vreinterpret_u8_u32(line37_channel0_u_32x2x2.val[0]);
1204 result3_u_8x8x3.val[1] = vreinterpret_u8_u32(line37_channel1_u_32x2x2.val[0]);
1205 result3_u_8x8x3.val[2] = vreinterpret_u8_u32(line37_channel2_u_32x2x2.val[0]);
1206 vst3_u8(targetBlock + targetStrideElements * 3u, result3_u_8x8x3);
1207
1208 uint8x8x3_t result4_u_8x8x3;
1209 result4_u_8x8x3.val[0] = vreinterpret_u8_u32(line04_channel0_u_32x2x2.val[1]);
1210 result4_u_8x8x3.val[1] = vreinterpret_u8_u32(line04_channel1_u_32x2x2.val[1]);
1211 result4_u_8x8x3.val[2] = vreinterpret_u8_u32(line04_channel2_u_32x2x2.val[1]);
1212 vst3_u8(targetBlock + targetStrideElements * 4u, result4_u_8x8x3);
1213
1214 uint8x8x3_t result5_u_8x8x3;
1215 result5_u_8x8x3.val[0] = vreinterpret_u8_u32(line15_channel0_u_32x2x2.val[1]);
1216 result5_u_8x8x3.val[1] = vreinterpret_u8_u32(line15_channel1_u_32x2x2.val[1]);
1217 result5_u_8x8x3.val[2] = vreinterpret_u8_u32(line15_channel2_u_32x2x2.val[1]);
1218 vst3_u8(targetBlock + targetStrideElements * 5u, result5_u_8x8x3);
1219
1220 uint8x8x3_t result6_u_8x8x3;
1221 result6_u_8x8x3.val[0] = vreinterpret_u8_u32(line26_channel0_u_32x2x2.val[1]);
1222 result6_u_8x8x3.val[1] = vreinterpret_u8_u32(line26_channel1_u_32x2x2.val[1]);
1223 result6_u_8x8x3.val[2] = vreinterpret_u8_u32(line26_channel2_u_32x2x2.val[1]);
1224 vst3_u8(targetBlock + targetStrideElements * 6u, result6_u_8x8x3);
1225
1226 uint8x8x3_t result7_u_8x8x3;
1227 result7_u_8x8x3.val[0] = vreinterpret_u8_u32(line37_channel0_u_32x2x2.val[1]);
1228 result7_u_8x8x3.val[1] = vreinterpret_u8_u32(line37_channel1_u_32x2x2.val[1]);
1229 result7_u_8x8x3.val[2] = vreinterpret_u8_u32(line37_channel2_u_32x2x2.val[1]);
1230 vst3_u8(targetBlock + targetStrideElements * 7u, result7_u_8x8x3);
1231
1232 break;
1233 }
1234
1235 case FD_LEFT_RIGHT:
1236 {
1237 uint8x8x3_t result0_u_8x8x3;
1238 result0_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line04_channel0_u_32x2x2.val[0]));
1239 result0_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line04_channel1_u_32x2x2.val[0]));
1240 result0_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line04_channel2_u_32x2x2.val[0]));
1241 vst3_u8(targetBlock + targetStrideElements * 0u, result0_u_8x8x3);
1242
1243 uint8x8x3_t result1_u_8x8x3;
1244 result1_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line15_channel0_u_32x2x2.val[0]));
1245 result1_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line15_channel1_u_32x2x2.val[0]));
1246 result1_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line15_channel2_u_32x2x2.val[0]));
1247 vst3_u8(targetBlock + targetStrideElements * 1u, result1_u_8x8x3);
1248
1249 uint8x8x3_t result2_u_8x8x3;
1250 result2_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line26_channel0_u_32x2x2.val[0]));
1251 result2_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line26_channel1_u_32x2x2.val[0]));
1252 result2_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line26_channel2_u_32x2x2.val[0]));
1253 vst3_u8(targetBlock + targetStrideElements * 2u, result2_u_8x8x3);
1254
1255 uint8x8x3_t result3_u_8x8x3;
1256 result3_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line37_channel0_u_32x2x2.val[0]));
1257 result3_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line37_channel1_u_32x2x2.val[0]));
1258 result3_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line37_channel2_u_32x2x2.val[0]));
1259 vst3_u8(targetBlock + targetStrideElements * 3u, result3_u_8x8x3);
1260
1261 uint8x8x3_t result4_u_8x8x3;
1262 result4_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line04_channel0_u_32x2x2.val[1]));
1263 result4_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line04_channel1_u_32x2x2.val[1]));
1264 result4_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line04_channel2_u_32x2x2.val[1]));
1265 vst3_u8(targetBlock + targetStrideElements * 4u, result4_u_8x8x3);
1266
1267 uint8x8x3_t result5_u_8x8x3;
1268 result5_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line15_channel0_u_32x2x2.val[1]));
1269 result5_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line15_channel1_u_32x2x2.val[1]));
1270 result5_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line15_channel2_u_32x2x2.val[1]));
1271 vst3_u8(targetBlock + targetStrideElements * 5u, result5_u_8x8x3);
1272
1273 uint8x8x3_t result6_u_8x8x3;
1274 result6_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line26_channel0_u_32x2x2.val[1]));
1275 result6_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line26_channel1_u_32x2x2.val[1]));
1276 result6_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line26_channel2_u_32x2x2.val[1]));
1277 vst3_u8(targetBlock + targetStrideElements * 6u, result6_u_8x8x3);
1278
1279 uint8x8x3_t result7_u_8x8x3;
1280 result7_u_8x8x3.val[0] = vrev64_u8(vreinterpret_u8_u32(line37_channel0_u_32x2x2.val[1]));
1281 result7_u_8x8x3.val[1] = vrev64_u8(vreinterpret_u8_u32(line37_channel1_u_32x2x2.val[1]));
1282 result7_u_8x8x3.val[2] = vrev64_u8(vreinterpret_u8_u32(line37_channel2_u_32x2x2.val[1]));
1283 vst3_u8(targetBlock + targetStrideElements * 7u, result7_u_8x8x3);
1284
1285 break;
1286 }
1287
1288 case FD_TOP_BOTTOM:
1289 {
1290 uint8x8x3_t result7_u_8x8x3;
1291 result7_u_8x8x3.val[0] = vreinterpret_u8_u32(line37_channel0_u_32x2x2.val[1]);
1292 result7_u_8x8x3.val[1] = vreinterpret_u8_u32(line37_channel1_u_32x2x2.val[1]);
1293 result7_u_8x8x3.val[2] = vreinterpret_u8_u32(line37_channel2_u_32x2x2.val[1]);
1294 vst3_u8(targetBlock + targetStrideElements * 0u, result7_u_8x8x3);
1295
1296 uint8x8x3_t result6_u_8x8x3;
1297 result6_u_8x8x3.val[0] = vreinterpret_u8_u32(line26_channel0_u_32x2x2.val[1]);
1298 result6_u_8x8x3.val[1] = vreinterpret_u8_u32(line26_channel1_u_32x2x2.val[1]);
1299 result6_u_8x8x3.val[2] = vreinterpret_u8_u32(line26_channel2_u_32x2x2.val[1]);
1300 vst3_u8(targetBlock + targetStrideElements * 1u, result6_u_8x8x3);
1301
1302 uint8x8x3_t result5_u_8x8x3;
1303 result5_u_8x8x3.val[0] = vreinterpret_u8_u32(line15_channel0_u_32x2x2.val[1]);
1304 result5_u_8x8x3.val[1] = vreinterpret_u8_u32(line15_channel1_u_32x2x2.val[1]);
1305 result5_u_8x8x3.val[2] = vreinterpret_u8_u32(line15_channel2_u_32x2x2.val[1]);
1306 vst3_u8(targetBlock + targetStrideElements * 2u, result5_u_8x8x3);
1307
1308 uint8x8x3_t result4_u_8x8x3;
1309 result4_u_8x8x3.val[0] = vreinterpret_u8_u32(line04_channel0_u_32x2x2.val[1]);
1310 result4_u_8x8x3.val[1] = vreinterpret_u8_u32(line04_channel1_u_32x2x2.val[1]);
1311 result4_u_8x8x3.val[2] = vreinterpret_u8_u32(line04_channel2_u_32x2x2.val[1]);
1312 vst3_u8(targetBlock + targetStrideElements * 3u, result4_u_8x8x3);
1313
1314 uint8x8x3_t result3_u_8x8x3;
1315 result3_u_8x8x3.val[0] = vreinterpret_u8_u32(line37_channel0_u_32x2x2.val[0]);
1316 result3_u_8x8x3.val[1] = vreinterpret_u8_u32(line37_channel1_u_32x2x2.val[0]);
1317 result3_u_8x8x3.val[2] = vreinterpret_u8_u32(line37_channel2_u_32x2x2.val[0]);
1318 vst3_u8(targetBlock + targetStrideElements * 4u, result3_u_8x8x3);
1319
1320 uint8x8x3_t result2_u_8x8x3;
1321 result2_u_8x8x3.val[0] = vreinterpret_u8_u32(line26_channel0_u_32x2x2.val[0]);
1322 result2_u_8x8x3.val[1] = vreinterpret_u8_u32(line26_channel1_u_32x2x2.val[0]);
1323 result2_u_8x8x3.val[2] = vreinterpret_u8_u32(line26_channel2_u_32x2x2.val[0]);
1324 vst3_u8(targetBlock + targetStrideElements * 5u, result2_u_8x8x3);
1325
1326 uint8x8x3_t result1_u_8x8x3;
1327 result1_u_8x8x3.val[0] = vreinterpret_u8_u32(line15_channel0_u_32x2x2.val[0]);
1328 result1_u_8x8x3.val[1] = vreinterpret_u8_u32(line15_channel1_u_32x2x2.val[0]);
1329 result1_u_8x8x3.val[2] = vreinterpret_u8_u32(line15_channel2_u_32x2x2.val[0]);
1330 vst3_u8(targetBlock + targetStrideElements * 6u, result1_u_8x8x3);
1331
1332 uint8x8x3_t result0_u_8x8x3;
1333 result0_u_8x8x3.val[0] = vreinterpret_u8_u32(line04_channel0_u_32x2x2.val[0]);
1334 result0_u_8x8x3.val[1] = vreinterpret_u8_u32(line04_channel1_u_32x2x2.val[0]);
1335 result0_u_8x8x3.val[2] = vreinterpret_u8_u32(line04_channel2_u_32x2x2.val[0]);
1336 vst3_u8(targetBlock + targetStrideElements * 7u, result0_u_8x8x3);
1337
1338 break;
1339 }
1340
1341 default:
1342 ocean_assert(false && "Invalid flip direction!");
1343 }
1344}
1345
1346template <>
1347template <FrameTransposer::FlipDirection tFlipDirection>
1348OCEAN_FORCE_INLINE void FrameTransposer::BlockTransposer<uint8_t, 4u>::transposeBlock8x8(const uint8_t* sourceBlock, uint8_t* targetBlock, const unsigned int sourceStrideElements, const unsigned int targetStrideElements)
1349{
1350 ocean_assert(sourceBlock && targetBlock);
1351 ocean_assert(sourceStrideElements >= 8u * 4u && targetStrideElements >= 8u * 4u);
1352
1353 // we simply tranpose four blocks of 4x4 pixels
1354
1355 switch (tFlipDirection)
1356 {
1357 case FD_NONE:
1358 {
1359 transposeBlock4x4NEON<tFlipDirection>(sourceBlock, targetBlock, sourceStrideElements, targetStrideElements);
1360 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 16, targetBlock + 4 * targetStrideElements, sourceStrideElements, targetStrideElements);
1361 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 4 * sourceStrideElements, targetBlock + 16, sourceStrideElements, targetStrideElements);
1362 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 4 * sourceStrideElements + 16, targetBlock + 4 * targetStrideElements + 16, sourceStrideElements, targetStrideElements);
1363
1364 break;
1365 }
1366
1367 case FD_LEFT_RIGHT:
1368 {
1369 transposeBlock4x4NEON<tFlipDirection>(sourceBlock, targetBlock + 16, sourceStrideElements, targetStrideElements);
1370 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 16, targetBlock + 4 * targetStrideElements + 16, sourceStrideElements, targetStrideElements);
1371 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 4 * sourceStrideElements, targetBlock, sourceStrideElements, targetStrideElements);
1372 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 4 * sourceStrideElements + 16, targetBlock + 4 * targetStrideElements, sourceStrideElements, targetStrideElements);
1373
1374 break;
1375 }
1376
1377 case FD_TOP_BOTTOM:
1378 {
1379 transposeBlock4x4NEON<tFlipDirection>(sourceBlock, targetBlock + 4 * targetStrideElements, sourceStrideElements, targetStrideElements);
1380 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 16, targetBlock, sourceStrideElements, targetStrideElements);
1381 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 4 * sourceStrideElements, targetBlock + 4 * targetStrideElements + 16, sourceStrideElements, targetStrideElements);
1382 transposeBlock4x4NEON<tFlipDirection>(sourceBlock + 4 * sourceStrideElements + 16, targetBlock + 16, sourceStrideElements, targetStrideElements);
1383
1384 break;
1385 }
1386
1387 default:
1388 ocean_assert(false && "Invalid flip direction!");
1389 }
1390}
1391
1392#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
1393
1394template <typename T, unsigned int tChannels>
1395template <FrameTransposer::FlipDirection tFlipDirection>
1396OCEAN_FORCE_INLINE void FrameTransposer::BlockTransposer<T, tChannels>::transposeBlock8x8(const T* sourceBlock, T* targetBlock, const unsigned int sourceStrideElements, const unsigned int targetStrideElements)
1397{
1398 ocean_assert(sourceBlock && targetBlock);
1399 ocean_assert(sourceStrideElements >= 8u && targetStrideElements >= 8u);
1400
1401 typedef typename DataType<T, tChannels>::Type PixelType;
1402
1403 switch (tFlipDirection)
1404 {
1405 case FD_NONE:
1406 {
1407 // simply transposing the block
1408
1409 for (unsigned int y = 0u; y < 8u; ++y)
1410 {
1411 const PixelType* const sourcePixel = (const PixelType*)sourceBlock;
1412
1413 for (unsigned int x = 0u; x < 8u; ++x)
1414 {
1415 *((PixelType*)(targetBlock + targetStrideElements * x)) = sourcePixel[x];
1416 }
1417
1418 sourceBlock += sourceStrideElements;
1419 targetBlock += tChannels;
1420 }
1421
1422 break;
1423 }
1424
1425 case FD_LEFT_RIGHT:
1426 {
1427 // transposing the block and applying a left-right flip like a mirror, actually a 90 degree clockwise rotation
1428
1429 for (unsigned int y = 0u; y < 8u; ++y)
1430 {
1431 const PixelType* const sourcePixel = (const PixelType*)sourceBlock;
1432
1433 for (unsigned int x = 0u; x < 8u; ++x)
1434 {
1435 *((PixelType*)(targetBlock + targetStrideElements * x) + (8u - y - 1u)) = sourcePixel[x];
1436 }
1437
1438 sourceBlock += sourceStrideElements;
1439 }
1440
1441 break;
1442 }
1443
1444 case FD_TOP_BOTTOM:
1445 {
1446 // transposing the block and applying a top-bottom flip, actually a 90 degree counter clockwise rotation
1447
1448 for (unsigned int y = 0u; y < 8u; ++y)
1449 {
1450 const PixelType* const sourcePixel = (const PixelType*)sourceBlock;
1451
1452 for (unsigned int x = 0u; x < 8u; ++x)
1453 {
1454 *((PixelType*)(targetBlock + targetStrideElements * (8u - x - 1u)) + y) = sourcePixel[x];
1455 }
1456
1457 sourceBlock += sourceStrideElements;
1458 }
1459
1460 break;
1461 }
1462
1463 default:
1464 ocean_assert(false && "Invalid flip direction!");
1465 }
1466}
1467
1468template <typename T, unsigned int tChannels>
1469template <FrameTransposer::FlipDirection tFlipDirection>
1470OCEAN_FORCE_INLINE void FrameTransposer::BlockTransposer<T, tChannels>::transposeBlock(const T* sourceBlock, T* targetBlock, const unsigned int blockWidth, const unsigned int blockHeight, const unsigned int sourceStrideElements, const unsigned int targetStrideElements)
1471{
1472 ocean_assert(sourceBlock && targetBlock);
1473
1474 ocean_assert(blockWidth >= 1u && blockHeight >= 1u);
1475 ocean_assert(blockWidth < 8u || blockHeight < 8u);
1476
1477 ocean_assert(sourceStrideElements >= blockWidth);
1478 ocean_assert(targetStrideElements >= blockHeight);
1479
1480 typedef typename DataType<T, tChannels>::Type PixelType;
1481
1482 switch (tFlipDirection)
1483 {
1484 case FD_NONE:
1485 {
1486 // simply transposing the block
1487
1488 for (unsigned int y = 0u; y < blockHeight; ++y)
1489 {
1490 const PixelType* const sourcePixel = (const PixelType*)sourceBlock;
1491
1492 for (unsigned int x = 0u; x < blockWidth; ++x)
1493 {
1494 *((PixelType*)(targetBlock + targetStrideElements * x)) = sourcePixel[x];
1495 }
1496
1497 sourceBlock += sourceStrideElements;
1498 targetBlock += tChannels;
1499 }
1500
1501 break;
1502 }
1503
1504 case FD_LEFT_RIGHT:
1505 {
1506 // transposing the block and applying a left-right flip like a mirror, actually a 90 degree clockwise rotation
1507
1508 for (unsigned int y = 0u; y < blockHeight; ++y)
1509 {
1510 const PixelType* const sourcePixel = (const PixelType*)sourceBlock;
1511
1512 for (unsigned int x = 0u; x < blockWidth; ++x)
1513 {
1514 *((PixelType*)(targetBlock + targetStrideElements * x) + (blockHeight - y - 1u)) = sourcePixel[x];
1515 }
1516
1517 sourceBlock += sourceStrideElements;
1518 }
1519
1520 break;
1521 }
1522
1523 case FD_TOP_BOTTOM:
1524 {
1525 // transposing the block and applying a top-bottom flip, actually a 90 degree counter clockwise rotation
1526
1527 for (unsigned int y = 0u; y < blockHeight; ++y)
1528 {
1529 const PixelType* const sourcePixel = (const PixelType*)sourceBlock;
1530
1531 for (unsigned int x = 0u; x < blockWidth; ++x)
1532 {
1533 *((PixelType*)(targetBlock + targetStrideElements * (blockWidth - x - 1u)) + y) = sourcePixel[x];
1534 }
1535
1536 sourceBlock += sourceStrideElements;
1537 }
1538
1539 break;
1540 }
1541
1542 default:
1543 ocean_assert(false && "Invalid flip direction!");
1544 }
1545}
1546
1547template <typename TElementType, unsigned int tChannels>
1548inline void FrameTransposer::rotate90Subset(const TElementType* source, TElementType* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
1549{
1550 static_assert(tChannels >= 1u, "Invalid channel number!");
1551
1552 ocean_assert(source && target);
1553 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
1554
1555 ocean_assert(firstTargetRow + numberTargetRows <= sourceWidth);
1556
1557 const unsigned int& targetWidth = sourceHeight;
1558
1559 // Clockwise: Counter-Clockwise:
1560 // Source: Source:
1561 // ^ ^ ^ ^ ... D C B A
1562 // | | | | | | | |
1563 // | | | | | | | |
1564 // | | | | | | | |
1565 // A B C D ... v v v v
1566 // Target: Target:
1567 // A ------> A ------>
1568 // B ------> B ------>
1569 // C ------> C ------>
1570 // D ... D ...
1571 // E ... E ...
1572
1573 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
1574 const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
1575
1576 TElementType* targetRowStartElement = target + firstTargetRow * targetStrideElements;
1577 const TElementType* const targetEndElement = targetRowStartElement + numberTargetRows * targetStrideElements - targetPaddingElements;
1578 ocean_assert_and_suppress_unused(targetRowStartElement < targetEndElement || numberTargetRows == 0u, targetEndElement);
1579
1580 if (clockwise)
1581 {
1582 const TElementType* sourceColumnStartElement = source + (sourceHeight - 1u) * sourceStrideElements + tChannels * firstTargetRow;
1583
1584 for (unsigned row = 0u; row < numberTargetRows; ++row)
1585 {
1586 const TElementType* sourceElement = sourceColumnStartElement;
1587
1588 TElementType* targetElement = targetRowStartElement;
1589 const TElementType* const targetRowEndElement = targetRowStartElement + tChannels * targetWidth;
1590 ocean_assert(targetRowEndElement <= targetEndElement);
1591
1592 while (targetElement != targetRowEndElement)
1593 {
1594 ocean_assert(sourceElement < source + sourceHeight * sourceStrideElements - sourcePaddingElements);
1595 ocean_assert(targetElement < targetEndElement);
1596 ocean_assert(targetElement < targetRowEndElement);
1597
1598 for (unsigned int c = 0u; c < tChannels; ++c)
1599 {
1600 targetElement[c] = sourceElement[c];
1601 }
1602
1603 sourceElement -= sourceStrideElements;
1604 targetElement += tChannels;
1605 }
1606
1607 sourceColumnStartElement += tChannels;
1608 targetRowStartElement += targetStrideElements;
1609 }
1610 }
1611 else
1612 {
1613 const TElementType* sourceColumnStartElement = source + tChannels * (sourceWidth - firstTargetRow - 1u);
1614
1615 for (unsigned row = 0u; row < numberTargetRows; ++row)
1616 {
1617 const TElementType* sourceElement = sourceColumnStartElement;
1618 ocean_assert(sourceElement >= source);
1619
1620 TElementType* targetElement = targetRowStartElement;
1621 const TElementType* const targetRowEndElement = targetRowStartElement + tChannels * targetWidth;
1622 ocean_assert(targetRowEndElement <= targetEndElement);
1623
1624 while (targetElement != targetRowEndElement)
1625 {
1626 ocean_assert(sourceElement < source + sourceHeight * sourceStrideElements - sourcePaddingElements);
1627 ocean_assert(targetElement < targetEndElement);
1628 ocean_assert(targetElement < targetRowEndElement);
1629
1630 for (unsigned int c = 0u; c < tChannels; ++c)
1631 {
1632 targetElement[c] = sourceElement[c];
1633 }
1634
1635 sourceElement += sourceStrideElements;
1636 targetElement += tChannels;
1637 }
1638
1639 sourceColumnStartElement -= tChannels;
1640 targetRowStartElement += targetStrideElements;
1641 }
1642 }
1643}
1644
1645}
1646
1647}
1648
1649#endif // META_OCEAN_CV_FRAME_TRANSPOSER_H
@ CONVERT_FLIPPED_AND_MIRRORED
Rotated conversion, rotates the image by 180.0 degrees with anchor in the center of the image.
Definition FrameConverter.h:82
Helper class for functions transposing blocks.
Definition FrameTransposer.h:122
static OCEAN_FORCE_INLINE void transposeBlock(const T *sourceBlock, T *targetBlock, const unsigned int blockWidth, const unsigned int blockHeight, const unsigned int sourceStrideElements, const unsigned int targetStrideElements)
Transposes a block of n x m pixels.
Definition FrameTransposer.h:1470
static OCEAN_FORCE_INLINE void transposeBlock4x4NEON(const T *sourceBlock, T *targetBlock, const unsigned int sourceStrideElements, const unsigned int targetStrideElements)
Transposes a block of 4x4 pixels.
static OCEAN_FORCE_INLINE void transposeBlock8x8(const T *sourceBlock, T *targetBlock, const unsigned int sourceStrideElements, const unsigned int targetStrideElements)
Transposes a block of 8x8 pixels.
Definition FrameTransposer.h:1396
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition FrameTransposer.h:39
static bool rotate(const Frame &input, Frame &output, const int angle, Worker *worker=nullptr)
Rotates a given frame with 90 degree steps.
static bool rotate180(const Frame &input, Frame &output, Worker *worker=nullptr)
Rotates a given frame by 180 degrees.
static bool rotate90(const Frame &input, Frame &output, const bool clockwise, Worker *worker=nullptr)
Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
This class implements a frame transposer.
Definition FrameTransposer.h:30
static void rotate90Subset(const TElementType *source, TElementType *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Rotates a subset of a given frame either clockwise or counter-clockwise by 90 degree.
Definition FrameTransposer.h:1548
static void transposeSubset(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstSourceRow, const unsigned int numberSourceRows)
Transposes the subset of a given image buffer.
Definition FrameTransposer.h:519
static bool transpose(const Frame &source, Frame &target, Worker *worker=nullptr)
Transposes a given frame.
FlipDirection
Definition of individual flip directions which can be applied to a transposed frame.
Definition FrameTransposer.h:105
@ FD_NONE
Applying no flip.
Definition FrameTransposer.h:107
@ FD_TOP_BOTTOM
Applying a top-bottom flip, combined with a transpose operation an image can be rotated counter clock...
Definition FrameTransposer.h:111
@ FD_LEFT_RIGHT
Applying a left-right flip like a mirror, combined with a transpose operation an image can be rotated...
Definition FrameTransposer.h:109
static void rotate180(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rotates a given image buffer 180 degrees.
Definition FrameTransposer.h:455
static void rotate90(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rotates a given image buffer 90 degrees clockwise or counter clockwise.
Definition FrameTransposer.h:395
static bool rotate(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const int angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rotates a given image with 90 degree steps.
Definition FrameTransposer.h:468
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition Caller.h:2876
This class implements Ocean's image class.
Definition Frame.h:1808
void setRelativeTimestamp(const Timestamp &relative)
Sets the relative timestamp of this frame.
Definition Frame.h:4233
bool isValid() const
Returns whether this frame is valid.
Definition Frame.h:4528
void setTimestamp(const Timestamp &timestamp)
Sets the timestamp of this frame.
Definition Frame.h:4228
const Timestamp & timestamp() const
Returns the timestamp of this frame.
Definition Frame.h:4218
const Timestamp & relativeTimestamp() const
Returns the relative timestamp of this frame.
Definition Frame.h:4223
TypeMapperBySize< sizeof(T)>::Type Type
Definition of an invalid mapped data type.
Definition DataType.h:508
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Default definition of a type with tBytes bytes.
Definition DataType.h:32