Ocean
Loading...
Searching...
No Matches
FrameConverterY10_Packed.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_FRAME_CONVERTER_Y_10_PACKED_H
9#define META_OCEAN_CV_FRAME_CONVERTER_Y_10_PACKED_H
10
11#include "ocean/cv/CV.h"
14
15#include "ocean/base/Memory.h"
17#include "ocean/base/Worker.h"
18
19#include <unordered_map>
20
21namespace Ocean
22{
23
24namespace CV
25{
26
27/**
28 * This class provides functions to convert frames with Y10_PACKED pixel format.
29 * @ingroup cv
30 */
31class OCEAN_CV_EXPORT FrameConverterY10_Packed : public FrameConverter
32{
33 public:
34
35 /**
36 * This class implements the manager for lookup tables.
37 */
38 class LookupTableManager : public Singleton<LookupTableManager>
39 {
40 protected:
41
42 /// Definition of a map mapping gamma values to the memory of lookup tables.
43 typedef std::unordered_map<float, Memory> LookupTables;
44
45 public:
46
47 /**
48 * Returns the lookup table for a gamma compression/correction function.
49 * The gamma compression/correction is based the following equation
50 * <pre>
51 * Y8 = 255 * (Y10 / 1023) ^ gamma
52 * </pre>
53 * @param gamma The gamma value for which the lookup table will be returned, with range (0, 2)
54 * @return The requested lookup table, will be valid
55 */
56 const uint8_t* lookupTable(const float gamma);
57
58 protected:
59
60 /// The lookup tables.
62
63 /// The lock of the manager.
65 };
66
67 public:
68
69 /**
70 * Converts a Y10_PACKED frame to a Y8 frame.
71 * @param source The source frame buffer, must be valid
72 * @param target The target frame buffer, must be valid
73 * @param width The width of the frame in pixel, with range [4, infinity), must be a multiple of 4
74 * @param height The height of the frame in pixel, with range [1, infinity)
75 * @param flag Determining the type of conversion
76 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
77 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
78 * @param worker Optional worker object to distribute the computational load
79 */
80 static inline void convertY10_PackedToY8Linear(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
81
82 /**
83 * Converts a Y10_PACKED frame to a Y8 frame by applying gamma compression/correction using a lookup table.
84 * The gamma compression/correction is based the following equation
85 * <pre>
86 * Y8 = 255 * (Y10 / 1023) ^ gamma
87 * </pre>
88 * @param source The source frame buffer, must be valid
89 * @param target The target frame buffer, must be valid
90 * @param width The width of the frame in pixel, with range [4, infinity), must be a multiple of 4
91 * @param height The height of the frame in pixel, with range [1, infinity)
92 * @param flag Determining the type of conversion
93 * @param gamma The gamma value to be applied, with range (0, 2)
94 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
95 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
96 * @param worker Optional worker object to distribute the computational load
97 */
98 static inline void convertY10_PackedToY8GammaLUT(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const float gamma, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
99
100 /**
101 * Converts a Y10_PACKED frame to a Y8 frame by applying gamma compression/correction using a 3-step linear approximation.
102 * The gamma compression/correction is based the following equation
103 * <pre>
104 * Y8 = 255 * (Y10 / 1023) ^ gamma
105 * </pre>
106 * @param source The source frame buffer, must be valid
107 * @param target The target frame buffer, must be valid
108 * @param width The width of the frame in pixel, with range [4, infinity), must be a multiple of 4
109 * @param height The height of the frame in pixel, with range [1, infinity)
110 * @param flag Determining the type of conversion
111 * @param gamma The gamma value to be applied, with range (0, 2)
112 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
113 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
114 * @param worker Optional worker object to distribute the computational load
115 */
116 static void convertY10_PackedToY8GammaApproximated(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const float gamma, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
117
118 /**
119 * Converts a Y10_PACKED frame to a Y10 frame, so that this function simply unpacks the 10 bits.
120 * @param source The source frame buffer, must be valid
121 * @param target The target frame buffer, must be valid
122 * @param width The width of the frame in pixel, with range [4, infinity), must be a multiple of 4
123 * @param height The height of the frame in pixel, with range [1, infinity)
124 * @param flag Determining the type of conversion
125 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
126 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
127 * @param worker Optional worker object to distribute the computational load
128 */
129 static inline void convertY10_PackedToY10(const uint8_t* const source, uint16_t* const target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
130
131 /**
132 * Converts a Y10_PACKED frame to a RGB24 frame.
133 * @param source The source frame buffer, must be valid
134 * @param target The target frame buffer, must be valid
135 * @param width The width of the frame in pixel, with range [4, infinity), must be a multiple of 4
136 * @param height The height of the frame in pixel, with range [1, infinity)
137 * @param flag Determining the type of conversion
138 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
139 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
140 * @param worker Optional worker object to distribute the computational load
141 */
142 static inline void convertY10_PackedToBGR24(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
143
144 /**
145 * Converts a Y10_PACKED frame to a RGB24 frame.
146 * @param source The source frame buffer, must be valid
147 * @param target The target frame buffer, must be valid
148 * @param width The width of the frame in pixel, with range [4, infinity), must be a multiple of 4
149 * @param height The height of the frame in pixel, with range [1, infinity)
150 * @param flag Determining the type of conversion
151 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
152 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
153 * @param worker Optional worker object to distribute the computational load
154 */
155 static inline void convertY10_PackedToRGB24(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
156
157 protected:
158
159 /**
160 * Converts a Y10_Packed row to a Y8 row.
161 * This function simply applies a linear bit reduction from 10 bits to 8 bits.
162 * @param source The pointer to the source pixels, must be valid
163 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
164 * @param size The number of source (and target pixels) to convert, with range [4, infinity), must be a multiple of 4
165 * @param unusedParameters Unused parameters, must be nullptr
166 */
167 static void convertRowY10_PackedToY8Linear(const uint8_t* source, uint8_t* target, const size_t size, const void* unusedParameters = nullptr);
168
169 /**
170 * Converts a Y10_Packed row to a Y8 row by applying gamma compression/correction with a lookup table.
171 * @param source The pointer to the source pixels, must be valid
172 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
173 * @param size The number of source (and target pixels) to convert, with range [4, infinity), must be a multiple of 4
174 * @param parameters The pointer to the `uint8_t` lookup table to be used, must be valid
175 */
176 static void convertRowY10_PackedToY8GammaLUT(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
177
178 /**
179 * Converts a Y10_Packed row to a Y8 row by applying gamma compression/correction with a 3-step linear interpolation.
180 * @param source The pointer to the source pixels, must be valid
181 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
182 * @param size The number of source (and target pixels) to convert, with range [4, infinity), must be a multiple of 4
183 * @param parameters Three linear slope parameters and two intercept parameters, must be valid
184 */
185 template <unsigned int tStep01, unsigned int tStep12>
186 static void convertRowY10_PackedToY8GammaApproximated(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
187
188 /**
189 * Converts a Y10_Packed row to a Y10 row.
190 * This function simply applies an unpacking of the 10 bits.
191 * @param source The pointer to the source pixels, must be valid
192 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
193 * @param size The number of source (and target pixels) to convert, with range [4, infinity), must be a multiple of 4
194 * @param unusedParameters Unused parameters, must be nullptr
195 */
196 static void convertRowY10_PackedToY10(const uint8_t* source, uint16_t* target, const size_t size, const void* unusedParameters = nullptr);
197
198 /**
199 * Converts a Y10_Packed row to a RGB24 row, or BGR24 row.
200 * This function simply applies a linear bit reduction from 10 bits to 8 bits.
201 * @param source The pointer to the source pixels, must be valid
202 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
203 * @param size The number of source (and target pixels) to convert, with range [4, infinity), must be a multiple of 4
204 * @param unusedParameters Unused parameters, must be nullptr
205 */
206 static void convertRowY10_PackedToYYY24Linear(const uint8_t* source, uint8_t* target, const size_t size, const void* unusedParameters = nullptr);
207
208#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
209
210 /**
211 * Converts 16 pixels (20 elements) of a Y10_Packed buffer to 16 Y8 pixels by applying a liner conversion.
212 * @param source The souce buffer with 16 elements, must be valid
213 * @param target The resulting 16 Y8 pixels, must be valid
214 */
215 static OCEAN_FORCE_INLINE void convert16PixelY10_PackedToY8LinearNEON(const uint8_t* const source, uint8_t* const target);
216
217 /**
218 * Converts 16 pixels (20 elements) of a Y10_Packed buffer to 16 Y8 pixels by applying gamma compression/correction with a 3-step linear interpolation.
219 * @param source The souce buffer with 16 elements, must be valid
220 * @param m0_256_s_16x4 The slope of the first linear approximation (with multiple of 256), with range (-10 * 256, 10 * 256)
221 * @param m1_256_s_16x4 The slope of the second linear approximation (with multiple of 256), with range (-10 * 256, 10 * 256)
222 * @param m2_256_s_16x4 The slope of the third linear approximation (with multiple of 256), with range (-10 * 256, 10 * 256)
223 * @param c1_s_16x8 The intercept of the second linear approximation, with range (-255, 255)
224 * @param c2_s_16x8 The intercept of the third linear approximation, with range (-255, 255)
225 * @param target The resulting 16 Y8 pixels, must be valid
226 */
227 template <unsigned int tStep01, unsigned int tStep12>
228 static OCEAN_FORCE_INLINE void convert16PixelY10_PackedToY8ApproximatedNEON(const uint8_t* const source, const int16x4_t& m0_256_s_16x4, const int16x4_t& m1_256_s_16x4, const int16x4_t& m2_256_s_16x4, const int16x8_t& c1_s_16x8, const int16x8_t& c2_s_16x8, uint8_t* const target);
229
230#endif
231};
232
233inline void FrameConverterY10_Packed::convertY10_PackedToY8Linear(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
234{
235 ocean_assert(source != nullptr && target != nullptr);
236 ocean_assert(width >= 4u && height >= 1u);
237 ocean_assert(width % 4u == 0u);
238
239 const unsigned int sourceStrideElements = width * 5u / 4u + sourcePaddingElements;
240 const unsigned int targetStrideElements = width * 1u + targetPaddingElements;
241
242 constexpr void* options = nullptr;
243
244 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
245
246 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, flag, convertRowY10_PackedToY8Linear, CV::FrameChannels::reverseRowPixelOrderInPlace<uint8_t, 1u>, areContinuous, options, worker);
247}
248
249inline void FrameConverterY10_Packed::convertY10_PackedToY8GammaLUT(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const float gamma, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
250{
251 ocean_assert(source != nullptr && target != nullptr);
252 ocean_assert(width >= 4u && height >= 1u);
253 ocean_assert(width % 4u == 0u);
254
255 ocean_assert(gamma > 0.0f && gamma < 2.0f);
256
257 const unsigned int sourceStrideElements = width * 5u / 4u + sourcePaddingElements;
258 const unsigned int targetStrideElements = width * 1u + targetPaddingElements;
259
260 const void* const options = LookupTableManager::get().lookupTable(gamma);
261
262 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
263
264 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, flag, convertRowY10_PackedToY8GammaLUT, CV::FrameChannels::reverseRowPixelOrderInPlace<uint8_t, 1u>, areContinuous, options, worker);
265}
266
267inline void FrameConverterY10_Packed::convertY10_PackedToY10(const uint8_t* source, uint16_t* target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
268{
269 ocean_assert(source != nullptr && target != nullptr);
270 ocean_assert(width >= 4u && height >= 1u);
271 ocean_assert(width % 4u == 0u);
272
273 const unsigned int sourceStrideElements = width * 5u / 4u + sourcePaddingElements;
274 const unsigned int targetStrideElements = width * 1u + targetPaddingElements;
275
276 constexpr void* options = nullptr;
277
278 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
279
280 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, flag, convertRowY10_PackedToY10, CV::FrameChannels::reverseRowPixelOrderInPlace<uint16_t, 1u>, areContinuous, options, worker);
281}
282
283inline void FrameConverterY10_Packed::convertY10_PackedToBGR24(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
284{
285 convertY10_PackedToRGB24(source, target, width, height, flag, sourcePaddingElements, targetPaddingElements, worker);
286}
287
288inline void FrameConverterY10_Packed::convertY10_PackedToRGB24(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
289{
290 ocean_assert(source != nullptr && target != nullptr);
291 ocean_assert(width >= 4u && height >= 1u);
292 ocean_assert(width % 4u == 0u);
293
294 const unsigned int sourceStrideElements = width * 5u / 4u + sourcePaddingElements;
295 const unsigned int targetStrideElements = width * 3u + targetPaddingElements;
296
297 constexpr void* options = nullptr;
298
299 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
300
301 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, flag, convertRowY10_PackedToYYY24Linear, CV::FrameChannels::reverseRowPixelOrderInPlace<uint8_t, 3u>, areContinuous, options, worker);
302}
303
304template <unsigned int tStep01, unsigned int tStep12>
305void FrameConverterY10_Packed::convertRowY10_PackedToY8GammaApproximated(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters)
306{
307 static_assert(0u < tStep01 && tStep01 < tStep12 && tStep12 < 1023u, "Invalid steps");
308
309 ocean_assert(source != nullptr && target != nullptr);
310 ocean_assert(size >= 4 && size % 4 == 0);
311 ocean_assert(parameters != nullptr);
312
313 // applying a 3-step linear approximation
314 // https://www.desmos.com/calculator/pezgk5slux
315
316 const int* coefficients = reinterpret_cast<const int*>(parameters);
317
318 const int32_t m0_256 = coefficients[0];
319 const int32_t m1_256 = coefficients[1];
320 const int32_t m2_256 = coefficients[2];
321
322 const int32_t c1 = coefficients[3];
323 const int32_t c2 = coefficients[4];
324
325 size_t blocks4 = size / size_t(4);
326
327#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
328
329 const size_t blocks16 = size / size_t(16);
330
331 const int16x4_t m0_256_s_16x4 = vdup_n_s16(int16_t(m0_256));
332 const int16x4_t m1_256_s_16x4 = vdup_n_s16(int16_t(m1_256));
333 const int16x4_t m2_256_s_16x4 = vdup_n_s16(int16_t(m2_256));
334
335 const int16x8_t c1_s_16x8 = vdupq_n_s16(int16_t(c1));
336 const int16x8_t c2_s_16x8 = vdupq_n_s16(int16_t(c2));
337
338 for (size_t n = 0; n < blocks16; ++n)
339 {
340 convert16PixelY10_PackedToY8ApproximatedNEON<tStep01, tStep12>(source, m0_256_s_16x4, m1_256_s_16x4, m2_256_s_16x4, c1_s_16x8, c2_s_16x8, target);
341
342 target += 16;
343 source += 20;
344 }
345
346 blocks4 = (size - blocks16 * size_t(16)) / size_t(4);
347 ocean_assert(blocks4 <= size / size_t(4));
348
349#endif // OCEAN_HARDWARE_NEON_VERSION
350
351 int32_t result256;
352
353 const int32_t c1_256 = c1 * 256;
354 const int32_t c2_256 = c2 * 256;
355
356 for (size_t n = 0; n < blocks4; ++n)
357 {
358 const int32_t x[4] =
359 {
360 int32_t(uint16_t(source[0]) << uint16_t(2) | (uint16_t(source[4]) & uint16_t(0b00000011))),
361 int32_t(uint16_t(source[1]) << uint16_t(2) | ((uint16_t(source[4]) & uint16_t(0b00001100)) >> uint16_t(2))),
362 int32_t(uint16_t(source[2]) << uint16_t(2) | ((uint16_t(source[4]) & uint16_t(0b00110000)) >> uint16_t(4))),
363 int32_t(uint16_t(source[3]) << uint16_t(2) | (uint16_t(source[4]) >> uint16_t(6)))
364 };
365
366 for (unsigned int i = 0u; i < 4u; ++i)
367 {
368 const uint32_t& xx = x[i];
369
370 if (xx < tStep01)
371 {
372 result256 = (m0_256 * xx);
373 }
374 else if (xx <= tStep12)
375 {
376 result256 = (m1_256 * xx + c1_256);
377 }
378 else
379 {
380 result256 = (m2_256 * xx + c2_256);
381 }
382
383 ocean_assert(0 <= result256 && result256 <= 255 * 256);
384
385 target[i] = int8_t((uint32_t(result256) + 128u) >> 8u);
386 }
387
388 target += 4;
389 source += 5;
390 }
391}
392
393#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
394
395OCEAN_FORCE_INLINE void FrameConverterY10_Packed::convert16PixelY10_PackedToY8LinearNEON(const uint8_t* const source, uint8_t* const target)
396{
397#ifdef __aarch64__
398
399 const uint8x16_t packedA_u_8x16 = vld1q_u8(source);
400 const uint8x8_t packedB_u_8x8 = vld1_u8(source + 12);
401
402 // F E D C B A 9 8 7 6 5 4 3 2 1 0
403 // D C B A 8 7 6 5 3 2 1 0 X X X X
404 constexpr uint8x16_t shuffle_u_8x16 = {16u, 16u, 16u, 16u, 0u, 1u, 2u, 3u, 5u, 6u, 7u, 8u, 10u, 11u, 12u, 13u};
405 const uint8x16_t intermediateA_u_8x16 = vqtbl1q_u8(packedA_u_8x16, shuffle_u_8x16);
406
407 const uint8x8_t intermediateB_u_8x8 = vext_u8(packedB_u_8x8, packedB_u_8x8, 3);
408
409 const uint8x16_t target_u_8x16 = vextq_u8(intermediateA_u_8x16, vcombine_u8(intermediateB_u_8x8, intermediateB_u_8x8), 4);
410
411#else
412
413 constexpr uint8x16_t mask_u_8x16 = {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0xFFu, 0xFFu, 0xFFu};
414
415 const uint8x16_t packedA_u_8x16 = vld1q_u8(source);
416 const uint8x8_t packedB_u_8x8 = vld1_u8(source + 11);
417
418 const uint8x8_t packedAA_u_8x8 = vget_low_u8(packedA_u_8x16);
419 const uint8x8_t packedAB_u_8x8 = vget_high_u8(packedA_u_8x16);
420
421 constexpr uint8x8_t shuffleA_u_8x8 = {8u, 0u, 1u, 2u, 3u, 5u, 6u, 7u};
422 constexpr uint8x8_t shuffleB_u_8x8 = {0u, 2u, 3u, 4u, 5u, 7u, 8u, 8u};
423 const uint8x16_t intermediateA_u_8x16 = vextq_u8(vcombine_u8(vtbl1_u8(packedAA_u_8x8, shuffleA_u_8x8), vtbl1_u8(packedAB_u_8x8, shuffleB_u_8x8)), mask_u_8x16, 1); // we use the first zero element of mask_u_8x16
424
425 const uint8x16_t intermediateB_u_8x16 = vcombine_u8(vget_low_u8(mask_u_8x16), vand_u8(packedB_u_8x8, vget_high_u8(mask_u_8x16)));
426
427 const uint8x16_t target_u_8x16 = vorrq_u8(intermediateA_u_8x16, intermediateB_u_8x16);
428
429#endif // __aarch64__
430
431 vst1q_u8(target, target_u_8x16);
432}
433
434template <unsigned int tStep01, unsigned int tStep12>
435OCEAN_FORCE_INLINE void FrameConverterY10_Packed::convert16PixelY10_PackedToY8ApproximatedNEON(const uint8_t* const source, const int16x4_t& m0_s_16x4, const int16x4_t& m1_s_16x4, const int16x4_t& m2_s_16x4, const int16x8_t& c1_s_16x8, const int16x8_t& c2_s_16x8, uint8_t* const target)
436{
437 static_assert(0u < tStep01 && tStep01 < tStep12 && tStep12 < 1023u, "Invalid steps");
438
439 constexpr int8x16_t leftShifts_s_8x16 = {6, 0, 4, 0, 2, 0, 0, 0, 6, 0, 4, 0, 2, 0, 0, 0};
440 constexpr int16x8_t rightShifts_s_16x8 = {-6, -6, -6, -6, -6, -6, -6, -6};
441
442#ifdef __aarch64__
443
444 const uint8x16_t packedAB_u_8x16 = vld1q_u8(source);
445 const uint8x16_t packedCD_u_8x16 = vld1q_u8(source + 4);
446
447 // F E D C B A 9 8 7 6 5 4 3 2 1 0
448 // 8 9 7 9 6 9 5 9 3 4 2 4 1 4 0 4
449 constexpr uint8x16_t shuffleAB_u_8x16 = {4u, 0u, 4u, 1u, 4u, 2u, 4u, 3u, 9u, 5u, 9u, 6u, 9u, 7u, 9u, 8u};
450 const uint8x16_t intermediateAB_u_8x16 = vqtbl1q_u8(packedAB_u_8x16, shuffleAB_u_8x16);
451
452 constexpr uint8x16_t shuffleCD_u_8x16 = {10u, 6u, 10u, 7u, 10u, 8u, 10u, 9u, 15u, 11u, 15u, 12u, 15u, 13u, 15u, 14u};
453 const uint8x16_t intermediateCD_u_8x16 = vqtbl1q_u8(packedCD_u_8x16, shuffleCD_u_8x16);
454
455#else
456
457 constexpr uint8x8_t shuffleAB_u_8x8 = {4u, 0u, 4u, 1u, 4u, 2u, 4u, 3u};
458 constexpr uint8x8_t shuffleC_u_8x8 = {6u, 2u, 6u, 3u, 6u, 4u, 6u, 5u};
459 constexpr uint8x8_t shuffleD_u_8x8 = {7u, 3u, 7u, 4u, 7u, 5u, 7u, 6u};
460
461 const uint8x16_t packedAB_u_8x16 = vld1q_u8(source);
462 const uint8x8_t packedForD_u_8x8 = vld1_u8(source + 12);
463
464 const uint8x8_t packedForA_u_8x8 = vget_low_u8(packedAB_u_8x16);
465 const uint8x8_t packedForB_u_8x8 = vget_low_u8(vextq_u8(packedAB_u_8x16, packedAB_u_8x16, 5));
466 const uint8x8_t packedForC_u_8x8 = vget_high_u8(packedAB_u_8x16);
467
468 const uint8x16_t intermediateAB_u_8x16 = vcombine_u8(vtbl1_u8(packedForA_u_8x8, shuffleAB_u_8x8), vtbl1_u8(packedForB_u_8x8, shuffleAB_u_8x8));
469 const uint8x16_t intermediateCD_u_8x16 = vcombine_u8(vtbl1_u8(packedForC_u_8x8, shuffleC_u_8x8), vtbl1_u8(packedForD_u_8x8, shuffleD_u_8x8));
470
471#endif // __aarch64__
472
473
474 // ... XXXXXX99 33333333 44XXXXXX 22222222 XX44XXXX 11111111 XXXX44XX 00000000 XXXXXX44
475 // ... 99------ 33333333 44------ 22222222 44------ 11111111 44------ 00000000 44------
476 const uint16x8_t intermediateAB_u_16x8 = vreinterpretq_u16_u8(vshlq_u8(intermediateAB_u_8x16, leftShifts_s_8x16));
477 const uint16x8_t intermediateCD_u_16x8 = vreinterpretq_u16_u8(vshlq_u8(intermediateCD_u_8x16, leftShifts_s_8x16));
478
479
480 // ... 99------ 33333333 44------ 22222222 44------ 11111111 44------ 00000000 44------
481 // ... 55555599 ------33 33333344 ------22 22222244 ------11 11111144 ------00 00000044
482 const uint16x8_t unpackedAB_u_16x8 = vshlq_u16(intermediateAB_u_16x8, rightShifts_s_16x8);
483 const uint16x8_t unpackedCD_u_16x8 = vshlq_u16(intermediateCD_u_16x8, rightShifts_s_16x8);
484
485 // now, we have 16 uin16_t unpacked values for which we will approximate the gamma compression/correction
486
487 // approximation via three linear equations
488 // [ 0, step01]: f_0(x) = m_0 * x, with f_0(0) = 0
489 // [step01, step12]: f_1(x) = m_1 * x + c_1
490 // [step21, 1 ]: f_2(x) = m_2 * x + c_2, with f_2(1) = 1
491
492 constexpr int16x8_t step01_s_16x8 = {int32_t(tStep01), int32_t(tStep01), int32_t(tStep01), int32_t(tStep01), int32_t(tStep01), int32_t(tStep01), int32_t(tStep01), int32_t(tStep01)};
493 constexpr int16x8_t step12_s_16x8 = {int32_t(tStep12), int32_t(tStep12), int32_t(tStep12), int32_t(tStep12), int32_t(tStep12), int32_t(tStep12), int32_t(tStep12), int32_t(tStep12)};
494
495 // determining masks to switch between one of the tree linear equations
496
497 const uint16x8_t isWithin0AB_u_16x8 = vcleq_s16(vreinterpretq_s16_u16(unpackedAB_u_16x8), step01_s_16x8); // unpackedAB <= step01 ? 0xFFFFFFFF : 0x00000000
498 const uint16x8_t isWithin0CD_u_16x8 = vcleq_s16(vreinterpretq_s16_u16(unpackedCD_u_16x8), step01_s_16x8);
499 const uint8x16_t isWithin0_u_8x16 = vcombine_u8(vmovn_u16(isWithin0AB_u_16x8), vmovn_u16(isWithin0CD_u_16x8));
500
501 const uint16x8_t isWithin2AB_u_16x8 = vcgtq_s16(vreinterpretq_s16_u16(unpackedAB_u_16x8), step12_s_16x8); // unpackedAB > step12 ? 0xFFFFFFFF : 0x00000000
502 const uint16x8_t isWithin2CD_u_16x8 = vcgtq_s16(vreinterpretq_s16_u16(unpackedCD_u_16x8), step12_s_16x8);
503 const uint8x16_t isWithin2_u_8x16 = vcombine_u8(vmovn_u16(isWithin2AB_u_16x8), vmovn_u16(isWithin2CD_u_16x8));
504
505 const uint8x16_t isWithin1_u_8x16 = vmvnq_u8(vorrq_u8(isWithin0_u_8x16, isWithin2_u_8x16)); // unpacked > step01 && unpacked <= step02 ? 0xFFFFFFFF : 0x00000000
506
507
508 const int16x4_t unpackedA_s_16x4 = vreinterpret_s16_u16(vget_low_u8(unpackedAB_u_16x8));
509 const int16x4_t unpackedB_s_16x4 = vreinterpret_s16_u16(vget_high_u8(unpackedAB_u_16x8));
510 const int16x4_t unpackedC_s_16x4 = vreinterpret_s16_u16(vget_low_u8(unpackedCD_u_16x8));
511 const int16x4_t unpackedD_s_16x4 = vreinterpret_s16_u16(vget_high_u8(unpackedCD_u_16x8));
512
513 // result0 = (m0 * x) / 256)
514 const uint16x8_t resultAB0_u_16x8 = vcombine_u16(vqrshrun_n_s32(vmull_s16(m0_s_16x4, unpackedA_s_16x4), 8), vqrshrun_n_s32(vmull_s16(m0_s_16x4, unpackedB_s_16x4), 8));
515 const uint16x8_t resultCD0_u_16x8 = vcombine_u16(vqrshrun_n_s32(vmull_s16(m0_s_16x4, unpackedC_s_16x4), 8), vqrshrun_n_s32(vmull_s16(m0_s_16x4, unpackedD_s_16x4), 8));
516
517 // result1 = ((m1 * x) / 256 + c1)
518 const int16x8_t resultAB1_s_16x8 = vaddq_s16(c1_s_16x8, vcombine_s16(vrshrn_n_s32(vmull_s16(m1_s_16x4, unpackedA_s_16x4), 8), vrshrn_n_s32(vmull_s16(m1_s_16x4, unpackedB_s_16x4), 8)));
519 const int16x8_t resultCD1_s_16x8 = vaddq_s16(c1_s_16x8, vcombine_s16(vrshrn_n_s32(vmull_s16(m1_s_16x4, unpackedC_s_16x4), 8), vrshrn_n_s32(vmull_s16(m1_s_16x4, unpackedD_s_16x4), 8)));
520
521 // result2 = ((m2 * x) / 256 + c2)
522 const int16x8_t resultAB2_s_16x8 = vaddq_s16(c2_s_16x8, vcombine_s16(vrshrn_n_s32(vmull_s16(m2_s_16x4, unpackedA_s_16x4), 8), vrshrn_n_s32(vmull_s16(m2_s_16x4, unpackedB_s_16x4), 8)));
523 const int16x8_t resultCD2_s_16x8 = vaddq_s16(c2_s_16x8, vcombine_s16(vrshrn_n_s32(vmull_s16(m2_s_16x4, unpackedC_s_16x4), 8), vrshrn_n_s32(vmull_s16(m2_s_16x4, unpackedD_s_16x4), 8)));
524
525 const uint8x16_t result0_u_8x16 = vcombine_u8(vqmovn_u16(resultAB0_u_16x8), vqmovn_u16(resultCD0_u_16x8));
526 const uint8x16_t result1_u_8x16 = vcombine_u8(vqmovun_s16(resultAB1_s_16x8), vqmovun_s16(resultCD1_s_16x8));
527 const uint8x16_t result2_u_8x16 = vcombine_u8(vqmovun_s16(resultAB2_s_16x8), vqmovun_s16(resultCD2_s_16x8));
528
529
530 // result0 & isWithin0 | result1 & isWithin1 | result2 & isWithin2
531 const uint8x16_t result_u_8x16 = vorrq_u8(vorrq_u8(vandq_u8(result0_u_8x16, isWithin0_u_8x16), vandq_u8(result1_u_8x16, isWithin1_u_8x16)), vandq_u8(result2_u_8x16, isWithin2_u_8x16));
532
533 vst1q_u8(target, result_u_8x16);
534}
535
536#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
537
538}
539
540}
541
542#endif // META_OCEAN_CV_FRAME_CONVERTER_Y_10_PACKED_H
This is the base class for all frame converter classes.
Definition FrameConverter.h:32
ConversionFlag
Definition of individual conversion flags.
Definition FrameConverter.h:39
static void convertGenericPixelFormat(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const ConversionFlag flag, const RowConversionFunction< TSource, TTarget > rowConversionFunction, const RowReversePixelOrderInPlaceFunction< TTarget > targetReversePixelOrderInPlaceFunction, const bool areContinuous, const void *options, Worker *worker)
Converts a frame with generic pixel format (e.g., RGBA32, BGR24, YUV24, ...) to a frame with generic ...
Definition FrameConverter.h:3211
This class implements the manager for lookup tables.
Definition FrameConverterY10_Packed.h:39
std::unordered_map< float, Memory > LookupTables
Definition of a map mapping gamma values to the memory of lookup tables.
Definition FrameConverterY10_Packed.h:43
const uint8_t * lookupTable(const float gamma)
Returns the lookup table for a gamma compression/correction function.
LookupTables lookupTables_
The lookup tables.
Definition FrameConverterY10_Packed.h:61
Lock lock_
The lock of the manager.
Definition FrameConverterY10_Packed.h:64
This class provides functions to convert frames with Y10_PACKED pixel format.
Definition FrameConverterY10_Packed.h:32
static void convertY10_PackedToBGR24(const uint8_t *const source, uint8_t *const target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Converts a Y10_PACKED frame to a RGB24 frame.
Definition FrameConverterY10_Packed.h:283
static void convertY10_PackedToY10(const uint8_t *const source, uint16_t *const target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Converts a Y10_PACKED frame to a Y10 frame, so that this function simply unpacks the 10 bits.
Definition FrameConverterY10_Packed.h:267
static void convertY10_PackedToY8GammaApproximated(const uint8_t *const source, uint8_t *const target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const float gamma, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Converts a Y10_PACKED frame to a Y8 frame by applying gamma compression/correction using a 3-step lin...
static void convertRowY10_PackedToY8GammaLUT(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a Y10_Packed row to a Y8 row by applying gamma compression/correction with a lookup table.
static OCEAN_FORCE_INLINE void convert16PixelY10_PackedToY8LinearNEON(const uint8_t *const source, uint8_t *const target)
Converts 16 pixels (20 elements) of a Y10_Packed buffer to 16 Y8 pixels by applying a liner conversio...
Definition FrameConverterY10_Packed.h:395
static void convertRowY10_PackedToY10(const uint8_t *source, uint16_t *target, const size_t size, const void *unusedParameters=nullptr)
Converts a Y10_Packed row to a Y10 row.
static void convertRowY10_PackedToY8Linear(const uint8_t *source, uint8_t *target, const size_t size, const void *unusedParameters=nullptr)
Converts a Y10_Packed row to a Y8 row.
static OCEAN_FORCE_INLINE void convert16PixelY10_PackedToY8ApproximatedNEON(const uint8_t *const source, const int16x4_t &m0_256_s_16x4, const int16x4_t &m1_256_s_16x4, const int16x4_t &m2_256_s_16x4, const int16x8_t &c1_s_16x8, const int16x8_t &c2_s_16x8, uint8_t *const target)
Converts 16 pixels (20 elements) of a Y10_Packed buffer to 16 Y8 pixels by applying gamma compression...
Definition FrameConverterY10_Packed.h:435
static void convertY10_PackedToY8Linear(const uint8_t *const source, uint8_t *const target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Converts a Y10_PACKED frame to a Y8 frame.
Definition FrameConverterY10_Packed.h:233
static void convertY10_PackedToY8GammaLUT(const uint8_t *const source, uint8_t *const target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const float gamma, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Converts a Y10_PACKED frame to a Y8 frame by applying gamma compression/correction using a lookup tab...
Definition FrameConverterY10_Packed.h:249
static void convertRowY10_PackedToYYY24Linear(const uint8_t *source, uint8_t *target, const size_t size, const void *unusedParameters=nullptr)
Converts a Y10_Packed row to a RGB24 row, or BGR24 row.
static void convertRowY10_PackedToY8GammaApproximated(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a Y10_Packed row to a Y8 row by applying gamma compression/correction with a 3-step linear i...
Definition FrameConverterY10_Packed.h:305
static void convertY10_PackedToRGB24(const uint8_t *const source, uint8_t *const target, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Converts a Y10_PACKED frame to a RGB24 frame.
Definition FrameConverterY10_Packed.h:288
This class implements a recursive lock object.
Definition Lock.h:31
This template class is the base class for all singleton objects.
Definition Singleton.h:71
static LookupTableManager & get()
Returns a reference to the unique object.
Definition Singleton.h:115
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
The namespace covering the entire Ocean framework.
Definition Accessor.h:15