Ocean
Loading...
Searching...
No Matches
SquareMatrix4.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_MATH_SQUARE_MATRIX_4_H
9#define META_OCEAN_MATH_SQUARE_MATRIX_4_H
10
11#include "ocean/math/Math.h"
12#include "ocean/math/Equation.h"
13#include "ocean/math/Vector2.h"
14#include "ocean/math/Vector3.h"
15#include "ocean/math/Vector4.h"
16
17#include <vector>
18
19namespace Ocean
20{
21
22// Forward declaration.
23template <typename T> class AnyCameraT;
24
25// Forward declaration.
26template <typename T> class HomogenousMatrixT4;
27
28// Forward declaration.
29template <typename T> class SquareMatrixT3;
30
31// Forward declaration.
32template <typename T> class SquareMatrixT4;
33
34/**
35 * Definition of the SquareMatrix4 object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION either with single or double precision float data type.
36 * @see SquareMatrixT4
37 * @ingroup math
38 */
40
41/**
42 * Instantiation of the SquareMatrixT4 template class using a double precision float data type.
43 * @see SquareMatrixT4
44 * @ingroup math
45 */
47
48/**
49 * Instantiation of the SquareMatrixT4 template class using a double precision float data type.
50 * @see SquareMatrixT4
51 * @ingroup math
52 */
54
55/**
56 * Definition of a typename alias for vectors with SquareMatrixT4 objects.
57 * @see SquareMatrixT4
58 * @ingroup math
59 */
60template <typename T>
61using SquareMatricesT4 = std::vector<SquareMatrixT4<T>>;
62
63/**
64 * Definition of a vector holding SquareMatrix4 objects.
65 * @see SquareMatrix4
66 * @ingroup math
67 */
68using SquareMatrices4 = std::vector<SquareMatrix4>;
69
70/**
71 * This class implements a 4x4 square matrix.
72 * The values are stored in a column aligned order with indices:
73 * <pre>
74 * | 0 4 8 12 |
75 * | 1 5 9 13 |
76 * | 2 6 10 14 |
77 * | 3 7 11 15 |
78 * </pre>
79 * @tparam T Data type of matrix elements
80 * @see SquareMatrix4, SquareMatrixF4, SquareMatrixD4.
81 * @ingroup math
82 */
83template <typename T>
85{
86 template <typename U> friend class SquareMatrixT4;
87
88 public:
89
90 /**
91 * Definition of the used data type.
92 */
93 using Type = T;
94
95 public:
96
97 /**
98 * Creates a new SquareMatrixT4 object with undefined elements.
99 * Beware: This matrix is neither a zero nor an entity matrix!
100 */
102
103 /**
104 * Copy constructor.
105 * @param matrix The matrix to copy
106 */
107 SquareMatrixT4(const SquareMatrixT4<T>& matrix) = default;
108
109 /**
110 * Copy constructor for a matrix with difference element data type than T.
111 * @param matrix The matrix to copy
112 * @tparam U The element data type of the second matrix
113 */
114 template <typename U>
115 inline explicit SquareMatrixT4(const SquareMatrixT4<U>& matrix);
116
117 /**
118 * Creates a new SquareMatrixT4 object.
119 * @param setToIdentity Determines whether a entity matrix will be created
120 */
121 explicit SquareMatrixT4(const bool setToIdentity);
122
123 /**
124 * Creates a new SquareMatrixT4 object by an array of at least sixteen elements of float type U.
125 * @param arrayValues The sixteen matrix elements defining the new matrix, must be valid
126 * @tparam U The floating point type of the given elements
127 */
128 template <typename U>
129 explicit SquareMatrixT4(const U* arrayValues);
130
131 /**
132 * Creates a new SquareMatrixT4 object by an array of at least sixteen elements.
133 * @param arrayValues The sixteen matrix elements defining the new matrix, must be valid
134 */
135 explicit SquareMatrixT4(const T* arrayValues);
136
137 /**
138 * Creates a new SquareMatrixT4 object by an array of at least sixteen elements of float type U.
139 * @param arrayValues The sixteen matrix elements defining the new matrix, must be valid
140 * @param valuesRowAligned True, if the given values are stored in a row aligned order; False, if the values are stored in a column aligned order (which is the default case for this matrix)
141 * @tparam U The floating point type of the given elements
142 */
143 template <typename U>
144 SquareMatrixT4(const U* arrayValues, const bool valuesRowAligned);
145
146 /**
147 * Creates a new SquareMatrixT4 object by an array of at least sixteen elements.
148 * @param valuesRowAligned True, if the given values are stored in a row aligned order; False, if the values are stored in a column aligned order (which is the default case for this matrix)
149 * @param arrayValues The sixteen matrix elements defining the new matrix, must be valid
150 */
151 SquareMatrixT4(const T* arrayValues, const bool valuesRowAligned);
152
153 /**
154 * Creates a new SquareMatrixT4 object by given transformation matrix.
155 * @param transformation The transformation matrix to copy
156 */
157 explicit SquareMatrixT4(const HomogenousMatrixT4<T>& transformation);
158
159 /**
160 * Creates a new SquareMatrixT4 object by given 3x3 sub matrix.
161 * The lower and right elements of the 4x4 square matrix are set to zero.
162 * @param subMatrix 3x3 subMatrix defining the upper left elements of the new matrix
163 */
164 explicit SquareMatrixT4(const SquareMatrixT3<T>& subMatrix);
165
166 /**
167 * Creates a new SquareMatrixT4 object by a given diagonal vector.
168 * @param diagonal The diagonal vector for the new matrix
169 */
170 explicit SquareMatrixT4(const VectorT4<T>& diagonal);
171
172 /**
173 * Returns the transposed of this matrix.
174 * @return Transposed matrix
175 */
176 [[nodiscard]] SquareMatrixT4<T> transposed() const;
177
178 /**
179 * Transposes the matrix.
180 */
181 void transpose();
182
183 /**
184 * Returns the inverted matrix of this matrix.
185 * This matrix must not be singular.<br>
186 * Beware: This function does not throw an exception if the matrix cannot be inverted.<br>
187 * Thus, ensure that the matrix is invertible before calling this function.<br>
188 * Even better: avoid the usage of this function and call invert() instead.<br>
189 * In case, this matrix is not invertible, this matrix will be returned instead.
190 * @return The inverted matrix
191 * @see invert(), isSingular().
192 */
193 [[nodiscard]] SquareMatrixT4<T> inverted() const;
194
195 /**
196 * Inverts this matrix in place.
197 * @return True, if the matrix is not singular and could be inverted
198 * @see inverted().
199 */
200 bool invert();
201
202 /**
203 * Inverts the matrix and returns the result.
204 * @param invertedMatrix The resulting inverted matrix
205 * @return True, if the matrix is not singular and could be inverted
206 * @see inverted().
207 */
208 bool invert(SquareMatrixT4<T>& invertedMatrix) const;
209
210 /**
211 * Returns the determinant of the matrix.
212 * @return Matrix determinant
213 */
214 T determinant() const;
215
216 /**
217 * Returns the trace of the matrix which is the sum of the diagonal elements.
218 * @return Trace of the matrix
219 */
220 T trace() const;
221
222 /**
223 * Sets the matrix to the identity matrix.
224 */
225 inline void toIdentity();
226
227 /**
228 * Sets the matrix to a zero matrix.
229 * @see isNull();
230 */
231 inline void toNull();
232
233 /**
234 * Returns whether this matrix is a null matrix.
235 * @return True, if so
236 */
237 bool isNull() const;
238
239 /**
240 * Returns whether this matrix is the identity matrix.
241 * @return True, if so
242 */
243 bool isIdentity() const;
244
245 /**
246 * Returns whether this matrix is singular (and thus cannot be inverted).
247 * A matrix is singular if the determinant of a matrix is 0.<br>
248 * @return True, if so
249 */
250 inline bool isSingular() const;
251
252 /**
253 * Returns whether this matrix is symmetric.
254 * @param epsilon The epsilon threshold to be used, with range [0, infinity)
255 * @return True, if so
256 */
257 bool isSymmetric(const T epsilon = NumericT<T>::eps()) const;
258
259 /**
260 * Returns whether two matrices are almost identical up to a specified epsilon.
261 * @param matrix Second matrix that will be checked
262 * @param eps The epsilon threshold to be used, with range [0, infinity)
263 * @return True, if so
264 */
265 inline bool isEqual(const SquareMatrixT4<T>& matrix, const T eps = NumericT<T>::eps()) const;
266
267 /**
268 * Performs an eigen value analysis.
269 * @param eigenValues The resulting four eigen values, must be valid
270 * @param eigenVectors The resulting four eigen vectors, must be valid
271 * @return True, if succeeded
272 */
273 bool eigenSystem(T* eigenValues, VectorT4<T>* eigenVectors) const;
274
275 /**
276 * Returns a pointer to the internal values.
277 * @return Pointer to the internal values
278 */
279 inline const T* data() const;
280
281 /**
282 * Returns a pointer to the internal values.
283 * @return Pointer to the internal values
284 */
285 inline T* data();
286
287 /**
288 * Copies the elements of this matrix to an array with floating point values of type U.
289 * @param arrayValues Array with 16 floating point values of type U receiving the elements of this matrix
290 * @tparam U Floating point type
291 */
292 template <typename U>
293 void copyElements(U* arrayValues) const;
294
295 /**
296 * Copies the elements of this matrix to an array with floating point values.
297 * @param arrayValues Array with floating point values receiving the elements of this matrix
298 */
299 void copyElements(T* arrayValues) const;
300
301 /**
302 * Default assign operator.
303 * @return Reference to this object
304 */
306
307 /**
308 * Returns whether two matrices are identical up to a small epsilon.
309 * @param matrix Right operand
310 * @return True, if so
311 */
312 bool operator==(const SquareMatrixT4<T>& matrix) const;
313
314 /**
315 * Returns whether two matrices are not identical up to a small epsilon.
316 * @param matrix Right operand
317 * @return True, if so
318 */
319 inline bool operator!=(const SquareMatrixT4<T>& matrix) const;
320
321 /**
322 * Adds two matrices.
323 * @param matrix Right operand
324 * @return Sum matrix
325 */
327
328 /**
329 * Adds and assigns two matrices.
330 * @param matrix Right operand
331 * @return Reference to this object
332 */
334
335 /**
336 * Subtracts two matrices.
337 * @param matrix Right operand
338 * @return Difference matrix
339 */
341
342 /**
343 * Subtracts and assigns two matrices.
344 * @param matrix Right operand
345 * @return Reference to this object
346 */
348
349 /**
350 * Returns the negative matrix of this matrix (all matrix elements are multiplied by -1).
351 * @return Resulting negative matrix
352 */
354
355 /**
356 * Multiplies two matrices.
357 * @param matrix Right operand
358 * @return Product matrix
359 */
360 OCEAN_FORCE_INLINE SquareMatrixT4<T> operator*(const SquareMatrixT4<T>& matrix) const;
361
362 /**
363 * Multiplies two matrices.
364 * @param matrix Right operand
365 * @return Product matrix
366 */
367 OCEAN_FORCE_INLINE SquareMatrixT4<T> operator*(const HomogenousMatrixT4<T>& matrix) const;
368
369 /**
370 * Multiplies and assigns two matrices.
371 * @param matrix Right operand
372 * @return Reference to this object
373 */
374 OCEAN_FORCE_INLINE SquareMatrixT4<T>& operator*=(const SquareMatrixT4<T>& matrix);
375
376 /**
377 * Multiplies and assigns two matrices.
378 * @param matrix Right operand
379 * @return Reference to this object
380 */
381 OCEAN_FORCE_INLINE SquareMatrixT4<T>& operator*=(const HomogenousMatrixT4<T>& matrix);
382
383 /**
384 * Multiply operator for a 3D vector.
385 * The 3D vector is interpreted as a 4D vector with fourth component equal to 1.<br>
386 * The final result will be de-homogenizated to provide a 3D vector result.<br>
387 * Beware the dot product of the last row with the vector must not be zero!
388 * @param vector Right operand
389 * @return Resulting 3D vector
390 */
391 OCEAN_FORCE_INLINE VectorT3<T> operator*(const VectorT3<T>& vector) const;
392
393 /**
394 * Multiply operator for a 4D vector.
395 * @param vector Right operand
396 * @return Resulting 4D vector
397 */
398 OCEAN_FORCE_INLINE VectorT4<T> operator*(const VectorT4<T>& vector) const;
399
400 /**
401 * Multiplies this matrix with a scalar value.
402 * @param value Right operand
403 * @return Resulting matrix
404 */
405 SquareMatrixT4<T> operator*(const T value) const;
406
407 /**
408 * Multiplies and assigns this matrix with a scalar value.
409 * @param value right operand
410 * @return Reference to this object
411 */
413
414 /**
415 * Element operator.
416 * Beware: No range check will be done!
417 * @param index The index of the element to return [0, 15]
418 * @return Specified element
419 */
420 inline T operator[](const unsigned int index) const;
421
422 /**
423 * Element operator.
424 * Beware: No range check will be done!
425 * @param index The index of the element to return [0, 15]
426 * @return Specified element
427 */
428 inline T& operator[](const unsigned int index);
429
430 /**
431 * Element operator.
432 * Beware: No range check will be done!
433 * @param row The row of the element to return [0, 3]
434 * @param column The column of the element to return [0, 3]
435 * @return Specified element
436 */
437 inline T operator()(const unsigned int row, const unsigned int column) const;
438
439 /**
440 * Element operator.
441 * Beware: No range check will be done!
442 * @param row The row of the element to return [0, 3]
443 * @param column The column of the element to return [0, 3]
444 * @return Specified element
445 */
446 inline T& operator()(const unsigned int row, const unsigned int column);
447
448 /**
449 * Element operator.
450 * Beware: No range check will be done!
451 * @param index The index of the element to return [0, 15]
452 * @return Specified element
453 */
454 inline T operator()(const unsigned int index) const;
455
456 /**
457 * Element operator.
458 * Beware: No range check will be done!
459 * @param index The index of the element to return [0, 15]
460 * @return Specified element
461 */
462 inline T& operator()(const unsigned int index);
463
464 /**
465 * Access operator.
466 * @return Pointer to the internal values
467 */
468 inline const T* operator()() const;
469
470 /**
471 * Access operator.
472 * @return Pointer to the internal values
473 */
474 inline T* operator()();
475
476 /**
477 * Hash function.
478 * @param matrix The matrix for which the hash value will be determined
479 * @return The resulting hash value
480 */
481 inline size_t operator()(const SquareMatrixT4<T>& matrix) const;
482
483 /**
484 * Returns the number of elements this matrix has.
485 * @return The number of elements, always 16
486 */
487 static inline size_t elements();
488
489 /**
490 * Multiplies several 4D vectors with a given matrix.
491 * @param matrix The matrix to be used for multiplication, may be nullptr if number is 0
492 * @param vectors The input vectors that will be multiplied, may be nullptr if number is 0
493 * @param results The resulting output (multiplied/transformed) vectors, with same number as the provided input vectors
494 * @param number The number of provided vectors (input and output), with range [0, infinity)
495 */
496 static void multiply(const SquareMatrixT4<T>& matrix, const VectorT4<T>* vectors, VectorT4<T>* results, const size_t number);
497
498 /**
499 * Creates a projection matrix defined by the horizontal field of view, the aspect ratio and the near and far clipping plane.
500 * @param fovX Horizontal field of view, in radian and a range of [0, Pi]
501 * @param aspectRatio View aspect ratio which is width divided by the height of the projection window
502 * @param nearDistance Positive distance to the near clipping plane
503 * @param farDistance Positive distance to the far clipping lane
504 * @return Specified projection matrix
505 */
506 static SquareMatrixT4<T> projectionMatrix(const T fovX, const T aspectRatio, const T nearDistance, const T farDistance);
507
508 /**
509 * Creates a projection matrix defined by a camera profile of a pinhole camera and the near and far clipping plane.
510 * @param anyCamera The camera profile of a pinhole camera, without distortion parameters, must be valid
511 * @param nearDistance Positive distance to the near clipping plane
512 * @param farDistance Positive distance to the far clipping lane
513 * @return Specified projection matrix
514 */
515 static SquareMatrixT4<T> projectionMatrix(const AnyCameraT<T>& anyCamera, const T nearDistance, const T farDistance);
516
517 /**
518 * Creates a projection matrix defined by an asymmetric viewing frustum.
519 * The shape of the frustum is defined by the rectangle on the near plane.<br>
520 * Afterwards, the field of view is defined by the (positive) distance to the near clipping plane.<br>
521 * Followed by the (positive) far clipping plane to determine the entire frustum.
522 * @param left Position of the left border of the rectangle on the near plane
523 * @param right Position of the right border of the rectangle on the near plane
524 * @param top Position of the top border of the rectangle on the near plane
525 * @param bottom Position of the bottom border of the rectangle on the near plane
526 * @param nearDistance Positive distance to the near clipping plane
527 * @param farDistance Positive distance to the far clipping plane
528 * @return Specified frustum projection matrix
529 */
530 static SquareMatrixT4<T> frustumMatrix(const T left, const T right, const T top, const T bottom, const T nearDistance, const T farDistance);
531
532 /**
533 * Creates a project matrix defined by an asymmetric viewing frustum.
534 * The shape of the frustum is defined by the rectangle on the near plane.<br>
535 * The viewing position is defined by the given view matrix while the near plane is expected to lie in the origin of the coordinate system.
536 * @param width The width of the near plane, with range (0, infinity)
537 * @param height The height of the near plane, with range (0, infinity)
538 * @param viewingMatrix Viewing matrix transforming point defined in the camera coordinate system into points defined in the world coordinate system, must be invertible
539 * @param nearDistance Positive distance to the near clipping plane, with range (0, infinity)
540 * @param farDistance Positive distance to the far clipping plane, with range (nearDistance, infinity)
541 * @return Specified frustum projection matrix
542 */
543 static SquareMatrixT4<T> frustumMatrix(const T width, const T height, const HomogenousMatrixT4<T>& viewingMatrix, const T nearDistance, const T farDistance);
544
545 /**
546 * Converts matrices with specific data type to matrices with different data type.
547 * @param matrices The matrices to convert
548 * @return The converted matrices
549 * @tparam U The element data type of the matrices to convert
550 */
551 template <typename U>
552 static inline std::vector< SquareMatrixT4<T> > matrices2matrices(const std::vector< SquareMatrixT4<U> >& matrices);
553
554 /**
555 * Converts matrices with specific data type to matrices with different data type.
556 * @param matrices The matrices to convert
557 * @param size The number of matrices to convert
558 * @return The converted matrices
559 * @tparam U The element data type of the matrices to convert
560 */
561 template <typename U>
562 static inline std::vector< SquareMatrixT4<T> > matrices2matrices(const SquareMatrixT4<U>* matrices, const size_t size);
563
564 protected:
565
566 /**
567 * Swaps two rows of this matrix.
568 * @param row0 The index of the first row ,with range [0, 3]
569 * @param row1 The index of the second row, with range [0, 3]
570 */
571 void swapRows(const unsigned int row0, const unsigned int row1);
572
573 /**
574 * Multiplies a row with a scalar value.
575 * @param row The index of the row to multiply, with range [0, 3]
576 * @param scalar The scalar to multiply
577 */
578 void multiplyRow(const unsigned int row, const T scalar);
579
580 /**
581 * Multiplies elements from a specific row with a scalar and adds them to another row.
582 * @param targetRow The index of the target row, with range [0, 3]
583 * @param sourceRow The index of the source row, with range [0, 3]
584 * @param scalar The scalar to multiply the source elements with
585 */
586 void addRows(const unsigned int targetRow, const unsigned int sourceRow, const T scalar);
587
588 protected:
589
590 /// The sixteen values of the matrix.
591 T values[16];
592};
593
594template <typename T>
596{
597 // nothing to do here
598}
599
600template <typename T>
601template <typename U>
603{
604 for (unsigned int n = 0u; n < 16u; ++n)
605 {
606 values[n] = T(matrix.values[n]);
607 }
608}
609
610template <typename T>
611SquareMatrixT4<T>::SquareMatrixT4(const bool setToIdentity)
612{
613 if (setToIdentity)
614 {
615 toIdentity();
616 }
617 else
618 {
619 toNull();
620 }
621}
622
623template <typename T>
624template <typename U>
626{
627 ocean_assert(arrayValues);
628
629 for (unsigned int n = 0u; n < 16u; ++n)
630 {
631 values[n] = T(arrayValues[n]);
632 }
633}
634
635template <typename T>
637{
638 ocean_assert(arrayValues);
639 memcpy(values, arrayValues, sizeof(T) * 16);
640}
641
642template <typename T>
643template <typename U>
644SquareMatrixT4<T>::SquareMatrixT4(const U* arrayValues, const bool valuesRowAligned)
645{
646 ocean_assert(arrayValues);
647
648 if (valuesRowAligned)
649 {
650 values[ 0] = T(arrayValues[ 0]);
651 values[ 1] = T(arrayValues[ 4]);
652 values[ 2] = T(arrayValues[ 8]);
653 values[ 3] = T(arrayValues[12]);
654 values[ 4] = T(arrayValues[ 1]);
655 values[ 5] = T(arrayValues[ 5]);
656 values[ 6] = T(arrayValues[ 9]);
657 values[ 7] = T(arrayValues[13]);
658 values[ 8] = T(arrayValues[ 2]);
659 values[ 9] = T(arrayValues[ 6]);
660 values[10] = T(arrayValues[10]);
661 values[11] = T(arrayValues[14]);
662 values[12] = T(arrayValues[ 3]);
663 values[13] = T(arrayValues[ 7]);
664 values[14] = T(arrayValues[11]);
665 values[15] = T(arrayValues[15]);
666 }
667 else
668 {
669 for (unsigned int n = 0u; n < 16u; ++n)
670 {
671 values[n] = T(arrayValues[n]);
672 }
673 }
674}
675
676template <typename T>
677SquareMatrixT4<T>::SquareMatrixT4(const T* arrayValues, const bool valuesRowAligned)
678{
679 ocean_assert(arrayValues);
680
681 if (valuesRowAligned)
682 {
683 values[ 0] = arrayValues[ 0];
684 values[ 1] = arrayValues[ 4];
685 values[ 2] = arrayValues[ 8];
686 values[ 3] = arrayValues[12];
687 values[ 4] = arrayValues[ 1];
688 values[ 5] = arrayValues[ 5];
689 values[ 6] = arrayValues[ 9];
690 values[ 7] = arrayValues[13];
691 values[ 8] = arrayValues[ 2];
692 values[ 9] = arrayValues[ 6];
693 values[10] = arrayValues[10];
694 values[11] = arrayValues[14];
695 values[12] = arrayValues[ 3];
696 values[13] = arrayValues[ 7];
697 values[14] = arrayValues[11];
698 values[15] = arrayValues[15];
699 }
700 else
701 {
702 memcpy(values, arrayValues, sizeof(T) * 16);
703 }
704}
705
706template <typename T>
708{
709 memcpy(values, transformation(), sizeof(T) * 16);
710}
711
712template <typename T>
714{
715 values[ 3] = T(0.0);
716 values[ 7] = T(0.0);
717 values[11] = T(0.0);
718 values[12] = T(0.0);
719 values[13] = T(0.0);
720 values[14] = T(0.0);
721 values[15] = T(0.0);
722
723 memcpy(values, subMatrix(), sizeof(T) * 3); // Set values[0] - values[2]
724 memcpy(values + 4, subMatrix() + 3, sizeof(T) * 3); // Set values[4] - values[6]
725 memcpy(values + 8, subMatrix() + 6, sizeof(T) * 3); // Set values[8] - values[10]
726}
727
728template <typename T>
730{
731 values[ 0] = diagonal[0];
732 values[ 1] = T(0.0);
733 values[ 2] = T(0.0);
734 values[ 3] = T(0.0);
735 values[ 4] = T(0.0);
736 values[ 5] = diagonal[1];
737 values[ 6] = T(0.0);
738 values[ 7] = T(0.0);
739 values[ 8] = T(0.0);
740 values[ 9] = T(0.0);
741 values[10] = diagonal[2];
742 values[11] = T(0.0);
743 values[12] = T(0.0);
744 values[13] = T(0.0);
745 values[14] = T(0.0);
746 values[15] = diagonal[3];
747}
748
749template <typename T>
750inline const T* SquareMatrixT4<T>::data() const
751{
752 return values;
753}
754
755template <typename T>
757{
758 return values;
759}
760
761template <typename T>
763{
764 SquareMatrixT4<T> result(*this);
765
766 result.values[1] = values[4];
767 result.values[4] = values[1];
768
769 result.values[2] = values[8];
770 result.values[8] = values[2];
771
772 result.values[3] = values[12];
773 result.values[12] = values[3];
774
775 result.values[6] = values[9];
776 result.values[9] = values[6];
777
778 result.values[7] = values[13];
779 result.values[13] = values[7];
780
781 result.values[11] = values[14];
782 result.values[14] = values[11];
783
784 return result;
785}
786
787template <typename T>
789{
790 SquareMatrixT4<T> tmp(*this);
791
792 values[4] = tmp.values[1];
793 values[1] = tmp.values[4];
794
795 values[8] = tmp.values[2];
796 values[2] = tmp.values[8];
797
798 values[12] = tmp.values[3];
799 values[3] = tmp.values[12];
800
801 values[9] = tmp.values[6];
802 values[6] = tmp.values[9];
803
804 values[13] = tmp.values[7];
805 values[7] = tmp.values[13];
806
807 values[14] = tmp.values[11];
808 values[11] = tmp.values[14];
809}
810
811template <typename T>
813{
814 SquareMatrixT4 invertedMatrix;
815
816 if (!invert(invertedMatrix))
817 {
818 ocean_assert(false && "Could not invert matrix.");
819 return *this;
820 }
821
822 return invertedMatrix;
823}
824
825template <typename T>
827{
828 SquareMatrixT4<T> invertedMatrix;
829
830 if (!invert(invertedMatrix))
831 {
832 return false;
833 }
834
835 *this = invertedMatrix;
836
837 return true;
838}
839
840template <typename T>
842{
843 // implements the Gauss-Jordon Elimination
844
845 SquareMatrixT4<T> source(*this);
846 invertedMatrix.toIdentity();
847
848 for (unsigned int col = 0; col < 4u; ++col)
849 {
850 // find largest absolute value in the col-th column,
851 // to remove zeros from the main diagonal and to provide numerical stability
852 T absolute = 0.0;
853 unsigned int selectedRow = 0u;
854
855 for (unsigned int row = col; row < 4u; ++row)
856 {
857 const T value = NumericT<T>::abs(source(row, col));
858 if (absolute < value)
859 {
860 absolute = value;
861 selectedRow = row;
862 }
863 }
864
865 // if there was no greater absolute value than 0 this matrix is singular
866
867 if (NumericT<T>::isEqualEps(absolute))
868 {
869 return false;
870 }
871
872 // exchange the two rows
873 if (selectedRow != col)
874 {
875 source.swapRows(col, selectedRow);
876 invertedMatrix.swapRows(col, selectedRow);
877 }
878
879 // now the element at (col, col) will be 1.0
880 if (NumericT<T>::isNotEqual(source(col, col), T(1.0)))
881 {
882 const T divisor = T(1.0) / source(col, col);
883 ocean_assert(divisor != T(0.0));
884
885 source.multiplyRow(col, divisor);
886 invertedMatrix.multiplyRow(col, divisor);
887 }
888
889 // clear each entry above and below the selected row and column to zero
890 for (unsigned int row = 0; row < 4u; ++row)
891 {
892 if (row != col)
893 {
894 const T value = -source(row, col);
895
896 source.addRows(row, col, value);
897 invertedMatrix.addRows(row, col, value);
898 }
899 }
900 }
901
902 return true;
903}
904
905template <typename T>
907{
908 const T v6_15 = values[6] * values[15];
909 const T v10_15 = values[10] * values[15];
910 const T v11_14 = values[11] * values[14];
911 const T v7_10 = values[7] * values[10];
912 const T v9_14 = values[9] * values[14];
913 const T v6_13 = values[6] * values[13];
914 const T v2_13 = values[2] * values[13];
915 const T v2_9 = values[2] * values[9];
916 const T v3_10 = values[3] * values[10];
917 const T v2_5 = values[2] * values[5];
918
919 return values[0] * (values[5] * v10_15 - values[13] * v7_10
920 + v9_14 * values[7] - values[5] * v11_14
921 + v6_13 * values[11] - values[9] * v6_15)
922 - values[4] * (v9_14 * values[3] - values[1] * v11_14
923 + v2_13 * values[11] - v2_9 * values[15]
924 + values[1] * v10_15 - values[13] * v3_10)
925 + values[8] * (values[1] * v6_15 - v6_13 * values[3]
926 + values[5] * values[14] * values[3] - values[1] * values[14] * values[7]
927 + v2_13 * values[7] - v2_5 * values[15])
928 - values[12] * (values[1] * values[6] * values[11] - values[9] * values[6] * values[3]
929 + values[5] * v3_10 - values[1] * v7_10
930 + v2_9 * values[7] - v2_5 * values[11]);
931}
932
933template <typename T>
935{
936 return values[0] + values[5] + values[10] + values[15];
937}
938
939template <typename T>
941{
942 values[ 0] = T(1.0);
943 values[ 1] = T(0.0);
944 values[ 2] = T(0.0);
945 values[ 3] = T(0.0);
946 values[ 4] = T(0.0);
947 values[ 5] = T(1.0);
948 values[ 6] = T(0.0);
949 values[ 7] = T(0.0);
950 values[ 8] = T(0.0);
951 values[ 9] = T(0.0);
952 values[10] = T(1.0);
953 values[11] = T(0.0);
954 values[12] = T(0.0);
955 values[13] = T(0.0);
956 values[14] = T(0.0);
957 values[15] = T(1.0);
958}
959
960template <typename T>
962{
963 for (unsigned int n = 0u; n < 16u; ++n)
964 {
965 values[n] = T(0.0);
966 }
967}
968
969template <typename T>
971{
972 return NumericT<T>::isEqual(values[0], 1) && NumericT<T>::isEqualEps(values[1]) && NumericT<T>::isEqualEps(values[2]) && NumericT<T>::isEqualEps(values[3])
973 && NumericT<T>::isEqualEps(values[4]) && NumericT<T>::isEqual(values[5], 1) && NumericT<T>::isEqualEps(values[6]) && NumericT<T>::isEqualEps(values[7])
974 && NumericT<T>::isEqualEps(values[8]) && NumericT<T>::isEqualEps(values[9]) && NumericT<T>::isEqual(values[10], 1) && NumericT<T>::isEqualEps(values[11])
975 && NumericT<T>::isEqualEps(values[12]) && NumericT<T>::isEqualEps(values[13]) && NumericT<T>::isEqualEps(values[14]) && NumericT<T>::isEqual(values[15], 1);
976}
977
978template <typename T>
980{
981 return NumericT<T>::isEqualEps(values[0]) && NumericT<T>::isEqualEps(values[1]) && NumericT<T>::isEqualEps(values[2]) && NumericT<T>::isEqualEps(values[3])
982 && NumericT<T>::isEqualEps(values[4]) && NumericT<T>::isEqualEps(values[5]) && NumericT<T>::isEqualEps(values[6]) && NumericT<T>::isEqualEps(values[7])
983 && NumericT<T>::isEqualEps(values[8]) && NumericT<T>::isEqualEps(values[9]) && NumericT<T>::isEqualEps(values[10]) && NumericT<T>::isEqualEps(values[11])
984 && NumericT<T>::isEqualEps(values[12]) && NumericT<T>::isEqualEps(values[13]) && NumericT<T>::isEqualEps(values[14]) && NumericT<T>::isEqualEps(values[15]);
985}
986
987template <typename T>
989{
990 return NumericT<T>::isEqualEps(determinant());
991}
992
993template <typename T>
994bool SquareMatrixT4<T>::isSymmetric(const T epsilon) const
995{
996 ocean_assert(epsilon >= T(0));
997
998 return NumericT<T>::isEqual(values[1], values[4], epsilon) && NumericT<T>::isEqual(values[2], values[8], epsilon) && NumericT<T>::isEqual(values[3], values[12], epsilon)
999 && NumericT<T>::isEqual(values[6], values[9], epsilon) && NumericT<T>::isEqual(values[7], values[13], epsilon) && NumericT<T>::isEqual(values[11], values[14], epsilon);
1000}
1001
1002template <typename T>
1003inline bool SquareMatrixT4<T>::isEqual(const SquareMatrixT4<T>& matrix, const T eps) const
1004{
1005 return NumericT<T>::isEqual(values[0], matrix.values[0], eps) && NumericT<T>::isEqual(values[1], matrix.values[1], eps)
1006 && NumericT<T>::isEqual(values[2], matrix.values[2], eps) && NumericT<T>::isEqual(values[3], matrix.values[3], eps)
1007 && NumericT<T>::isEqual(values[4], matrix.values[4], eps) && NumericT<T>::isEqual(values[5], matrix.values[5], eps)
1008 && NumericT<T>::isEqual(values[6], matrix.values[6], eps) && NumericT<T>::isEqual(values[7], matrix.values[7], eps)
1009 && NumericT<T>::isEqual(values[8], matrix.values[8], eps) && NumericT<T>::isEqual(values[9], matrix.values[9], eps)
1010 && NumericT<T>::isEqual(values[10], matrix.values[10], eps) && NumericT<T>::isEqual(values[11], matrix.values[11], eps)
1011 && NumericT<T>::isEqual(values[12], matrix.values[12], eps) && NumericT<T>::isEqual(values[13], matrix.values[13], eps)
1012 && NumericT<T>::isEqual(values[14], matrix.values[14], eps) && NumericT<T>::isEqual(values[15], matrix.values[15], eps);
1013}
1014
1015template <typename T>
1016bool SquareMatrixT4<T>::eigenSystem(T* eigenValues, VectorT4<T>* eigenVectors) const
1017{
1018 ocean_assert(eigenValues != nullptr && eigenVectors != nullptr);
1019
1020 /**
1021 * <pre>
1022 * Computation of the characteristic polynomial
1023 *
1024 * [ a b c d ]
1025 * A = [ e f g h ]
1026 * [ i j k l ]
1027 * [ m n o p ]
1028 *
1029 * [ a-x b c d ]
1030 * A - x * E = [ e f-x g h ]
1031 * [ i j k-x l ]
1032 * [ m n o p-x ]
1033 *
1034 * Polynomial = Det|A - x * E| = 0
1035 * = x^4 + (-a - f - k - p)x^3 + (-be + af - ci - gj + ak + fk - dm - hn - lo + ap + fp + kp)x^2
1036 * + (cfi - bgi - cej + agj + bek - afk + dfm - bhm + dkm - clm - den + ahn + hkn - gln - dio - hjo + alo + flo + bep - afp + cip + gjp - akp - fkp)x
1037 * + (dgjm - chjm - dfkm + bhkm + cflm - bglm)
1038 * + (-dgin + chin + dekn - ahkn - celn + agln)
1039 * + (dfio - bhio - dejo + ahjo + belo - aflo)
1040 * + (-cfip + bgip + cejp - agjp - bekp + afkp)
1041 * = a1x^4 + a2x^3 + a3x^2 + a4x + a5 = 0
1042 * </pre>
1043 */
1044
1045 const T a = values[0];
1046 const T b = values[1];
1047 const T c = values[2];
1048 const T d = values[3];
1049 const T e = values[4];
1050 const T f = values[5];
1051 const T g = values[6];
1052 const T h = values[7];
1053 const T i = values[8];
1054 const T j = values[9];
1055 const T k = values[10];
1056 const T l = values[11];
1057 const T m = values[12];
1058 const T n = values[13];
1059 const T o = values[14];
1060 const T p = values[15];
1061
1062 const T a1 = 1.0;
1063 const T a2 = -a - f - k - p;
1064 const T a3 = -b * e + a * f - c * i - g * j + a * k + f * k - d * m - h * n - l * o + a * p + f * p + k * p;
1065 const T a4 = c * f * i - b * g * i - c * e * j + a * g * j + b * e * k - a * f * k + d * f * m - b * h * m
1066 + d * k * m - c * l * m - d * e * n + a * h * n + h * k * n - g * l * n - d * i * o - h * j * o
1067 + a * l * o + f * l * o + b * e * p - a * f * p + c * i * p + g * j * p - a * k * p - f * k * p;
1068 const T a5 = d * g * j * m - c * h * j * m - d * f * k * m + b * h * k * m + c * f * l * m - b * g * l * m
1069 - d * g * i * n + c * h * i * n + d * e * k * n - a * h * k * n - c * e * l * n + a * g * l * n
1070 + d * f * i * o - b * h * i * o - d * e * j * o + a * h * j * o + b * e * l * o - a * f * l * o
1071 - c * f * i * p + b * g * i * p + c * e * j * p - a * g * j * p - b * e * k * p + a * f * k * p;
1072
1073 // Solve the quartic equation to find eigenvalues
1074
1075 double x[4];
1076 const unsigned int solutions = EquationT<double>::solveQuartic(double(a1), double(a2), double(a3), double(a4), double(a5), x);
1077
1078 if (solutions != 4u)
1079 {
1080 return false;
1081 }
1082
1083 for (unsigned int iEigen = 0u; iEigen < solutions && iEigen < 4u; ++iEigen)
1084 {
1085 eigenValues[iEigen] = T(x[iEigen]);
1086 }
1087
1088 for (unsigned int iEigen = solutions; iEigen < 4u; ++iEigen)
1089 {
1090 eigenValues[iEigen] = T(0);
1091 }
1092
1093 Utilities::sortHighestToFront4(eigenValues[0], eigenValues[1], eigenValues[2], eigenValues[3]);
1094
1095 /**
1096 * <pre>
1097 * Determination of the eigen vectors (vx, vy, vz, vw):
1098 * [ a-x b c d ] [ vx ]
1099 * A - x * I = [ e f-x g h ] * [ vy ] = 0
1100 * [ i j k-x l ] [ vz ]
1101 * [ m n o p-x ] [ vw ]
1102 * </pre>
1103 */
1104
1105 for (unsigned int nSolution = 0u; nSolution < solutions; ++nSolution)
1106 {
1107 const T lambda = eigenValues[nSolution];
1108
1109 // create matrix (A - x*I) as rows
1110
1111 VectorT4<T> rows[4];
1112 rows[0] = VectorT4<T>(a - lambda, b, c, d);
1113 rows[1] = VectorT4<T>(e, f - lambda, g, h);
1114 rows[2] = VectorT4<T>(i, j, k - lambda, l);
1115 rows[3] = VectorT4<T>(m, n, o, p - lambda);
1116
1117 // Find eigenvector by solving (A - lambda*I) * v = 0
1118 // We'll try setting each component to 1 and solving for the others
1119
1120 VectorT4<T> eigenvector(T(0), T(0), T(0), T(0));
1121 T bestResidual = NumericT<T>::maxValue();
1122
1123 for (unsigned int freeIndex = 0u; freeIndex < 4u; ++freeIndex)
1124 {
1125 // Try setting component freeIndex to 1
1126 VectorT4<T> candidate(T(0), T(0), T(0), T(0));
1127 candidate[freeIndex] = T(1);
1128
1129 // Find the 3 rows with largest coefficients at freeIndex position
1130 // (these will give us the most stable equations)
1131 unsigned int rowIndices[4] = {0, 1, 2, 3};
1132 T rowScores[4];
1133
1134 for (unsigned int rowNum = 0u; rowNum < 4u; ++rowNum)
1135 {
1136 rowScores[rowNum] = NumericT<T>::abs(rows[rowNum][freeIndex]);
1137 }
1138
1139 // Sort rows by score (descending)
1140 for (unsigned int row = 0u; row < 3u; ++row)
1141 {
1142 for (unsigned int row2 = row + 1u; row2 < 4u; ++row2)
1143 {
1144 if (rowScores[row2] > rowScores[row])
1145 {
1146 std::swap(rowScores[row], rowScores[row2]);
1147 std::swap(rowIndices[row], rowIndices[row2]);
1148 }
1149 }
1150 }
1151
1152 // Build 3x3 system from the 3 best rows
1153 // System: coeffMatrix * unknowns = rhs
1154 T coeffMatrix[3][3];
1155 T rhs[3];
1156 unsigned int unknownIndices[3];
1157 unsigned int idx = 0u;
1158
1159 for (unsigned int col = 0u; col < 4u; ++col)
1160 {
1161 if (col != freeIndex)
1162 {
1163 unknownIndices[idx++] = col;
1164 }
1165 }
1166
1167 for (unsigned int rowIdx = 0u; rowIdx < 3u; ++rowIdx)
1168 {
1169 const VectorT4<T>& row = rows[rowIndices[rowIdx]];
1170 rhs[rowIdx] = -row[freeIndex];
1171
1172 for (unsigned int colIdx = 0u; colIdx < 3u; ++colIdx)
1173 {
1174 coeffMatrix[rowIdx][colIdx] = row[unknownIndices[colIdx]];
1175 }
1176 }
1177
1178 // Solve 3x3 system using Gaussian elimination
1179 T augmented[3][4];
1180 T rowScales[3];
1181
1182 for (unsigned int rowIdx = 0u; rowIdx < 3u; ++rowIdx)
1183 {
1184 // Copy coefficients
1185 for (unsigned int colIdx = 0u; colIdx < 3u; ++colIdx)
1186 {
1187 augmented[rowIdx][colIdx] = coeffMatrix[rowIdx][colIdx];
1188 }
1189 augmented[rowIdx][3] = rhs[rowIdx];
1190
1191 // Compute row scale for better numerical conditioning
1192 T maxRowVal = T(0);
1193 for (unsigned int colIdx = 0u; colIdx < 4u; ++colIdx)
1194 {
1195 maxRowVal = std::max(maxRowVal, NumericT<T>::abs(augmented[rowIdx][colIdx]));
1196 }
1197
1198 rowScales[rowIdx] = maxRowVal;
1199
1200 // Scale the row if possible (improves conditioning)
1201 if (maxRowVal > NumericT<T>::weakEps())
1202 {
1203 const T scale = T(1) / maxRowVal;
1204 for (unsigned int colIdx = 0u; colIdx < 4u; ++colIdx)
1205 {
1206 augmented[rowIdx][colIdx] *= scale;
1207 }
1208 }
1209 }
1210
1211 // Forward elimination with partial pivoting
1212 for (unsigned int pivot = 0u; pivot < 3u; ++pivot)
1213 {
1214 // Find row with largest pivot element
1215 unsigned int maxRow = pivot;
1216 T maxVal = NumericT<T>::abs(augmented[pivot][pivot]);
1217
1218 for (unsigned int rowIdx = pivot + 1u; rowIdx < 3u; ++rowIdx)
1219 {
1220 const T val = NumericT<T>::abs(augmented[rowIdx][pivot]);
1221 if (val > maxVal)
1222 {
1223 maxVal = val;
1224 maxRow = rowIdx;
1225 }
1226 }
1227
1228 // Swap rows if needed
1229 if (maxRow != pivot)
1230 {
1231 for (unsigned int colIdx = 0u; colIdx < 4u; ++colIdx)
1232 {
1233 std::swap(augmented[pivot][colIdx], augmented[maxRow][colIdx]);
1234 }
1235 }
1236
1237 // Use generous threshold (system may be near-singular as we're finding null space)
1238 const T pivotThreshold = std::is_same<T, float>::value ? (NumericT<T>::weakEps() * T(10)) : NumericT<T>::weakEps();
1239
1240 // Skip elimination if pivot is too small, but continue (system is expected to be singular)
1241 if (NumericT<T>::abs(augmented[pivot][pivot]) < pivotThreshold)
1242 {
1243 continue;
1244 }
1245
1246 // Eliminate below pivot
1247 for (unsigned int rowIdx = pivot + 1u; rowIdx < 3u; ++rowIdx)
1248 {
1249 const T factor = augmented[rowIdx][pivot] / augmented[pivot][pivot];
1250 for (unsigned int colIdx = pivot; colIdx < 4u; ++colIdx)
1251 {
1252 augmented[rowIdx][colIdx] -= factor * augmented[pivot][colIdx];
1253 }
1254 }
1255 }
1256
1257 // Back substitution
1258 T solution[3] = {T(0), T(0), T(0)};
1259
1260 for (int rowIdx = 2; rowIdx >= 0; --rowIdx)
1261 {
1262 T sum = augmented[rowIdx][3];
1263
1264 for (unsigned int colIdx = rowIdx + 1u; colIdx < 3u; ++colIdx)
1265 {
1266 sum -= augmented[rowIdx][colIdx] * solution[colIdx];
1267 }
1268
1269 const T pivotThreshold = std::is_same<T, float>::value ? (NumericT<T>::weakEps() * T(10)) : NumericT<T>::weakEps();
1270 if (NumericT<T>::abs(augmented[rowIdx][rowIdx]) > pivotThreshold)
1271 {
1272 solution[rowIdx] = sum / augmented[rowIdx][rowIdx];
1273 }
1274 // else: leave solution[rowIdx] as 0 (free variable in under-determined system)
1275 }
1276
1277 // Build candidate eigenvector
1278 for (unsigned int idx2 = 0u; idx2 < 3u; ++idx2)
1279 {
1280 candidate[unknownIndices[idx2]] = solution[idx2];
1281 }
1282
1283 // Evaluate residual: ||A*v - lambda*v||
1284 const VectorT4<T> Av = (*this) * candidate;
1285 const VectorT4<T> lambdaV = candidate * lambda;
1286 const T residual = (Av - lambdaV).length();
1287
1288 if (residual < bestResidual)
1289 {
1290 bestResidual = residual;
1291 eigenvector = candidate;
1292 }
1293 }
1294
1295 // Normalize the eigenvector
1296 if (!eigenvector.normalize())
1297 {
1298 // Normalization failed - use standard basis vector
1299 eigenvector = VectorT4<T>(T(0), T(0), T(0), T(0));
1300 eigenvector[nSolution % 4u] = T(1);
1301 }
1302
1303 eigenVectors[nSolution] = eigenvector;
1304 }
1305
1306 return true;
1307}
1308
1309template <typename T>
1310template <typename U>
1311void SquareMatrixT4<T>::copyElements(U* arrayValues) const
1312{
1313 ocean_assert(arrayValues != nullptr);
1314
1315 for (unsigned int n = 0u; n < 16u; ++n)
1316 {
1317 arrayValues[n] = U(values[n]);
1318 }
1319}
1320
1321template <typename T>
1322void SquareMatrixT4<T>::copyElements(T* arrayValues) const
1323{
1324 ocean_assert(arrayValues != nullptr);
1325
1326 memcpy(arrayValues, values, sizeof(T) * 16);
1327}
1328
1329template <typename T>
1330inline bool SquareMatrixT4<T>::operator!=(const SquareMatrixT4<T>& matrix) const
1331{
1332 return !(*this == matrix);
1333}
1334
1335template <typename T>
1337{
1338 *this = *this * matrix;
1339 return *this;
1340}
1341
1342template <typename T>
1344{
1345 *this = *this * matrix;
1346 return *this;
1347}
1348
1349template <typename T>
1350inline T SquareMatrixT4<T>::operator[](const unsigned int index) const
1351{
1352 ocean_assert(index < 16u);
1353 return values[index];
1354}
1355
1356template <typename T>
1357inline T& SquareMatrixT4<T>::operator[](const unsigned int index)
1358{
1359 ocean_assert(index < 16u);
1360 return values[index];
1361}
1362
1363template <typename T>
1364inline T SquareMatrixT4<T>::operator()(const unsigned int row, const unsigned int column) const
1365{
1366 ocean_assert(row < 4u && column < 4u);
1367 return values[(column << 2) + row]; // values[(column * 4) + row];
1368}
1369
1370template <typename T>
1371inline T& SquareMatrixT4<T>::operator()(const unsigned int row, const unsigned int column)
1372{
1373 ocean_assert(row < 4u && column < 4u);
1374 return values[(column << 2) + row]; // values[(column * 4) + row];
1375}
1376
1377template <typename T>
1378inline T SquareMatrixT4<T>::operator()(const unsigned int index) const
1379{
1380 ocean_assert(index < 16u);
1381 return values[index];
1382}
1383
1384template <typename T>
1385inline T& SquareMatrixT4<T>::operator()(const unsigned int index)
1386{
1387 ocean_assert(index < 16u);
1388 return values[index];
1389}
1390
1391template <typename T>
1392inline const T* SquareMatrixT4<T>::operator()() const
1393{
1394 return values;
1395}
1396
1397template <typename T>
1399{
1400 return values;
1401}
1402
1403template <typename T>
1404inline size_t SquareMatrixT4<T>::operator()(const SquareMatrixT4<T>& matrix) const
1405{
1406 size_t seed = std::hash<T>{}(matrix.values[0]);
1407
1408 for (unsigned int n = 1u; n < 16u; ++n)
1409 {
1410 seed ^= std::hash<T>{}(matrix.values[n]) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
1411 }
1412
1413 return seed;
1414}
1415
1416template <typename T>
1418{
1419 return 16;
1420}
1421
1422template <typename T>
1424{
1425 return isEqual(matrix);
1426}
1427
1428template <typename T>
1430{
1431 SquareMatrixT4<T> result(*this);
1432
1433 result += matrix;
1434
1435 return result;
1436}
1437
1438template <typename T>
1440{
1441 for (unsigned int n = 0u; n < 16u; ++n)
1442 {
1443 values[n] += matrix.values[n];
1444 }
1445
1446 return *this;
1447}
1448
1449template <typename T>
1451{
1452 SquareMatrixT4<T> result(*this);
1453
1454 result -= matrix;
1455
1456 return result;
1457}
1458
1459template <typename T>
1461{
1462 for (unsigned int n = 0u; n < 16u; ++n)
1463 {
1464 values[n] -= matrix.values[n];
1465 }
1466
1467 return *this;
1468}
1469
1470template <typename T>
1472{
1473 SquareMatrixT4<T> result;
1474
1475 for (unsigned int n = 0u; n < 16u; ++n)
1476 {
1477 result.values[n] = -values[n];
1478 }
1479
1480 return result;
1481}
1482
1483template <typename T>
1485{
1486 SquareMatrixT4<T> result;
1487
1488 result.values[0] = values[0] * matrix.values[0] + values[4] * matrix.values[1] + values[8] * matrix.values[2] + values[12] * matrix.values[3];
1489 result.values[1] = values[1] * matrix.values[0] + values[5] * matrix.values[1] + values[9] * matrix.values[2] + values[13] * matrix.values[3];
1490 result.values[2] = values[2] * matrix.values[0] + values[6] * matrix.values[1] + values[10] * matrix.values[2] + values[14] * matrix.values[3];
1491 result.values[3] = values[3] * matrix.values[0] + values[7] * matrix.values[1] + values[11] * matrix.values[2] + values[15] * matrix.values[3];
1492
1493 result.values[4] = values[0] * matrix.values[4] + values[4] * matrix.values[5] + values[8] * matrix.values[6] + values[12] * matrix.values[7];
1494 result.values[5] = values[1] * matrix.values[4] + values[5] * matrix.values[5] + values[9] * matrix.values[6] + values[13] * matrix.values[7];
1495 result.values[6] = values[2] * matrix.values[4] + values[6] * matrix.values[5] + values[10] * matrix.values[6] + values[14] * matrix.values[7];
1496 result.values[7] = values[3] * matrix.values[4] + values[7] * matrix.values[5] + values[11] * matrix.values[6] + values[15] * matrix.values[7];
1497
1498 result.values[8] = values[0] * matrix.values[8] + values[4] * matrix.values[9] + values[8] * matrix.values[10] + values[12] * matrix.values[11];
1499 result.values[9] = values[1] * matrix.values[8] + values[5] * matrix.values[9] + values[9] * matrix.values[10] + values[13] * matrix.values[11];
1500 result.values[10] = values[2] * matrix.values[8] + values[6] * matrix.values[9] + values[10] * matrix.values[10] + values[14] * matrix.values[11];
1501 result.values[11] = values[3] * matrix.values[8] + values[7] * matrix.values[9] + values[11] * matrix.values[10] + values[15] * matrix.values[11];
1502
1503 result.values[12] = values[0] * matrix.values[12] + values[4] * matrix.values[13] + values[8] * matrix.values[14] + values[12] * matrix.values[15];
1504 result.values[13] = values[1] * matrix.values[12] + values[5] * matrix.values[13] + values[9] * matrix.values[14] + values[13] * matrix.values[15];
1505 result.values[14] = values[2] * matrix.values[12] + values[6] * matrix.values[13] + values[10] * matrix.values[14] + values[14] * matrix.values[15];
1506 result.values[15] = values[3] * matrix.values[12] + values[7] * matrix.values[13] + values[11] * matrix.values[14] + values[15] * matrix.values[15];
1507
1508 return result;
1509}
1510
1511#if defined(OCEAN_HARDWARE_AVX_VERSION) && OCEAN_HARDWARE_AVX_VERSION >= 10
1512
1513template <>
1515{
1516 // the following code uses the following AVX instructions, and needs AVX1 or higher
1517
1518 // AVX1:
1519 // _mm256_broadcast_sd
1520 // _mm256_loadu_pd
1521 // _mm256_mul_pd
1522 // _mm256_add_pd
1523 // _mm256_storeu_pd
1524
1525 // we use the same strategy as we apply for matrix-vector multiplication
1526 // further, here we interpret the right matrix as 4 vectors
1527
1528 // we load the columns of the left matrix
1529 __m256d c0 = _mm256_loadu_pd(values + 0);
1530 __m256d c1 = _mm256_loadu_pd(values + 4);
1531 __m256d c2 = _mm256_loadu_pd(values + 8);
1532 __m256d c3 = _mm256_loadu_pd(values + 12);
1533
1534
1535 // we determine the first vector of the resulting matrix
1536 __m256d v0 = _mm256_broadcast_sd(matrix.data() + 0);
1537 __m256d r0 = _mm256_mul_pd(c0, v0);
1538
1539 __m256d v1 = _mm256_broadcast_sd(matrix.data() + 1);
1540 __m256d r1 = _mm256_mul_pd(c1, v1);
1541
1542 r0 = _mm256_add_pd(r0, r1);
1543
1544 __m256d v2 = _mm256_broadcast_sd(matrix.data() + 2);
1545 __m256d r2 = _mm256_mul_pd(c2, v2);
1546
1547 r0 = _mm256_add_pd(r0, r2);
1548
1549 __m256d v3 = _mm256_broadcast_sd(matrix.data() + 3);
1550 __m256d r3 = _mm256_mul_pd(c3, v3);
1551
1552 r0 = _mm256_add_pd(r0, r3);
1553
1555
1556 _mm256_storeu_pd(result.data(), r0);
1557
1558
1559 // we determine the second vector of the resulting matrix
1560 __m256d v4 = _mm256_broadcast_sd(matrix.data() + 4);
1561 __m256d r4 = _mm256_mul_pd(c0, v4);
1562
1563 __m256d v5 = _mm256_broadcast_sd(matrix.data() + 5);
1564 __m256d r5 = _mm256_mul_pd(c1, v5);
1565
1566 r4 = _mm256_add_pd(r4, r5);
1567
1568 __m256d v6 = _mm256_broadcast_sd(matrix.data() + 6);
1569 __m256d r6 = _mm256_mul_pd(c2, v6);
1570
1571 r4 = _mm256_add_pd(r4, r6);
1572
1573 __m256d v7 = _mm256_broadcast_sd(matrix.data() + 7);
1574 __m256d r7 = _mm256_mul_pd(c3, v7);
1575
1576 r4 = _mm256_add_pd(r4, r7);
1577
1578 _mm256_storeu_pd(result.data() + 4, r4);
1579
1580
1581 // we determine the third vector of the resulting matrix
1582 __m256d v8 = _mm256_broadcast_sd(matrix.data() + 8);
1583 __m256d r8 = _mm256_mul_pd(c0, v8);
1584
1585 __m256d v9 = _mm256_broadcast_sd(matrix.data() + 9);
1586 __m256d r9 = _mm256_mul_pd(c1, v9);
1587
1588 r8 = _mm256_add_pd(r8, r9);
1589
1590 __m256d v10 = _mm256_broadcast_sd(matrix.data() + 10);
1591 __m256d r10 = _mm256_mul_pd(c2, v10);
1592
1593 r8 = _mm256_add_pd(r8, r10);
1594
1595 __m256d v11 = _mm256_broadcast_sd(matrix.data() + 11);
1596 __m256d r11 = _mm256_mul_pd(c3, v11);
1597
1598 r8 = _mm256_add_pd(r8, r11);
1599
1600 _mm256_storeu_pd(result.data() + 8, r8);
1601
1602
1603 // we determine the forth vector of the resulting matrix
1604 __m256d v12 = _mm256_broadcast_sd(matrix.data() + 12);
1605 __m256d r12 = _mm256_mul_pd(c0, v12);
1606
1607 __m256d v13 = _mm256_broadcast_sd(matrix.data() + 13);
1608 __m256d r13 = _mm256_mul_pd(c1, v13);
1609
1610 r12 = _mm256_add_pd(r12, r13);
1611
1612 __m256d v14 = _mm256_broadcast_sd(matrix.data() + 14);
1613 __m256d r14 = _mm256_mul_pd(c2, v14);
1614
1615 r12 = _mm256_add_pd(r12, r14);
1616
1617 __m256d v15 = _mm256_broadcast_sd(matrix.data() + 15);
1618 __m256d r15 = _mm256_mul_pd(c3, v15);
1619
1620 r12 = _mm256_add_pd(r12, r15);
1621
1622 _mm256_storeu_pd(result.data() + 12, r12);
1623
1624 return result;
1625}
1626
1627template <>
1629{
1630 // the following code uses the following AVX instructions, and needs AVX1 or higher
1631
1632 // we use the same strategy as we apply for matrix-vector multiplication
1633 // further, here we interpret the right matrix as 4 vectors, and combine two vectors into one 256 bit register
1634
1635 // we load the four columns of the left matrix
1636 __m256 c0 = _mm256_broadcast_ps((const __m128*)values + 0);
1637 __m256 c1 = _mm256_broadcast_ps((const __m128*)values + 1);
1638 __m256 c2 = _mm256_broadcast_ps((const __m128*)values + 2);
1639 __m256 c3 = _mm256_broadcast_ps((const __m128*)values + 3);
1640
1641 __m256 m01 = _mm256_loadu_ps(matrix.data() + 0);
1642 __m256 m23 = _mm256_loadu_ps(matrix.data() + 8);
1643
1644
1645 // we determine the first two vectors of the resulting matrix
1646 __m256 v0_4 = _mm256_permute_ps(m01, 0x00);
1647 __m256 r0 = _mm256_mul_ps(c0, v0_4);
1648
1649#ifdef OCEAN_COMPILER_MSC
1650 ocean_assert(NumericF::isEqual(r0.m256_f32[0], values[0] * matrix[0]));
1651 ocean_assert(NumericF::isEqual(r0.m256_f32[1], values[1] * matrix[0]));
1652 ocean_assert(NumericF::isEqual(r0.m256_f32[2], values[2] * matrix[0]));
1653 ocean_assert(NumericF::isEqual(r0.m256_f32[3], values[3] * matrix[0]));
1654
1655 ocean_assert(NumericF::isEqual(r0.m256_f32[4], values[0] * matrix[4]));
1656 ocean_assert(NumericF::isEqual(r0.m256_f32[5], values[1] * matrix[4]));
1657 ocean_assert(NumericF::isEqual(r0.m256_f32[6], values[2] * matrix[4]));
1658 ocean_assert(NumericF::isEqual(r0.m256_f32[7], values[3] * matrix[4]));
1659#endif
1660
1661 __m256 v1_5 = _mm256_permute_ps(m01, 0x55);
1662 __m256 r1 = _mm256_mul_ps(c1, v1_5);
1663
1664#ifdef OCEAN_COMPILER_MSC
1665 ocean_assert(NumericF::isEqual(r1.m256_f32[0], values[ 4] * matrix[1]));
1666 ocean_assert(NumericF::isEqual(r1.m256_f32[1], values[ 5] * matrix[1]));
1667 ocean_assert(NumericF::isEqual(r1.m256_f32[2], values[ 6] * matrix[1]));
1668 ocean_assert(NumericF::isEqual(r1.m256_f32[3], values[ 7] * matrix[1]));
1669
1670 ocean_assert(NumericF::isEqual(r1.m256_f32[4], values[4] * matrix[5]));
1671 ocean_assert(NumericF::isEqual(r1.m256_f32[5], values[5] * matrix[5]));
1672 ocean_assert(NumericF::isEqual(r1.m256_f32[6], values[6] * matrix[5]));
1673 ocean_assert(NumericF::isEqual(r1.m256_f32[7], values[7] * matrix[5]));
1674#endif
1675
1676 r0 = _mm256_add_ps(r0, r1);
1677
1678
1679 __m256 v2_6 = _mm256_permute_ps(m01, 0xAA);
1680 __m256 r2 = _mm256_mul_ps(c2, v2_6);
1681
1682 r0 = _mm256_add_ps(r0, r2);
1683
1684
1685 __m256 v3_7 = _mm256_permute_ps(m01, 0xFF);
1686 __m256 r3 = _mm256_mul_ps(c3, v3_7);
1687
1688 r0 = _mm256_add_ps(r0, r3);
1689
1690 SquareMatrixT4<float> result;
1691
1692 _mm256_storeu_ps(result.data(), r0);
1693
1694
1695
1696 // we determine the last two vectors of the resulting matrix
1697 __m256 v8_12 = _mm256_permute_ps(m23, 0x00);
1698 __m256 r8 = _mm256_mul_ps(c0, v8_12);
1699
1700
1701 __m256 v9_13 = _mm256_permute_ps(m23, 0x55);
1702 __m256 r9 = _mm256_mul_ps(c1, v9_13);
1703
1704 r8 = _mm256_add_ps(r8, r9);
1705
1706
1707 __m256 v10_14 = _mm256_permute_ps(m23, 0xAA);
1708 __m256 r10 = _mm256_mul_ps(c2, v10_14);
1709
1710 r8 = _mm256_add_ps(r8, r10);
1711
1712
1713 __m256 v11_15 = _mm256_permute_ps(m23, 0xFF);
1714 __m256 r11 = _mm256_mul_ps(c3, v11_15);
1715
1716 r8 = _mm256_add_ps(r8, r11);
1717
1718 _mm256_storeu_ps(result.data() + 8, r8);
1719
1720 return result;
1721}
1722
1723#else // OCEAN_HARDWARE_AVX_VERSION < 10
1724
1725#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 10
1726
1727template <>
1729{
1730 // the following code uses the following SSE instructions, and needs SSE1 or higher
1731
1732 // SSE1:
1733 // _mm_load1_ps
1734 // _mm_loadu_ps
1735 // _mm_mul_ps
1736 // _mm_add_ps
1737 // _mm_storeu_ps
1738
1739 // we use the same strategy as we apply for matrix-vector multiplication
1740 // further, here we interpret the right matrix as 4 vectors
1741
1742 // we load the columns of the left matrix
1743 __m128 c0 = _mm_loadu_ps(values + 0);
1744 __m128 c1 = _mm_loadu_ps(values + 4);
1745 __m128 c2 = _mm_loadu_ps(values + 8);
1746 __m128 c3 = _mm_loadu_ps(values + 12);
1747
1748
1749 // we determine the first vector of the resulting matrix
1750 __m128 v0 = _mm_load1_ps(matrix.data() + 0);
1751 __m128 r0 = _mm_mul_ps(c0, v0);
1752
1753 __m128 v1 = _mm_load1_ps(matrix.data() + 1);
1754 __m128 r1 = _mm_mul_ps(c1, v1);
1755
1756 r0 = _mm_add_ps(r0, r1);
1757
1758 __m128 v2 = _mm_load1_ps(matrix.data() + 2);
1759 __m128 r2 = _mm_mul_ps(c2, v2);
1760
1761 r0 = _mm_add_ps(r0, r2);
1762
1763 __m128 v3 = _mm_load1_ps(matrix.data() + 3);
1764 __m128 r3 = _mm_mul_ps(c3, v3);
1765
1766 r0 = _mm_add_ps(r0, r3);
1767
1768 SquareMatrixT4<float> result;
1769
1770 _mm_storeu_ps(result.data(), r0);
1771
1772
1773 // we determine the second vector of the resulting matrix
1774 __m128 v4 = _mm_load1_ps(matrix.data() + 4);
1775 __m128 r4 = _mm_mul_ps(c0, v4);
1776
1777 __m128 v5 = _mm_load1_ps(matrix.data() + 5);
1778 __m128 r5 = _mm_mul_ps(c1, v5);
1779
1780 r4 = _mm_add_ps(r4, r5);
1781
1782 __m128 v6 = _mm_load1_ps(matrix.data() + 6);
1783 __m128 r6 = _mm_mul_ps(c2, v6);
1784
1785 r4 = _mm_add_ps(r4, r6);
1786
1787 __m128 v7 = _mm_load1_ps(matrix.data() + 7);
1788 __m128 r7 = _mm_mul_ps(c3, v7);
1789
1790 r4 = _mm_add_ps(r4, r7);
1791
1792 _mm_storeu_ps(result.data() + 4, r4);
1793
1794
1795 // we determine the third vector of the resulting matrix
1796 __m128 v8 = _mm_load1_ps(matrix.data() + 8);
1797 __m128 r8 = _mm_mul_ps(c0, v8);
1798
1799 __m128 v9 = _mm_load1_ps(matrix.data() + 9);
1800 __m128 r9 = _mm_mul_ps(c1, v9);
1801
1802 r8 = _mm_add_ps(r8, r9);
1803
1804 __m128 v10 = _mm_load1_ps(matrix.data() + 10);
1805 __m128 r10 = _mm_mul_ps(c2, v10);
1806
1807 r8 = _mm_add_ps(r8, r10);
1808
1809 __m128 v11 = _mm_load1_ps(matrix.data() + 11);
1810 __m128 r11 = _mm_mul_ps(c3, v11);
1811
1812 r8 = _mm_add_ps(r8, r11);
1813
1814 _mm_storeu_ps(result.data() + 8, r8);
1815
1816
1817 // we determine the forth vector of the resulting matrix
1818 __m128 v12 = _mm_load1_ps(matrix.data() + 12);
1819 __m128 r12 = _mm_mul_ps(c0, v12);
1820
1821 __m128 v13 = _mm_load1_ps(matrix.data() + 13);
1822 __m128 r13 = _mm_mul_ps(c1, v13);
1823
1824 r12 = _mm_add_ps(r12, r13);
1825
1826 __m128 v14 = _mm_load1_ps(matrix.data() + 14);
1827 __m128 r14 = _mm_mul_ps(c2, v14);
1828
1829 r12 = _mm_add_ps(r12, r14);
1830
1831 __m128 v15 = _mm_load1_ps(matrix.data() + 15);
1832 __m128 r15 = _mm_mul_ps(c3, v15);
1833
1834 r12 = _mm_add_ps(r12, r15);
1835
1836 _mm_storeu_ps(result.data() + 12, r12);
1837
1838 return result;
1839}
1840
1841#endif // OCEAN_HARDWARE_SSE_VERSION >= 10
1842
1843#endif // OCEAN_HARDWARE_AVX_VERSION >= 10
1844
1845template <typename T>
1847{
1848 SquareMatrixT4<T> result;
1849
1850 result.values[0] = values[0] * matrix[0] + values[4] * matrix[1] + values[8] * matrix[2]; // + values[12] * matrix[3];
1851 result.values[1] = values[1] * matrix[0] + values[5] * matrix[1] + values[9] * matrix[2]; // + values[13] * matrix[3];
1852 result.values[2] = values[2] * matrix[0] + values[6] * matrix[1] + values[10] * matrix[2]; // + values[14] * matrix[3];
1853 result.values[3] = values[3] * matrix[0] + values[7] * matrix[1] + values[11] * matrix[2]; // + values[15] * matrix[3];
1854
1855 result.values[4] = values[0] * matrix[4] + values[4] * matrix[5] + values[8] * matrix[6]; // + values[12] * matrix[7];
1856 result.values[5] = values[1] * matrix[4] + values[5] * matrix[5] + values[9] * matrix[6]; // + values[13] * matrix[7];
1857 result.values[6] = values[2] * matrix[4] + values[6] * matrix[5] + values[10] * matrix[6]; // + values[14] * matrix[7];
1858 result.values[7] = values[3] * matrix[4] + values[7] * matrix[5] + values[11] * matrix[6]; // + values[15] * matrix[7];
1859
1860 result.values[8] = values[0] * matrix[8] + values[4] * matrix[9] + values[8] * matrix[10]; // + values[12] * matrix[11];
1861 result.values[9] = values[1] * matrix[8] + values[5] * matrix[9] + values[9] * matrix[10]; // + values[13] * matrix[11];
1862 result.values[10] = values[2] * matrix[8] + values[6] * matrix[9] + values[10] * matrix[10]; // + values[14] * matrix[11];
1863 result.values[11] = values[3] * matrix[8] + values[7] * matrix[9] + values[11] * matrix[10]; // + values[15] * matrix[11];
1864
1865 result.values[12] = values[0] * matrix[12] + values[4] * matrix[13] + values[8] * matrix[14] + values[12]; // * matrix[15];
1866 result.values[13] = values[1] * matrix[12] + values[5] * matrix[13] + values[9] * matrix[14] + values[13]; // * matrix[15];
1867 result.values[14] = values[2] * matrix[12] + values[6] * matrix[13] + values[10] * matrix[14] + values[14]; // * matrix[15];
1868 result.values[15] = values[3] * matrix[12] + values[7] * matrix[13] + values[11] * matrix[14] + values[15]; // * matrix[15];
1869
1870 return result;
1871}
1872
1873template <typename T>
1874OCEAN_FORCE_INLINE VectorT3<T> SquareMatrixT4<T>::operator*(const VectorT3<T>& vector) const
1875{
1876 const T w = values[3] * vector[0] + values[7] * vector[1] + values[11] * vector[2] + values[15];
1877 ocean_assert(NumericT<T>::isNotEqualEps(w) && "Division by zero!");
1878
1879 const T factor = 1 / w;
1880
1881 return VectorT3<T>((values[0] * vector[0] + values[4] * vector[1] + values[8] * vector[2] + values[12]) * factor,
1882 (values[1] * vector[0] + values[5] * vector[1] + values[9] * vector[2] + values[13]) * factor,
1883 (values[2] * vector[0] + values[6] * vector[1] + values[10] * vector[2] + values[14]) * factor);
1884}
1885
1886template <typename T>
1887OCEAN_FORCE_INLINE VectorT4<T> SquareMatrixT4<T>::operator*(const VectorT4<T>& vector) const
1888{
1889 return VectorT4<T>(values[0] * vector[0] + values[4] * vector[1] + values[8] * vector[2] + values[12] * vector[3],
1890 values[1] * vector[0] + values[5] * vector[1] + values[9] * vector[2] + values[13] * vector[3],
1891 values[2] * vector[0] + values[6] * vector[1] + values[10] * vector[2] + values[14] * vector[3],
1892 values[3] * vector[0] + values[7] * vector[1] + values[11] * vector[2] + values[15] * vector[3]);
1893}
1894
1895#if defined(OCEAN_HARDWARE_AVX_VERSION) && OCEAN_HARDWARE_AVX_VERSION >= 10
1896
1897#ifdef OCEAN_USE_SLOWER_IMPLEMENTATION
1898
1899// we keep following implementation inside 'OCEAN_USE_SLOWER_IMPLEMENTATION' showing an alternative (which is slower)
1900
1901template <>
1903{
1904 // the following code uses the following AVX instructions, and needs AVX1 or higher
1905
1906 // AVX1:
1907 // _mm256_loadu_pd
1908 // _mm256_mul_pd
1909 // _mm256_shuffle_pd
1910 // _mm256_hadd_pd
1911 // _mm256_permute2f128_pd
1912 // _mm256_storeu_pd
1913
1914 // first we load the first four rows of the matrix
1915 __m256d row0 = _mm256_loadu_pd(values + 0);
1916 __m256d row1 = _mm256_loadu_pd(values + 4);
1917 __m256d row2 = _mm256_loadu_pd(values + 8);
1918 __m256d row3 = _mm256_loadu_pd(values + 12);
1919
1920 // we load the four values of the vector
1921 __m256d v = _mm256_loadu_pd(vector.data());
1922
1923 // first, we transpose the 4x4 matrix
1924
1925 // A E I M A B C D
1926 // B F J N E F G H
1927 // C G K O I J K L
1928 // D H L P M N O P
1929
1930 // A B C D, E F G H -> A E C G
1931 __m256d temp0 = _mm256_shuffle_pd(row0, row1, 0x00); // 0x00 = 0000 0000
1932 // A B C D, E F G H -> B F D H
1933 __m256d temp2 = _mm256_shuffle_pd(row0, row1, 0x0F); // 0x0F = 0000 1111
1934 // I J K L, M N O P -> I M K O
1935 __m256d temp1 = _mm256_shuffle_pd(row2, row3, 0x00);
1936 // I J K L, M N O P -> J N L P
1937 __m256d temp3 = _mm256_shuffle_pd(row2, row3, 0x0F);
1938
1939 // A E C G I M K O -> A E I M
1940 row0 = _mm256_permute2f128_pd(temp0, temp1, 0x20); // 0x20 = 0010 0000
1941 // B F D H J N L P -> B F J N
1942 row1 = _mm256_permute2f128_pd(temp2, temp3, 0x20);
1943 // A E C G I M K O -> C G K O
1944 row2 = _mm256_permute2f128_pd(temp0, temp1, 0x31); // 0x31 = 0011 0001
1945 // B F D H J N L P -> D H L P
1946 row3 = _mm256_permute2f128_pd(temp2, temp3, 0x31);
1947
1948#ifdef OCEAN_COMPILER_MSC
1949 ocean_assert(row0.m256d_f64[0] == values[0] && row0.m256d_f64[1] == values[4] && row0.m256d_f64[2] == values[ 8] && row0.m256d_f64[3] == values[12]);
1950 ocean_assert(row1.m256d_f64[0] == values[1] && row1.m256d_f64[1] == values[5] && row1.m256d_f64[2] == values[ 9] && row1.m256d_f64[3] == values[13]);
1951 ocean_assert(row2.m256d_f64[0] == values[2] && row2.m256d_f64[1] == values[6] && row2.m256d_f64[2] == values[10] && row2.m256d_f64[3] == values[14]);
1952 ocean_assert(row3.m256d_f64[0] == values[3] && row3.m256d_f64[1] == values[7] && row3.m256d_f64[2] == values[11] && row3.m256d_f64[3] == values[15]);
1953#endif
1954
1955 // unfortunately the AVX does not offer _mm256_dp_pd (the determination of the dot product) so we have to do it on our own
1956
1957 __m256d r0v = _mm256_mul_pd(row0, v);
1958 __m256d r1v = _mm256_mul_pd(row1, v);
1959 __m256d r2v = _mm256_mul_pd(row2, v);
1960 __m256d r3v = _mm256_mul_pd(row3, v);
1961
1962 // we sum both multiplication results horizontally (at least two neighboring products)
1963 __m256d sum_interleaved_r0_r1 = _mm256_hadd_pd(r0v, r1v);
1964 __m256d sum_interleaved_r2_r3 = _mm256_hadd_pd(r2v, r3v);
1965
1966 // now we reorder the interleaved sums
1967 __m256d sum_first = _mm256_permute2f128_pd(sum_interleaved_r0_r1, sum_interleaved_r2_r3, 0x20); // 0x20 = 0010 0000
1968 __m256d sum_second = _mm256_permute2f128_pd(sum_interleaved_r0_r1, sum_interleaved_r2_r3, 0x31); // 0x31 = 0011 0001
1969
1970 // we finally add both reordered sums
1971 __m256d sum = _mm256_add_pd(sum_first, sum_second);
1972
1973 VectorT4<double> result;
1974 _mm256_storeu_pd(result.data(), sum);
1975
1976 ocean_assert(NumericD::isEqual(result[0], values[0] * vector[0] + values[4] * vector[1] + values[ 8] * vector[2] + values[12] * vector[3], NumericD::eps() * 100));
1977 ocean_assert(NumericD::isEqual(result[1], values[1] * vector[0] + values[5] * vector[1] + values[ 9] * vector[2] + values[13] * vector[3], NumericD::eps() * 100));
1978 ocean_assert(NumericD::isEqual(result[2], values[2] * vector[0] + values[6] * vector[1] + values[10] * vector[2] + values[14] * vector[3], NumericD::eps() * 100));
1979 ocean_assert(NumericD::isEqual(result[3], values[3] * vector[0] + values[7] * vector[1] + values[11] * vector[2] + values[15] * vector[3], NumericD::eps() * 100));
1980
1981 return result;
1982}
1983
1984#else // OCEAN_USE_SLOWER_IMPLEMENTATION
1985
1986template <>
1988{
1989 // the following code uses the following AVX instructions, and needs AVX1 or higher
1990
1991 // AVX1:
1992 // _mm256_broadcast_sd
1993 // _mm256_loadu_pd
1994 // _mm256_mul_pd
1995 // _mm256_add_pd
1996 // _mm256_storeu_pd
1997
1998 // we use the same strategy as for the 32 bit float values
1999
2000 // first we load the first vector element in all 64bit elements of the 256 bit register, so that we receive [a, a, a, a]
2001 __m256d v0 = _mm256_broadcast_sd(vector.data() + 0);
2002
2003 // now we load the first column to receive: [A, B, C, D]
2004 __m256d c0 = _mm256_loadu_pd(values + 0);
2005
2006 // now we multiply the 256 bit register [A, B, C, D] * [a, a, a, a] = [Aa, Ba, Ca, Da]
2007 __m256d r0 = _mm256_mul_pd(c0, v0);
2008
2009
2010 // now we proceed with the second column
2011 __m256d v1 = _mm256_broadcast_sd(vector.data() + 1);
2012 __m256d c1 = _mm256_loadu_pd(values + 4);
2013 __m256d r1 = _mm256_mul_pd(c1, v1);
2014
2015 // and we sum the result of the first column with the result of the second column
2016 r0 = _mm256_add_pd(r0, r1);
2017
2018
2019 // now we proceed with the third column
2020 __m256d v2 = _mm256_broadcast_sd(vector.data() + 2);
2021 __m256d c2 = _mm256_loadu_pd(values + 8);
2022 __m256d r2 = _mm256_mul_pd(c2, v2);
2023
2024 // we sum the results
2025 r0 = _mm256_add_pd(r0, r2);
2026
2027
2028 // now we proceed with the fourth column
2029 __m256d v3 = _mm256_broadcast_sd(vector.data() + 3);
2030 __m256d c3 = _mm256_loadu_pd(values + 12);
2031 __m256d r3 = _mm256_mul_pd(c3, v3);
2032
2033 // we sum the results
2034 r0 = _mm256_add_pd(r0, r3);
2035
2036 // and finally we store the results back to the vector
2037 VectorT4<double> result;
2038
2039 _mm256_storeu_pd(result.data(), r0);
2040
2041 return result;
2042}
2043
2044#endif // OCEAN_USE_SLOWER_IMPLEMENTATION
2045
2046#else // OCEAN_HARDWARE_AVX_VERSION
2047
2048#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 20
2049
2050template <>
2052{
2053 // the following code uses the following SSE instructions, and needs SSE2 or higher
2054
2055 // SSE2:
2056 // _mm_load1_pd
2057 // _mm_loadu_pd
2058 // _mm_add_pd
2059 // _mm_storeu_pd
2060 // _mm_mul_pd
2061
2062 // we use the following strategy:
2063 // the values of the matrix are column aligned so that we normally would need to transpose the matrix before we can apply simple SIMD instructions
2064 // however, we do not transpose the matrix (we avoid the shuffle instructions) and instead multiply the matrix column-wise:
2065 // finally we sum the four columns and have the result, compared to the transpose-based approach this approach is approx. two times faster
2066 //
2067 // A E I M a Aa + Eb + Ic + Md
2068 // B F J N b Ba + Fb + Jc + Nd
2069 // C G K O * c = Ca + Gb + Kc + Od
2070 // D H L P d Da + Hb + Lc + Pd
2071
2072 // first we load the first vector element in both 64 elements of the 128 bit register, so that we receive [a, a]
2073 __m128d v0 = _mm_load1_pd(vector.data() + 0);
2074
2075 // now we load the first column to receive: [A, B] and [C, D]
2076 __m128d c0a = _mm_loadu_pd(values + 0);
2077 __m128d c0b = _mm_loadu_pd(values + 2);
2078
2079 // now we multiply both 128 bit registers by: [A, B] * [a, a] = [Aa, Ba] and [C, D] * [a, a] = [Ca, Da]
2080 __m128d r0a = _mm_mul_pd(c0a, v0);
2081 __m128d r0b = _mm_mul_pd(c0b, v0);
2082
2083
2084 // now we proceed with the second column
2085 __m128d v1 = _mm_load1_pd(vector.data() + 1);
2086
2087 __m128d c1a = _mm_loadu_pd(values + 4);
2088 __m128d c1b = _mm_loadu_pd(values + 6);
2089
2090 __m128d r1a = _mm_mul_pd(c1a, v1);
2091 __m128d r1b = _mm_mul_pd(c1b, v1);
2092
2093 // and we sum the result of the first column with the result of the second column
2094 r0a = _mm_add_pd(r0a, r1a);
2095 r0b = _mm_add_pd(r0b, r1b);
2096
2097
2098 // now we proceed with the third column
2099 __m128d v2 = _mm_load1_pd(vector.data() + 2);
2100
2101 __m128d c2a = _mm_loadu_pd(values + 8);
2102 __m128d c2b = _mm_loadu_pd(values + 10);
2103
2104 __m128d r2a = _mm_mul_pd(c2a, v2);
2105 __m128d r2b = _mm_mul_pd(c2b, v2);
2106
2107 // we sum the results
2108 r0a = _mm_add_pd(r0a, r2a);
2109 r0b = _mm_add_pd(r0b, r2b);
2110
2111
2112 // now we proceed with the fourth column
2113 __m128d v3 = _mm_load1_pd(vector.data() + 3);
2114
2115 __m128d c3a = _mm_loadu_pd(values + 12);
2116 __m128d c3b = _mm_loadu_pd(values + 14);
2117
2118 __m128d r3a = _mm_mul_pd(c3a, v3);
2119 __m128d r3b = _mm_mul_pd(c3b, v3);
2120
2121 // we sum the results
2122 r0a = _mm_add_pd(r0a, r3a);
2123 r0b = _mm_add_pd(r0b, r3b);
2124
2125 // and finally we store the results back to the vector
2126 VectorT4<double> result;
2127
2128 _mm_storeu_pd(result.data() + 0, r0a);
2129 _mm_storeu_pd(result.data() + 2, r0b);
2130
2131 return result;
2132}
2133
2134#endif // OCEAN_HARDWARE_SSE_VERSION >= 20
2135
2136#endif // OCEAN_HARDWARE_AVX_VERSION >= 10
2137
2138#ifdef OCEAN_USE_SLOWER_IMPLEMENTATION
2139
2140// we keep following implementation inside 'OCEAN_USE_SLOWER_IMPLEMENTATION' showing an alternative (which is slower)
2141
2142#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
2143
2144template <>
2146{
2147 // the following code uses the following SSE instructions, and needs SSE 4.1 or higher
2148
2149 // SSE:
2150 // _mm_loadu_ps
2151 // _mm_shuffle_ps
2152 // _mm_or_ps
2153 // _mm_storeu_ps
2154
2155 // SSE4.1:
2156 // _mm_dp_ps
2157
2158 // first we load the first four rows of the matrix
2159 __m128 row0 = _mm_loadu_ps(values + 0);
2160 __m128 row1 = _mm_loadu_ps(values + 4);
2161 __m128 row2 = _mm_loadu_ps(values + 8);
2162 __m128 row3 = _mm_loadu_ps(values + 12);
2163
2164 // we load the four values of the vector
2165 __m128 v = _mm_loadu_ps(vector.data());
2166
2167 // first, we transpose the 4x4 matrix
2168
2169 // A E I M A B C D
2170 // B F J N E F G H
2171 // C G K O I J K L
2172 // D H L P M N O P
2173
2174 // A B C D, E F G H -> A B E F
2175 __m128 temp0 = _mm_shuffle_ps(row0, row1, 0x44); // 0x44 = 0100 0100
2176 // A B C D, E F G H -> C D G H
2177 __m128 temp2 = _mm_shuffle_ps(row0, row1, 0xEE); // 0xEE = 1110 1110
2178 // I J K L, M N O P -> I J M N
2179 __m128 temp1 = _mm_shuffle_ps(row2, row3, 0x44);
2180 // I J K L, M N O P -> K L O P
2181 __m128 temp3 = _mm_shuffle_ps(row2, row3, 0xEE);
2182
2183 // A B E F, I J M N -> A E I M
2184 row0 = _mm_shuffle_ps(temp0, temp1, 0x88); // 0x88 = 1000 1000
2185 // A B E F, I J M N -> B F J N
2186 row1 = _mm_shuffle_ps(temp0, temp1, 0xDD); // 0xDD = 1101 1101
2187 // C D G H, K L O P -> C G K O
2188 row2 = _mm_shuffle_ps(temp2, temp3, 0x88);
2189 // C D G H, K L O P -> D H L P
2190 row3 = _mm_shuffle_ps(temp2, temp3, 0xDD);
2191
2192#ifdef OCEAN_COMPILER_MSC
2193 ocean_assert(row0.m128_f32[0] == values[0] && row0.m128_f32[1] == values[4] && row0.m128_f32[2] == values[ 8] && row0.m128_f32[3] == values[12]);
2194 ocean_assert(row1.m128_f32[0] == values[1] && row1.m128_f32[1] == values[5] && row1.m128_f32[2] == values[ 9] && row1.m128_f32[3] == values[13]);
2195 ocean_assert(row2.m128_f32[0] == values[2] && row2.m128_f32[1] == values[6] && row2.m128_f32[2] == values[10] && row2.m128_f32[3] == values[14]);
2196 ocean_assert(row3.m128_f32[0] == values[3] && row3.m128_f32[1] == values[7] && row3.m128_f32[2] == values[11] && row3.m128_f32[3] == values[15]);
2197#endif
2198
2199 // we determine the dot product between the first row and the vector and store the result in the first float bin
2200 row0 = _mm_dp_ps(row0, v, 0xF1); // 0xF1 = 1111 0001
2201
2202 // we determine the dot product between the second row and the vector and store the result in the second float bin
2203 row1 = _mm_dp_ps(row1, v, 0xF2); // 0xF2 = 1111 0010
2204 row2 = _mm_dp_ps(row2, v, 0xF4); // 0xF4 = 1111 0100
2205 row3 = _mm_dp_ps(row3, v, 0xF8); // 0xF8 = 1111 1000
2206
2207 // now we blend the results by applying the bit-wise or operator
2208 __m128 result01 = _mm_or_ps(row0, row1);
2209 __m128 result23 = _mm_or_ps(row2, row3);
2210 __m128 result03 = _mm_or_ps(result01, result23);
2211
2212 VectorT4<float> result;
2213 _mm_storeu_ps(result.data(), result03);
2214
2215 return result;
2216}
2217
2218#endif // OCEAN_HARDWARE_SSE_VERSION >= 41
2219
2220#else // OCEAN_USE_SLOWER_IMPLEMENTATION
2221
2222#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 10
2223
2224template <>
2226{
2227 // the following code uses the following SSE instructions, and needs SSE1 or higher
2228
2229 // SSE1:
2230 // _mm_load1_ps
2231 // _mm_loadu_ps
2232 // _mm_mul_ps
2233 // _mm_add_ps
2234 // _mm_storeu_ps
2235
2236
2237 // we use the following strategy:
2238 // the values of the matrix are column aligned so that we normally would need to transpose the matrix before we can apply simple SIMD instructions
2239 // however, we do not transpose the matrix (we avoid the shuffle instructions) and instead multiply the matrix column-wise:
2240 // finally we sum the four columns and have the result, compared to the transpose-based approach this approach is approx. two times faster
2241 //
2242 // A E I M a Aa + Eb + Ic + Md
2243 // B F J N b Ba + Fb + Jc + Nd
2244 // C G K O * c = Ca + Gb + Kc + Od
2245 // D H L P d Da + Hb + Lc + Pd
2246
2247 // first we load the first vector element in all 32bit elements of the 128 bit register, so that we receive [a, a, a, a]
2248 __m128 v0 = _mm_load1_ps(vector.data() + 0);
2249
2250 // now we load the first column to receive: [A, B, C, D]
2251 __m128 c0 = _mm_loadu_ps(values + 0);
2252
2253 // now we multiply the 128 bit register [A, B, C, D] * [a, a, a, a] = [Aa, Ba, Ca, Da]
2254 __m128 r0 = _mm_mul_ps(c0, v0);
2255
2256
2257 // now we proceed with the second column
2258 __m128 v1 = _mm_load1_ps(vector.data() + 1);
2259 __m128 c1 = _mm_loadu_ps(values + 4);
2260 __m128 r1 = _mm_mul_ps(c1, v1);
2261
2262 // and we sum the result of the first column with the result of the second column
2263 r0 = _mm_add_ps(r0, r1);
2264
2265
2266 // now we proceed with the third column
2267 __m128 v2 = _mm_load1_ps(vector.data() + 2);
2268 __m128 c2 = _mm_loadu_ps(values + 8);
2269 __m128 r2 = _mm_mul_ps(c2, v2);
2270
2271 // we sum the results
2272 r0 = _mm_add_ps(r0, r2);
2273
2274
2275 // now we proceed with the fourth column
2276 __m128 v3 = _mm_load1_ps(vector.data() + 3);
2277 __m128 c3 = _mm_loadu_ps(values + 12);
2278 __m128 r3 = _mm_mul_ps(c3, v3);
2279
2280 // we sum the results
2281 r0 = _mm_add_ps(r0, r3);
2282
2283 // and finally we store the results back to the vector
2284 VectorT4<float> result;
2285
2286 _mm_storeu_ps(result.data(), r0);
2287
2288 return result;
2289}
2290
2291#endif // OCEAN_HARDWARE_SSE_VERSION >= 10
2292
2293#endif // OCEAN_USE_SLOWER_IMPLEMENTATION
2294
2295#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2296
2297#ifdef __aarch64__
2298
2299template <>
2301{
2302 // the following NEON code is almost identical to the SSE implementation
2303
2304 // we use the following strategy:
2305 // the values of the matrix are column aligned so that we normally would need to transpose the matrix before we can apply simple SIMD instructions
2306 // however, we do not transpose the matrix (we avoid the shuffle instructions) and instead multiply the matrix column-wise:
2307 // finally we sum the four columns and have the result, compared to the transpose-based approach this approach is approx. two times faster
2308 //
2309 // A E I M a Aa + Eb + Ic + Md
2310 // B F J N b Ba + Fb + Jc + Nd
2311 // C G K O * c = Ca + Gb + Kc + Od
2312 // D H L P d Da + Hb + Lc + Pd
2313
2314 // first we load the first vector element in both 64 elements of the 128 bit register, so that we receive [a, a]
2315 float64x2_t v0 = vld1q_dup_f64(vector.data() + 0);
2316
2317 // now we load the first column to receive: [A, B] and [C, D]
2318 float64x2_t c0a = vld1q_f64(values + 0);
2319 float64x2_t c0b = vld1q_f64(values + 2);
2320
2321 // now we multiply both 128 bit registers by: [A, B] * [a, a] = [Aa, Ba] and [C, D] * [a, a] = [Ca, Da]
2322 float64x2_t r0a = vmulq_f64(c0a, v0);
2323 float64x2_t r0b = vmulq_f64(c0b, v0);
2324
2325
2326 // now we proceed with the second column
2327 float64x2_t v1 = vld1q_dup_f64(vector.data() + 1);
2328
2329 float64x2_t c1a = vld1q_f64(values + 4);
2330 float64x2_t c1b = vld1q_f64(values + 6);
2331
2332 float64x2_t r1a = vmulq_f64(c1a, v1);
2333 float64x2_t r1b = vmulq_f64(c1b, v1);
2334
2335 // and we sum the result of the first column with the result of the second column
2336 r0a = vaddq_f64(r0a, r1a);
2337 r0b = vaddq_f64(r0b, r1b);
2338
2339
2340 // now we proceed with the third column
2341 float64x2_t v2 = vld1q_dup_f64(vector.data() + 2);
2342
2343 float64x2_t c2a = vld1q_f64(values + 8);
2344 float64x2_t c2b = vld1q_f64(values + 10);
2345
2346 float64x2_t r2a = vmulq_f64(c2a, v2);
2347 float64x2_t r2b = vmulq_f64(c2b, v2);
2348
2349 // we sum the results
2350 r0a = vaddq_f64(r0a, r2a);
2351 r0b = vaddq_f64(r0b, r2b);
2352
2353
2354 // now we proceed with the fourth column
2355 float64x2_t v3 = vld1q_dup_f64(vector.data() + 3);
2356
2357 float64x2_t c3a = vld1q_f64(values + 12);
2358 float64x2_t c3b = vld1q_f64(values + 14);
2359
2360 float64x2_t r3a = vmulq_f64(c3a, v3);
2361 float64x2_t r3b = vmulq_f64(c3b, v3);
2362
2363 // we sum the results
2364 r0a = vaddq_f64(r0a, r3a);
2365 r0b = vaddq_f64(r0b, r3b);
2366
2367 // and finally we store the results back to the vector
2368 VectorT4<double> result;
2369
2370 vst1q_f64(result.data() + 0, r0a);
2371 vst1q_f64(result.data() + 2, r0b);
2372
2373 return result;
2374}
2375
2376#endif // __aarch64__
2377
2378template <>
2380{
2381 // the following NEON code is almost identical to the SSE implementation
2382
2383 // we use the following strategy:
2384 // the values of the matrix are column aligned so that we normally would need to transpose the matrix before we can apply simple SIMD instructions
2385 // however, we do not transpose the matrix (we avoid the shuffle instructions) and instead multiply the matrix column-wise:
2386 // finally we sum the four columns and have the result, compared to the transpose-based approach this approach is approx. two times faster
2387 //
2388 // A E I M a Aa + Eb + Ic + Md
2389 // B F J N b Ba + Fb + Jc + Nd
2390 // C G K O * c = Ca + Gb + Kc + Od
2391 // D H L P d Da + Hb + Lc + Pd
2392
2393 // first we load the first vector element in all 32bit elements of the 128 bit register, so that we receive [a, a, a, a]
2394 float32x4_t v0 = vld1q_dup_f32(vector.data() + 0);
2395
2396 // now we load the first column to receive: [A, B, C, D]
2397 float32x4_t c0 = vld1q_f32(values + 0);
2398
2399 // now we multiply the 128 bit register [A, B, C, D] * [a, a, a, a] = [Aa, Ba, Ca, Da]
2400 float32x4_t r0 = vmulq_f32(c0, v0);
2401
2402
2403 // now we proceed with the second column
2404 float32x4_t v1 = vld1q_dup_f32(vector.data() + 1);
2405 float32x4_t c1 = vld1q_f32(values + 4);
2406 float32x4_t r1 = vmulq_f32(c1, v1);
2407
2408 // and we sum the result of the first column with the result of the second column
2409 r0 = vaddq_f32(r0, r1);
2410
2411
2412 // now we proceed with the third column
2413 float32x4_t v2 = vld1q_dup_f32(vector.data() + 2);
2414 float32x4_t c2 = vld1q_f32(values + 8);
2415 float32x4_t r2 = vmulq_f32(c2, v2);
2416
2417 // we sum the results
2418 r0 = vaddq_f32(r0, r2);
2419
2420
2421 // now we proceed with the fourth column
2422 float32x4_t v3 = vld1q_dup_f32(vector.data() + 3);
2423 float32x4_t c3 = vld1q_f32(values + 12);
2424 float32x4_t r3 = vmulq_f32(c3, v3);
2425
2426 // we sum the results
2427 r0 = vaddq_f32(r0, r3);
2428
2429 // and finally we store the results back to the vector
2430 VectorT4<float> result;
2431
2432 vst1q_f32(result.data(), r0);
2433
2434 return result;
2435}
2436
2437#endif
2438
2439template <typename T>
2441{
2442 SquareMatrixT4<T> result(*this);
2443
2444 result.values[0] *= value;
2445 result.values[1] *= value;
2446 result.values[2] *= value;
2447 result.values[3] *= value;
2448 result.values[4] *= value;
2449 result.values[5] *= value;
2450 result.values[6] *= value;
2451 result.values[7] *= value;
2452 result.values[8] *= value;
2453 result.values[9] *= value;
2454 result.values[10] *= value;
2455 result.values[11] *= value;
2456 result.values[12] *= value;
2457 result.values[13] *= value;
2458 result.values[14] *= value;
2459 result.values[15] *= value;
2460
2461 return result;
2462}
2463
2464template <typename T>
2466{
2467 values[0] *= value;
2468 values[1] *= value;
2469 values[2] *= value;
2470 values[3] *= value;
2471 values[4] *= value;
2472 values[5] *= value;
2473 values[6] *= value;
2474 values[7] *= value;
2475 values[8] *= value;
2476 values[9] *= value;
2477 values[10] *= value;
2478 values[11] *= value;
2479 values[12] *= value;
2480 values[13] *= value;
2481 values[14] *= value;
2482 values[15] *= value;
2483
2484 return *this;
2485}
2486
2487template <typename T>
2488SquareMatrixT4<T> SquareMatrixT4<T>::projectionMatrix(const T fovX, const T aspectRatio, const T nearDistance, const T farDistance)
2489{
2490 /*
2491 * <pre>
2492 * Creates the following frustum projection matrix.
2493 *
2494 * --------------------------------------------------
2495 * | t/a 0 0 0 |
2496 * | 0 t 0 0 |
2497 * | 0 0 (f+n)/(n-f) -2fn/(n-f) |
2498 * | 0 0 -1 0 |
2499 * --------------------------------------------------
2500 *
2501 * With: t = 1 / tan (fovY / 2), a = aspectRatio, n = nearDistance, f = farDistance
2502 * </pre>
2503 */
2504
2505 ocean_assert(fovX > 0 && fovX < NumericT<T>::pi());
2506 ocean_assert(aspectRatio > 0);
2507 ocean_assert(nearDistance > 0);
2508 ocean_assert(nearDistance < farDistance);
2509
2510 const T fovY = T(2.0) * NumericT<T>::atan(NumericT<T>::tan(T(0.5) * fovX) / aspectRatio);
2511
2512 SquareMatrixT4<T> matrix(false);
2513 ocean_assert(matrix(1, 0) == 0 && matrix(2, 0) == 0 && matrix(3, 0) == 0);
2514 ocean_assert(matrix(0, 1) == 0 && matrix(2, 1) == 0 && matrix(3, 1) == 0);
2515 ocean_assert(matrix(0, 2) == 0 && matrix(1, 2) == 0);
2516 ocean_assert(matrix(0, 3) == 0 && matrix(1, 3) == 0 && matrix(3, 3) == 0);
2517
2518 ocean_assert(NumericT<T>::isNotEqual(farDistance, nearDistance));
2519
2520 const T f = T(1.0) / NumericT<T>::tan(fovY * T(0.5));
2521 const T factor = T(1.0) / (nearDistance - farDistance);
2522
2523 matrix(0, 0) = f / aspectRatio;
2524 matrix(1, 1) = f;
2525 matrix(2, 2) = (farDistance + nearDistance) * factor;
2526 matrix(3, 2) = -T(1.0);
2527 matrix(2, 3) = (T(2.0) * farDistance * nearDistance) * factor;
2528
2529 return matrix;
2530}
2531
2532template <typename T>
2533SquareMatrixT4<T> SquareMatrixT4<T>::projectionMatrix(const AnyCameraT<T>& anyCamera, const T nearDistance, const T farDistance)
2534{
2535 /*
2536 * <pre>
2537 * Creates the following frustum projection matrix.
2538 *
2539 * --------------------------------------------------
2540 * | Fx 0 mx 0 |
2541 * | 0 Fy my 0 |
2542 * | 0 0 (f+n)/(n-f) -2fn/(n-f) |
2543 * | 0 0 -1 0 |
2544 * --------------------------------------------------
2545 *
2546 * n = nearDistance, f = farDistance
2547 * </pre>
2548 */
2549
2550 ocean_assert(anyCamera.isValid());
2551
2552 ocean_assert(nearDistance > 0);
2553 ocean_assert(nearDistance < farDistance);
2554
2555 const T fxPixel = anyCamera.focalLengthX();
2556 const T fyPixel = anyCamera.focalLengthY();
2557 ocean_assert(fxPixel > T(1) && fyPixel > T(1));
2558
2559 const T mxPixel = anyCamera.principalPointX();
2560 const T myPixel = anyCamera.principalPointY();
2561
2562 const T width_2 = T(anyCamera.width()) / T(2);
2563 const T height_2 = T(anyCamera.height()) / T(2);
2564
2565 ocean_assert(NumericT<T>::isNotEqualEps(width_2));
2566 ocean_assert(NumericT<T>::isNotEqualEps(height_2));
2567
2568 const T fx = fxPixel / width_2;
2569 const T fy = fyPixel / height_2;
2570
2571 const T mx = (mxPixel - width_2) / width_2; // principal point with range [-1, 1]
2572 const T my = (myPixel - height_2) / height_2;
2573
2574 const T factor = T(1.0) / (nearDistance - farDistance);
2575
2576 SquareMatrixT4<T> matrix(false);
2577
2578 matrix(0, 0) = fx;
2579 matrix(1, 1) = fy;
2580 matrix(0, 2) = -mx;
2581 matrix(1, 2) = my;
2582 matrix(2, 2) = (farDistance + nearDistance) * factor;
2583 matrix(3, 2) = -T(1);
2584 matrix(2, 3) = (T(2) * farDistance * nearDistance) * factor;
2585
2586 return matrix;
2587}
2588
2589template <typename T>
2590SquareMatrixT4<T> SquareMatrixT4<T>::frustumMatrix(const T left, const T right, const T top, const T bottom, const T nearDistance, const T farDistance)
2591{
2592 /**
2593 * <pre>
2594 * Creates the following frustum projection matrix:
2595 *
2596 * --------------------------------------------------
2597 * | 2n/(r-l) 0 (r+l)/(r-l) 0 |
2598 * | 0 2n/(t-b) (t+b)/(t-b) 0 |
2599 * | 0 0 -(f+n)/(f-n) -2fn/(f-n) |
2600 * | 0 0 -1 0 |
2601 * --------------------------------------------------
2602 * </pre>
2603 */
2604
2605 ocean_assert(NumericT<T>::isNotEqual(left, right));
2606 ocean_assert(NumericT<T>::isNotEqual(top, bottom));
2607 ocean_assert(NumericT<T>::isNotEqual(nearDistance, farDistance));
2608
2609 SquareMatrixT4<T> matrix(false);
2610 ocean_assert(matrix(1, 0) == 0 && matrix(2, 0) == 0 && matrix(3, 0) == 0);
2611 ocean_assert(matrix(0, 1) == 0 && matrix(2, 1) == 0 && matrix(3, 1) == 0);
2612 ocean_assert(matrix(0, 3) == 0 && matrix(1, 3) == 0 && matrix(3, 3) == 0);
2613
2614 const T rightLeft = T(1.0) / (right - left);
2615 const T near2 = nearDistance * T(2.0);
2616
2617 matrix(0, 0) = near2 * rightLeft;
2618 matrix(0, 2) = (right + left) * rightLeft;
2619
2620 const T topBottom = T(1.0) / (top - bottom);
2621
2622 matrix(1, 1) = near2 * topBottom;
2623 matrix(1, 2) = (top + bottom) * topBottom;
2624
2625 const T farNear = T(1.0) / (farDistance - nearDistance);
2626
2627 matrix(2, 2) = -(farDistance + nearDistance) * farNear;
2628 matrix(2, 3) = -T(2.0) * farDistance * nearDistance * farNear;
2629
2630 matrix(3, 2) = -T(1.0);
2631
2632 return matrix;
2633}
2634
2635template <typename T>
2636SquareMatrixT4<T> SquareMatrixT4<T>::frustumMatrix(const T width, const T height, const HomogenousMatrixT4<T>& viewingMatrix, const T nearDistance, const T farDistance)
2637{
2638 ocean_assert(width > NumericT<T>::eps() && height > NumericT<T>::eps());
2639 ocean_assert(nearDistance >= NumericT<T>::eps() && farDistance > nearDistance);
2640
2641 const T planeDistance = NumericT<T>::abs(viewingMatrix.translation().z());
2642 ocean_assert(viewingMatrix.isValid() && NumericT<T>::isNotEqualEps(planeDistance));
2643
2644 const HomogenousMatrixT4<T> inversedViewingMatrix(viewingMatrix.inverted());
2645
2646 const VectorT3<T> leftTop(width * -T(0.5), height * T(0.5), 0);
2647 const VectorT3<T> rightBottom(width * T(0.5), height * -T(0.5), 0);
2648
2649 const VectorT3<T> leftTopInCamera(inversedViewingMatrix * leftTop);
2650 const VectorT3<T> rightBottomInCamera(inversedViewingMatrix * rightBottom);
2651
2652 const T factor = nearDistance / planeDistance;
2653
2654 return frustumMatrix(factor * leftTopInCamera.x(), factor * rightBottomInCamera.x(), factor * leftTopInCamera.y(), factor * rightBottomInCamera.y(), nearDistance, farDistance);
2655}
2656
2657template <typename T>
2658void SquareMatrixT4<T>::multiply(const SquareMatrixT4<T>& matrix, const VectorT4<T>* vectors, VectorT4<T>* results, const size_t number)
2659{
2660 ocean_assert((vectors && results) || number == 0);
2661
2662 for (size_t n = 0; n < number; ++n)
2663 {
2664 results[n] = matrix * vectors[n];
2665 }
2666}
2667
2668#if defined(OCEAN_HARDWARE_AVX_VERSION) && OCEAN_HARDWARE_AVX_VERSION >= 10
2669
2670template <>
2671inline void SquareMatrixT4<double>::multiply(const SquareMatrixT4<double>& matrix, const VectorT4<double>* vectors, VectorT4<double>* results, const size_t number)
2672{
2673 // the following code uses the following AVX instructions, and needs AVX1 or higher
2674
2675 // AVX1:
2676 // _mm256_broadcast_sd
2677 // _mm256_loadu_pd
2678 // _mm256_mul_pd
2679 // _mm256_add_pd
2680 // _mm256_storeu_pd
2681
2682 // we use the same strategy as for the 32 bit float values
2683
2684 __m256d c0 = _mm256_loadu_pd(matrix.values + 0);
2685 __m256d c1 = _mm256_loadu_pd(matrix.values + 4);
2686 __m256d c2 = _mm256_loadu_pd(matrix.values + 8);
2687 __m256d c3 = _mm256_loadu_pd(matrix.values + 12);
2688
2689 for (size_t n = 0; n < number; ++n)
2690 {
2691 __m256d v0 = _mm256_broadcast_sd(vectors[n].data() + 0);
2692 __m256d r0 = _mm256_mul_pd(c0, v0);
2693
2694 __m256d v1 = _mm256_broadcast_sd(vectors[n].data() + 1);
2695 __m256d r1 = _mm256_mul_pd(c1, v1);
2696
2697 r0 = _mm256_add_pd(r0, r1);
2698
2699 __m256d v2 = _mm256_broadcast_sd(vectors[n].data() + 2);
2700 __m256d r2 = _mm256_mul_pd(c2, v2);
2701
2702 r0 = _mm256_add_pd(r0, r2);
2703
2704 __m256d v3 = _mm256_broadcast_sd(vectors[n].data() + 3);
2705 __m256d r3 = _mm256_mul_pd(c3, v3);
2706
2707 r0 = _mm256_add_pd(r0, r3);
2708
2709 _mm256_storeu_pd(results[n].data(), r0);
2710 }
2711}
2712
2713#else // OCEAN_HARDWARE_AVX_VERSION
2714
2715#ifdef OCEAN_USE_SLOWER_IMPLEMENTATION
2716
2717// we keep following implementation inside 'OCEAN_USE_SLOWER_IMPLEMENTATION' showing an alternative (which is slower)
2718
2719#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
2720
2721template <>
2722inline void SquareMatrixT4<float>::multiply(const SquareMatrixT4<float>& matrix, const VectorT4<float>* vectors, VectorT4<float>* results, const size_t number)
2723{
2724 // the following code uses the following SSE instructions, and needs SSE 4.1 or higher
2725
2726 // SSE:
2727 // _mm_loadu_ps
2728 // _mm_shuffle_ps
2729 // _mm_or_ps
2730 // _mm_storeu_ps
2731
2732 // SSE4.1:
2733 // _mm_dp_ps
2734
2735 // first we load the four rows of the matrix
2736 __m128 row0 = _mm_loadu_ps(matrix.values + 0);
2737 __m128 row1 = _mm_loadu_ps(matrix.values + 4);
2738 __m128 row2 = _mm_loadu_ps(matrix.values + 8);
2739 __m128 row3 = _mm_loadu_ps(matrix.values + 12);
2740
2741 // first, we transpose the 4x4 matrix
2742
2743 // A E I M A B C D
2744 // B F J N E F G H
2745 // C G K O I J K L
2746 // D H L P M N O P
2747
2748 // A B C D, E F G H -> A B E F
2749 __m128 temp0 = _mm_shuffle_ps(row0, row1, 0x44); // 0x44 = 0100 0100
2750 // A B C D, E F G H -> C D G H
2751 __m128 temp2 = _mm_shuffle_ps(row0, row1, 0xEE); // 0xEE = 1110 1110
2752 // I J K L, M N O P -> I J M N
2753 __m128 temp1 = _mm_shuffle_ps(row2, row3, 0x44);
2754 // I J K L, M N O P -> K L O P
2755 __m128 temp3 = _mm_shuffle_ps(row2, row3, 0xEE);
2756
2757 // A B E F, I J M N -> A E I M
2758 row0 = _mm_shuffle_ps(temp0, temp1, 0x88); // 0x88 = 1000 1000
2759 // A B E F, I J M N -> B F J N
2760 row1 = _mm_shuffle_ps(temp0, temp1, 0xDD); // 0xDD = 1101 1101
2761 // C D G H, K L O P -> C G K O
2762 row2 = _mm_shuffle_ps(temp2, temp3, 0x88);
2763 // C D G H, K L O P -> D H L P
2764 row3 = _mm_shuffle_ps(temp2, temp3, 0xDD);
2765
2766#ifdef OCEAN_COMPILER_MSC
2767 ocean_assert(row0.m128_f32[0] == matrix.values[0] && row0.m128_f32[1] == matrix.values[4] && row0.m128_f32[2] == matrix.values[ 8] && row0.m128_f32[3] == matrix.values[12]);
2768 ocean_assert(row1.m128_f32[0] == matrix.values[1] && row1.m128_f32[1] == matrix.values[5] && row1.m128_f32[2] == matrix.values[ 9] && row1.m128_f32[3] == matrix.values[13]);
2769 ocean_assert(row2.m128_f32[0] == matrix.values[2] && row2.m128_f32[1] == matrix.values[6] && row2.m128_f32[2] == matrix.values[10] && row2.m128_f32[3] == matrix.values[14]);
2770 ocean_assert(row3.m128_f32[0] == matrix.values[3] && row3.m128_f32[1] == matrix.values[7] && row3.m128_f32[2] == matrix.values[11] && row3.m128_f32[3] == matrix.values[15]);
2771#endif
2772
2773 for (size_t n = 0u; n < number; ++n)
2774 {
2775 // we load the four values of the vector
2776 __m128 v = _mm_loadu_ps(vectors[n].data());
2777
2778 // we determine the dot product between the first row and the vector and store the result in the first float bin
2779 __m128 dot0 = _mm_dp_ps(row0, v, 0xF1); // 0xF1 = 1111 0001
2780
2781 // we determine the dot product between the second row and the vector and store the result in the second float bin
2782 __m128 dot1 = _mm_dp_ps(row1, v, 0xF2); // 0xF2 = 1111 0010
2783 __m128 dot2 = _mm_dp_ps(row2, v, 0xF4); // 0xF4 = 1111 0100
2784 __m128 dot3 = _mm_dp_ps(row3, v, 0xF8); // 0xF8 = 1111 1000
2785
2786 // now we blend the results by applying the bit-wise or operator
2787 __m128 result01 = _mm_or_ps(dot0, dot1);
2788 __m128 result23 = _mm_or_ps(dot2, dot3);
2789 __m128 result03 = _mm_or_ps(result01, result23);
2790
2791 _mm_storeu_ps(results[n].data(), result03);
2792 }
2793}
2794
2795#endif // OCEAN_HARDWARE_SSE_VERSION >= 41
2796
2797#else // OCEAN_USE_SLOWER_IMPLEMENTATION
2798
2799#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 20
2800
2801template <>
2802inline void SquareMatrixT4<double>::multiply(const SquareMatrixT4<double>& matrix, const VectorT4<double>* vectors, VectorT4<double>* results, const size_t number)
2803{
2804 // the following code uses the following SSE instructions, and needs SSE2 or higher
2805
2806 // SSE2:
2807 // _mm_load1_pd
2808 // _mm_loadu_pd
2809 // _mm_add_pd
2810 // _mm_storeu_pd
2811 // _mm_mul_pd
2812
2813 // now we load the four columns
2814 __m128d c0a = _mm_loadu_pd(matrix.values + 0);
2815 __m128d c0b = _mm_loadu_pd(matrix.values + 2);
2816 __m128d c1a = _mm_loadu_pd(matrix.values + 4);
2817 __m128d c1b = _mm_loadu_pd(matrix.values + 6);
2818 __m128d c2a = _mm_loadu_pd(matrix.values + 8);
2819 __m128d c2b = _mm_loadu_pd(matrix.values + 10);
2820 __m128d c3a = _mm_loadu_pd(matrix.values + 12);
2821 __m128d c3b = _mm_loadu_pd(matrix.values + 14);
2822
2823 for (size_t n = 0u; n < number; ++n)
2824 {
2825 __m128d v0 = _mm_load1_pd(vectors[n].data() + 0);
2826 __m128d v1 = _mm_load1_pd(vectors[n].data() + 1);
2827 __m128d v2 = _mm_load1_pd(vectors[n].data() + 2);
2828 __m128d v3 = _mm_load1_pd(vectors[n].data() + 3);
2829
2830 // first column
2831 __m128d r0a = _mm_mul_pd(c0a, v0);
2832 __m128d r0b = _mm_mul_pd(c0b, v0);
2833
2834 // second column
2835 __m128d r1a = _mm_mul_pd(c1a, v1);
2836 __m128d r1b = _mm_mul_pd(c1b, v1);
2837
2838 r0a = _mm_add_pd(r0a, r1a);
2839 r0b = _mm_add_pd(r0b, r1b);
2840
2841 // third column
2842 __m128d r2a = _mm_mul_pd(c2a, v2);
2843 __m128d r2b = _mm_mul_pd(c2b, v2);
2844 r0a = _mm_add_pd(r0a, r2a);
2845 r0b = _mm_add_pd(r0b, r2b);
2846
2847 // fourth column
2848 __m128d r3a = _mm_mul_pd(c3a, v3);
2849 __m128d r3b = _mm_mul_pd(c3b, v3);
2850 r0a = _mm_add_pd(r0a, r3a);
2851 r0b = _mm_add_pd(r0b, r3b);
2852
2853 _mm_storeu_pd(results[n].data() + 0, r0a);
2854 _mm_storeu_pd(results[n].data() + 2, r0b);
2855 }
2856}
2857
2858#endif // OCEAN_HARDWARE_SSE_VERSION >= 20
2859
2860#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 10
2861
2862template <>
2863inline void SquareMatrixT4<float>::multiply(const SquareMatrixT4<float>& matrix, const VectorT4<float>* vectors, VectorT4<float>* results, const size_t number)
2864{
2865 // the following code uses the following SSE instructions, and needs SSE1 or higher
2866
2867 // SSE1:
2868 // _mm_load1_ps
2869 // _mm_loadu_ps
2870 // _mm_mul_ps
2871 // _mm_add_ps
2872 // _mm_storeu_ps
2873
2874 // now we load the four columns
2875 __m128 c0 = _mm_loadu_ps(matrix.values + 0);
2876 __m128 c1 = _mm_loadu_ps(matrix.values + 4);
2877 __m128 c2 = _mm_loadu_ps(matrix.values + 8);
2878 __m128 c3 = _mm_loadu_ps(matrix.values + 12);
2879
2880 for (size_t n = 0u; n < number; ++n)
2881 {
2882 __m128 v0 = _mm_load1_ps(vectors[n].data() + 0);
2883 __m128 v1 = _mm_load1_ps(vectors[n].data() + 1);
2884 __m128 v2 = _mm_load1_ps(vectors[n].data() + 2);
2885 __m128 v3 = _mm_load1_ps(vectors[n].data() + 3);
2886
2887 // first column
2888 __m128 r0 = _mm_mul_ps(c0, v0);
2889
2890 // second column
2891 __m128 r1 = _mm_mul_ps(c1, v1);
2892
2893 r0 = _mm_add_ps(r0, r1);
2894
2895 // third column
2896 __m128 r2 = _mm_mul_ps(c2, v2);
2897 r0 = _mm_add_ps(r0, r2);
2898
2899 // fourth column
2900 __m128 r3 = _mm_mul_ps(c3, v3);
2901 r0 = _mm_add_ps(r0, r3);
2902
2903 _mm_storeu_ps(results[n].data(), r0);
2904 }
2905}
2906
2907#endif // OCEAN_HARDWARE_SSE_VERSION >= 10
2908
2909#endif // OCEAN_USE_SLOWER_IMPLEMENTATION
2910
2911#endif // OCEAN_HARDWARE_AVX_VERSION
2912
2913#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2914
2915#ifdef __aarch64__
2916
2917template <>
2918inline void SquareMatrixT4<double>::multiply(const SquareMatrixT4<double>& matrix, const VectorT4<double>* vectors, VectorT4<double>* results, const size_t number)
2919{
2920 // the following NEON code is almost identical to the SSE implementation
2921
2922 // now we load the four columns
2923 float64x2_t c0a = vld1q_f64(matrix.values + 0);
2924 float64x2_t c0b = vld1q_f64(matrix.values + 2);
2925 float64x2_t c1a = vld1q_f64(matrix.values + 4);
2926 float64x2_t c1b = vld1q_f64(matrix.values + 6);
2927 float64x2_t c2a = vld1q_f64(matrix.values + 8);
2928 float64x2_t c2b = vld1q_f64(matrix.values + 10);
2929 float64x2_t c3a = vld1q_f64(matrix.values + 12);
2930 float64x2_t c3b = vld1q_f64(matrix.values + 14);
2931
2932 for (size_t n = 0u; n < number; ++n)
2933 {
2934 float64x2_t v0 = vld1q_dup_f64(vectors[n].data() + 0);
2935 float64x2_t v1 = vld1q_dup_f64(vectors[n].data() + 1);
2936 float64x2_t v2 = vld1q_dup_f64(vectors[n].data() + 2);
2937 float64x2_t v3 = vld1q_dup_f64(vectors[n].data() + 3);
2938
2939 float64x2_t r0a = vmulq_f64(c0a, v0);
2940 float64x2_t r0b = vmulq_f64(c0b, v0);
2941
2942 float64x2_t r1a = vmulq_f64(c1a, v1);
2943 float64x2_t r1b = vmulq_f64(c1b, v1);
2944
2945 r0a = vaddq_f64(r0a, r1a);
2946 r0b = vaddq_f64(r0b, r1b);
2947
2948 float64x2_t r2a = vmulq_f64(c2a, v2);
2949 float64x2_t r2b = vmulq_f64(c2b, v2);
2950
2951 r0a = vaddq_f64(r0a, r2a);
2952 r0b = vaddq_f64(r0b, r2b);
2953
2954 float64x2_t r3a = vmulq_f64(c3a, v3);
2955 float64x2_t r3b = vmulq_f64(c3b, v3);
2956
2957 r0a = vaddq_f64(r0a, r3a);
2958 r0b = vaddq_f64(r0b, r3b);
2959
2960 vst1q_f64(results[n].data() + 0, r0a);
2961 vst1q_f64(results[n].data() + 2, r0b);
2962 }
2963}
2964
2965#endif // __aarch64__
2966
2967template <>
2968inline void SquareMatrixT4<float>::multiply(const SquareMatrixT4<float>& matrix, const VectorT4<float>* vectors, VectorT4<float>* results, const size_t number)
2969{
2970 // the following NEON code is almost identical to the SSE implementation
2971
2972 // now we load the four columns
2973 float32x4_t c0 = vld1q_f32(matrix.values + 0);
2974 float32x4_t c1 = vld1q_f32(matrix.values + 4);
2975 float32x4_t c2 = vld1q_f32(matrix.values + 8);
2976 float32x4_t c3 = vld1q_f32(matrix.values + 12);
2977
2978 for (size_t n = 0u; n < number; ++n)
2979 {
2980 float32x4_t v0 = vld1q_dup_f32(vectors[n].data() + 0);
2981 float32x4_t v1 = vld1q_dup_f32(vectors[n].data() + 1);
2982 float32x4_t v2 = vld1q_dup_f32(vectors[n].data() + 2);
2983 float32x4_t v3 = vld1q_dup_f32(vectors[n].data() + 3);
2984
2985 // first column
2986 float32x4_t r0 = vmulq_f32(c0, v0);
2987
2988 // second column
2989 float32x4_t r1 = vmulq_f32(c1, v1);
2990
2991 r0 = vaddq_f32(r0, r1);
2992
2993 // third column
2994 float32x4_t r2 = vmulq_f32(c2, v2);
2995
2996 r0 = vaddq_f32(r0, r2);
2997
2998 // fourth column
2999 float32x4_t r3 = vmulq_f32(c3, v3);
3000
3001 r0 = vaddq_f32(r0, r3);
3002
3003 vst1q_f32(results[n].data(), r0);
3004 }
3005}
3006
3007#endif
3008
3009template <typename T>
3010template <typename U>
3011inline std::vector< SquareMatrixT4<T> > SquareMatrixT4<T>::matrices2matrices(const std::vector< SquareMatrixT4<U> >& matrices)
3012{
3013 std::vector< SquareMatrixT4<T> > result;
3014 result.reserve(matrices.size());
3015
3016 for (typename std::vector< SquareMatrixT4<U> >::const_iterator i = matrices.begin(); i != matrices.end(); ++i)
3017 {
3018 result.push_back(SquareMatrixT4<T>(*i));
3019 }
3020
3021 return result;
3022}
3023
3024template <>
3025template <>
3026inline std::vector< SquareMatrixT4<float> > SquareMatrixT4<float>::matrices2matrices(const std::vector< SquareMatrixT4<float> >& matrices)
3027{
3028 return matrices;
3029}
3030
3031template <>
3032template <>
3033inline std::vector< SquareMatrixT4<double> > SquareMatrixT4<double>::matrices2matrices(const std::vector< SquareMatrixT4<double> >& matrices)
3034{
3035 return matrices;
3036}
3037
3038template <typename T>
3039template <typename U>
3040inline std::vector< SquareMatrixT4<T> > SquareMatrixT4<T>::matrices2matrices(const SquareMatrixT4<U>* matrices, const size_t size)
3041{
3042 std::vector< SquareMatrixT4<T> > result;
3043 result.reserve(size);
3044
3045 for (size_t n = 0; n < size; ++n)
3046 {
3047 result.push_back(SquareMatrixT4<T>(matrices[n]));
3048 }
3049
3050 return result;
3051}
3052
3053template <typename T>
3054void SquareMatrixT4<T>::swapRows(const unsigned int row0, const unsigned int row1)
3055{
3056 ocean_assert(row0 < 4u && row1 < 4u);
3057
3058 if (row0 == row1)
3059 {
3060 return;
3061 }
3062
3063 T* first = values + row0;
3064 T* second = values + row1;
3065
3066 T tmp = *first;
3067 *first = *second;
3068 *second = tmp;
3069
3070 first += 4;
3071 second += 4;
3072 tmp = *first;
3073 *first = *second;
3074 *second = tmp;
3075
3076 first += 4;
3077 second += 4;
3078 tmp = *first;
3079 *first = *second;
3080 *second = tmp;
3081
3082 first += 4;
3083 second += 4;
3084 tmp = *first;
3085 *first = *second;
3086 *second = tmp;
3087}
3088
3089template <typename T>
3090void SquareMatrixT4<T>::multiplyRow(const unsigned int row, const T scalar)
3091{
3092 ocean_assert(row < 4u);
3093
3094 T* element = values + row;
3095
3096 *element *= scalar;
3097 element += 4;
3098 *element *= scalar;
3099 element += 4;
3100 *element *= scalar;
3101 element += 4;
3102 *element *= scalar;
3103}
3104
3105template <typename T>
3106void SquareMatrixT4<T>::addRows(const unsigned int targetRow, unsigned int const sourceRow, const T scalar)
3107{
3108 ocean_assert(targetRow < 4u && sourceRow < 4u);
3109 ocean_assert(targetRow != sourceRow);
3110
3111 T* target = values + targetRow;
3112 T* source = values + sourceRow;
3113
3114 *target += *source * scalar;
3115
3116 target += 4;
3117 source += 4;
3118 *target += *source * scalar;
3119
3120 target += 4;
3121 source += 4;
3122 *target += *source * scalar;
3123
3124 target += 4;
3125 source += 4;
3126 *target += *source * scalar;
3127}
3128
3129template <typename T>
3130std::ostream& operator<<(std::ostream& stream, const SquareMatrixT4<T>& matrix)
3131{
3132 stream << "|" << matrix(0, 0) << ", " << matrix(0, 1) << ", " << matrix(0, 2) << ", " << matrix(0, 3) << "|" << std::endl;
3133 stream << "|" << matrix(1, 0) << ", " << matrix(1, 1) << ", " << matrix(1, 2) << ", " << matrix(1, 3) << "|" << std::endl;
3134 stream << "|" << matrix(2, 0) << ", " << matrix(2, 1) << ", " << matrix(2, 2) << ", " << matrix(2, 3) << "|" << std::endl;
3135 stream << "|" << matrix(3, 0) << ", " << matrix(3, 1) << ", " << matrix(3, 2) << ", " << matrix(3, 3) << "|";
3136
3137 return stream;
3138}
3139
3140template <bool tActive, typename T>
3141MessageObject<tActive>& operator<<(MessageObject<tActive>& messageObject, const SquareMatrixT4<T>& matrix)
3142{
3143 return messageObject << "|" << matrix(0, 0) << ", " << matrix(0, 1) << ", " << matrix(0, 2) << ", " << matrix(0, 3) << "|\n|"
3144 << matrix(1, 0) << ", " << matrix(1, 1) << ", " << matrix(1, 2) << ", " << matrix(1, 3) << "|\n|"
3145 << matrix(2, 0) << ", " << matrix(2, 1) << ", " << matrix(2, 2) << ", " << matrix(2, 3) << "|\n|"
3146 << matrix(3, 0) << ", " << matrix(3, 1) << ", " << matrix(3, 2) << ", " << matrix(3, 3) << "|";
3147}
3148
3149template <bool tActive, typename T>
3150MessageObject<tActive>& operator<<(MessageObject<tActive>&& messageObject, const SquareMatrixT4<T>& matrix)
3151{
3152 return messageObject << "|" << matrix(0, 0) << ", " << matrix(0, 1) << ", " << matrix(0, 2) << ", " << matrix(0, 3) << "|\n|"
3153 << matrix(1, 0) << ", " << matrix(1, 1) << ", " << matrix(1, 2) << ", " << matrix(1, 3) << "|\n|"
3154 << matrix(2, 0) << ", " << matrix(2, 1) << ", " << matrix(2, 2) << ", " << matrix(2, 3) << "|\n|"
3155 << matrix(3, 0) << ", " << matrix(3, 1) << ", " << matrix(3, 2) << ", " << matrix(3, 3) << "|";
3156}
3157
3158}
3159
3160#endif // META_OCEAN_MATH_SQUARE_MATRIX_4_H
This class implements the abstract base class for all AnyCamera objects.
Definition AnyCamera.h:131
virtual unsigned int width() const =0
Returns the width of the camera image.
virtual T focalLengthX() const =0
Returns the horizontal focal length parameter.
virtual T focalLengthY() const =0
Returns the vertical focal length parameter.
virtual unsigned int height() const =0
Returns the height of the camera image.
virtual T principalPointY() const =0
Returns the y-value of the principal point of the camera image in the pixel domain.
virtual bool isValid() const =0
Returns whether this camera is valid.
virtual T principalPointX() const =0
Returns the x-value of the principal point of the camera image in the pixel domain.
static unsigned int solveQuartic(const T a, const T b, const T c, const T d, const T e, T *x, const bool refine=true)
Solves a quartic equation with the form:
Definition Equation.h:343
This class implements a 4x4 homogeneous transformation matrix using floating point values with the pr...
Definition HomogenousMatrix4.h:110
HomogenousMatrixT4< T > inverted() const noexcept
Returns the inverted of this matrix.
Definition HomogenousMatrix4.h:1575
VectorT3< T > translation() const
Returns the translation of the transformation.
Definition HomogenousMatrix4.h:1381
bool isValid() const
Returns whether this matrix is a valid homogeneous transformation.
Definition HomogenousMatrix4.h:1806
This class provides basic numeric functionalities.
Definition Numeric.h:57
static constexpr T weakEps()
Returns a weak epsilon.
static T atan(const T value)
Returns the arctangent of a given value.
Definition Numeric.h:1620
static T abs(const T value)
Returns the absolute value of a given value.
Definition Numeric.h:1220
static constexpr T eps()
Returns a small epsilon.
static bool isEqual(const T first, const T second)
Returns whether two values are equal up to a small epsilon.
Definition Numeric.h:2395
static T tan(const T value)
Returns the tangent of a given value.
Definition Numeric.h:1604
static constexpr bool isEqualEps(const T value)
Returns whether a value is smaller than or equal to a small epsilon.
Definition Numeric.h:2096
static constexpr T maxValue()
Returns the max scalar value.
Definition Numeric.h:3253
This class implements a 3x3 square matrix.
Definition SquareMatrix3.h:89
OCEAN_FORCE_INLINE SquareMatrixT4< T > & operator*=(const SquareMatrixT4< T > &matrix)
Multiplies and assigns two matrices.
Definition SquareMatrix4.h:1336
void copyElements(U *arrayValues) const
Copies the elements of this matrix to an array with floating point values of type U.
Definition SquareMatrix4.h:1311
SquareMatrixT4< T > & operator-=(const SquareMatrixT4< T > &matrix)
Subtracts and assigns two matrices.
Definition SquareMatrix4.h:1460
const T * data() const
Returns a pointer to the internal values.
Definition SquareMatrix4.h:750
T operator()(const unsigned int index) const
Element operator.
Definition SquareMatrix4.h:1378
SquareMatrixT4(const U *arrayValues, const bool valuesRowAligned)
Creates a new SquareMatrixT4 object by an array of at least sixteen elements of float type U.
Definition SquareMatrix4.h:644
static SquareMatrixT4< T > projectionMatrix(const T fovX, const T aspectRatio, const T nearDistance, const T farDistance)
Creates a projection matrix defined by the horizontal field of view, the aspect ratio and the near an...
Definition SquareMatrix4.h:2488
OCEAN_FORCE_INLINE VectorT3< T > operator*(const VectorT3< T > &vector) const
Multiply operator for a 3D vector.
Definition SquareMatrix4.h:1874
bool isIdentity() const
Returns whether this matrix is the identity matrix.
Definition SquareMatrix4.h:970
SquareMatrixT4(const U *arrayValues)
Creates a new SquareMatrixT4 object by an array of at least sixteen elements of float type U.
Definition SquareMatrix4.h:625
SquareMatrixT4(const SquareMatrixT4< U > &matrix)
Copy constructor for a matrix with difference element data type than T.
Definition SquareMatrix4.h:602
void transpose()
Transposes the matrix.
Definition SquareMatrix4.h:788
static SquareMatrixT4< T > projectionMatrix(const AnyCameraT< T > &anyCamera, const T nearDistance, const T farDistance)
Creates a projection matrix defined by a camera profile of a pinhole camera and the near and far clip...
Definition SquareMatrix4.h:2533
SquareMatrixT4< T > transposed() const
Returns the transposed of this matrix.
Definition SquareMatrix4.h:762
void swapRows(const unsigned int row0, const unsigned int row1)
Swaps two rows of this matrix.
Definition SquareMatrix4.h:3054
void copyElements(T *arrayValues) const
Copies the elements of this matrix to an array with floating point values.
Definition SquareMatrix4.h:1322
SquareMatrixT4< T > inverted() const
Returns the inverted matrix of this matrix.
Definition SquareMatrix4.h:812
bool invert(SquareMatrixT4< T > &invertedMatrix) const
Inverts the matrix and returns the result.
Definition SquareMatrix4.h:841
SquareMatrixT4< T > operator+(const SquareMatrixT4< T > &matrix) const
Adds two matrices.
Definition SquareMatrix4.h:1429
T operator[](const unsigned int index) const
Element operator.
Definition SquareMatrix4.h:1350
bool isSingular() const
Returns whether this matrix is singular (and thus cannot be inverted).
Definition SquareMatrix4.h:988
SquareMatrixT4()
Creates a new SquareMatrixT4 object with undefined elements.
Definition SquareMatrix4.h:595
SquareMatrixT4(const VectorT4< T > &diagonal)
Creates a new SquareMatrixT4 object by a given diagonal vector.
Definition SquareMatrix4.h:729
OCEAN_FORCE_INLINE SquareMatrixT4< T > operator*(const HomogenousMatrixT4< T > &matrix) const
Multiplies two matrices.
Definition SquareMatrix4.h:1846
friend class SquareMatrixT4
Definition SquareMatrix4.h:86
bool isSymmetric(const T epsilon=NumericT< T >::eps()) const
Returns whether this matrix is symmetric.
Definition SquareMatrix4.h:994
T values[16]
The sixteen values of the matrix.
Definition SquareMatrix4.h:591
T determinant() const
Returns the determinant of the matrix.
Definition SquareMatrix4.h:906
T * data()
Returns a pointer to the internal values.
Definition SquareMatrix4.h:756
T operator()(const unsigned int row, const unsigned int column) const
Element operator.
Definition SquareMatrix4.h:1364
bool isEqual(const SquareMatrixT4< T > &matrix, const T eps=NumericT< T >::eps()) const
Returns whether two matrices are almost identical up to a specified epsilon.
Definition SquareMatrix4.h:1003
void toNull()
Sets the matrix to a zero matrix.
Definition SquareMatrix4.h:961
bool operator==(const SquareMatrixT4< T > &matrix) const
Returns whether two matrices are identical up to a small epsilon.
Definition SquareMatrix4.h:1423
static size_t elements()
Returns the number of elements this matrix has.
Definition SquareMatrix4.h:1417
SquareMatrixT4< T > & operator+=(const SquareMatrixT4< T > &matrix)
Adds and assigns two matrices.
Definition SquareMatrix4.h:1439
const T * operator()() const
Access operator.
Definition SquareMatrix4.h:1392
SquareMatrixT4< T > & operator=(const SquareMatrixT4< T > &)=default
Default assign operator.
SquareMatrixT4(const T *arrayValues, const bool valuesRowAligned)
Creates a new SquareMatrixT4 object by an array of at least sixteen elements.
Definition SquareMatrix4.h:677
static void multiply(const SquareMatrixT4< T > &matrix, const VectorT4< T > *vectors, VectorT4< T > *results, const size_t number)
Multiplies several 4D vectors with a given matrix.
Definition SquareMatrix4.h:2658
SquareMatrixT4< T > operator-() const
Returns the negative matrix of this matrix (all matrix elements are multiplied by -1).
Definition SquareMatrix4.h:1471
bool isNull() const
Returns whether this matrix is a null matrix.
Definition SquareMatrix4.h:979
OCEAN_FORCE_INLINE SquareMatrixT4< T > & operator*=(const HomogenousMatrixT4< T > &matrix)
Multiplies and assigns two matrices.
Definition SquareMatrix4.h:1343
T Type
Definition of the used data type.
Definition SquareMatrix4.h:93
SquareMatrixT4(const SquareMatrixT3< T > &subMatrix)
Creates a new SquareMatrixT4 object by given 3x3 sub matrix.
Definition SquareMatrix4.h:713
SquareMatrixT4< T > operator-(const SquareMatrixT4< T > &matrix) const
Subtracts two matrices.
Definition SquareMatrix4.h:1450
void addRows(const unsigned int targetRow, const unsigned int sourceRow, const T scalar)
Multiplies elements from a specific row with a scalar and adds them to another row.
Definition SquareMatrix4.h:3106
static std::vector< SquareMatrixT4< T > > matrices2matrices(const std::vector< SquareMatrixT4< U > > &matrices)
Converts matrices with specific data type to matrices with different data type.
Definition SquareMatrix4.h:3011
static SquareMatrixT4< T > frustumMatrix(const T left, const T right, const T top, const T bottom, const T nearDistance, const T farDistance)
Creates a projection matrix defined by an asymmetric viewing frustum.
Definition SquareMatrix4.h:2590
T & operator()(const unsigned int index)
Element operator.
Definition SquareMatrix4.h:1385
SquareMatrixT4< T > operator*(const T value) const
Multiplies this matrix with a scalar value.
Definition SquareMatrix4.h:2440
bool invert()
Inverts this matrix in place.
Definition SquareMatrix4.h:826
bool operator!=(const SquareMatrixT4< T > &matrix) const
Returns whether two matrices are not identical up to a small epsilon.
Definition SquareMatrix4.h:1330
size_t operator()(const SquareMatrixT4< T > &matrix) const
Hash function.
Definition SquareMatrix4.h:1404
static std::vector< SquareMatrixT4< T > > matrices2matrices(const SquareMatrixT4< U > *matrices, const size_t size)
Converts matrices with specific data type to matrices with different data type.
Definition SquareMatrix4.h:3040
OCEAN_FORCE_INLINE SquareMatrixT4< T > operator*(const SquareMatrixT4< T > &matrix) const
Multiplies two matrices.
Definition SquareMatrix4.h:1484
T & operator()(const unsigned int row, const unsigned int column)
Element operator.
Definition SquareMatrix4.h:1371
T & operator[](const unsigned int index)
Element operator.
Definition SquareMatrix4.h:1357
bool eigenSystem(T *eigenValues, VectorT4< T > *eigenVectors) const
Performs an eigen value analysis.
Definition SquareMatrix4.h:1016
void multiplyRow(const unsigned int row, const T scalar)
Multiplies a row with a scalar value.
Definition SquareMatrix4.h:3090
T trace() const
Returns the trace of the matrix which is the sum of the diagonal elements.
Definition SquareMatrix4.h:934
SquareMatrixT4(const bool setToIdentity)
Creates a new SquareMatrixT4 object.
Definition SquareMatrix4.h:611
SquareMatrixT4(const HomogenousMatrixT4< T > &transformation)
Creates a new SquareMatrixT4 object by given transformation matrix.
Definition SquareMatrix4.h:707
OCEAN_FORCE_INLINE VectorT4< T > operator*(const VectorT4< T > &vector) const
Multiply operator for a 4D vector.
Definition SquareMatrix4.h:1887
T * operator()()
Access operator.
Definition SquareMatrix4.h:1398
static SquareMatrixT4< T > frustumMatrix(const T width, const T height, const HomogenousMatrixT4< T > &viewingMatrix, const T nearDistance, const T farDistance)
Creates a project matrix defined by an asymmetric viewing frustum.
Definition SquareMatrix4.h:2636
SquareMatrixT4< T > & operator*=(const T value)
Multiplies and assigns this matrix with a scalar value.
Definition SquareMatrix4.h:2465
void toIdentity()
Sets the matrix to the identity matrix.
Definition SquareMatrix4.h:940
SquareMatrixT4(const SquareMatrixT4< T > &matrix)=default
Copy constructor.
SquareMatrixT4(const T *arrayValues)
Creates a new SquareMatrixT4 object by an array of at least sixteen elements.
Definition SquareMatrix4.h:636
static void sortHighestToFront4(T &value0, T &value1, T &value2, T &value3)
Sorts four values so that the highest value will finally be the first value.
Definition base/Utilities.h:758
This class implements a vector with three elements.
Definition Vector3.h:97
const T & y() const noexcept
Returns the y value.
Definition Vector3.h:824
const T & x() const noexcept
Returns the x value.
Definition Vector3.h:812
This class implements a vector with four elements.
Definition Vector4.h:97
const T * data() const noexcept
Returns an pointer to the vector elements.
Definition Vector4.h:732
bool normalize()
Normalizes this vector.
Definition Vector4.h:605
std::vector< SquareMatrix4 > SquareMatrices4
Definition of a vector holding SquareMatrix4 objects.
Definition SquareMatrix4.h:68
std::vector< SquareMatrixT4< T > > SquareMatricesT4
Definition of a typename alias for vectors with SquareMatrixT4 objects.
Definition SquareMatrix4.h:61
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
std::ostream & operator<<(std::ostream &stream, const HighPerformanceStatistic &highPerformanceStatistic)
Definition HighPerformanceTimer.h:963