Ocean
Loading...
Searching...
No Matches
SquareMatrix4.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_MATH_SQUARE_MATRIX_4_H
9#define META_OCEAN_MATH_SQUARE_MATRIX_4_H
10
11#include "ocean/math/Math.h"
12#include "ocean/math/Equation.h"
13#include "ocean/math/Vector2.h"
14#include "ocean/math/Vector3.h"
15#include "ocean/math/Vector4.h"
16
17#include <vector>
18
19namespace Ocean
20{
21
22// Forward declaration.
23template <typename T> class AnyCameraT;
24
25// Forward declaration.
26template <typename T> class HomogenousMatrixT4;
27
28// Forward declaration.
29template <typename T> class SquareMatrixT3;
30
31// Forward declaration.
32template <typename T> class SquareMatrixT4;
33
34/**
35 * Definition of the SquareMatrix4 object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION either with single or double precision float data type.
36 * @see SquareMatrixT4
37 * @ingroup math
38 */
40
41/**
42 * Instantiation of the SquareMatrixT4 template class using a double precision float data type.
43 * @see SquareMatrixT4
44 * @ingroup math
45 */
47
48/**
49 * Instantiation of the SquareMatrixT4 template class using a double precision float data type.
50 * @see SquareMatrixT4
51 * @ingroup math
52 */
54
55/**
56 * Definition of a typename alias for vectors with SquareMatrixT4 objects.
57 * @see SquareMatrixT4
58 * @ingroup math
59 */
60template <typename T>
61using SquareMatricesT4 = std::vector<SquareMatrixT4<T>>;
62
63/**
64 * Definition of a vector holding SquareMatrix4 objects.
65 * @see SquareMatrix4
66 * @ingroup math
67 */
68typedef std::vector<SquareMatrix4> SquareMatrices4;
69
70/**
71 * This class implements a 4x4 square matrix.
72 * The values are stored in a column aligned order with indices:
73 * <pre>
74 * | 0 4 8 12 |
75 * | 1 5 9 13 |
76 * | 2 6 10 14 |
77 * | 3 7 11 15 |
78 * </pre>
79 * @tparam T Data type of matrix elements
80 * @see SquareMatrix4, SquareMatrixF4, SquareMatrixD4.
81 * @ingroup math
82 */
83template <typename T>
85{
86 template <typename U> friend class SquareMatrixT4;
87
88 public:
89
90 /**
91 * Definition of the used data type.
92 */
93 typedef T Type;
94
95 public:
96
97 /**
98 * Creates a new SquareMatrixT4 object with undefined elements.
99 * Beware: This matrix is neither a zero nor an entity matrix!
100 */
102
103 /**
104 * Copy constructor.
105 * @param matrix The matrix to copy
106 */
107 SquareMatrixT4(const SquareMatrixT4<T>& matrix) = default;
108
109 /**
110 * Copy constructor for a matrix with difference element data type than T.
111 * @param matrix The matrix to copy
112 * @tparam U The element data type of the second matrix
113 */
114 template <typename U>
115 inline explicit SquareMatrixT4(const SquareMatrixT4<U>& matrix);
116
117 /**
118 * Creates a new SquareMatrixT4 object.
119 * @param setToIdentity Determines whether a entity matrix will be created
120 */
121 explicit SquareMatrixT4(const bool setToIdentity);
122
123 /**
124 * Creates a new SquareMatrixT4 object by an array of at least sixteen elements of float type U.
125 * @param arrayValues The sixteen matrix elements defining the new matrix, must be valid
126 * @tparam U The floating point type of the given elements
127 */
128 template <typename U>
129 explicit SquareMatrixT4(const U* arrayValues);
130
131 /**
132 * Creates a new SquareMatrixT4 object by an array of at least sixteen elements.
133 * @param arrayValues The sixteen matrix elements defining the new matrix, must be valid
134 */
135 explicit SquareMatrixT4(const T* arrayValues);
136
137 /**
138 * Creates a new SquareMatrixT4 object by an array of at least sixteen elements of float type U.
139 * @param arrayValues The sixteen matrix elements defining the new matrix, must be valid
140 * @param valuesRowAligned True, if the given values are stored in a row aligned order; False, if the values are stored in a column aligned order (which is the default case for this matrix)
141 * @tparam U The floating point type of the given elements
142 */
143 template <typename U>
144 SquareMatrixT4(const U* arrayValues, const bool valuesRowAligned);
145
146 /**
147 * Creates a new SquareMatrixT4 object by an array of at least sixteen elements.
148 * @param valuesRowAligned True, if the given values are stored in a row aligned order; False, if the values are stored in a column aligned order (which is the default case for this matrix)
149 * @param arrayValues The sixteen matrix elements defining the new matrix, must be valid
150 */
151 SquareMatrixT4(const T* arrayValues, const bool valuesRowAligned);
152
153 /**
154 * Creates a new SquareMatrixT4 object by given transformation matrix.
155 * @param transformation The transformation matrix to copy
156 */
157 explicit SquareMatrixT4(const HomogenousMatrixT4<T>& transformation);
158
159 /**
160 * Creates a new SquareMatrixT4 object by given 3x3 sub matrix.
161 * The lower and right elements of the 4x4 square matrix are set to zero.
162 * @param subMatrix 3x3 subMatrix defining the upper left elements of the new matrix
163 */
164 explicit SquareMatrixT4(const SquareMatrixT3<T>& subMatrix);
165
166 /**
167 * Creates a new SquareMatrixT4 object by a given diagonal vector.
168 * @param diagonal The diagonal vector for the new matrix
169 */
170 explicit SquareMatrixT4(const VectorT4<T>& diagonal);
171
172 /**
173 * Returns the transposed of this matrix.
174 * @return Transposed matrix
175 */
177
178 /**
179 * Transposes the matrix.
180 */
181 void transpose();
182
183 /**
184 * Returns the inverted matrix of this matrix.
185 * This matrix must not be singular.<br>
186 * Beware: This function does not throw an exception if the matrix cannot be inverted.<br>
187 * Thus, ensure that the matrix is invertible before calling this function.<br>
188 * Even better: avoid the usage of this function and call invert() instead.<br>
189 * In case, this matrix is not invertible, this matrix will be returned instead.
190 * @return The inverted matrix
191 * @see invert(), isSingular().
192 */
194
195 /**
196 * Inverts this matrix in place.
197 * @return True, if the matrix is not singular and could be inverted
198 * @see inverted().
199 */
200 bool invert();
201
202 /**
203 * Inverts the matrix and returns the result.
204 * @param invertedMatrix The resulting inverted matrix
205 * @return True, if the matrix is not singular and could be inverted
206 * @see inverted().
207 */
208 bool invert(SquareMatrixT4<T>& invertedMatrix) const;
209
210 /**
211 * Returns the determinant of the matrix.
212 * @return Matrix determinant
213 */
214 T determinant() const;
215
216 /**
217 * Returns the trace of the matrix which is the sum of the diagonal elements.
218 * @return Trace of the matrix
219 */
220 T trace() const;
221
222 /**
223 * Sets the matrix to the identity matrix.
224 */
225 inline void toIdentity();
226
227 /**
228 * Sets the matrix to a zero matrix.
229 * @see isNull();
230 */
231 inline void toNull();
232
233 /**
234 * Returns whether this matrix is a null matrix.
235 * @return True, if so
236 */
237 bool isNull() const;
238
239 /**
240 * Returns whether this matrix is the identity matrix.
241 * @return True, if so
242 */
243 bool isIdentity() const;
244
245 /**
246 * Returns whether this matrix is singular (and thus cannot be inverted).
247 * A matrix is singular if the determinant of a matrix is 0.<br>
248 * @return True, if so
249 */
250 inline bool isSingular() const;
251
252 /**
253 * Returns whether this matrix is symmetric.
254 * @param epsilon The epsilon threshold to be used, with range [0, infinity)
255 * @return True, if so
256 */
257 bool isSymmetric(const T epsilon = NumericT<T>::eps()) const;
258
259 /**
260 * Returns whether two matrices are almost identical up to a specified epsilon.
261 * @param matrix Second matrix that will be checked
262 * @param eps The epsilon threshold to be used, with range [0, infinity)
263 * @return True, if so
264 */
265 inline bool isEqual(const SquareMatrixT4<T>& matrix, const T eps = NumericT<T>::eps()) const;
266
267 /**
268 * Performs an eigen value analysis.
269 * @param eigenValues Vector holding the three eigen values
270 * @param eigenVectors Matrix holding the three corresponding eigen vectors
271 * @return True, if succeeded
272 */
273 bool eigenSystem(VectorT4<T>& eigenValues, SquareMatrixT4<T>& eigenVectors);
274
275 /**
276 * Returns a pointer to the internal values.
277 * @return Pointer to the internal values
278 */
279 inline const T* data() const;
280
281 /**
282 * Returns a pointer to the internal values.
283 * @return Pointer to the internal values
284 */
285 inline T* data();
286
287 /**
288 * Copies the elements of this matrix to an array with floating point values of type U.
289 * @param arrayValues Array with 16 floating point values of type U receiving the elements of this matrix
290 * @tparam U Floating point type
291 */
292 template <typename U>
293 void copyElements(U* arrayValues) const;
294
295 /**
296 * Copies the elements of this matrix to an array with floating point values.
297 * @param arrayValues Array with floating point values receiving the elements of this matrix
298 */
299 void copyElements(T* arrayValues) const;
300
301 /**
302 * Default assign operator.
303 * @return Reference to this object
304 */
306
307 /**
308 * Returns whether two matrices are identical up to a small epsilon.
309 * @param matrix Right operand
310 * @return True, if so
311 */
312 bool operator==(const SquareMatrixT4<T>& matrix) const;
313
314 /**
315 * Returns whether two matrices are not identical up to a small epsilon.
316 * @param matrix Right operand
317 * @return True, if so
318 */
319 inline bool operator!=(const SquareMatrixT4<T>& matrix) const;
320
321 /**
322 * Adds two matrices.
323 * @param matrix Right operand
324 * @return Sum matrix
325 */
327
328 /**
329 * Adds and assigns two matrices.
330 * @param matrix Right operand
331 * @return Reference to this object
332 */
334
335 /**
336 * Subtracts two matrices.
337 * @param matrix Right operand
338 * @return Difference matrix
339 */
341
342 /**
343 * Subtracts and assigns two matrices.
344 * @param matrix Right operand
345 * @return Reference to this object
346 */
348
349 /**
350 * Returns the negative matrix of this matrix (all matrix elements are multiplied by -1).
351 * @return Resulting negative matrix
352 */
354
355 /**
356 * Multiplies two matrices.
357 * @param matrix Right operand
358 * @return Product matrix
359 */
360 OCEAN_FORCE_INLINE SquareMatrixT4<T> operator*(const SquareMatrixT4<T>& matrix) const;
361
362 /**
363 * Multiplies two matrices.
364 * @param matrix Right operand
365 * @return Product matrix
366 */
367 OCEAN_FORCE_INLINE SquareMatrixT4<T> operator*(const HomogenousMatrixT4<T>& matrix) const;
368
369 /**
370 * Multiplies and assigns two matrices.
371 * @param matrix Right operand
372 * @return Reference to this object
373 */
374 OCEAN_FORCE_INLINE SquareMatrixT4<T>& operator*=(const SquareMatrixT4<T>& matrix);
375
376 /**
377 * Multiplies and assigns two matrices.
378 * @param matrix Right operand
379 * @return Reference to this object
380 */
381 OCEAN_FORCE_INLINE SquareMatrixT4<T>& operator*=(const HomogenousMatrixT4<T>& matrix);
382
383 /**
384 * Multiply operator for a 3D vector.
385 * The 3D vector is interpreted as a 4D vector with fourth component equal to 1.<br>
386 * The final result will be de-homogenizated to provide a 3D vector result.<br>
387 * Beware the dot product of the last row with the vector must not be zero!
388 * @param vector Right operand
389 * @return Resulting 3D vector
390 */
391 OCEAN_FORCE_INLINE VectorT3<T> operator*(const VectorT3<T>& vector) const;
392
393 /**
394 * Multiply operator for a 4D vector.
395 * @param vector Right operand
396 * @return Resulting 4D vector
397 */
398 OCEAN_FORCE_INLINE VectorT4<T> operator*(const VectorT4<T>& vector) const;
399
400 /**
401 * Multiplies this matrix with a scalar value.
402 * @param value Right operand
403 * @return Resulting matrix
404 */
405 SquareMatrixT4<T> operator*(const T value) const;
406
407 /**
408 * Multiplies and assigns this matrix with a scalar value.
409 * @param value right operand
410 * @return Reference to this object
411 */
413
414 /**
415 * Element operator.
416 * Beware: No range check will be done!
417 * @param index The index of the element to return [0, 15]
418 * @return Specified element
419 */
420 inline T operator[](const unsigned int index) const;
421
422 /**
423 * Element operator.
424 * Beware: No range check will be done!
425 * @param index The index of the element to return [0, 15]
426 * @return Specified element
427 */
428 inline T& operator[](const unsigned int index);
429
430 /**
431 * Element operator.
432 * Beware: No range check will be done!
433 * @param row The row of the element to return [0, 3]
434 * @param column The column of the element to return [0, 3]
435 * @return Specified element
436 */
437 inline T operator()(const unsigned int row, const unsigned int column) const;
438
439 /**
440 * Element operator.
441 * Beware: No range check will be done!
442 * @param row The row of the element to return [0, 3]
443 * @param column The column of the element to return [0, 3]
444 * @return Specified element
445 */
446 inline T& operator()(const unsigned int row, const unsigned int column);
447
448 /**
449 * Element operator.
450 * Beware: No range check will be done!
451 * @param index The index of the element to return [0, 15]
452 * @return Specified element
453 */
454 inline T operator()(const unsigned int index) const;
455
456 /**
457 * Element operator.
458 * Beware: No range check will be done!
459 * @param index The index of the element to return [0, 15]
460 * @return Specified element
461 */
462 inline T& operator()(const unsigned int index);
463
464 /**
465 * Access operator.
466 * @return Pointer to the internal values
467 */
468 inline const T* operator()() const;
469
470 /**
471 * Access operator.
472 * @return Pointer to the internal values
473 */
474 inline T* operator()();
475
476 /**
477 * Hash function.
478 * @param matrix The matrix for which the hash value will be determined
479 * @return The resulting hash value
480 */
481 inline size_t operator()(const SquareMatrixT4<T>& matrix) const;
482
483 /**
484 * Returns the number of elements this matrix has.
485 * @return The number of elements, always 16
486 */
487 static inline size_t elements();
488
489 /**
490 * Multiplies several 4D vectors with a given matrix.
491 * @param matrix The matrix to be used for multiplication, may be nullptr if number is 0
492 * @param vectors The input vectors that will be multiplied, may be nullptr if number is 0
493 * @param results The resulting output (multiplied/transformed) vectors, with same number as the provided input vectors
494 * @param number The number of provided vectors (input and output), with range [0, infinity)
495 */
496 static void multiply(const SquareMatrixT4<T>& matrix, const VectorT4<T>* vectors, VectorT4<T>* results, const size_t number);
497
498 /**
499 * Creates a projection matrix defined by the horizontal field of view, the aspect ratio and the near and far clipping plane.
500 * @param fovX Horizontal field of view, in radian and a range of [0, Pi]
501 * @param aspectRatio View aspect ratio which is width divided by the height of the projection window
502 * @param nearDistance Positive distance to the near clipping plane
503 * @param farDistance Positive distance to the far clipping lane
504 * @return Specified projection matrix
505 */
506 static SquareMatrixT4<T> projectionMatrix(const T fovX, const T aspectRatio, const T nearDistance, const T farDistance);
507
508 /**
509 * Creates a projection matrix defined by a camera profile of a pinhole camera and the near and far clipping plane.
510 * @param anyCamera The camera profile of a pinhole camera, without distortion parameters, must be valid
511 * @param nearDistance Positive distance to the near clipping plane
512 * @param farDistance Positive distance to the far clipping lane
513 * @return Specified projection matrix
514 */
515 static SquareMatrixT4<T> projectionMatrix(const AnyCameraT<T>& anyCamera, const T nearDistance, const T farDistance);
516
517 /**
518 * Creates a projection matrix defined by an asymmetric viewing frustum.
519 * The shape of the frustum is defined by the rectangle on the near plane.<br>
520 * Afterwards, the field of view is defined by the (positive) distance to the near clipping plane.<br>
521 * Followed by the (positive) far clipping plane to determine the entire frustum.
522 * @param left Position of the left border of the rectangle on the near plane
523 * @param right Position of the right border of the rectangle on the near plane
524 * @param top Position of the top border of the rectangle on the near plane
525 * @param bottom Position of the bottom border of the rectangle on the near plane
526 * @param nearDistance Positive distance to the near clipping plane
527 * @param farDistance Positive distance to the far clipping plane
528 * @return Specified frustum projection matrix
529 */
530 static SquareMatrixT4<T> frustumMatrix(const T left, const T right, const T top, const T bottom, const T nearDistance, const T farDistance);
531
532 /**
533 * Creates a project matrix defined by an asymmetric viewing frustum.
534 * The shape of the frustum is defined by the rectangle on the near plane.<br>
535 * The viewing position is defined by the given view matrix while the near plane is expected to lie in the origin of the coordinate system.
536 * @param width The width of the near plane, with range (0, infinity)
537 * @param height The height of the near plane, with range (0, infinity)
538 * @param viewingMatrix Viewing matrix transforming point defined in the camera coordinate system into points defined in the world coordinate system, must be invertible
539 * @param nearDistance Positive distance to the near clipping plane, with range (0, infinity)
540 * @param farDistance Positive distance to the far clipping plane, with range (nearDistance, infinity)
541 * @return Specified frustum projection matrix
542 */
543 static SquareMatrixT4<T> frustumMatrix(const T width, const T height, const HomogenousMatrixT4<T>& viewingMatrix, const T nearDistance, const T farDistance);
544
545 /**
546 * Converts matrices with specific data type to matrices with different data type.
547 * @param matrices The matrices to convert
548 * @return The converted matrices
549 * @tparam U The element data type of the matrices to convert
550 */
551 template <typename U>
552 static inline std::vector< SquareMatrixT4<T> > matrices2matrices(const std::vector< SquareMatrixT4<U> >& matrices);
553
554 /**
555 * Converts matrices with specific data type to matrices with different data type.
556 * @param matrices The matrices to convert
557 * @param size The number of matrices to convert
558 * @return The converted matrices
559 * @tparam U The element data type of the matrices to convert
560 */
561 template <typename U>
562 static inline std::vector< SquareMatrixT4<T> > matrices2matrices(const SquareMatrixT4<U>* matrices, const size_t size);
563
564 protected:
565
566 /**
567 * Swaps two rows of this matrix.
568 * @param row0 The index of the first row ,with range [0, 3]
569 * @param row1 The index of the second row, with range [0, 3]
570 */
571 void swapRows(const unsigned int row0, const unsigned int row1);
572
573 /**
574 * Multiplies a row with a scalar value.
575 * @param row The index of the row to multiply, with range [0, 3]
576 * @param scalar The scalar to multiply
577 */
578 void multiplyRow(const unsigned int row, const T scalar);
579
580 /**
581 * Multiplies elements from a specific row with a scalar and adds them to another row.
582 * @param targetRow The index of the target row, with range [0, 3]
583 * @param sourceRow The index of the source row, with range [0, 3]
584 * @param scalar The scalar to multiply the source elements with
585 */
586 void addRows(const unsigned int targetRow, const unsigned int sourceRow, const T scalar);
587
588 protected:
589
590 /// The sixteen values of the matrix.
591 T values[16];
592};
593
594template <typename T>
596{
597 // nothing to do here
598}
599
600template <typename T>
601template <typename U>
603{
604 values[ 0] = T(matrix.values[ 0]);
605 values[ 1] = T(matrix.values[ 1]);
606 values[ 2] = T(matrix.values[ 2]);
607 values[ 3] = T(matrix.values[ 3]);
608 values[ 4] = T(matrix.values[ 4]);
609 values[ 5] = T(matrix.values[ 5]);
610 values[ 6] = T(matrix.values[ 6]);
611 values[ 7] = T(matrix.values[ 7]);
612 values[ 8] = T(matrix.values[ 8]);
613 values[ 9] = T(matrix.values[ 9]);
614 values[10] = T(matrix.values[10]);
615 values[11] = T(matrix.values[11]);
616 values[12] = T(matrix.values[12]);
617 values[13] = T(matrix.values[13]);
618 values[14] = T(matrix.values[14]);
619 values[15] = T(matrix.values[15]);
620}
621
622template <typename T>
623SquareMatrixT4<T>::SquareMatrixT4(const bool setToIdentity)
624{
625 if (setToIdentity)
626 {
627 values[ 0] = T(1.0);
628 values[ 1] = T(0.0);
629 values[ 2] = T(0.0);
630 values[ 3] = T(0.0);
631 values[ 4] = T(0.0);
632 values[ 5] = T(1.0);
633 values[ 6] = T(0.0);
634 values[ 7] = T(0.0);
635 values[ 8] = T(0.0);
636 values[ 9] = T(0.0);
637 values[10] = T(1.0);
638 values[11] = T(0.0);
639 values[12] = T(0.0);
640 values[13] = T(0.0);
641 values[14] = T(0.0);
642 values[15] = T(1.0);
643 }
644 else
645 {
646 values[ 0] = T(0.0);
647 values[ 1] = T(0.0);
648 values[ 2] = T(0.0);
649 values[ 3] = T(0.0);
650 values[ 4] = T(0.0);
651 values[ 5] = T(0.0);
652 values[ 6] = T(0.0);
653 values[ 7] = T(0.0);
654 values[ 8] = T(0.0);
655 values[ 9] = T(0.0);
656 values[10] = T(0.0);
657 values[11] = T(0.0);
658 values[12] = T(0.0);
659 values[13] = T(0.0);
660 values[14] = T(0.0);
661 values[15] = T(0.0);
662 }
663}
664
665template <typename T>
666template <typename U>
668{
669 ocean_assert(arrayValues);
670
671 for (unsigned int n = 0u; n < 16u; ++n)
672 {
673 values[n] = T(arrayValues[n]);
674 }
675}
676
677template <typename T>
679{
680 ocean_assert(arrayValues);
681 memcpy(values, arrayValues, sizeof(T) * 16);
682}
683
684template <typename T>
685template <typename U>
686SquareMatrixT4<T>::SquareMatrixT4(const U* arrayValues, const bool valuesRowAligned)
687{
688 ocean_assert(arrayValues);
689
690 if (valuesRowAligned)
691 {
692 values[ 0] = T(arrayValues[ 0]);
693 values[ 1] = T(arrayValues[ 4]);
694 values[ 2] = T(arrayValues[ 8]);
695 values[ 3] = T(arrayValues[12]);
696 values[ 4] = T(arrayValues[ 1]);
697 values[ 5] = T(arrayValues[ 5]);
698 values[ 6] = T(arrayValues[ 9]);
699 values[ 7] = T(arrayValues[13]);
700 values[ 8] = T(arrayValues[ 2]);
701 values[ 9] = T(arrayValues[ 6]);
702 values[10] = T(arrayValues[10]);
703 values[11] = T(arrayValues[14]);
704 values[12] = T(arrayValues[ 3]);
705 values[13] = T(arrayValues[ 7]);
706 values[14] = T(arrayValues[11]);
707 values[15] = T(arrayValues[15]);
708 }
709 else
710 {
711 for (unsigned int n = 0u; n < 16u; ++n)
712 values[n] = T(arrayValues[n]);
713 }
714}
715
716template <typename T>
717SquareMatrixT4<T>::SquareMatrixT4(const T* arrayValues, const bool valuesRowAligned)
718{
719 ocean_assert(arrayValues);
720
721 if (valuesRowAligned)
722 {
723 values[ 0] = arrayValues[ 0];
724 values[ 1] = arrayValues[ 4];
725 values[ 2] = arrayValues[ 8];
726 values[ 3] = arrayValues[12];
727 values[ 4] = arrayValues[ 1];
728 values[ 5] = arrayValues[ 5];
729 values[ 6] = arrayValues[ 9];
730 values[ 7] = arrayValues[13];
731 values[ 8] = arrayValues[ 2];
732 values[ 9] = arrayValues[ 6];
733 values[10] = arrayValues[10];
734 values[11] = arrayValues[14];
735 values[12] = arrayValues[ 3];
736 values[13] = arrayValues[ 7];
737 values[14] = arrayValues[11];
738 values[15] = arrayValues[15];
739 }
740 else
741 {
742 memcpy(values, arrayValues, sizeof(T) * 16);
743 }
744}
745
746template <typename T>
748{
749 memcpy(values, transformation(), sizeof(T) * 16);
750}
751
752template <typename T>
754{
755 values[ 3] = T(0.0);
756 values[ 7] = T(0.0);
757 values[11] = T(0.0);
758 values[12] = T(0.0);
759 values[13] = T(0.0);
760 values[14] = T(0.0);
761 values[15] = T(0.0);
762
763 memcpy(values, subMatrix(), sizeof(T) * 3); // Set values[0] - values[2]
764 memcpy(values + 4, subMatrix() + 3, sizeof(T) * 3); // Set values[4] - values[6]
765 memcpy(values + 8, subMatrix() + 6, sizeof(T) * 3); // Set values[8] - values[10]
766}
767
768template <typename T>
770{
771 values[ 0] = diagonal[0];
772 values[ 1] = T(0.0);
773 values[ 2] = T(0.0);
774 values[ 3] = T(0.0);
775 values[ 4] = T(0.0);
776 values[ 5] = diagonal[1];
777 values[ 6] = T(0.0);
778 values[ 7] = T(0.0);
779 values[ 8] = T(0.0);
780 values[ 9] = T(0.0);
781 values[10] = diagonal[2];
782 values[11] = T(0.0);
783 values[12] = T(0.0);
784 values[13] = T(0.0);
785 values[14] = T(0.0);
786 values[15] = diagonal[3];
787}
788
789template <typename T>
790inline const T* SquareMatrixT4<T>::data() const
791{
792 return values;
793}
794
795template <typename T>
797{
798 return values;
799}
800
801template <typename T>
803{
804 SquareMatrixT4<T> result(*this);
805
806 result.values[1] = values[4];
807 result.values[4] = values[1];
808
809 result.values[2] = values[8];
810 result.values[8] = values[2];
811
812 result.values[3] = values[12];
813 result.values[12] = values[3];
814
815 result.values[6] = values[9];
816 result.values[9] = values[6];
817
818 result.values[7] = values[13];
819 result.values[13] = values[7];
820
821 result.values[11] = values[14];
822 result.values[14] = values[11];
823
824 return result;
825}
826
827template <typename T>
829{
830 SquareMatrixT4<T> tmp(*this);
831
832 values[4] = tmp.values[1];
833 values[1] = tmp.values[4];
834
835 values[8] = tmp.values[2];
836 values[2] = tmp.values[8];
837
838 values[12] = tmp.values[3];
839 values[3] = tmp.values[12];
840
841 values[9] = tmp.values[6];
842 values[6] = tmp.values[9];
843
844 values[13] = tmp.values[7];
845 values[7] = tmp.values[13];
846
847 values[14] = tmp.values[11];
848 values[11] = tmp.values[14];
849}
850
851template <typename T>
853{
854 SquareMatrixT4 invertedMatrix;
855
856 if (!invert(invertedMatrix))
857 {
858 ocean_assert(false && "Could not invert matrix.");
859 return *this;
860 }
861
862 return invertedMatrix;
863}
864
865template <typename T>
867{
868 SquareMatrixT4<T> invertedMatrix;
869
870 if (!invert(invertedMatrix))
871 {
872 return false;
873 }
874
875 *this = invertedMatrix;
876
877 return true;
878}
879
880template <typename T>
882{
883 // implements the Gauss-Jordon Elimination
884
885 SquareMatrixT4<T> source(*this);
886 invertedMatrix.toIdentity();
887
888 for (unsigned int col = 0; col < 4u; ++col)
889 {
890 // find largest absolute value in the col-th column,
891 // to remove zeros from the main diagonal and to provide numerical stability
892 T absolute = 0.0;
893 unsigned int selectedRow = 0u;
894
895 for (unsigned int row = col; row < 4u; ++row)
896 {
897 const T value = NumericT<T>::abs(source(row, col));
898 if (absolute < value)
899 {
900 absolute = value;
901 selectedRow = row;
902 }
903 }
904
905 // if there was no greater absolute value than 0 this matrix is singular
906
907 if (NumericT<T>::isEqualEps(absolute))
908 {
909 return false;
910 }
911
912 // exchange the two rows
913 if (selectedRow != col)
914 {
915 source.swapRows(col, selectedRow);
916 invertedMatrix.swapRows(col, selectedRow);
917 }
918
919 // now the element at (col, col) will be 1.0
920 if (NumericT<T>::isNotEqual(source(col, col), T(1.0)))
921 {
922 const T divisor = T(1.0) / source(col, col);
923 ocean_assert(divisor != T(0.0));
924
925 source.multiplyRow(col, divisor);
926 invertedMatrix.multiplyRow(col, divisor);
927 }
928
929 // clear each entry above and below the selected row and column to zero
930 for (unsigned int row = 0; row < 4u; ++row)
931 {
932 if (row != col)
933 {
934 const T value = -source(row, col);
935
936 source.addRows(row, col, value);
937 invertedMatrix.addRows(row, col, value);
938 }
939 }
940 }
941
942 return true;
943}
944
945template <typename T>
947{
948 const T v6_15 = values[6] * values[15];
949 const T v10_15 = values[10] * values[15];
950 const T v11_14 = values[11] * values[14];
951 const T v7_10 = values[7] * values[10];
952 const T v9_14 = values[9] * values[14];
953 const T v6_13 = values[6] * values[13];
954 const T v2_13 = values[2] * values[13];
955 const T v2_9 = values[2] * values[9];
956 const T v3_10 = values[3] * values[10];
957 const T v2_5 = values[2] * values[5];
958
959 return values[0] * (values[5] * v10_15 - values[13] * v7_10
960 + v9_14 * values[7] - values[5] * v11_14
961 + v6_13 * values[11] - values[9] * v6_15)
962 - values[4] * (v9_14 * values[3] - values[1] * v11_14
963 + v2_13 * values[11] - v2_9 * values[15]
964 + values[1] * v10_15 - values[13] * v3_10)
965 + values[8] * (values[1] * v6_15 - v6_13 * values[3]
966 + values[5] * values[14] * values[3] - values[1] * values[14] * values[7]
967 + v2_13 * values[7] - v2_5 * values[15])
968 - values[12] * (values[1] * values[6] * values[11] - values[9] * values[6] * values[3]
969 + values[5] * v3_10 - values[1] * v7_10
970 + v2_9 * values[7] - v2_5 * values[11]);
971}
972
973template <typename T>
975{
976 return values[0] + values[5] + values[10] + values[15];
977}
978
979template <typename T>
981{
982 values[ 0] = T(1.0);
983 values[ 1] = T(0.0);
984 values[ 2] = T(0.0);
985 values[ 3] = T(0.0);
986 values[ 4] = T(0.0);
987 values[ 5] = T(1.0);
988 values[ 6] = T(0.0);
989 values[ 7] = T(0.0);
990 values[ 8] = T(0.0);
991 values[ 9] = T(0.0);
992 values[10] = T(1.0);
993 values[11] = T(0.0);
994 values[12] = T(0.0);
995 values[13] = T(0.0);
996 values[14] = T(0.0);
997 values[15] = T(1.0);
998}
999
1000template <typename T>
1002{
1003 values[ 0] = T(0.0);
1004 values[ 1] = T(0.0);
1005 values[ 2] = T(0.0);
1006 values[ 3] = T(0.0);
1007 values[ 4] = T(0.0);
1008 values[ 5] = T(0.0);
1009 values[ 6] = T(0.0);
1010 values[ 7] = T(0.0);
1011 values[ 8] = T(0.0);
1012 values[ 9] = T(0.0);
1013 values[10] = T(0.0);
1014 values[11] = T(0.0);
1015 values[12] = T(0.0);
1016 values[13] = T(0.0);
1017 values[14] = T(0.0);
1018 values[15] = T(0.0);
1019}
1020
1021template <typename T>
1023{
1024 return NumericT<T>::isEqual(values[0], 1) && NumericT<T>::isEqualEps(values[1]) && NumericT<T>::isEqualEps(values[2]) && NumericT<T>::isEqualEps(values[3])
1025 && NumericT<T>::isEqualEps(values[4]) && NumericT<T>::isEqual(values[5], 1) && NumericT<T>::isEqualEps(values[6]) && NumericT<T>::isEqualEps(values[7])
1026 && NumericT<T>::isEqualEps(values[8]) && NumericT<T>::isEqualEps(values[9]) && NumericT<T>::isEqual(values[10], 1) && NumericT<T>::isEqualEps(values[11])
1027 && NumericT<T>::isEqualEps(values[12]) && NumericT<T>::isEqualEps(values[13]) && NumericT<T>::isEqualEps(values[14]) && NumericT<T>::isEqual(values[15], 1);
1028}
1029
1030template <typename T>
1032{
1033 return NumericT<T>::isEqualEps(values[0]) && NumericT<T>::isEqualEps(values[1]) && NumericT<T>::isEqualEps(values[2]) && NumericT<T>::isEqualEps(values[3])
1034 && NumericT<T>::isEqualEps(values[4]) && NumericT<T>::isEqualEps(values[5]) && NumericT<T>::isEqualEps(values[6]) && NumericT<T>::isEqualEps(values[7])
1035 && NumericT<T>::isEqualEps(values[8]) && NumericT<T>::isEqualEps(values[9]) && NumericT<T>::isEqualEps(values[10]) && NumericT<T>::isEqualEps(values[11])
1036 && NumericT<T>::isEqualEps(values[12]) && NumericT<T>::isEqualEps(values[13]) && NumericT<T>::isEqualEps(values[14]) && NumericT<T>::isEqualEps(values[15]);
1037}
1038
1039template <typename T>
1041{
1042 return NumericT<T>::isEqualEps(determinant());
1043}
1044
1045template <typename T>
1046bool SquareMatrixT4<T>::isSymmetric(const T epsilon) const
1047{
1048 ocean_assert(epsilon >= T(0));
1049
1050 return NumericT<T>::isEqual(values[1], values[4], epsilon) && NumericT<T>::isEqual(values[2], values[8], epsilon) && NumericT<T>::isEqual(values[3], values[12], epsilon)
1051 && NumericT<T>::isEqual(values[6], values[9], epsilon) && NumericT<T>::isEqual(values[7], values[13], epsilon) && NumericT<T>::isEqual(values[11], values[14], epsilon);
1052}
1053
1054template <typename T>
1055inline bool SquareMatrixT4<T>::isEqual(const SquareMatrixT4<T>& matrix, const T eps) const
1056{
1057 return NumericT<T>::isEqual(values[0], matrix.values[0], eps) && NumericT<T>::isEqual(values[1], matrix.values[1], eps)
1058 && NumericT<T>::isEqual(values[2], matrix.values[2], eps) && NumericT<T>::isEqual(values[3], matrix.values[3], eps)
1059 && NumericT<T>::isEqual(values[4], matrix.values[4], eps) && NumericT<T>::isEqual(values[5], matrix.values[5], eps)
1060 && NumericT<T>::isEqual(values[6], matrix.values[6], eps) && NumericT<T>::isEqual(values[7], matrix.values[7], eps)
1061 && NumericT<T>::isEqual(values[8], matrix.values[8], eps) && NumericT<T>::isEqual(values[9], matrix.values[9], eps)
1062 && NumericT<T>::isEqual(values[10], matrix.values[10], eps) && NumericT<T>::isEqual(values[11], matrix.values[11], eps)
1063 && NumericT<T>::isEqual(values[12], matrix.values[12], eps) && NumericT<T>::isEqual(values[13], matrix.values[13], eps)
1064 && NumericT<T>::isEqual(values[14], matrix.values[14], eps) && NumericT<T>::isEqual(values[15], matrix.values[15], eps);
1065}
1066
1067template <typename T>
1069{
1070 /**
1071 * <pre>
1072 * Computation of the characteristic polynomial
1073 *
1074 * [ a b c d ]
1075 * A = [ e f g h ]
1076 * [ i j k l ]
1077 * [ m n o p ]
1078 *
1079 * [ a-x b c d ]
1080 * A - x * E = [ e f-x g h ]
1081 * [ i j k-x l ]
1082 * [ m n o p-x ]
1083 *
1084 * Polynomial = Det|A - x * E| = 0
1085 * = x^4 + (-a - f - k - p)x^3 + (-be + af - ci - gj + ak + fk - dm - hn - lo + ap + fp + kp)x^2
1086 * + (cfi - bgi - cej + agj + bek - afk + dfm - bhm + dkm - clm - den + ahn + hkn - gln - dio - hjo + alo + flo + bep - afp + cip + gjp - akp - fkp)x
1087 * + (dgjm - chjm - dfkm + bhkm + cflm - bglm)
1088 * + (-dgin + chin + dekn - ahkn - celn + agln)
1089 * + (dfio - bhio - dejo + ahjo + belo - aflo)
1090 * + (-cfip + bgip + cejp - agjp - bekp + afkp)
1091 * = a1x^4 + a2x^3 + a3x^2 + a4x + a5 = 0
1092 * </pre>
1093 */
1094
1095 const T a = values[0];
1096 const T b = values[1];
1097 const T c = values[2];
1098 const T d = values[3];
1099 const T e = values[4];
1100 const T f = values[5];
1101 const T g = values[6];
1102 const T h = values[7];
1103 const T i = values[8];
1104 const T j = values[9];
1105 const T k = values[10];
1106 const T l = values[11];
1107 const T m = values[12];
1108 const T n = values[13];
1109 const T o = values[14];
1110 const T p = values[15];
1111
1112 const T a1 = 1.0;
1113 const T a2 = -a - f - k - p;
1114 const T a3 = -b * e + a * f - c * i - g * j + a * k + f * k - d * m - h * n - l * o + a * p + f * p + k * p;
1115 const T a4 = c * f * i - b * g * i - c * e * j + a * g * j + b * e * k - a * f * k + d * f * m - b * h * m
1116 + d * k * m - c * l * m - d * e * n + a * h * n + h * k * n - g * l * n - d * i * o - h * j * o
1117 + a * l * o + f * l * o + b * e * p - a * f * p + c * i * p + g * j * p - a * k * p - f * k * p;
1118 const T a5 = d * g * j * m - c * h * j * m - d * f * k * m + b * h * k * m + c * f * l * m - b * g * l * m
1119 - d * g * i * n + c * h * i * n + d * e * k * n - a * h * k * n - c * e * l * n + a * g * l * n
1120 + d * f * i * o - b * h * i * o - d * e * j * o + a * h * j * o + b * e * l * o - a * f * l * o
1121 - c * f * i * p + b * g * i * p + c * e * j * p - a * g * j * p - b * e * k * p + a * f * k * p;
1122
1123#if defined(SQUARE_MATRIX_DISABLED_MISSING_IMPLEMENTATION)
1124 T x[4];
1125 unsigned int solutions = EquationT<T>::solveQuartic(a1, a2, a3, a4, a5, x);
1126#endif // SQUARE_MATRIX_DISABLED_MISSING_IMPLEMENTATION
1127
1128 ocean_assert(false && "Missing implementation, calculate eigen vectors");
1129 OCEAN_SUPPRESS_UNUSED_WARNING(eigenValues);
1130 OCEAN_SUPPRESS_UNUSED_WARNING(eigenVectors);
1131
1132 return false;
1133}
1134
1135template <typename T>
1136template <typename U>
1137void SquareMatrixT4<T>::copyElements(U* arrayValues) const
1138{
1139 ocean_assert(arrayValues);
1140
1141 arrayValues[ 0] = U(values[ 0]);
1142 arrayValues[ 1] = U(values[ 1]);
1143 arrayValues[ 2] = U(values[ 2]);
1144 arrayValues[ 3] = U(values[ 3]);
1145 arrayValues[ 4] = U(values[ 4]);
1146 arrayValues[ 5] = U(values[ 5]);
1147 arrayValues[ 6] = U(values[ 6]);
1148 arrayValues[ 7] = U(values[ 7]);
1149 arrayValues[ 8] = U(values[ 8]);
1150 arrayValues[ 9] = U(values[ 9]);
1151 arrayValues[10] = U(values[10]);
1152 arrayValues[11] = U(values[11]);
1153 arrayValues[12] = U(values[12]);
1154 arrayValues[13] = U(values[13]);
1155 arrayValues[14] = U(values[14]);
1156 arrayValues[15] = U(values[15]);
1157}
1158
1159template <typename T>
1160void SquareMatrixT4<T>::copyElements(T* arrayValues) const
1161{
1162 ocean_assert(arrayValues);
1163
1164 memcpy(arrayValues, values, sizeof(T) * 16);
1165}
1166
1167template <typename T>
1168inline bool SquareMatrixT4<T>::operator!=(const SquareMatrixT4<T>& matrix) const
1169{
1170 return !(*this == matrix);
1171}
1172
1173template <typename T>
1175{
1176 *this = *this * matrix;
1177 return *this;
1178}
1179
1180template <typename T>
1182{
1183 *this = *this * matrix;
1184 return *this;
1185}
1186
1187template <typename T>
1188inline T SquareMatrixT4<T>::operator[](const unsigned int index) const
1189{
1190 ocean_assert(index < 16u);
1191 return values[index];
1192}
1193
1194template <typename T>
1195inline T& SquareMatrixT4<T>::operator[](const unsigned int index)
1196{
1197 ocean_assert(index < 16u);
1198 return values[index];
1199}
1200
1201template <typename T>
1202inline T SquareMatrixT4<T>::operator()(const unsigned int row, const unsigned int column) const
1203{
1204 ocean_assert(row < 4u && column < 4u);
1205 return values[(column << 2) + row]; // values[(column * 4) + row];
1206}
1207
1208template <typename T>
1209inline T& SquareMatrixT4<T>::operator()(const unsigned int row, const unsigned int column)
1210{
1211 ocean_assert(row < 4u && column < 4u);
1212 return values[(column << 2) + row]; // values[(column * 4) + row];
1213}
1214
1215template <typename T>
1216inline T SquareMatrixT4<T>::operator()(const unsigned int index) const
1217{
1218 ocean_assert(index < 16u);
1219 return values[index];
1220}
1221
1222template <typename T>
1223inline T& SquareMatrixT4<T>::operator()(const unsigned int index)
1224{
1225 ocean_assert(index < 16u);
1226 return values[index];
1227}
1228
1229template <typename T>
1230inline const T* SquareMatrixT4<T>::operator()() const
1231{
1232 return values;
1233}
1234
1235template <typename T>
1237{
1238 return values;
1239}
1240
1241template <typename T>
1242inline size_t SquareMatrixT4<T>::operator()(const SquareMatrixT4<T>& matrix) const
1243{
1244 size_t seed = std::hash<T>{}(matrix.values[0]);
1245
1246 for (unsigned int n = 1u; n < 16u; ++n)
1247 {
1248 seed ^= std::hash<T>{}(matrix.values[n]) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
1249 }
1250
1251 return seed;
1252}
1253
1254template <typename T>
1256{
1257 return 16;
1258}
1259
1260template <typename T>
1262{
1263 return isEqual(matrix);
1264}
1265
1266template <typename T>
1268{
1269 SquareMatrixT4<T> result(*this);
1270
1271 result.values[0] += matrix.values[0];
1272 result.values[1] += matrix.values[1];
1273 result.values[2] += matrix.values[2];
1274 result.values[3] += matrix.values[3];
1275 result.values[4] += matrix.values[4];
1276 result.values[5] += matrix.values[5];
1277 result.values[6] += matrix.values[6];
1278 result.values[7] += matrix.values[7];
1279 result.values[8] += matrix.values[8];
1280 result.values[9] += matrix.values[9];
1281 result.values[10] += matrix.values[10];
1282 result.values[11] += matrix.values[11];
1283 result.values[12] += matrix.values[12];
1284 result.values[13] += matrix.values[13];
1285 result.values[14] += matrix.values[14];
1286 result.values[15] += matrix.values[15];
1287
1288 return result;
1289}
1290
1291template <typename T>
1293{
1294 values[0] += matrix.values[0];
1295 values[1] += matrix.values[1];
1296 values[2] += matrix.values[2];
1297 values[3] += matrix.values[3];
1298 values[4] += matrix.values[4];
1299 values[5] += matrix.values[5];
1300 values[6] += matrix.values[6];
1301 values[7] += matrix.values[7];
1302 values[8] += matrix.values[8];
1303 values[9] += matrix.values[9];
1304 values[10] += matrix.values[10];
1305 values[11] += matrix.values[11];
1306 values[12] += matrix.values[12];
1307 values[13] += matrix.values[13];
1308 values[14] += matrix.values[14];
1309 values[15] += matrix.values[15];
1310
1311 return *this;
1312}
1313
1314template <typename T>
1316{
1317 SquareMatrixT4<T> result(*this);
1318
1319 result.values[0] -= matrix.values[0];
1320 result.values[1] -= matrix.values[1];
1321 result.values[2] -= matrix.values[2];
1322 result.values[3] -= matrix.values[3];
1323 result.values[4] -= matrix.values[4];
1324 result.values[5] -= matrix.values[5];
1325 result.values[6] -= matrix.values[6];
1326 result.values[7] -= matrix.values[7];
1327 result.values[8] -= matrix.values[8];
1328 result.values[9] -= matrix.values[9];
1329 result.values[10] -= matrix.values[10];
1330 result.values[11] -= matrix.values[11];
1331 result.values[12] -= matrix.values[12];
1332 result.values[13] -= matrix.values[13];
1333 result.values[14] -= matrix.values[14];
1334 result.values[15] -= matrix.values[15];
1335
1336 return result;
1337}
1338
1339template <typename T>
1341{
1342 values[0] -= matrix.values[0];
1343 values[1] -= matrix.values[1];
1344 values[2] -= matrix.values[2];
1345 values[3] -= matrix.values[3];
1346 values[4] -= matrix.values[4];
1347 values[5] -= matrix.values[5];
1348 values[6] -= matrix.values[6];
1349 values[7] -= matrix.values[7];
1350 values[8] -= matrix.values[8];
1351 values[9] -= matrix.values[9];
1352 values[10] -= matrix.values[10];
1353 values[11] -= matrix.values[11];
1354 values[12] -= matrix.values[12];
1355 values[13] -= matrix.values[13];
1356 values[14] -= matrix.values[14];
1357 values[15] -= matrix.values[15];
1358
1359 return *this;
1360}
1361
1362template <typename T>
1364{
1365 SquareMatrixT4<T> result;
1366
1367 result.values[ 0] = -values[ 0];
1368 result.values[ 1] = -values[ 1];
1369 result.values[ 2] = -values[ 2];
1370 result.values[ 3] = -values[ 3];
1371 result.values[ 4] = -values[ 4];
1372 result.values[ 5] = -values[ 5];
1373 result.values[ 6] = -values[ 6];
1374 result.values[ 7] = -values[ 7];
1375 result.values[ 8] = -values[ 8];
1376 result.values[ 9] = -values[ 9];
1377 result.values[10] = -values[10];
1378 result.values[11] = -values[11];
1379 result.values[12] = -values[12];
1380 result.values[13] = -values[13];
1381 result.values[14] = -values[14];
1382
1383 return result;
1384}
1385
1386template <typename T>
1388{
1389 SquareMatrixT4<T> result;
1390
1391 result.values[0] = values[0] * matrix.values[0] + values[4] * matrix.values[1] + values[8] * matrix.values[2] + values[12] * matrix.values[3];
1392 result.values[1] = values[1] * matrix.values[0] + values[5] * matrix.values[1] + values[9] * matrix.values[2] + values[13] * matrix.values[3];
1393 result.values[2] = values[2] * matrix.values[0] + values[6] * matrix.values[1] + values[10] * matrix.values[2] + values[14] * matrix.values[3];
1394 result.values[3] = values[3] * matrix.values[0] + values[7] * matrix.values[1] + values[11] * matrix.values[2] + values[15] * matrix.values[3];
1395
1396 result.values[4] = values[0] * matrix.values[4] + values[4] * matrix.values[5] + values[8] * matrix.values[6] + values[12] * matrix.values[7];
1397 result.values[5] = values[1] * matrix.values[4] + values[5] * matrix.values[5] + values[9] * matrix.values[6] + values[13] * matrix.values[7];
1398 result.values[6] = values[2] * matrix.values[4] + values[6] * matrix.values[5] + values[10] * matrix.values[6] + values[14] * matrix.values[7];
1399 result.values[7] = values[3] * matrix.values[4] + values[7] * matrix.values[5] + values[11] * matrix.values[6] + values[15] * matrix.values[7];
1400
1401 result.values[8] = values[0] * matrix.values[8] + values[4] * matrix.values[9] + values[8] * matrix.values[10] + values[12] * matrix.values[11];
1402 result.values[9] = values[1] * matrix.values[8] + values[5] * matrix.values[9] + values[9] * matrix.values[10] + values[13] * matrix.values[11];
1403 result.values[10] = values[2] * matrix.values[8] + values[6] * matrix.values[9] + values[10] * matrix.values[10] + values[14] * matrix.values[11];
1404 result.values[11] = values[3] * matrix.values[8] + values[7] * matrix.values[9] + values[11] * matrix.values[10] + values[15] * matrix.values[11];
1405
1406 result.values[12] = values[0] * matrix.values[12] + values[4] * matrix.values[13] + values[8] * matrix.values[14] + values[12] * matrix.values[15];
1407 result.values[13] = values[1] * matrix.values[12] + values[5] * matrix.values[13] + values[9] * matrix.values[14] + values[13] * matrix.values[15];
1408 result.values[14] = values[2] * matrix.values[12] + values[6] * matrix.values[13] + values[10] * matrix.values[14] + values[14] * matrix.values[15];
1409 result.values[15] = values[3] * matrix.values[12] + values[7] * matrix.values[13] + values[11] * matrix.values[14] + values[15] * matrix.values[15];
1410
1411 return result;
1412}
1413
1414#if defined(OCEAN_HARDWARE_AVX_VERSION) && OCEAN_HARDWARE_AVX_VERSION >= 10
1415
1416template <>
1418{
1419 // the following code uses the following AVX instructions, and needs AVX1 or higher
1420
1421 // AVX1:
1422 // _mm256_broadcast_sd
1423 // _mm256_loadu_pd
1424 // _mm256_mul_pd
1425 // _mm256_add_pd
1426 // _mm256_storeu_pd
1427
1428 // we use the same strategy as we apply for matrix-vector multiplication
1429 // further, here we interpret the right matrix as 4 vectors
1430
1431 // we load the columns of the left matrix
1432 __m256d c0 = _mm256_loadu_pd(values + 0);
1433 __m256d c1 = _mm256_loadu_pd(values + 4);
1434 __m256d c2 = _mm256_loadu_pd(values + 8);
1435 __m256d c3 = _mm256_loadu_pd(values + 12);
1436
1437
1438 // we determine the first vector of the resulting matrix
1439 __m256d v0 = _mm256_broadcast_sd(matrix.data() + 0);
1440 __m256d r0 = _mm256_mul_pd(c0, v0);
1441
1442 __m256d v1 = _mm256_broadcast_sd(matrix.data() + 1);
1443 __m256d r1 = _mm256_mul_pd(c1, v1);
1444
1445 r0 = _mm256_add_pd(r0, r1);
1446
1447 __m256d v2 = _mm256_broadcast_sd(matrix.data() + 2);
1448 __m256d r2 = _mm256_mul_pd(c2, v2);
1449
1450 r0 = _mm256_add_pd(r0, r2);
1451
1452 __m256d v3 = _mm256_broadcast_sd(matrix.data() + 3);
1453 __m256d r3 = _mm256_mul_pd(c3, v3);
1454
1455 r0 = _mm256_add_pd(r0, r3);
1456
1458
1459 _mm256_storeu_pd(result.data(), r0);
1460
1461
1462 // we determine the second vector of the resulting matrix
1463 __m256d v4 = _mm256_broadcast_sd(matrix.data() + 4);
1464 __m256d r4 = _mm256_mul_pd(c0, v4);
1465
1466 __m256d v5 = _mm256_broadcast_sd(matrix.data() + 5);
1467 __m256d r5 = _mm256_mul_pd(c1, v5);
1468
1469 r4 = _mm256_add_pd(r4, r5);
1470
1471 __m256d v6 = _mm256_broadcast_sd(matrix.data() + 6);
1472 __m256d r6 = _mm256_mul_pd(c2, v6);
1473
1474 r4 = _mm256_add_pd(r4, r6);
1475
1476 __m256d v7 = _mm256_broadcast_sd(matrix.data() + 7);
1477 __m256d r7 = _mm256_mul_pd(c3, v7);
1478
1479 r4 = _mm256_add_pd(r4, r7);
1480
1481 _mm256_storeu_pd(result.data() + 4, r4);
1482
1483
1484 // we determine the third vector of the resulting matrix
1485 __m256d v8 = _mm256_broadcast_sd(matrix.data() + 8);
1486 __m256d r8 = _mm256_mul_pd(c0, v8);
1487
1488 __m256d v9 = _mm256_broadcast_sd(matrix.data() + 9);
1489 __m256d r9 = _mm256_mul_pd(c1, v9);
1490
1491 r8 = _mm256_add_pd(r8, r9);
1492
1493 __m256d v10 = _mm256_broadcast_sd(matrix.data() + 10);
1494 __m256d r10 = _mm256_mul_pd(c2, v10);
1495
1496 r8 = _mm256_add_pd(r8, r10);
1497
1498 __m256d v11 = _mm256_broadcast_sd(matrix.data() + 11);
1499 __m256d r11 = _mm256_mul_pd(c3, v11);
1500
1501 r8 = _mm256_add_pd(r8, r11);
1502
1503 _mm256_storeu_pd(result.data() + 8, r8);
1504
1505
1506 // we determine the forth vector of the resulting matrix
1507 __m256d v12 = _mm256_broadcast_sd(matrix.data() + 12);
1508 __m256d r12 = _mm256_mul_pd(c0, v12);
1509
1510 __m256d v13 = _mm256_broadcast_sd(matrix.data() + 13);
1511 __m256d r13 = _mm256_mul_pd(c1, v13);
1512
1513 r12 = _mm256_add_pd(r12, r13);
1514
1515 __m256d v14 = _mm256_broadcast_sd(matrix.data() + 14);
1516 __m256d r14 = _mm256_mul_pd(c2, v14);
1517
1518 r12 = _mm256_add_pd(r12, r14);
1519
1520 __m256d v15 = _mm256_broadcast_sd(matrix.data() + 15);
1521 __m256d r15 = _mm256_mul_pd(c3, v15);
1522
1523 r12 = _mm256_add_pd(r12, r15);
1524
1525 _mm256_storeu_pd(result.data() + 12, r12);
1526
1527 return result;
1528}
1529
1530template <>
1532{
1533 // the following code uses the following AVX instructions, and needs AVX1 or higher
1534
1535 // we use the same strategy as we apply for matrix-vector multiplication
1536 // further, here we interpret the right matrix as 4 vectors, and combine two vectors into one 256 bit register
1537
1538 // we load the four columns of the left matrix
1539 __m256 c0 = _mm256_broadcast_ps((const __m128*)values + 0);
1540 __m256 c1 = _mm256_broadcast_ps((const __m128*)values + 1);
1541 __m256 c2 = _mm256_broadcast_ps((const __m128*)values + 2);
1542 __m256 c3 = _mm256_broadcast_ps((const __m128*)values + 3);
1543
1544 __m256 m01 = _mm256_loadu_ps(matrix.data() + 0);
1545 __m256 m23 = _mm256_loadu_ps(matrix.data() + 8);
1546
1547
1548 // we determine the first two vectors of the resulting matrix
1549 __m256 v0_4 = _mm256_permute_ps(m01, 0x00);
1550 __m256 r0 = _mm256_mul_ps(c0, v0_4);
1551
1552#ifdef OCEAN_COMPILER_MSC
1553 ocean_assert(NumericF::isEqual(r0.m256_f32[0], values[0] * matrix[0]));
1554 ocean_assert(NumericF::isEqual(r0.m256_f32[1], values[1] * matrix[0]));
1555 ocean_assert(NumericF::isEqual(r0.m256_f32[2], values[2] * matrix[0]));
1556 ocean_assert(NumericF::isEqual(r0.m256_f32[3], values[3] * matrix[0]));
1557
1558 ocean_assert(NumericF::isEqual(r0.m256_f32[4], values[0] * matrix[4]));
1559 ocean_assert(NumericF::isEqual(r0.m256_f32[5], values[1] * matrix[4]));
1560 ocean_assert(NumericF::isEqual(r0.m256_f32[6], values[2] * matrix[4]));
1561 ocean_assert(NumericF::isEqual(r0.m256_f32[7], values[3] * matrix[4]));
1562#endif
1563
1564 __m256 v1_5 = _mm256_permute_ps(m01, 0x55);
1565 __m256 r1 = _mm256_mul_ps(c1, v1_5);
1566
1567#ifdef OCEAN_COMPILER_MSC
1568 ocean_assert(NumericF::isEqual(r1.m256_f32[0], values[ 4] * matrix[1]));
1569 ocean_assert(NumericF::isEqual(r1.m256_f32[1], values[ 5] * matrix[1]));
1570 ocean_assert(NumericF::isEqual(r1.m256_f32[2], values[ 6] * matrix[1]));
1571 ocean_assert(NumericF::isEqual(r1.m256_f32[3], values[ 7] * matrix[1]));
1572
1573 ocean_assert(NumericF::isEqual(r1.m256_f32[4], values[4] * matrix[5]));
1574 ocean_assert(NumericF::isEqual(r1.m256_f32[5], values[5] * matrix[5]));
1575 ocean_assert(NumericF::isEqual(r1.m256_f32[6], values[6] * matrix[5]));
1576 ocean_assert(NumericF::isEqual(r1.m256_f32[7], values[7] * matrix[5]));
1577#endif
1578
1579 r0 = _mm256_add_ps(r0, r1);
1580
1581
1582 __m256 v2_6 = _mm256_permute_ps(m01, 0xAA);
1583 __m256 r2 = _mm256_mul_ps(c2, v2_6);
1584
1585 r0 = _mm256_add_ps(r0, r2);
1586
1587
1588 __m256 v3_7 = _mm256_permute_ps(m01, 0xFF);
1589 __m256 r3 = _mm256_mul_ps(c3, v3_7);
1590
1591 r0 = _mm256_add_ps(r0, r3);
1592
1593 SquareMatrixT4<float> result;
1594
1595 _mm256_storeu_ps(result.data(), r0);
1596
1597
1598
1599 // we determine the last two vectors of the resulting matrix
1600 __m256 v8_12 = _mm256_permute_ps(m23, 0x00);
1601 __m256 r8 = _mm256_mul_ps(c0, v8_12);
1602
1603
1604 __m256 v9_13 = _mm256_permute_ps(m23, 0x55);
1605 __m256 r9 = _mm256_mul_ps(c1, v9_13);
1606
1607 r8 = _mm256_add_ps(r8, r9);
1608
1609
1610 __m256 v10_14 = _mm256_permute_ps(m23, 0xAA);
1611 __m256 r10 = _mm256_mul_ps(c2, v10_14);
1612
1613 r8 = _mm256_add_ps(r8, r10);
1614
1615
1616 __m256 v11_15 = _mm256_permute_ps(m23, 0xFF);
1617 __m256 r11 = _mm256_mul_ps(c3, v11_15);
1618
1619 r8 = _mm256_add_ps(r8, r11);
1620
1621 _mm256_storeu_ps(result.data() + 8, r8);
1622
1623 return result;
1624}
1625
1626#else // OCEAN_HARDWARE_AVX_VERSION < 10
1627
1628#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 10
1629
1630template <>
1632{
1633 // the following code uses the following SSE instructions, and needs SSE1 or higher
1634
1635 // SSE1:
1636 // _mm_load1_ps
1637 // _mm_loadu_ps
1638 // _mm_mul_ps
1639 // _mm_add_ps
1640 // _mm_storeu_ps
1641
1642 // we use the same strategy as we apply for matrix-vector multiplication
1643 // further, here we interpret the right matrix as 4 vectors
1644
1645 // we load the columns of the left matrix
1646 __m128 c0 = _mm_loadu_ps(values + 0);
1647 __m128 c1 = _mm_loadu_ps(values + 4);
1648 __m128 c2 = _mm_loadu_ps(values + 8);
1649 __m128 c3 = _mm_loadu_ps(values + 12);
1650
1651
1652 // we determine the first vector of the resulting matrix
1653 __m128 v0 = _mm_load1_ps(matrix.data() + 0);
1654 __m128 r0 = _mm_mul_ps(c0, v0);
1655
1656 __m128 v1 = _mm_load1_ps(matrix.data() + 1);
1657 __m128 r1 = _mm_mul_ps(c1, v1);
1658
1659 r0 = _mm_add_ps(r0, r1);
1660
1661 __m128 v2 = _mm_load1_ps(matrix.data() + 2);
1662 __m128 r2 = _mm_mul_ps(c2, v2);
1663
1664 r0 = _mm_add_ps(r0, r2);
1665
1666 __m128 v3 = _mm_load1_ps(matrix.data() + 3);
1667 __m128 r3 = _mm_mul_ps(c3, v3);
1668
1669 r0 = _mm_add_ps(r0, r3);
1670
1671 SquareMatrixT4<float> result;
1672
1673 _mm_storeu_ps(result.data(), r0);
1674
1675
1676 // we determine the second vector of the resulting matrix
1677 __m128 v4 = _mm_load1_ps(matrix.data() + 4);
1678 __m128 r4 = _mm_mul_ps(c0, v4);
1679
1680 __m128 v5 = _mm_load1_ps(matrix.data() + 5);
1681 __m128 r5 = _mm_mul_ps(c1, v5);
1682
1683 r4 = _mm_add_ps(r4, r5);
1684
1685 __m128 v6 = _mm_load1_ps(matrix.data() + 6);
1686 __m128 r6 = _mm_mul_ps(c2, v6);
1687
1688 r4 = _mm_add_ps(r4, r6);
1689
1690 __m128 v7 = _mm_load1_ps(matrix.data() + 7);
1691 __m128 r7 = _mm_mul_ps(c3, v7);
1692
1693 r4 = _mm_add_ps(r4, r7);
1694
1695 _mm_storeu_ps(result.data() + 4, r4);
1696
1697
1698 // we determine the third vector of the resulting matrix
1699 __m128 v8 = _mm_load1_ps(matrix.data() + 8);
1700 __m128 r8 = _mm_mul_ps(c0, v8);
1701
1702 __m128 v9 = _mm_load1_ps(matrix.data() + 9);
1703 __m128 r9 = _mm_mul_ps(c1, v9);
1704
1705 r8 = _mm_add_ps(r8, r9);
1706
1707 __m128 v10 = _mm_load1_ps(matrix.data() + 10);
1708 __m128 r10 = _mm_mul_ps(c2, v10);
1709
1710 r8 = _mm_add_ps(r8, r10);
1711
1712 __m128 v11 = _mm_load1_ps(matrix.data() + 11);
1713 __m128 r11 = _mm_mul_ps(c3, v11);
1714
1715 r8 = _mm_add_ps(r8, r11);
1716
1717 _mm_storeu_ps(result.data() + 8, r8);
1718
1719
1720 // we determine the forth vector of the resulting matrix
1721 __m128 v12 = _mm_load1_ps(matrix.data() + 12);
1722 __m128 r12 = _mm_mul_ps(c0, v12);
1723
1724 __m128 v13 = _mm_load1_ps(matrix.data() + 13);
1725 __m128 r13 = _mm_mul_ps(c1, v13);
1726
1727 r12 = _mm_add_ps(r12, r13);
1728
1729 __m128 v14 = _mm_load1_ps(matrix.data() + 14);
1730 __m128 r14 = _mm_mul_ps(c2, v14);
1731
1732 r12 = _mm_add_ps(r12, r14);
1733
1734 __m128 v15 = _mm_load1_ps(matrix.data() + 15);
1735 __m128 r15 = _mm_mul_ps(c3, v15);
1736
1737 r12 = _mm_add_ps(r12, r15);
1738
1739 _mm_storeu_ps(result.data() + 12, r12);
1740
1741 return result;
1742}
1743
1744#endif // OCEAN_HARDWARE_SSE_VERSION >= 10
1745
1746#endif // OCEAN_HARDWARE_AVX_VERSION >= 10
1747
1748template <typename T>
1750{
1751 SquareMatrixT4<T> result;
1752
1753 result.values[0] = values[0] * matrix[0] + values[4] * matrix[1] + values[8] * matrix[2]; // + values[12] * matrix[3];
1754 result.values[1] = values[1] * matrix[0] + values[5] * matrix[1] + values[9] * matrix[2]; // + values[13] * matrix[3];
1755 result.values[2] = values[2] * matrix[0] + values[6] * matrix[1] + values[10] * matrix[2]; // + values[14] * matrix[3];
1756 result.values[3] = values[3] * matrix[0] + values[7] * matrix[1] + values[11] * matrix[2]; // + values[15] * matrix[3];
1757
1758 result.values[4] = values[0] * matrix[4] + values[4] * matrix[5] + values[8] * matrix[6]; // + values[12] * matrix[7];
1759 result.values[5] = values[1] * matrix[4] + values[5] * matrix[5] + values[9] * matrix[6]; // + values[13] * matrix[7];
1760 result.values[6] = values[2] * matrix[4] + values[6] * matrix[5] + values[10] * matrix[6]; // + values[14] * matrix[7];
1761 result.values[7] = values[3] * matrix[4] + values[7] * matrix[5] + values[11] * matrix[6]; // + values[15] * matrix[7];
1762
1763 result.values[8] = values[0] * matrix[8] + values[4] * matrix[9] + values[8] * matrix[10]; // + values[12] * matrix[11];
1764 result.values[9] = values[1] * matrix[8] + values[5] * matrix[9] + values[9] * matrix[10]; // + values[13] * matrix[11];
1765 result.values[10] = values[2] * matrix[8] + values[6] * matrix[9] + values[10] * matrix[10]; // + values[14] * matrix[11];
1766 result.values[11] = values[3] * matrix[8] + values[7] * matrix[9] + values[11] * matrix[10]; // + values[15] * matrix[11];
1767
1768 result.values[12] = values[0] * matrix[12] + values[4] * matrix[13] + values[8] * matrix[14] + values[12]; // * matrix[15];
1769 result.values[13] = values[1] * matrix[12] + values[5] * matrix[13] + values[9] * matrix[14] + values[13]; // * matrix[15];
1770 result.values[14] = values[2] * matrix[12] + values[6] * matrix[13] + values[10] * matrix[14] + values[14]; // * matrix[15];
1771 result.values[15] = values[3] * matrix[12] + values[7] * matrix[13] + values[11] * matrix[14] + values[15]; // * matrix[15];
1772
1773 return result;
1774}
1775
1776template <typename T>
1777OCEAN_FORCE_INLINE VectorT3<T> SquareMatrixT4<T>::operator*(const VectorT3<T>& vector) const
1778{
1779 const T w = values[3] * vector[0] + values[7] * vector[1] + values[11] * vector[2] + values[15];
1780 ocean_assert(NumericT<T>::isNotEqualEps(w) && "Division by zero!");
1781
1782 const T factor = 1 / w;
1783
1784 return VectorT3<T>((values[0] * vector[0] + values[4] * vector[1] + values[8] * vector[2] + values[12]) * factor,
1785 (values[1] * vector[0] + values[5] * vector[1] + values[9] * vector[2] + values[13]) * factor,
1786 (values[2] * vector[0] + values[6] * vector[1] + values[10] * vector[2] + values[14]) * factor);
1787}
1788
1789template <typename T>
1790OCEAN_FORCE_INLINE VectorT4<T> SquareMatrixT4<T>::operator*(const VectorT4<T>& vector) const
1791{
1792 return VectorT4<T>(values[0] * vector[0] + values[4] * vector[1] + values[8] * vector[2] + values[12] * vector[3],
1793 values[1] * vector[0] + values[5] * vector[1] + values[9] * vector[2] + values[13] * vector[3],
1794 values[2] * vector[0] + values[6] * vector[1] + values[10] * vector[2] + values[14] * vector[3],
1795 values[3] * vector[0] + values[7] * vector[1] + values[11] * vector[2] + values[15] * vector[3]);
1796}
1797
1798#if defined(OCEAN_HARDWARE_AVX_VERSION) && OCEAN_HARDWARE_AVX_VERSION >= 10
1799
1800#ifdef OCEAN_USE_SLOWER_IMPLEMENTATION
1801
1802// we keep following implementation inside 'OCEAN_USE_SLOWER_IMPLEMENTATION' showing an alternative (which is slower)
1803
1804template <>
1806{
1807 // the following code uses the following AVX instructions, and needs AVX1 or higher
1808
1809 // AVX1:
1810 // _mm256_loadu_pd
1811 // _mm256_mul_pd
1812 // _mm256_shuffle_pd
1813 // _mm256_hadd_pd
1814 // _mm256_permute2f128_pd
1815 // _mm256_storeu_pd
1816
1817 // first we load the first four rows of the matrix
1818 __m256d row0 = _mm256_loadu_pd(values + 0);
1819 __m256d row1 = _mm256_loadu_pd(values + 4);
1820 __m256d row2 = _mm256_loadu_pd(values + 8);
1821 __m256d row3 = _mm256_loadu_pd(values + 12);
1822
1823 // we load the four values of the vector
1824 __m256d v = _mm256_loadu_pd(vector.data());
1825
1826 // first, we transpose the 4x4 matrix
1827
1828 // A E I M A B C D
1829 // B F J N E F G H
1830 // C G K O I J K L
1831 // D H L P M N O P
1832
1833 // A B C D, E F G H -> A E C G
1834 __m256d temp0 = _mm256_shuffle_pd(row0, row1, 0x00); // 0x00 = 0000 0000
1835 // A B C D, E F G H -> B F D H
1836 __m256d temp2 = _mm256_shuffle_pd(row0, row1, 0x0F); // 0x0F = 0000 1111
1837 // I J K L, M N O P -> I M K O
1838 __m256d temp1 = _mm256_shuffle_pd(row2, row3, 0x00);
1839 // I J K L, M N O P -> J N L P
1840 __m256d temp3 = _mm256_shuffle_pd(row2, row3, 0x0F);
1841
1842 // A E C G I M K O -> A E I M
1843 row0 = _mm256_permute2f128_pd(temp0, temp1, 0x20); // 0x20 = 0010 0000
1844 // B F D H J N L P -> B F J N
1845 row1 = _mm256_permute2f128_pd(temp2, temp3, 0x20);
1846 // A E C G I M K O -> C G K O
1847 row2 = _mm256_permute2f128_pd(temp0, temp1, 0x31); // 0x31 = 0011 0001
1848 // B F D H J N L P -> D H L P
1849 row3 = _mm256_permute2f128_pd(temp2, temp3, 0x31);
1850
1851#ifdef OCEAN_COMPILER_MSC
1852 ocean_assert(row0.m256d_f64[0] == values[0] && row0.m256d_f64[1] == values[4] && row0.m256d_f64[2] == values[ 8] && row0.m256d_f64[3] == values[12]);
1853 ocean_assert(row1.m256d_f64[0] == values[1] && row1.m256d_f64[1] == values[5] && row1.m256d_f64[2] == values[ 9] && row1.m256d_f64[3] == values[13]);
1854 ocean_assert(row2.m256d_f64[0] == values[2] && row2.m256d_f64[1] == values[6] && row2.m256d_f64[2] == values[10] && row2.m256d_f64[3] == values[14]);
1855 ocean_assert(row3.m256d_f64[0] == values[3] && row3.m256d_f64[1] == values[7] && row3.m256d_f64[2] == values[11] && row3.m256d_f64[3] == values[15]);
1856#endif
1857
1858 // unfortunately the AVX does not offer _mm256_dp_pd (the determination of the dot product) so we have to do it on our own
1859
1860 __m256d r0v = _mm256_mul_pd(row0, v);
1861 __m256d r1v = _mm256_mul_pd(row1, v);
1862 __m256d r2v = _mm256_mul_pd(row2, v);
1863 __m256d r3v = _mm256_mul_pd(row3, v);
1864
1865 // we sum both multiplication results horizontally (at least two neighboring products)
1866 __m256d sum_interleaved_r0_r1 = _mm256_hadd_pd(r0v, r1v);
1867 __m256d sum_interleaved_r2_r3 = _mm256_hadd_pd(r2v, r3v);
1868
1869 // now we reorder the interleaved sums
1870 __m256d sum_first = _mm256_permute2f128_pd(sum_interleaved_r0_r1, sum_interleaved_r2_r3, 0x20); // 0x20 = 0010 0000
1871 __m256d sum_second = _mm256_permute2f128_pd(sum_interleaved_r0_r1, sum_interleaved_r2_r3, 0x31); // 0x31 = 0011 0001
1872
1873 // we finally add both reordered sums
1874 __m256d sum = _mm256_add_pd(sum_first, sum_second);
1875
1876 VectorT4<double> result;
1877 _mm256_storeu_pd(result.data(), sum);
1878
1879 ocean_assert(NumericD::isEqual(result[0], values[0] * vector[0] + values[4] * vector[1] + values[ 8] * vector[2] + values[12] * vector[3], NumericD::eps() * 100));
1880 ocean_assert(NumericD::isEqual(result[1], values[1] * vector[0] + values[5] * vector[1] + values[ 9] * vector[2] + values[13] * vector[3], NumericD::eps() * 100));
1881 ocean_assert(NumericD::isEqual(result[2], values[2] * vector[0] + values[6] * vector[1] + values[10] * vector[2] + values[14] * vector[3], NumericD::eps() * 100));
1882 ocean_assert(NumericD::isEqual(result[3], values[3] * vector[0] + values[7] * vector[1] + values[11] * vector[2] + values[15] * vector[3], NumericD::eps() * 100));
1883
1884 return result;
1885}
1886
1887#else // OCEAN_USE_SLOWER_IMPLEMENTATION
1888
1889template <>
1891{
1892 // the following code uses the following AVX instructions, and needs AVX1 or higher
1893
1894 // AVX1:
1895 // _mm256_broadcast_sd
1896 // _mm256_loadu_pd
1897 // _mm256_mul_pd
1898 // _mm256_add_pd
1899 // _mm256_storeu_pd
1900
1901 // we use the same strategy as for the 32 bit float values
1902
1903 // first we load the first vector element in all 64bit elements of the 256 bit register, so that we receive [a, a, a, a]
1904 __m256d v0 = _mm256_broadcast_sd(vector.data() + 0);
1905
1906 // now we load the first column to receive: [A, B, C, D]
1907 __m256d c0 = _mm256_loadu_pd(values + 0);
1908
1909 // now we multiply the 256 bit register [A, B, C, D] * [a, a, a, a] = [Aa, Ba, Ca, Da]
1910 __m256d r0 = _mm256_mul_pd(c0, v0);
1911
1912
1913 // now we proceed with the second column
1914 __m256d v1 = _mm256_broadcast_sd(vector.data() + 1);
1915 __m256d c1 = _mm256_loadu_pd(values + 4);
1916 __m256d r1 = _mm256_mul_pd(c1, v1);
1917
1918 // and we sum the result of the first column with the result of the second column
1919 r0 = _mm256_add_pd(r0, r1);
1920
1921
1922 // now we proceed with the third column
1923 __m256d v2 = _mm256_broadcast_sd(vector.data() + 2);
1924 __m256d c2 = _mm256_loadu_pd(values + 8);
1925 __m256d r2 = _mm256_mul_pd(c2, v2);
1926
1927 // we sum the results
1928 r0 = _mm256_add_pd(r0, r2);
1929
1930
1931 // now we proceed with the fourth column
1932 __m256d v3 = _mm256_broadcast_sd(vector.data() + 3);
1933 __m256d c3 = _mm256_loadu_pd(values + 12);
1934 __m256d r3 = _mm256_mul_pd(c3, v3);
1935
1936 // we sum the results
1937 r0 = _mm256_add_pd(r0, r3);
1938
1939 // and finally we store the results back to the vector
1940 VectorT4<double> result;
1941
1942 _mm256_storeu_pd(result.data(), r0);
1943
1944 return result;
1945}
1946
1947#endif // OCEAN_USE_SLOWER_IMPLEMENTATION
1948
1949#else // OCEAN_HARDWARE_AVX_VERSION
1950
1951#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 20
1952
1953template <>
1955{
1956 // the following code uses the following SSE instructions, and needs SSE2 or higher
1957
1958 // SSE2:
1959 // _mm_load1_pd
1960 // _mm_loadu_pd
1961 // _mm_add_pd
1962 // _mm_storeu_pd
1963 // _mm_mul_pd
1964
1965 // we use the following strategy:
1966 // the values of the matrix are column aligned so that we normally would need to transpose the matrix before we can apply simple SIMD instructions
1967 // however, we do not transpose the matrix (we avoid the shuffle instructions) and instead multiply the matrix column-wise:
1968 // finally we sum the four columns and have the result, compared to the transpose-based approach this approach is approx. two times faster
1969 //
1970 // A E I M a Aa + Eb + Ic + Md
1971 // B F J N b Ba + Fb + Jc + Nd
1972 // C G K O * c = Ca + Gb + Kc + Od
1973 // D H L P d Da + Hb + Lc + Pd
1974
1975 // first we load the first vector element in both 64 elements of the 128 bit register, so that we receive [a, a]
1976 __m128d v0 = _mm_load1_pd(vector.data() + 0);
1977
1978 // now we load the first column to receive: [A, B] and [C, D]
1979 __m128d c0a = _mm_loadu_pd(values + 0);
1980 __m128d c0b = _mm_loadu_pd(values + 2);
1981
1982 // now we multiply both 128 bit registers by: [A, B] * [a, a] = [Aa, Ba] and [C, D] * [a, a] = [Ca, Da]
1983 __m128d r0a = _mm_mul_pd(c0a, v0);
1984 __m128d r0b = _mm_mul_pd(c0b, v0);
1985
1986
1987 // now we proceed with the second column
1988 __m128d v1 = _mm_load1_pd(vector.data() + 1);
1989
1990 __m128d c1a = _mm_loadu_pd(values + 4);
1991 __m128d c1b = _mm_loadu_pd(values + 6);
1992
1993 __m128d r1a = _mm_mul_pd(c1a, v1);
1994 __m128d r1b = _mm_mul_pd(c1b, v1);
1995
1996 // and we sum the result of the first column with the result of the second column
1997 r0a = _mm_add_pd(r0a, r1a);
1998 r0b = _mm_add_pd(r0b, r1b);
1999
2000
2001 // now we proceed with the third column
2002 __m128d v2 = _mm_load1_pd(vector.data() + 2);
2003
2004 __m128d c2a = _mm_loadu_pd(values + 8);
2005 __m128d c2b = _mm_loadu_pd(values + 10);
2006
2007 __m128d r2a = _mm_mul_pd(c2a, v2);
2008 __m128d r2b = _mm_mul_pd(c2b, v2);
2009
2010 // we sum the results
2011 r0a = _mm_add_pd(r0a, r2a);
2012 r0b = _mm_add_pd(r0b, r2b);
2013
2014
2015 // now we proceed with the fourth column
2016 __m128d v3 = _mm_load1_pd(vector.data() + 3);
2017
2018 __m128d c3a = _mm_loadu_pd(values + 12);
2019 __m128d c3b = _mm_loadu_pd(values + 14);
2020
2021 __m128d r3a = _mm_mul_pd(c3a, v3);
2022 __m128d r3b = _mm_mul_pd(c3b, v3);
2023
2024 // we sum the results
2025 r0a = _mm_add_pd(r0a, r3a);
2026 r0b = _mm_add_pd(r0b, r3b);
2027
2028 // and finally we store the results back to the vector
2029 VectorT4<double> result;
2030
2031 _mm_storeu_pd(result.data() + 0, r0a);
2032 _mm_storeu_pd(result.data() + 2, r0b);
2033
2034 return result;
2035}
2036
2037#endif // OCEAN_HARDWARE_SSE_VERSION >= 20
2038
2039#endif // OCEAN_HARDWARE_AVX_VERSION >= 10
2040
2041#ifdef OCEAN_USE_SLOWER_IMPLEMENTATION
2042
2043// we keep following implementation inside 'OCEAN_USE_SLOWER_IMPLEMENTATION' showing an alternative (which is slower)
2044
2045#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
2046
2047template <>
2049{
2050 // the following code uses the following SSE instructions, and needs SSE 4.1 or higher
2051
2052 // SSE:
2053 // _mm_loadu_ps
2054 // _mm_shuffle_ps
2055 // _mm_or_ps
2056 // _mm_storeu_ps
2057
2058 // SSE4.1:
2059 // _mm_dp_ps
2060
2061 // first we load the first four rows of the matrix
2062 __m128 row0 = _mm_loadu_ps(values + 0);
2063 __m128 row1 = _mm_loadu_ps(values + 4);
2064 __m128 row2 = _mm_loadu_ps(values + 8);
2065 __m128 row3 = _mm_loadu_ps(values + 12);
2066
2067 // we load the four values of the vector
2068 __m128 v = _mm_loadu_ps(vector.data());
2069
2070 // first, we transpose the 4x4 matrix
2071
2072 // A E I M A B C D
2073 // B F J N E F G H
2074 // C G K O I J K L
2075 // D H L P M N O P
2076
2077 // A B C D, E F G H -> A B E F
2078 __m128 temp0 = _mm_shuffle_ps(row0, row1, 0x44); // 0x44 = 0100 0100
2079 // A B C D, E F G H -> C D G H
2080 __m128 temp2 = _mm_shuffle_ps(row0, row1, 0xEE); // 0xEE = 1110 1110
2081 // I J K L, M N O P -> I J M N
2082 __m128 temp1 = _mm_shuffle_ps(row2, row3, 0x44);
2083 // I J K L, M N O P -> K L O P
2084 __m128 temp3 = _mm_shuffle_ps(row2, row3, 0xEE);
2085
2086 // A B E F, I J M N -> A E I M
2087 row0 = _mm_shuffle_ps(temp0, temp1, 0x88); // 0x88 = 1000 1000
2088 // A B E F, I J M N -> B F J N
2089 row1 = _mm_shuffle_ps(temp0, temp1, 0xDD); // 0xDD = 1101 1101
2090 // C D G H, K L O P -> C G K O
2091 row2 = _mm_shuffle_ps(temp2, temp3, 0x88);
2092 // C D G H, K L O P -> D H L P
2093 row3 = _mm_shuffle_ps(temp2, temp3, 0xDD);
2094
2095#ifdef OCEAN_COMPILER_MSC
2096 ocean_assert(row0.m128_f32[0] == values[0] && row0.m128_f32[1] == values[4] && row0.m128_f32[2] == values[ 8] && row0.m128_f32[3] == values[12]);
2097 ocean_assert(row1.m128_f32[0] == values[1] && row1.m128_f32[1] == values[5] && row1.m128_f32[2] == values[ 9] && row1.m128_f32[3] == values[13]);
2098 ocean_assert(row2.m128_f32[0] == values[2] && row2.m128_f32[1] == values[6] && row2.m128_f32[2] == values[10] && row2.m128_f32[3] == values[14]);
2099 ocean_assert(row3.m128_f32[0] == values[3] && row3.m128_f32[1] == values[7] && row3.m128_f32[2] == values[11] && row3.m128_f32[3] == values[15]);
2100#endif
2101
2102 // we determine the dot product between the first row and the vector and store the result in the first float bin
2103 row0 = _mm_dp_ps(row0, v, 0xF1); // 0xF1 = 1111 0001
2104
2105 // we determine the dot product between the second row and the vector and store the result in the second float bin
2106 row1 = _mm_dp_ps(row1, v, 0xF2); // 0xF2 = 1111 0010
2107 row2 = _mm_dp_ps(row2, v, 0xF4); // 0xF4 = 1111 0100
2108 row3 = _mm_dp_ps(row3, v, 0xF8); // 0xF8 = 1111 1000
2109
2110 // now we blend the results by applying the bit-wise or operator
2111 __m128 result01 = _mm_or_ps(row0, row1);
2112 __m128 result23 = _mm_or_ps(row2, row3);
2113 __m128 result03 = _mm_or_ps(result01, result23);
2114
2115 VectorT4<float> result;
2116 _mm_storeu_ps(result.data(), result03);
2117
2118 return result;
2119}
2120
2121#endif // OCEAN_HARDWARE_SSE_VERSION >= 41
2122
2123#else // OCEAN_USE_SLOWER_IMPLEMENTATION
2124
2125#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 10
2126
2127template <>
2129{
2130 // the following code uses the following SSE instructions, and needs SSE1 or higher
2131
2132 // SSE1:
2133 // _mm_load1_ps
2134 // _mm_loadu_ps
2135 // _mm_mul_ps
2136 // _mm_add_ps
2137 // _mm_storeu_ps
2138
2139
2140 // we use the following strategy:
2141 // the values of the matrix are column aligned so that we normally would need to transpose the matrix before we can apply simple SIMD instructions
2142 // however, we do not transpose the matrix (we avoid the shuffle instructions) and instead multiply the matrix column-wise:
2143 // finally we sum the four columns and have the result, compared to the transpose-based approach this approach is approx. two times faster
2144 //
2145 // A E I M a Aa + Eb + Ic + Md
2146 // B F J N b Ba + Fb + Jc + Nd
2147 // C G K O * c = Ca + Gb + Kc + Od
2148 // D H L P d Da + Hb + Lc + Pd
2149
2150 // first we load the first vector element in all 32bit elements of the 128 bit register, so that we receive [a, a, a, a]
2151 __m128 v0 = _mm_load1_ps(vector.data() + 0);
2152
2153 // now we load the first column to receive: [A, B, C, D]
2154 __m128 c0 = _mm_loadu_ps(values + 0);
2155
2156 // now we multiply the 128 bit register [A, B, C, D] * [a, a, a, a] = [Aa, Ba, Ca, Da]
2157 __m128 r0 = _mm_mul_ps(c0, v0);
2158
2159
2160 // now we proceed with the second column
2161 __m128 v1 = _mm_load1_ps(vector.data() + 1);
2162 __m128 c1 = _mm_loadu_ps(values + 4);
2163 __m128 r1 = _mm_mul_ps(c1, v1);
2164
2165 // and we sum the result of the first column with the result of the second column
2166 r0 = _mm_add_ps(r0, r1);
2167
2168
2169 // now we proceed with the third column
2170 __m128 v2 = _mm_load1_ps(vector.data() + 2);
2171 __m128 c2 = _mm_loadu_ps(values + 8);
2172 __m128 r2 = _mm_mul_ps(c2, v2);
2173
2174 // we sum the results
2175 r0 = _mm_add_ps(r0, r2);
2176
2177
2178 // now we proceed with the fourth column
2179 __m128 v3 = _mm_load1_ps(vector.data() + 3);
2180 __m128 c3 = _mm_loadu_ps(values + 12);
2181 __m128 r3 = _mm_mul_ps(c3, v3);
2182
2183 // we sum the results
2184 r0 = _mm_add_ps(r0, r3);
2185
2186 // and finally we store the results back to the vector
2187 VectorT4<float> result;
2188
2189 _mm_storeu_ps(result.data(), r0);
2190
2191 return result;
2192}
2193
2194#endif // OCEAN_HARDWARE_SSE_VERSION >= 10
2195
2196#endif // OCEAN_USE_SLOWER_IMPLEMENTATION
2197
2198#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2199
2200#ifdef __aarch64__
2201
2202template <>
2204{
2205 // the following NEON code is almost identical to the SSE implementation
2206
2207 // we use the following strategy:
2208 // the values of the matrix are column aligned so that we normally would need to transpose the matrix before we can apply simple SIMD instructions
2209 // however, we do not transpose the matrix (we avoid the shuffle instructions) and instead multiply the matrix column-wise:
2210 // finally we sum the four columns and have the result, compared to the transpose-based approach this approach is approx. two times faster
2211 //
2212 // A E I M a Aa + Eb + Ic + Md
2213 // B F J N b Ba + Fb + Jc + Nd
2214 // C G K O * c = Ca + Gb + Kc + Od
2215 // D H L P d Da + Hb + Lc + Pd
2216
2217 // first we load the first vector element in both 64 elements of the 128 bit register, so that we receive [a, a]
2218 float64x2_t v0 = vld1q_dup_f64(vector.data() + 0);
2219
2220 // now we load the first column to receive: [A, B] and [C, D]
2221 float64x2_t c0a = vld1q_f64(values + 0);
2222 float64x2_t c0b = vld1q_f64(values + 2);
2223
2224 // now we multiply both 128 bit registers by: [A, B] * [a, a] = [Aa, Ba] and [C, D] * [a, a] = [Ca, Da]
2225 float64x2_t r0a = vmulq_f64(c0a, v0);
2226 float64x2_t r0b = vmulq_f64(c0b, v0);
2227
2228
2229 // now we proceed with the second column
2230 float64x2_t v1 = vld1q_dup_f64(vector.data() + 1);
2231
2232 float64x2_t c1a = vld1q_f64(values + 4);
2233 float64x2_t c1b = vld1q_f64(values + 6);
2234
2235 float64x2_t r1a = vmulq_f64(c1a, v1);
2236 float64x2_t r1b = vmulq_f64(c1b, v1);
2237
2238 // and we sum the result of the first column with the result of the second column
2239 r0a = vaddq_f64(r0a, r1a);
2240 r0b = vaddq_f64(r0b, r1b);
2241
2242
2243 // now we proceed with the third column
2244 float64x2_t v2 = vld1q_dup_f64(vector.data() + 2);
2245
2246 float64x2_t c2a = vld1q_f64(values + 8);
2247 float64x2_t c2b = vld1q_f64(values + 10);
2248
2249 float64x2_t r2a = vmulq_f64(c2a, v2);
2250 float64x2_t r2b = vmulq_f64(c2b, v2);
2251
2252 // we sum the results
2253 r0a = vaddq_f64(r0a, r2a);
2254 r0b = vaddq_f64(r0b, r2b);
2255
2256
2257 // now we proceed with the fourth column
2258 float64x2_t v3 = vld1q_dup_f64(vector.data() + 3);
2259
2260 float64x2_t c3a = vld1q_f64(values + 12);
2261 float64x2_t c3b = vld1q_f64(values + 14);
2262
2263 float64x2_t r3a = vmulq_f64(c3a, v3);
2264 float64x2_t r3b = vmulq_f64(c3b, v3);
2265
2266 // we sum the results
2267 r0a = vaddq_f64(r0a, r3a);
2268 r0b = vaddq_f64(r0b, r3b);
2269
2270 // and finally we store the results back to the vector
2271 VectorT4<double> result;
2272
2273 vst1q_f64(result.data() + 0, r0a);
2274 vst1q_f64(result.data() + 2, r0b);
2275
2276 return result;
2277}
2278
2279#endif // __aarch64__
2280
2281template <>
2283{
2284 // the following NEON code is almost identical to the SSE implementation
2285
2286 // we use the following strategy:
2287 // the values of the matrix are column aligned so that we normally would need to transpose the matrix before we can apply simple SIMD instructions
2288 // however, we do not transpose the matrix (we avoid the shuffle instructions) and instead multiply the matrix column-wise:
2289 // finally we sum the four columns and have the result, compared to the transpose-based approach this approach is approx. two times faster
2290 //
2291 // A E I M a Aa + Eb + Ic + Md
2292 // B F J N b Ba + Fb + Jc + Nd
2293 // C G K O * c = Ca + Gb + Kc + Od
2294 // D H L P d Da + Hb + Lc + Pd
2295
2296 // first we load the first vector element in all 32bit elements of the 128 bit register, so that we receive [a, a, a, a]
2297 float32x4_t v0 = vld1q_dup_f32(vector.data() + 0);
2298
2299 // now we load the first column to receive: [A, B, C, D]
2300 float32x4_t c0 = vld1q_f32(values + 0);
2301
2302 // now we multiply the 128 bit register [A, B, C, D] * [a, a, a, a] = [Aa, Ba, Ca, Da]
2303 float32x4_t r0 = vmulq_f32(c0, v0);
2304
2305
2306 // now we proceed with the second column
2307 float32x4_t v1 = vld1q_dup_f32(vector.data() + 1);
2308 float32x4_t c1 = vld1q_f32(values + 4);
2309 float32x4_t r1 = vmulq_f32(c1, v1);
2310
2311 // and we sum the result of the first column with the result of the second column
2312 r0 = vaddq_f32(r0, r1);
2313
2314
2315 // now we proceed with the third column
2316 float32x4_t v2 = vld1q_dup_f32(vector.data() + 2);
2317 float32x4_t c2 = vld1q_f32(values + 8);
2318 float32x4_t r2 = vmulq_f32(c2, v2);
2319
2320 // we sum the results
2321 r0 = vaddq_f32(r0, r2);
2322
2323
2324 // now we proceed with the fourth column
2325 float32x4_t v3 = vld1q_dup_f32(vector.data() + 3);
2326 float32x4_t c3 = vld1q_f32(values + 12);
2327 float32x4_t r3 = vmulq_f32(c3, v3);
2328
2329 // we sum the results
2330 r0 = vaddq_f32(r0, r3);
2331
2332 // and finally we store the results back to the vector
2333 VectorT4<float> result;
2334
2335 vst1q_f32(result.data(), r0);
2336
2337 return result;
2338}
2339
2340#endif
2341
2342template <typename T>
2344{
2345 SquareMatrixT4<T> result(*this);
2346
2347 result.values[0] *= value;
2348 result.values[1] *= value;
2349 result.values[2] *= value;
2350 result.values[3] *= value;
2351 result.values[4] *= value;
2352 result.values[5] *= value;
2353 result.values[6] *= value;
2354 result.values[7] *= value;
2355 result.values[8] *= value;
2356 result.values[9] *= value;
2357 result.values[10] *= value;
2358 result.values[11] *= value;
2359 result.values[12] *= value;
2360 result.values[13] *= value;
2361 result.values[14] *= value;
2362 result.values[15] *= value;
2363
2364 return result;
2365}
2366
2367template <typename T>
2369{
2370 values[0] *= value;
2371 values[1] *= value;
2372 values[2] *= value;
2373 values[3] *= value;
2374 values[4] *= value;
2375 values[5] *= value;
2376 values[6] *= value;
2377 values[7] *= value;
2378 values[8] *= value;
2379 values[9] *= value;
2380 values[10] *= value;
2381 values[11] *= value;
2382 values[12] *= value;
2383 values[13] *= value;
2384 values[14] *= value;
2385 values[15] *= value;
2386
2387 return *this;
2388}
2389
2390template <typename T>
2391SquareMatrixT4<T> SquareMatrixT4<T>::projectionMatrix(const T fovX, const T aspectRatio, const T nearDistance, const T farDistance)
2392{
2393 /*
2394 * <pre>
2395 * Creates the following frustum projection matrix.
2396 *
2397 * --------------------------------------------------
2398 * | t/a 0 0 0 |
2399 * | 0 t 0 0 |
2400 * | 0 0 (f+n)/(n-f) -2fn/(n-f) |
2401 * | 0 0 -1 0 |
2402 * --------------------------------------------------
2403 *
2404 * With: t = 1 / tan (fovY / 2), a = aspectRatio, n = nearDistance, f = farDistance
2405 * </pre>
2406 */
2407
2408 ocean_assert(fovX > 0 && fovX < NumericT<T>::pi());
2409 ocean_assert(aspectRatio > 0);
2410 ocean_assert(nearDistance > 0);
2411 ocean_assert(nearDistance < farDistance);
2412
2413 const T fovY = T(2.0) * NumericT<T>::atan(NumericT<T>::tan(T(0.5) * fovX) / aspectRatio);
2414
2415 SquareMatrixT4<T> matrix(false);
2416 ocean_assert(matrix(1, 0) == 0 && matrix(2, 0) == 0 && matrix(3, 0) == 0);
2417 ocean_assert(matrix(0, 1) == 0 && matrix(2, 1) == 0 && matrix(3, 1) == 0);
2418 ocean_assert(matrix(0, 2) == 0 && matrix(1, 2) == 0);
2419 ocean_assert(matrix(0, 3) == 0 && matrix(1, 3) == 0 && matrix(3, 3) == 0);
2420
2421 ocean_assert(NumericT<T>::isNotEqual(farDistance, nearDistance));
2422
2423 const T f = T(1.0) / NumericT<T>::tan(fovY * T(0.5));
2424 const T factor = T(1.0) / (nearDistance - farDistance);
2425
2426 matrix(0, 0) = f / aspectRatio;
2427 matrix(1, 1) = f;
2428 matrix(2, 2) = (farDistance + nearDistance) * factor;
2429 matrix(3, 2) = -T(1.0);
2430 matrix(2, 3) = (T(2.0) * farDistance * nearDistance) * factor;
2431
2432 return matrix;
2433}
2434
2435template <typename T>
2436SquareMatrixT4<T> SquareMatrixT4<T>::projectionMatrix(const AnyCameraT<T>& anyCamera, const T nearDistance, const T farDistance)
2437{
2438 /*
2439 * <pre>
2440 * Creates the following frustum projection matrix.
2441 *
2442 * --------------------------------------------------
2443 * | Fx 0 mx 0 |
2444 * | 0 Fy my 0 |
2445 * | 0 0 (f+n)/(n-f) -2fn/(n-f) |
2446 * | 0 0 -1 0 |
2447 * --------------------------------------------------
2448 *
2449 * n = nearDistance, f = farDistance
2450 * </pre>
2451 */
2452
2453 ocean_assert(anyCamera.isValid());
2454
2455 ocean_assert(nearDistance > 0);
2456 ocean_assert(nearDistance < farDistance);
2457
2458 const T fxPixel = anyCamera.focalLengthX();
2459 const T fyPixel = anyCamera.focalLengthY();
2460 ocean_assert(fxPixel > T(1) && fyPixel > T(1));
2461
2462 const T mxPixel = anyCamera.principalPointX();
2463 const T myPixel = anyCamera.principalPointY();
2464
2465 const T width_2 = T(anyCamera.width()) / T(2);
2466 const T height_2 = T(anyCamera.height()) / T(2);
2467
2468 ocean_assert(NumericT<T>::isNotEqualEps(width_2));
2469 ocean_assert(NumericT<T>::isNotEqualEps(height_2));
2470
2471 const T fx = fxPixel / width_2;
2472 const T fy = fyPixel / height_2;
2473
2474 const T mx = (mxPixel - width_2) / width_2; // principal point with range [-1, 1]
2475 const T my = (myPixel - height_2) / height_2;
2476
2477 const T factor = T(1.0) / (nearDistance - farDistance);
2478
2479 SquareMatrixT4<T> matrix(false);
2480
2481 matrix(0, 0) = fx;
2482 matrix(1, 1) = fy;
2483 matrix(0, 2) = -mx;
2484 matrix(1, 2) = my;
2485 matrix(2, 2) = (farDistance + nearDistance) * factor;
2486 matrix(3, 2) = -T(1);
2487 matrix(2, 3) = (T(2) * farDistance * nearDistance) * factor;
2488
2489 return matrix;
2490}
2491
2492template <typename T>
2493SquareMatrixT4<T> SquareMatrixT4<T>::frustumMatrix(const T left, const T right, const T top, const T bottom, const T nearDistance, const T farDistance)
2494{
2495 /**
2496 * <pre>
2497 * Creates the following frustum projection matrix:
2498 *
2499 * --------------------------------------------------
2500 * | 2n/(r-l) 0 (r+l)/(r-l) 0 |
2501 * | 0 2n/(t-b) (t+b)/(t-b) 0 |
2502 * | 0 0 -(f+n)/(f-n) -2fn/(f-n) |
2503 * | 0 0 -1 0 |
2504 * --------------------------------------------------
2505 * </pre>
2506 */
2507
2508 ocean_assert(NumericT<T>::isNotEqual(left, right));
2509 ocean_assert(NumericT<T>::isNotEqual(top, bottom));
2510 ocean_assert(NumericT<T>::isNotEqual(nearDistance, farDistance));
2511
2512 SquareMatrixT4<T> matrix(false);
2513 ocean_assert(matrix(1, 0) == 0 && matrix(2, 0) == 0 && matrix(3, 0) == 0);
2514 ocean_assert(matrix(0, 1) == 0 && matrix(2, 1) == 0 && matrix(3, 1) == 0);
2515 ocean_assert(matrix(0, 3) == 0 && matrix(1, 3) == 0 && matrix(3, 3) == 0);
2516
2517 const T rightLeft = T(1.0) / (right - left);
2518 const T near2 = nearDistance * T(2.0);
2519
2520 matrix(0, 0) = near2 * rightLeft;
2521 matrix(0, 2) = (right + left) * rightLeft;
2522
2523 const T topBottom = T(1.0) / (top - bottom);
2524
2525 matrix(1, 1) = near2 * topBottom;
2526 matrix(1, 2) = (top + bottom) * topBottom;
2527
2528 const T farNear = T(1.0) / (farDistance - nearDistance);
2529
2530 matrix(2, 2) = -(farDistance + nearDistance) * farNear;
2531 matrix(2, 3) = -T(2.0) * farDistance * nearDistance * farNear;
2532
2533 matrix(3, 2) = -T(1.0);
2534
2535 return matrix;
2536}
2537
2538template <typename T>
2539SquareMatrixT4<T> SquareMatrixT4<T>::frustumMatrix(const T width, const T height, const HomogenousMatrixT4<T>& viewingMatrix, const T nearDistance, const T farDistance)
2540{
2541 ocean_assert(width > NumericT<T>::eps() && height > NumericT<T>::eps());
2542 ocean_assert(nearDistance >= NumericT<T>::eps() && farDistance > nearDistance);
2543
2544 const T planeDistance = NumericT<T>::abs(viewingMatrix.translation().z());
2545 ocean_assert(viewingMatrix.isValid() && NumericT<T>::isNotEqualEps(planeDistance));
2546
2547 const HomogenousMatrixT4<T> inversedViewingMatrix(viewingMatrix.inverted());
2548
2549 const VectorT3<T> leftTop(width * -T(0.5), height * T(0.5), 0);
2550 const VectorT3<T> rightBottom(width * T(0.5), height * -T(0.5), 0);
2551
2552 const VectorT3<T> leftTopInCamera(inversedViewingMatrix * leftTop);
2553 const VectorT3<T> rightBottomInCamera(inversedViewingMatrix * rightBottom);
2554
2555 const T factor = nearDistance / planeDistance;
2556
2557 return frustumMatrix(factor * leftTopInCamera.x(), factor * rightBottomInCamera.x(), factor * leftTopInCamera.y(), factor * rightBottomInCamera.y(), nearDistance, farDistance);
2558}
2559
2560template <typename T>
2561void SquareMatrixT4<T>::multiply(const SquareMatrixT4<T>& matrix, const VectorT4<T>* vectors, VectorT4<T>* results, const size_t number)
2562{
2563 ocean_assert((vectors && results) || number == 0);
2564
2565 for (size_t n = 0; n < number; ++n)
2566 {
2567 results[n] = matrix * vectors[n];
2568 }
2569}
2570
2571#if defined(OCEAN_HARDWARE_AVX_VERSION) && OCEAN_HARDWARE_AVX_VERSION >= 10
2572
2573template <>
2574inline void SquareMatrixT4<double>::multiply(const SquareMatrixT4<double>& matrix, const VectorT4<double>* vectors, VectorT4<double>* results, const size_t number)
2575{
2576 // the following code uses the following AVX instructions, and needs AVX1 or higher
2577
2578 // AVX1:
2579 // _mm256_broadcast_sd
2580 // _mm256_loadu_pd
2581 // _mm256_mul_pd
2582 // _mm256_add_pd
2583 // _mm256_storeu_pd
2584
2585 // we use the same strategy as for the 32 bit float values
2586
2587 __m256d c0 = _mm256_loadu_pd(matrix.values + 0);
2588 __m256d c1 = _mm256_loadu_pd(matrix.values + 4);
2589 __m256d c2 = _mm256_loadu_pd(matrix.values + 8);
2590 __m256d c3 = _mm256_loadu_pd(matrix.values + 12);
2591
2592 for (size_t n = 0; n < number; ++n)
2593 {
2594 __m256d v0 = _mm256_broadcast_sd(vectors[n].data() + 0);
2595 __m256d r0 = _mm256_mul_pd(c0, v0);
2596
2597 __m256d v1 = _mm256_broadcast_sd(vectors[n].data() + 1);
2598 __m256d r1 = _mm256_mul_pd(c1, v1);
2599
2600 r0 = _mm256_add_pd(r0, r1);
2601
2602 __m256d v2 = _mm256_broadcast_sd(vectors[n].data() + 2);
2603 __m256d r2 = _mm256_mul_pd(c2, v2);
2604
2605 r0 = _mm256_add_pd(r0, r2);
2606
2607 __m256d v3 = _mm256_broadcast_sd(vectors[n].data() + 3);
2608 __m256d r3 = _mm256_mul_pd(c3, v3);
2609
2610 r0 = _mm256_add_pd(r0, r3);
2611
2612 _mm256_storeu_pd(results[n].data(), r0);
2613 }
2614}
2615
2616#else // OCEAN_HARDWARE_AVX_VERSION
2617
2618#ifdef OCEAN_USE_SLOWER_IMPLEMENTATION
2619
2620// we keep following implementation inside 'OCEAN_USE_SLOWER_IMPLEMENTATION' showing an alternative (which is slower)
2621
2622#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
2623
2624template <>
2625inline void SquareMatrixT4<float>::multiply(const SquareMatrixT4<float>& matrix, const VectorT4<float>* vectors, VectorT4<float>* results, const size_t number)
2626{
2627 // the following code uses the following SSE instructions, and needs SSE 4.1 or higher
2628
2629 // SSE:
2630 // _mm_loadu_ps
2631 // _mm_shuffle_ps
2632 // _mm_or_ps
2633 // _mm_storeu_ps
2634
2635 // SSE4.1:
2636 // _mm_dp_ps
2637
2638 // first we load the four rows of the matrix
2639 __m128 row0 = _mm_loadu_ps(matrix.values + 0);
2640 __m128 row1 = _mm_loadu_ps(matrix.values + 4);
2641 __m128 row2 = _mm_loadu_ps(matrix.values + 8);
2642 __m128 row3 = _mm_loadu_ps(matrix.values + 12);
2643
2644 // first, we transpose the 4x4 matrix
2645
2646 // A E I M A B C D
2647 // B F J N E F G H
2648 // C G K O I J K L
2649 // D H L P M N O P
2650
2651 // A B C D, E F G H -> A B E F
2652 __m128 temp0 = _mm_shuffle_ps(row0, row1, 0x44); // 0x44 = 0100 0100
2653 // A B C D, E F G H -> C D G H
2654 __m128 temp2 = _mm_shuffle_ps(row0, row1, 0xEE); // 0xEE = 1110 1110
2655 // I J K L, M N O P -> I J M N
2656 __m128 temp1 = _mm_shuffle_ps(row2, row3, 0x44);
2657 // I J K L, M N O P -> K L O P
2658 __m128 temp3 = _mm_shuffle_ps(row2, row3, 0xEE);
2659
2660 // A B E F, I J M N -> A E I M
2661 row0 = _mm_shuffle_ps(temp0, temp1, 0x88); // 0x88 = 1000 1000
2662 // A B E F, I J M N -> B F J N
2663 row1 = _mm_shuffle_ps(temp0, temp1, 0xDD); // 0xDD = 1101 1101
2664 // C D G H, K L O P -> C G K O
2665 row2 = _mm_shuffle_ps(temp2, temp3, 0x88);
2666 // C D G H, K L O P -> D H L P
2667 row3 = _mm_shuffle_ps(temp2, temp3, 0xDD);
2668
2669#ifdef OCEAN_COMPILER_MSC
2670 ocean_assert(row0.m128_f32[0] == matrix.values[0] && row0.m128_f32[1] == matrix.values[4] && row0.m128_f32[2] == matrix.values[ 8] && row0.m128_f32[3] == matrix.values[12]);
2671 ocean_assert(row1.m128_f32[0] == matrix.values[1] && row1.m128_f32[1] == matrix.values[5] && row1.m128_f32[2] == matrix.values[ 9] && row1.m128_f32[3] == matrix.values[13]);
2672 ocean_assert(row2.m128_f32[0] == matrix.values[2] && row2.m128_f32[1] == matrix.values[6] && row2.m128_f32[2] == matrix.values[10] && row2.m128_f32[3] == matrix.values[14]);
2673 ocean_assert(row3.m128_f32[0] == matrix.values[3] && row3.m128_f32[1] == matrix.values[7] && row3.m128_f32[2] == matrix.values[11] && row3.m128_f32[3] == matrix.values[15]);
2674#endif
2675
2676 for (size_t n = 0u; n < number; ++n)
2677 {
2678 // we load the four values of the vector
2679 __m128 v = _mm_loadu_ps(vectors[n].data());
2680
2681 // we determine the dot product between the first row and the vector and store the result in the first float bin
2682 __m128 dot0 = _mm_dp_ps(row0, v, 0xF1); // 0xF1 = 1111 0001
2683
2684 // we determine the dot product between the second row and the vector and store the result in the second float bin
2685 __m128 dot1 = _mm_dp_ps(row1, v, 0xF2); // 0xF2 = 1111 0010
2686 __m128 dot2 = _mm_dp_ps(row2, v, 0xF4); // 0xF4 = 1111 0100
2687 __m128 dot3 = _mm_dp_ps(row3, v, 0xF8); // 0xF8 = 1111 1000
2688
2689 // now we blend the results by applying the bit-wise or operator
2690 __m128 result01 = _mm_or_ps(dot0, dot1);
2691 __m128 result23 = _mm_or_ps(dot2, dot3);
2692 __m128 result03 = _mm_or_ps(result01, result23);
2693
2694 _mm_storeu_ps(results[n].data(), result03);
2695 }
2696}
2697
2698#endif // OCEAN_HARDWARE_SSE_VERSION >= 41
2699
2700#else // OCEAN_USE_SLOWER_IMPLEMENTATION
2701
2702#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 20
2703
2704template <>
2705inline void SquareMatrixT4<double>::multiply(const SquareMatrixT4<double>& matrix, const VectorT4<double>* vectors, VectorT4<double>* results, const size_t number)
2706{
2707 // the following code uses the following SSE instructions, and needs SSE2 or higher
2708
2709 // SSE2:
2710 // _mm_load1_pd
2711 // _mm_loadu_pd
2712 // _mm_add_pd
2713 // _mm_storeu_pd
2714 // _mm_mul_pd
2715
2716 // now we load the four columns
2717 __m128d c0a = _mm_loadu_pd(matrix.values + 0);
2718 __m128d c0b = _mm_loadu_pd(matrix.values + 2);
2719 __m128d c1a = _mm_loadu_pd(matrix.values + 4);
2720 __m128d c1b = _mm_loadu_pd(matrix.values + 6);
2721 __m128d c2a = _mm_loadu_pd(matrix.values + 8);
2722 __m128d c2b = _mm_loadu_pd(matrix.values + 10);
2723 __m128d c3a = _mm_loadu_pd(matrix.values + 12);
2724 __m128d c3b = _mm_loadu_pd(matrix.values + 14);
2725
2726 for (size_t n = 0u; n < number; ++n)
2727 {
2728 __m128d v0 = _mm_load1_pd(vectors[n].data() + 0);
2729 __m128d v1 = _mm_load1_pd(vectors[n].data() + 1);
2730 __m128d v2 = _mm_load1_pd(vectors[n].data() + 2);
2731 __m128d v3 = _mm_load1_pd(vectors[n].data() + 3);
2732
2733 // first column
2734 __m128d r0a = _mm_mul_pd(c0a, v0);
2735 __m128d r0b = _mm_mul_pd(c0b, v0);
2736
2737 // second column
2738 __m128d r1a = _mm_mul_pd(c1a, v1);
2739 __m128d r1b = _mm_mul_pd(c1b, v1);
2740
2741 r0a = _mm_add_pd(r0a, r1a);
2742 r0b = _mm_add_pd(r0b, r1b);
2743
2744 // third column
2745 __m128d r2a = _mm_mul_pd(c2a, v2);
2746 __m128d r2b = _mm_mul_pd(c2b, v2);
2747 r0a = _mm_add_pd(r0a, r2a);
2748 r0b = _mm_add_pd(r0b, r2b);
2749
2750 // fourth column
2751 __m128d r3a = _mm_mul_pd(c3a, v3);
2752 __m128d r3b = _mm_mul_pd(c3b, v3);
2753 r0a = _mm_add_pd(r0a, r3a);
2754 r0b = _mm_add_pd(r0b, r3b);
2755
2756 _mm_storeu_pd(results[n].data() + 0, r0a);
2757 _mm_storeu_pd(results[n].data() + 2, r0b);
2758 }
2759}
2760
2761#endif // OCEAN_HARDWARE_SSE_VERSION >= 20
2762
2763#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 10
2764
2765template <>
2766inline void SquareMatrixT4<float>::multiply(const SquareMatrixT4<float>& matrix, const VectorT4<float>* vectors, VectorT4<float>* results, const size_t number)
2767{
2768 // the following code uses the following SSE instructions, and needs SSE1 or higher
2769
2770 // SSE1:
2771 // _mm_load1_ps
2772 // _mm_loadu_ps
2773 // _mm_mul_ps
2774 // _mm_add_ps
2775 // _mm_storeu_ps
2776
2777 // now we load the four columns
2778 __m128 c0 = _mm_loadu_ps(matrix.values + 0);
2779 __m128 c1 = _mm_loadu_ps(matrix.values + 4);
2780 __m128 c2 = _mm_loadu_ps(matrix.values + 8);
2781 __m128 c3 = _mm_loadu_ps(matrix.values + 12);
2782
2783 for (size_t n = 0u; n < number; ++n)
2784 {
2785 __m128 v0 = _mm_load1_ps(vectors[n].data() + 0);
2786 __m128 v1 = _mm_load1_ps(vectors[n].data() + 1);
2787 __m128 v2 = _mm_load1_ps(vectors[n].data() + 2);
2788 __m128 v3 = _mm_load1_ps(vectors[n].data() + 3);
2789
2790 // first column
2791 __m128 r0 = _mm_mul_ps(c0, v0);
2792
2793 // second column
2794 __m128 r1 = _mm_mul_ps(c1, v1);
2795
2796 r0 = _mm_add_ps(r0, r1);
2797
2798 // third column
2799 __m128 r2 = _mm_mul_ps(c2, v2);
2800 r0 = _mm_add_ps(r0, r2);
2801
2802 // fourth column
2803 __m128 r3 = _mm_mul_ps(c3, v3);
2804 r0 = _mm_add_ps(r0, r3);
2805
2806 _mm_storeu_ps(results[n].data(), r0);
2807 }
2808}
2809
2810#endif // OCEAN_HARDWARE_SSE_VERSION >= 10
2811
2812#endif // OCEAN_USE_SLOWER_IMPLEMENTATION
2813
2814#endif // OCEAN_HARDWARE_AVX_VERSION
2815
2816#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2817
2818#ifdef __aarch64__
2819
2820template <>
2821inline void SquareMatrixT4<double>::multiply(const SquareMatrixT4<double>& matrix, const VectorT4<double>* vectors, VectorT4<double>* results, const size_t number)
2822{
2823 // the following NEON code is almost identical to the SSE implementation
2824
2825 // now we load the four columns
2826 float64x2_t c0a = vld1q_f64(matrix.values + 0);
2827 float64x2_t c0b = vld1q_f64(matrix.values + 2);
2828 float64x2_t c1a = vld1q_f64(matrix.values + 4);
2829 float64x2_t c1b = vld1q_f64(matrix.values + 6);
2830 float64x2_t c2a = vld1q_f64(matrix.values + 8);
2831 float64x2_t c2b = vld1q_f64(matrix.values + 10);
2832 float64x2_t c3a = vld1q_f64(matrix.values + 12);
2833 float64x2_t c3b = vld1q_f64(matrix.values + 14);
2834
2835 for (size_t n = 0u; n < number; ++n)
2836 {
2837 float64x2_t v0 = vld1q_dup_f64(vectors[n].data() + 0);
2838 float64x2_t v1 = vld1q_dup_f64(vectors[n].data() + 1);
2839 float64x2_t v2 = vld1q_dup_f64(vectors[n].data() + 2);
2840 float64x2_t v3 = vld1q_dup_f64(vectors[n].data() + 3);
2841
2842 float64x2_t r0a = vmulq_f64(c0a, v0);
2843 float64x2_t r0b = vmulq_f64(c0b, v0);
2844
2845 float64x2_t r1a = vmulq_f64(c1a, v1);
2846 float64x2_t r1b = vmulq_f64(c1b, v1);
2847
2848 r0a = vaddq_f64(r0a, r1a);
2849 r0b = vaddq_f64(r0b, r1b);
2850
2851 float64x2_t r2a = vmulq_f64(c2a, v2);
2852 float64x2_t r2b = vmulq_f64(c2b, v2);
2853
2854 r0a = vaddq_f64(r0a, r2a);
2855 r0b = vaddq_f64(r0b, r2b);
2856
2857 float64x2_t r3a = vmulq_f64(c3a, v3);
2858 float64x2_t r3b = vmulq_f64(c3b, v3);
2859
2860 r0a = vaddq_f64(r0a, r3a);
2861 r0b = vaddq_f64(r0b, r3b);
2862
2863 vst1q_f64(results[n].data() + 0, r0a);
2864 vst1q_f64(results[n].data() + 2, r0b);
2865 }
2866}
2867
2868#endif // __aarch64__
2869
2870template <>
2871inline void SquareMatrixT4<float>::multiply(const SquareMatrixT4<float>& matrix, const VectorT4<float>* vectors, VectorT4<float>* results, const size_t number)
2872{
2873 // the following NEON code is almost identical to the SSE implementation
2874
2875 // now we load the four columns
2876 float32x4_t c0 = vld1q_f32(matrix.values + 0);
2877 float32x4_t c1 = vld1q_f32(matrix.values + 4);
2878 float32x4_t c2 = vld1q_f32(matrix.values + 8);
2879 float32x4_t c3 = vld1q_f32(matrix.values + 12);
2880
2881 for (size_t n = 0u; n < number; ++n)
2882 {
2883 float32x4_t v0 = vld1q_dup_f32(vectors[n].data() + 0);
2884 float32x4_t v1 = vld1q_dup_f32(vectors[n].data() + 1);
2885 float32x4_t v2 = vld1q_dup_f32(vectors[n].data() + 2);
2886 float32x4_t v3 = vld1q_dup_f32(vectors[n].data() + 3);
2887
2888 // first column
2889 float32x4_t r0 = vmulq_f32(c0, v0);
2890
2891 // second column
2892 float32x4_t r1 = vmulq_f32(c1, v1);
2893
2894 r0 = vaddq_f32(r0, r1);
2895
2896 // third column
2897 float32x4_t r2 = vmulq_f32(c2, v2);
2898
2899 r0 = vaddq_f32(r0, r2);
2900
2901 // fourth column
2902 float32x4_t r3 = vmulq_f32(c3, v3);
2903
2904 r0 = vaddq_f32(r0, r3);
2905
2906 vst1q_f32(results[n].data(), r0);
2907 }
2908}
2909
2910#endif
2911
2912template <typename T>
2913template <typename U>
2914inline std::vector< SquareMatrixT4<T> > SquareMatrixT4<T>::matrices2matrices(const std::vector< SquareMatrixT4<U> >& matrices)
2915{
2916 std::vector< SquareMatrixT4<T> > result;
2917 result.reserve(matrices.size());
2918
2919 for (typename std::vector< SquareMatrixT4<U> >::const_iterator i = matrices.begin(); i != matrices.end(); ++i)
2920 {
2921 result.push_back(SquareMatrixT4<T>(*i));
2922 }
2923
2924 return result;
2925}
2926
2927template <>
2928template <>
2929inline std::vector< SquareMatrixT4<float> > SquareMatrixT4<float>::matrices2matrices(const std::vector< SquareMatrixT4<float> >& matrices)
2930{
2931 return matrices;
2932}
2933
2934template <>
2935template <>
2936inline std::vector< SquareMatrixT4<double> > SquareMatrixT4<double>::matrices2matrices(const std::vector< SquareMatrixT4<double> >& matrices)
2937{
2938 return matrices;
2939}
2940
2941template <typename T>
2942template <typename U>
2943inline std::vector< SquareMatrixT4<T> > SquareMatrixT4<T>::matrices2matrices(const SquareMatrixT4<U>* matrices, const size_t size)
2944{
2945 std::vector< SquareMatrixT4<T> > result;
2946 result.reserve(size);
2947
2948 for (size_t n = 0; n < size; ++n)
2949 {
2950 result.push_back(SquareMatrixT4<T>(matrices[n]));
2951 }
2952
2953 return result;
2954}
2955
2956template <typename T>
2957void SquareMatrixT4<T>::swapRows(const unsigned int row0, const unsigned int row1)
2958{
2959 ocean_assert(row0 < 4u && row1 < 4u);
2960
2961 if (row0 == row1)
2962 {
2963 return;
2964 }
2965
2966 T* first = values + row0;
2967 T* second = values + row1;
2968
2969 T tmp = *first;
2970 *first = *second;
2971 *second = tmp;
2972
2973 first += 4;
2974 second += 4;
2975 tmp = *first;
2976 *first = *second;
2977 *second = tmp;
2978
2979 first += 4;
2980 second += 4;
2981 tmp = *first;
2982 *first = *second;
2983 *second = tmp;
2984
2985 first += 4;
2986 second += 4;
2987 tmp = *first;
2988 *first = *second;
2989 *second = tmp;
2990}
2991
2992template <typename T>
2993void SquareMatrixT4<T>::multiplyRow(const unsigned int row, const T scalar)
2994{
2995 ocean_assert(row < 4u);
2996
2997 T* element = values + row;
2998
2999 *element *= scalar;
3000 element += 4;
3001 *element *= scalar;
3002 element += 4;
3003 *element *= scalar;
3004 element += 4;
3005 *element *= scalar;
3006}
3007
3008template <typename T>
3009void SquareMatrixT4<T>::addRows(const unsigned int targetRow, unsigned int const sourceRow, const T scalar)
3010{
3011 ocean_assert(targetRow < 4u && sourceRow < 4u);
3012 ocean_assert(targetRow != sourceRow);
3013
3014 T* target = values + targetRow;
3015 T* source = values + sourceRow;
3016
3017 *target += *source * scalar;
3018
3019 target += 4;
3020 source += 4;
3021 *target += *source * scalar;
3022
3023 target += 4;
3024 source += 4;
3025 *target += *source * scalar;
3026
3027 target += 4;
3028 source += 4;
3029 *target += *source * scalar;
3030}
3031
3032template <typename T>
3033std::ostream& operator<<(std::ostream& stream, const SquareMatrixT4<T>& matrix)
3034{
3035 stream << "|" << matrix(0, 0) << ", " << matrix(0, 1) << ", " << matrix(0, 2) << ", " << matrix(0, 3) << "|" << std::endl;
3036 stream << "|" << matrix(1, 0) << ", " << matrix(1, 1) << ", " << matrix(1, 2) << ", " << matrix(1, 3) << "|" << std::endl;
3037 stream << "|" << matrix(2, 0) << ", " << matrix(2, 1) << ", " << matrix(2, 2) << ", " << matrix(2, 3) << "|" << std::endl;
3038 stream << "|" << matrix(3, 0) << ", " << matrix(3, 1) << ", " << matrix(3, 2) << ", " << matrix(3, 3) << "|";
3039
3040 return stream;
3041}
3042
3043template <bool tActive, typename T>
3044MessageObject<tActive>& operator<<(MessageObject<tActive>& messageObject, const SquareMatrixT4<T>& matrix)
3045{
3046 return messageObject << "|" << matrix(0, 0) << ", " << matrix(0, 1) << ", " << matrix(0, 2) << ", " << matrix(0, 3) << "|\n|"
3047 << matrix(1, 0) << ", " << matrix(1, 1) << ", " << matrix(1, 2) << ", " << matrix(1, 3) << "|\n|"
3048 << matrix(2, 0) << ", " << matrix(2, 1) << ", " << matrix(2, 2) << ", " << matrix(2, 3) << "|\n|"
3049 << matrix(3, 0) << ", " << matrix(3, 1) << ", " << matrix(3, 2) << ", " << matrix(3, 3) << "|";
3050}
3051
3052template <bool tActive, typename T>
3053MessageObject<tActive>& operator<<(MessageObject<tActive>&& messageObject, const SquareMatrixT4<T>& matrix)
3054{
3055 return messageObject << "|" << matrix(0, 0) << ", " << matrix(0, 1) << ", " << matrix(0, 2) << ", " << matrix(0, 3) << "|\n|"
3056 << matrix(1, 0) << ", " << matrix(1, 1) << ", " << matrix(1, 2) << ", " << matrix(1, 3) << "|\n|"
3057 << matrix(2, 0) << ", " << matrix(2, 1) << ", " << matrix(2, 2) << ", " << matrix(2, 3) << "|\n|"
3058 << matrix(3, 0) << ", " << matrix(3, 1) << ", " << matrix(3, 2) << ", " << matrix(3, 3) << "|";
3059}
3060
3061}
3062
3063#endif // META_OCEAN_MATH_SQUARE_MATRIX_4_H
This class implements the abstract base class for all AnyCamera objects.
Definition AnyCamera.h:130
virtual unsigned int width() const =0
Returns the width of the camera image.
virtual T focalLengthX() const =0
Returns the horizontal focal length parameter.
virtual T focalLengthY() const =0
Returns the vertical focal length parameter.
virtual unsigned int height() const =0
Returns the height of the camera image.
virtual T principalPointY() const =0
Returns the y-value of the principal point of the camera image in the pixel domain.
virtual bool isValid() const =0
Returns whether this camera is valid.
virtual T principalPointX() const =0
Returns the x-value of the principal point of the camera image in the pixel domain.
static unsigned int solveQuartic(const T a, const T b, const T c, const T d, const T e, T *x)
Solves a quartic equation with the form: ax^4 + bx^3 + cx^2 + dx + e = 0.
Definition Equation.h:273
This class implements a 4x4 homogeneous transformation matrix using floating point values with the pr...
Definition HomogenousMatrix4.h:110
HomogenousMatrixT4< T > inverted() const noexcept
Returns the inverted of this matrix.
Definition HomogenousMatrix4.h:1575
VectorT3< T > translation() const
Returns the translation of the transformation.
Definition HomogenousMatrix4.h:1381
bool isValid() const
Returns whether this matrix is a valid homogeneous transformation.
Definition HomogenousMatrix4.h:1806
This class provides basic numeric functionalities.
Definition Numeric.h:57
static T atan(const T value)
Returns the arctangent of a given value.
Definition Numeric.h:1616
static T abs(const T value)
Returns the absolute value of a given value.
Definition Numeric.h:1220
static constexpr T eps()
Returns a small epsilon.
static bool isEqual(const T first, const T second)
Returns whether two values are equal up to a small epsilon.
Definition Numeric.h:2386
static T tan(const T value)
Returns the tangent of a given value.
Definition Numeric.h:1600
static constexpr bool isEqualEps(const T value)
Returns whether a value is smaller than or equal to a small epsilon.
Definition Numeric.h:2087
This class implements a 3x3 square matrix.
Definition SquareMatrix3.h:88
This class implements a 4x4 square matrix.
Definition SquareMatrix4.h:85
OCEAN_FORCE_INLINE SquareMatrixT4< T > & operator*=(const SquareMatrixT4< T > &matrix)
Multiplies and assigns two matrices.
Definition SquareMatrix4.h:1174
void copyElements(U *arrayValues) const
Copies the elements of this matrix to an array with floating point values of type U.
Definition SquareMatrix4.h:1137
SquareMatrixT4< T > & operator-=(const SquareMatrixT4< T > &matrix)
Subtracts and assigns two matrices.
Definition SquareMatrix4.h:1340
const T * data() const
Returns a pointer to the internal values.
Definition SquareMatrix4.h:790
T operator()(const unsigned int index) const
Element operator.
Definition SquareMatrix4.h:1216
SquareMatrixT4(const U *arrayValues, const bool valuesRowAligned)
Creates a new SquareMatrixT4 object by an array of at least sixteen elements of float type U.
Definition SquareMatrix4.h:686
static SquareMatrixT4< T > projectionMatrix(const T fovX, const T aspectRatio, const T nearDistance, const T farDistance)
Creates a projection matrix defined by the horizontal field of view, the aspect ratio and the near an...
Definition SquareMatrix4.h:2391
OCEAN_FORCE_INLINE VectorT3< T > operator*(const VectorT3< T > &vector) const
Multiply operator for a 3D vector.
Definition SquareMatrix4.h:1777
bool isIdentity() const
Returns whether this matrix is the identity matrix.
Definition SquareMatrix4.h:1022
SquareMatrixT4(const U *arrayValues)
Creates a new SquareMatrixT4 object by an array of at least sixteen elements of float type U.
Definition SquareMatrix4.h:667
SquareMatrixT4(const SquareMatrixT4< U > &matrix)
Copy constructor for a matrix with difference element data type than T.
Definition SquareMatrix4.h:602
void transpose()
Transposes the matrix.
Definition SquareMatrix4.h:828
static SquareMatrixT4< T > projectionMatrix(const AnyCameraT< T > &anyCamera, const T nearDistance, const T farDistance)
Creates a projection matrix defined by a camera profile of a pinhole camera and the near and far clip...
Definition SquareMatrix4.h:2436
SquareMatrixT4< T > transposed() const
Returns the transposed of this matrix.
Definition SquareMatrix4.h:802
void swapRows(const unsigned int row0, const unsigned int row1)
Swaps two rows of this matrix.
Definition SquareMatrix4.h:2957
void copyElements(T *arrayValues) const
Copies the elements of this matrix to an array with floating point values.
Definition SquareMatrix4.h:1160
SquareMatrixT4< T > inverted() const
Returns the inverted matrix of this matrix.
Definition SquareMatrix4.h:852
bool invert(SquareMatrixT4< T > &invertedMatrix) const
Inverts the matrix and returns the result.
Definition SquareMatrix4.h:881
SquareMatrixT4< T > operator+(const SquareMatrixT4< T > &matrix) const
Adds two matrices.
Definition SquareMatrix4.h:1267
T operator[](const unsigned int index) const
Element operator.
Definition SquareMatrix4.h:1188
bool isSingular() const
Returns whether this matrix is singular (and thus cannot be inverted).
Definition SquareMatrix4.h:1040
SquareMatrixT4()
Creates a new SquareMatrixT4 object with undefined elements.
Definition SquareMatrix4.h:595
SquareMatrixT4(const VectorT4< T > &diagonal)
Creates a new SquareMatrixT4 object by a given diagonal vector.
Definition SquareMatrix4.h:769
OCEAN_FORCE_INLINE SquareMatrixT4< T > operator*(const HomogenousMatrixT4< T > &matrix) const
Multiplies two matrices.
Definition SquareMatrix4.h:1749
friend class SquareMatrixT4
Definition SquareMatrix4.h:86
bool isSymmetric(const T epsilon=NumericT< T >::eps()) const
Returns whether this matrix is symmetric.
Definition SquareMatrix4.h:1046
T values[16]
The sixteen values of the matrix.
Definition SquareMatrix4.h:591
T determinant() const
Returns the determinant of the matrix.
Definition SquareMatrix4.h:946
T * data()
Returns a pointer to the internal values.
Definition SquareMatrix4.h:796
T operator()(const unsigned int row, const unsigned int column) const
Element operator.
Definition SquareMatrix4.h:1202
bool isEqual(const SquareMatrixT4< T > &matrix, const T eps=NumericT< T >::eps()) const
Returns whether two matrices are almost identical up to a specified epsilon.
Definition SquareMatrix4.h:1055
void toNull()
Sets the matrix to a zero matrix.
Definition SquareMatrix4.h:1001
bool eigenSystem(VectorT4< T > &eigenValues, SquareMatrixT4< T > &eigenVectors)
Performs an eigen value analysis.
Definition SquareMatrix4.h:1068
bool operator==(const SquareMatrixT4< T > &matrix) const
Returns whether two matrices are identical up to a small epsilon.
Definition SquareMatrix4.h:1261
static size_t elements()
Returns the number of elements this matrix has.
Definition SquareMatrix4.h:1255
SquareMatrixT4< T > & operator+=(const SquareMatrixT4< T > &matrix)
Adds and assigns two matrices.
Definition SquareMatrix4.h:1292
const T * operator()() const
Access operator.
Definition SquareMatrix4.h:1230
SquareMatrixT4< T > & operator=(const SquareMatrixT4< T > &)=default
Default assign operator.
SquareMatrixT4(const T *arrayValues, const bool valuesRowAligned)
Creates a new SquareMatrixT4 object by an array of at least sixteen elements.
Definition SquareMatrix4.h:717
static void multiply(const SquareMatrixT4< T > &matrix, const VectorT4< T > *vectors, VectorT4< T > *results, const size_t number)
Multiplies several 4D vectors with a given matrix.
Definition SquareMatrix4.h:2561
SquareMatrixT4< T > operator-() const
Returns the negative matrix of this matrix (all matrix elements are multiplied by -1).
Definition SquareMatrix4.h:1363
bool isNull() const
Returns whether this matrix is a null matrix.
Definition SquareMatrix4.h:1031
OCEAN_FORCE_INLINE SquareMatrixT4< T > & operator*=(const HomogenousMatrixT4< T > &matrix)
Multiplies and assigns two matrices.
Definition SquareMatrix4.h:1181
SquareMatrixT4(const SquareMatrixT3< T > &subMatrix)
Creates a new SquareMatrixT4 object by given 3x3 sub matrix.
Definition SquareMatrix4.h:753
SquareMatrixT4< T > operator-(const SquareMatrixT4< T > &matrix) const
Subtracts two matrices.
Definition SquareMatrix4.h:1315
void addRows(const unsigned int targetRow, const unsigned int sourceRow, const T scalar)
Multiplies elements from a specific row with a scalar and adds them to another row.
Definition SquareMatrix4.h:3009
static std::vector< SquareMatrixT4< T > > matrices2matrices(const std::vector< SquareMatrixT4< U > > &matrices)
Converts matrices with specific data type to matrices with different data type.
Definition SquareMatrix4.h:2914
static SquareMatrixT4< T > frustumMatrix(const T left, const T right, const T top, const T bottom, const T nearDistance, const T farDistance)
Creates a projection matrix defined by an asymmetric viewing frustum.
Definition SquareMatrix4.h:2493
T & operator()(const unsigned int index)
Element operator.
Definition SquareMatrix4.h:1223
SquareMatrixT4< T > operator*(const T value) const
Multiplies this matrix with a scalar value.
Definition SquareMatrix4.h:2343
bool invert()
Inverts this matrix in place.
Definition SquareMatrix4.h:866
bool operator!=(const SquareMatrixT4< T > &matrix) const
Returns whether two matrices are not identical up to a small epsilon.
Definition SquareMatrix4.h:1168
size_t operator()(const SquareMatrixT4< T > &matrix) const
Hash function.
Definition SquareMatrix4.h:1242
static std::vector< SquareMatrixT4< T > > matrices2matrices(const SquareMatrixT4< U > *matrices, const size_t size)
Converts matrices with specific data type to matrices with different data type.
Definition SquareMatrix4.h:2943
OCEAN_FORCE_INLINE SquareMatrixT4< T > operator*(const SquareMatrixT4< T > &matrix) const
Multiplies two matrices.
Definition SquareMatrix4.h:1387
T & operator()(const unsigned int row, const unsigned int column)
Element operator.
Definition SquareMatrix4.h:1209
T & operator[](const unsigned int index)
Element operator.
Definition SquareMatrix4.h:1195
void multiplyRow(const unsigned int row, const T scalar)
Multiplies a row with a scalar value.
Definition SquareMatrix4.h:2993
T trace() const
Returns the trace of the matrix which is the sum of the diagonal elements.
Definition SquareMatrix4.h:974
SquareMatrixT4(const bool setToIdentity)
Creates a new SquareMatrixT4 object.
Definition SquareMatrix4.h:623
SquareMatrixT4(const HomogenousMatrixT4< T > &transformation)
Creates a new SquareMatrixT4 object by given transformation matrix.
Definition SquareMatrix4.h:747
OCEAN_FORCE_INLINE VectorT4< T > operator*(const VectorT4< T > &vector) const
Multiply operator for a 4D vector.
Definition SquareMatrix4.h:1790
T * operator()()
Access operator.
Definition SquareMatrix4.h:1236
T Type
Definition of the used data type.
Definition SquareMatrix4.h:93
static SquareMatrixT4< T > frustumMatrix(const T width, const T height, const HomogenousMatrixT4< T > &viewingMatrix, const T nearDistance, const T farDistance)
Creates a project matrix defined by an asymmetric viewing frustum.
Definition SquareMatrix4.h:2539
SquareMatrixT4< T > & operator*=(const T value)
Multiplies and assigns this matrix with a scalar value.
Definition SquareMatrix4.h:2368
void toIdentity()
Sets the matrix to the identity matrix.
Definition SquareMatrix4.h:980
SquareMatrixT4(const SquareMatrixT4< T > &matrix)=default
Copy constructor.
SquareMatrixT4(const T *arrayValues)
Creates a new SquareMatrixT4 object by an array of at least sixteen elements.
Definition SquareMatrix4.h:678
This class implements a vector with three elements.
Definition Vector3.h:97
const T & y() const noexcept
Returns the y value.
Definition Vector3.h:824
const T & x() const noexcept
Returns the x value.
Definition Vector3.h:812
This class implements a vector with four elements.
Definition Vector4.h:97
const T * data() const noexcept
Returns an pointer to the vector elements.
Definition Vector4.h:732
std::vector< SquareMatrixT4< T > > SquareMatricesT4
Definition of a typename alias for vectors with SquareMatrixT4 objects.
Definition SquareMatrix4.h:61
SquareMatrixT4< float > SquareMatrixF4
Instantiation of the SquareMatrixT4 template class using a double precision float data type.
Definition SquareMatrix4.h:53
SquareMatrixT4< Scalar > SquareMatrix4
Definition of the SquareMatrix4 object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION either with ...
Definition SquareMatrix4.h:39
std::vector< SquareMatrix4 > SquareMatrices4
Definition of a vector holding SquareMatrix4 objects.
Definition SquareMatrix4.h:68
SquareMatrixT4< double > SquareMatrixD4
Instantiation of the SquareMatrixT4 template class using a double precision float data type.
Definition SquareMatrix4.h:46
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
std::ostream & operator<<(std::ostream &stream, const HighPerformanceStatistic &highPerformanceStatistic)
Definition HighPerformanceTimer.h:963