diff options
Diffstat (limited to 'src/Eigen/src/LU')
-rw-r--r-- | src/Eigen/src/LU/Determinant.h | 117 | ||||
-rw-r--r-- | src/Eigen/src/LU/FullPivLU.h | 877 | ||||
-rw-r--r-- | src/Eigen/src/LU/InverseImpl.h | 432 | ||||
-rw-r--r-- | src/Eigen/src/LU/PartialPivLU.h | 624 | ||||
-rw-r--r-- | src/Eigen/src/LU/PartialPivLU_LAPACKE.h | 83 | ||||
-rw-r--r-- | src/Eigen/src/LU/arch/InverseSize4.h | 351 |
6 files changed, 2484 insertions, 0 deletions
diff --git a/src/Eigen/src/LU/Determinant.h b/src/Eigen/src/LU/Determinant.h new file mode 100644 index 0000000..3a41e6f --- /dev/null +++ b/src/Eigen/src/LU/Determinant.h @@ -0,0 +1,117 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DETERMINANT_H +#define EIGEN_DETERMINANT_H + +namespace Eigen { + +namespace internal { + +template<typename Derived> +EIGEN_DEVICE_FUNC +inline const typename Derived::Scalar bruteforce_det3_helper +(const MatrixBase<Derived>& matrix, int a, int b, int c) +{ + return matrix.coeff(0,a) + * (matrix.coeff(1,b) * matrix.coeff(2,c) - matrix.coeff(1,c) * matrix.coeff(2,b)); +} + +template<typename Derived, + int DeterminantType = Derived::RowsAtCompileTime +> struct determinant_impl +{ + static inline typename traits<Derived>::Scalar run(const Derived& m) + { + if(Derived::ColsAtCompileTime==Dynamic && m.rows()==0) + return typename traits<Derived>::Scalar(1); + return m.partialPivLu().determinant(); + } +}; + +template<typename Derived> struct determinant_impl<Derived, 1> +{ + static inline EIGEN_DEVICE_FUNC + typename traits<Derived>::Scalar run(const Derived& m) + { + return m.coeff(0,0); + } +}; + +template<typename Derived> struct determinant_impl<Derived, 2> +{ + static inline EIGEN_DEVICE_FUNC + typename traits<Derived>::Scalar run(const Derived& m) + { + return m.coeff(0,0) * m.coeff(1,1) - m.coeff(1,0) * m.coeff(0,1); + } +}; + +template<typename Derived> struct determinant_impl<Derived, 3> +{ + static inline EIGEN_DEVICE_FUNC + typename traits<Derived>::Scalar run(const Derived& m) + { + return bruteforce_det3_helper(m,0,1,2) + - bruteforce_det3_helper(m,1,0,2) + + bruteforce_det3_helper(m,2,0,1); + } +}; + +template<typename Derived> struct determinant_impl<Derived, 4> +{ + typedef typename traits<Derived>::Scalar Scalar; + static EIGEN_DEVICE_FUNC + Scalar run(const Derived& m) + { + Scalar d2_01 = det2(m, 0, 1); + Scalar d2_02 = det2(m, 0, 2); + Scalar d2_03 = det2(m, 0, 3); + Scalar d2_12 = det2(m, 1, 2); + Scalar d2_13 = det2(m, 1, 3); + Scalar d2_23 = det2(m, 2, 3); + Scalar d3_0 = det3(m, 1,d2_23, 2,d2_13, 3,d2_12); + Scalar d3_1 = det3(m, 0,d2_23, 2,d2_03, 3,d2_02); + Scalar d3_2 = det3(m, 0,d2_13, 1,d2_03, 3,d2_01); + Scalar d3_3 = det3(m, 0,d2_12, 1,d2_02, 2,d2_01); + return internal::pmadd(-m(0,3),d3_0, m(1,3)*d3_1) + + internal::pmadd(-m(2,3),d3_2, m(3,3)*d3_3); + } +protected: + static EIGEN_DEVICE_FUNC + Scalar det2(const Derived& m, Index i0, Index i1) + { + return m(i0,0) * m(i1,1) - m(i1,0) * m(i0,1); + } + + static EIGEN_DEVICE_FUNC + Scalar det3(const Derived& m, Index i0, const Scalar& d0, Index i1, const Scalar& d1, Index i2, const Scalar& d2) + { + return internal::pmadd(m(i0,2), d0, internal::pmadd(-m(i1,2), d1, m(i2,2)*d2)); + } +}; + +} // end namespace internal + +/** \lu_module + * + * \returns the determinant of this matrix + */ +template<typename Derived> +EIGEN_DEVICE_FUNC +inline typename internal::traits<Derived>::Scalar MatrixBase<Derived>::determinant() const +{ + eigen_assert(rows() == cols()); + typedef typename internal::nested_eval<Derived,Base::RowsAtCompileTime>::type Nested; + return internal::determinant_impl<typename internal::remove_all<Nested>::type>::run(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_DETERMINANT_H diff --git a/src/Eigen/src/LU/FullPivLU.h b/src/Eigen/src/LU/FullPivLU.h new file mode 100644 index 0000000..ba1749f --- /dev/null +++ b/src/Eigen/src/LU/FullPivLU.h @@ -0,0 +1,877 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LU_H +#define EIGEN_LU_H + +namespace Eigen { + +namespace internal { +template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> > + : traits<_MatrixType> +{ + typedef MatrixXpr XprKind; + typedef SolverStorage StorageKind; + typedef int StorageIndex; + enum { Flags = 0 }; +}; + +} // end namespace internal + +/** \ingroup LU_Module + * + * \class FullPivLU + * + * \brief LU decomposition of a matrix with complete pivoting, and related features + * + * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition + * + * This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A is + * decomposed as \f$ A = P^{-1} L U Q^{-1} \f$ where L is unit-lower-triangular, U is + * upper-triangular, and P and Q are permutation matrices. This is a rank-revealing LU + * decomposition. The eigenvalues (diagonal coefficients) of U are sorted in such a way that any + * zeros are at the end. + * + * This decomposition provides the generic approach to solving systems of linear equations, computing + * the rank, invertibility, inverse, kernel, and determinant. + * + * This LU decomposition is very stable and well tested with large matrices. However there are use cases where the SVD + * decomposition is inherently more stable and/or flexible. For example, when computing the kernel of a matrix, + * working with the SVD allows to select the smallest singular values of the matrix, something that + * the LU decomposition doesn't see. + * + * The data of the LU decomposition can be directly accessed through the methods matrixLU(), + * permutationP(), permutationQ(). + * + * As an example, here is how the original matrix can be retrieved: + * \include class_FullPivLU.cpp + * Output: \verbinclude class_FullPivLU.out + * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * + * \sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse() + */ +template<typename _MatrixType> class FullPivLU + : public SolverBase<FullPivLU<_MatrixType> > +{ + public: + typedef _MatrixType MatrixType; + typedef SolverBase<FullPivLU> Base; + friend class SolverBase<FullPivLU>; + + EIGEN_GENERIC_PUBLIC_INTERFACE(FullPivLU) + enum { + MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; + typedef typename internal::plain_row_type<MatrixType, StorageIndex>::type IntRowVectorType; + typedef typename internal::plain_col_type<MatrixType, StorageIndex>::type IntColVectorType; + typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationQType; + typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationPType; + typedef typename MatrixType::PlainObject PlainObject; + + /** + * \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via LU::compute(const MatrixType&). + */ + FullPivLU(); + + /** \brief Default Constructor with memory preallocation + * + * Like the default constructor but with preallocation of the internal data + * according to the specified problem \a size. + * \sa FullPivLU() + */ + FullPivLU(Index rows, Index cols); + + /** Constructor. + * + * \param matrix the matrix of which to compute the LU decomposition. + * It is required to be nonzero. + */ + template<typename InputType> + explicit FullPivLU(const EigenBase<InputType>& matrix); + + /** \brief Constructs a LU factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref. + * + * \sa FullPivLU(const EigenBase&) + */ + template<typename InputType> + explicit FullPivLU(EigenBase<InputType>& matrix); + + /** Computes the LU decomposition of the given matrix. + * + * \param matrix the matrix of which to compute the LU decomposition. + * It is required to be nonzero. + * + * \returns a reference to *this + */ + template<typename InputType> + FullPivLU& compute(const EigenBase<InputType>& matrix) { + m_lu = matrix.derived(); + computeInPlace(); + return *this; + } + + /** \returns the LU decomposition matrix: the upper-triangular part is U, the + * unit-lower-triangular part is L (at least for square matrices; in the non-square + * case, special care is needed, see the documentation of class FullPivLU). + * + * \sa matrixL(), matrixU() + */ + inline const MatrixType& matrixLU() const + { + eigen_assert(m_isInitialized && "LU is not initialized."); + return m_lu; + } + + /** \returns the number of nonzero pivots in the LU decomposition. + * Here nonzero is meant in the exact sense, not in a fuzzy sense. + * So that notion isn't really intrinsically interesting, but it is + * still useful when implementing algorithms. + * + * \sa rank() + */ + inline Index nonzeroPivots() const + { + eigen_assert(m_isInitialized && "LU is not initialized."); + return m_nonzero_pivots; + } + + /** \returns the absolute value of the biggest pivot, i.e. the biggest + * diagonal coefficient of U. + */ + RealScalar maxPivot() const { return m_maxpivot; } + + /** \returns the permutation matrix P + * + * \sa permutationQ() + */ + EIGEN_DEVICE_FUNC inline const PermutationPType& permutationP() const + { + eigen_assert(m_isInitialized && "LU is not initialized."); + return m_p; + } + + /** \returns the permutation matrix Q + * + * \sa permutationP() + */ + inline const PermutationQType& permutationQ() const + { + eigen_assert(m_isInitialized && "LU is not initialized."); + return m_q; + } + + /** \returns the kernel of the matrix, also called its null-space. The columns of the returned matrix + * will form a basis of the kernel. + * + * \note If the kernel has dimension zero, then the returned matrix is a column-vector filled with zeros. + * + * \note This method has to determine which pivots should be considered nonzero. + * For that, it uses the threshold value that you can control by calling + * setThreshold(const RealScalar&). + * + * Example: \include FullPivLU_kernel.cpp + * Output: \verbinclude FullPivLU_kernel.out + * + * \sa image() + */ + inline const internal::kernel_retval<FullPivLU> kernel() const + { + eigen_assert(m_isInitialized && "LU is not initialized."); + return internal::kernel_retval<FullPivLU>(*this); + } + + /** \returns the image of the matrix, also called its column-space. The columns of the returned matrix + * will form a basis of the image (column-space). + * + * \param originalMatrix the original matrix, of which *this is the LU decomposition. + * The reason why it is needed to pass it here, is that this allows + * a large optimization, as otherwise this method would need to reconstruct it + * from the LU decomposition. + * + * \note If the image has dimension zero, then the returned matrix is a column-vector filled with zeros. + * + * \note This method has to determine which pivots should be considered nonzero. + * For that, it uses the threshold value that you can control by calling + * setThreshold(const RealScalar&). + * + * Example: \include FullPivLU_image.cpp + * Output: \verbinclude FullPivLU_image.out + * + * \sa kernel() + */ + inline const internal::image_retval<FullPivLU> + image(const MatrixType& originalMatrix) const + { + eigen_assert(m_isInitialized && "LU is not initialized."); + return internal::image_retval<FullPivLU>(*this, originalMatrix); + } + + #ifdef EIGEN_PARSED_BY_DOXYGEN + /** \return a solution x to the equation Ax=b, where A is the matrix of which + * *this is the LU decomposition. + * + * \param b the right-hand-side of the equation to solve. Can be a vector or a matrix, + * the only requirement in order for the equation to make sense is that + * b.rows()==A.rows(), where A is the matrix of which *this is the LU decomposition. + * + * \returns a solution. + * + * \note_about_checking_solutions + * + * \note_about_arbitrary_choice_of_solution + * \note_about_using_kernel_to_study_multiple_solutions + * + * Example: \include FullPivLU_solve.cpp + * Output: \verbinclude FullPivLU_solve.out + * + * \sa TriangularView::solve(), kernel(), inverse() + */ + template<typename Rhs> + inline const Solve<FullPivLU, Rhs> + solve(const MatrixBase<Rhs>& b) const; + #endif + + /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is + the LU decomposition. + */ + inline RealScalar rcond() const + { + eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); + return internal::rcond_estimate_helper(m_l1_norm, *this); + } + + /** \returns the determinant of the matrix of which + * *this is the LU decomposition. It has only linear complexity + * (that is, O(n) where n is the dimension of the square matrix) + * as the LU decomposition has already been computed. + * + * \note This is only for square matrices. + * + * \note For fixed-size matrices of size up to 4, MatrixBase::determinant() offers + * optimized paths. + * + * \warning a determinant can be very big or small, so for matrices + * of large enough dimension, there is a risk of overflow/underflow. + * + * \sa MatrixBase::determinant() + */ + typename internal::traits<MatrixType>::Scalar determinant() const; + + /** Allows to prescribe a threshold to be used by certain methods, such as rank(), + * who need to determine when pivots are to be considered nonzero. This is not used for the + * LU decomposition itself. + * + * When it needs to get the threshold value, Eigen calls threshold(). By default, this + * uses a formula to automatically determine a reasonable threshold. + * Once you have called the present method setThreshold(const RealScalar&), + * your value is used instead. + * + * \param threshold The new value to use as the threshold. + * + * A pivot will be considered nonzero if its absolute value is strictly greater than + * \f$ \vert pivot \vert \leqslant threshold \times \vert maxpivot \vert \f$ + * where maxpivot is the biggest pivot. + * + * If you want to come back to the default behavior, call setThreshold(Default_t) + */ + FullPivLU& setThreshold(const RealScalar& threshold) + { + m_usePrescribedThreshold = true; + m_prescribedThreshold = threshold; + return *this; + } + + /** Allows to come back to the default behavior, letting Eigen use its default formula for + * determining the threshold. + * + * You should pass the special object Eigen::Default as parameter here. + * \code lu.setThreshold(Eigen::Default); \endcode + * + * See the documentation of setThreshold(const RealScalar&). + */ + FullPivLU& setThreshold(Default_t) + { + m_usePrescribedThreshold = false; + return *this; + } + + /** Returns the threshold that will be used by certain methods such as rank(). + * + * See the documentation of setThreshold(const RealScalar&). + */ + RealScalar threshold() const + { + eigen_assert(m_isInitialized || m_usePrescribedThreshold); + return m_usePrescribedThreshold ? m_prescribedThreshold + // this formula comes from experimenting (see "LU precision tuning" thread on the list) + // and turns out to be identical to Higham's formula used already in LDLt. + : NumTraits<Scalar>::epsilon() * RealScalar(m_lu.diagonalSize()); + } + + /** \returns the rank of the matrix of which *this is the LU decomposition. + * + * \note This method has to determine which pivots should be considered nonzero. + * For that, it uses the threshold value that you can control by calling + * setThreshold(const RealScalar&). + */ + inline Index rank() const + { + using std::abs; + eigen_assert(m_isInitialized && "LU is not initialized."); + RealScalar premultiplied_threshold = abs(m_maxpivot) * threshold(); + Index result = 0; + for(Index i = 0; i < m_nonzero_pivots; ++i) + result += (abs(m_lu.coeff(i,i)) > premultiplied_threshold); + return result; + } + + /** \returns the dimension of the kernel of the matrix of which *this is the LU decomposition. + * + * \note This method has to determine which pivots should be considered nonzero. + * For that, it uses the threshold value that you can control by calling + * setThreshold(const RealScalar&). + */ + inline Index dimensionOfKernel() const + { + eigen_assert(m_isInitialized && "LU is not initialized."); + return cols() - rank(); + } + + /** \returns true if the matrix of which *this is the LU decomposition represents an injective + * linear map, i.e. has trivial kernel; false otherwise. + * + * \note This method has to determine which pivots should be considered nonzero. + * For that, it uses the threshold value that you can control by calling + * setThreshold(const RealScalar&). + */ + inline bool isInjective() const + { + eigen_assert(m_isInitialized && "LU is not initialized."); + return rank() == cols(); + } + + /** \returns true if the matrix of which *this is the LU decomposition represents a surjective + * linear map; false otherwise. + * + * \note This method has to determine which pivots should be considered nonzero. + * For that, it uses the threshold value that you can control by calling + * setThreshold(const RealScalar&). + */ + inline bool isSurjective() const + { + eigen_assert(m_isInitialized && "LU is not initialized."); + return rank() == rows(); + } + + /** \returns true if the matrix of which *this is the LU decomposition is invertible. + * + * \note This method has to determine which pivots should be considered nonzero. + * For that, it uses the threshold value that you can control by calling + * setThreshold(const RealScalar&). + */ + inline bool isInvertible() const + { + eigen_assert(m_isInitialized && "LU is not initialized."); + return isInjective() && (m_lu.rows() == m_lu.cols()); + } + + /** \returns the inverse of the matrix of which *this is the LU decomposition. + * + * \note If this matrix is not invertible, the returned matrix has undefined coefficients. + * Use isInvertible() to first determine whether this matrix is invertible. + * + * \sa MatrixBase::inverse() + */ + inline const Inverse<FullPivLU> inverse() const + { + eigen_assert(m_isInitialized && "LU is not initialized."); + eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the inverse of a non-square matrix!"); + return Inverse<FullPivLU>(*this); + } + + MatrixType reconstructedMatrix() const; + + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + inline Index rows() const EIGEN_NOEXCEPT { return m_lu.rows(); } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + inline Index cols() const EIGEN_NOEXCEPT { return m_lu.cols(); } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template<typename RhsType, typename DstType> + void _solve_impl(const RhsType &rhs, DstType &dst) const; + + template<bool Conjugate, typename RhsType, typename DstType> + void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const; + #endif + + protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + + void computeInPlace(); + + MatrixType m_lu; + PermutationPType m_p; + PermutationQType m_q; + IntColVectorType m_rowsTranspositions; + IntRowVectorType m_colsTranspositions; + Index m_nonzero_pivots; + RealScalar m_l1_norm; + RealScalar m_maxpivot, m_prescribedThreshold; + signed char m_det_pq; + bool m_isInitialized, m_usePrescribedThreshold; +}; + +template<typename MatrixType> +FullPivLU<MatrixType>::FullPivLU() + : m_isInitialized(false), m_usePrescribedThreshold(false) +{ +} + +template<typename MatrixType> +FullPivLU<MatrixType>::FullPivLU(Index rows, Index cols) + : m_lu(rows, cols), + m_p(rows), + m_q(cols), + m_rowsTranspositions(rows), + m_colsTranspositions(cols), + m_isInitialized(false), + m_usePrescribedThreshold(false) +{ +} + +template<typename MatrixType> +template<typename InputType> +FullPivLU<MatrixType>::FullPivLU(const EigenBase<InputType>& matrix) + : m_lu(matrix.rows(), matrix.cols()), + m_p(matrix.rows()), + m_q(matrix.cols()), + m_rowsTranspositions(matrix.rows()), + m_colsTranspositions(matrix.cols()), + m_isInitialized(false), + m_usePrescribedThreshold(false) +{ + compute(matrix.derived()); +} + +template<typename MatrixType> +template<typename InputType> +FullPivLU<MatrixType>::FullPivLU(EigenBase<InputType>& matrix) + : m_lu(matrix.derived()), + m_p(matrix.rows()), + m_q(matrix.cols()), + m_rowsTranspositions(matrix.rows()), + m_colsTranspositions(matrix.cols()), + m_isInitialized(false), + m_usePrescribedThreshold(false) +{ + computeInPlace(); +} + +template<typename MatrixType> +void FullPivLU<MatrixType>::computeInPlace() +{ + check_template_parameters(); + + // the permutations are stored as int indices, so just to be sure: + eigen_assert(m_lu.rows()<=NumTraits<int>::highest() && m_lu.cols()<=NumTraits<int>::highest()); + + m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff(); + + const Index size = m_lu.diagonalSize(); + const Index rows = m_lu.rows(); + const Index cols = m_lu.cols(); + + // will store the transpositions, before we accumulate them at the end. + // can't accumulate on-the-fly because that will be done in reverse order for the rows. + m_rowsTranspositions.resize(m_lu.rows()); + m_colsTranspositions.resize(m_lu.cols()); + Index number_of_transpositions = 0; // number of NONTRIVIAL transpositions, i.e. m_rowsTranspositions[i]!=i + + m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case) + m_maxpivot = RealScalar(0); + + for(Index k = 0; k < size; ++k) + { + // First, we need to find the pivot. + + // biggest coefficient in the remaining bottom-right corner (starting at row k, col k) + Index row_of_biggest_in_corner, col_of_biggest_in_corner; + typedef internal::scalar_score_coeff_op<Scalar> Scoring; + typedef typename Scoring::result_type Score; + Score biggest_in_corner; + biggest_in_corner = m_lu.bottomRightCorner(rows-k, cols-k) + .unaryExpr(Scoring()) + .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner); + row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner, + col_of_biggest_in_corner += k; // need to add k to them. + + if(biggest_in_corner==Score(0)) + { + // before exiting, make sure to initialize the still uninitialized transpositions + // in a sane state without destroying what we already have. + m_nonzero_pivots = k; + for(Index i = k; i < size; ++i) + { + m_rowsTranspositions.coeffRef(i) = internal::convert_index<StorageIndex>(i); + m_colsTranspositions.coeffRef(i) = internal::convert_index<StorageIndex>(i); + } + break; + } + + RealScalar abs_pivot = internal::abs_knowing_score<Scalar>()(m_lu(row_of_biggest_in_corner, col_of_biggest_in_corner), biggest_in_corner); + if(abs_pivot > m_maxpivot) m_maxpivot = abs_pivot; + + // Now that we've found the pivot, we need to apply the row/col swaps to + // bring it to the location (k,k). + + m_rowsTranspositions.coeffRef(k) = internal::convert_index<StorageIndex>(row_of_biggest_in_corner); + m_colsTranspositions.coeffRef(k) = internal::convert_index<StorageIndex>(col_of_biggest_in_corner); + if(k != row_of_biggest_in_corner) { + m_lu.row(k).swap(m_lu.row(row_of_biggest_in_corner)); + ++number_of_transpositions; + } + if(k != col_of_biggest_in_corner) { + m_lu.col(k).swap(m_lu.col(col_of_biggest_in_corner)); + ++number_of_transpositions; + } + + // Now that the pivot is at the right location, we update the remaining + // bottom-right corner by Gaussian elimination. + + if(k<rows-1) + m_lu.col(k).tail(rows-k-1) /= m_lu.coeff(k,k); + if(k<size-1) + m_lu.block(k+1,k+1,rows-k-1,cols-k-1).noalias() -= m_lu.col(k).tail(rows-k-1) * m_lu.row(k).tail(cols-k-1); + } + + // the main loop is over, we still have to accumulate the transpositions to find the + // permutations P and Q + + m_p.setIdentity(rows); + for(Index k = size-1; k >= 0; --k) + m_p.applyTranspositionOnTheRight(k, m_rowsTranspositions.coeff(k)); + + m_q.setIdentity(cols); + for(Index k = 0; k < size; ++k) + m_q.applyTranspositionOnTheRight(k, m_colsTranspositions.coeff(k)); + + m_det_pq = (number_of_transpositions%2) ? -1 : 1; + + m_isInitialized = true; +} + +template<typename MatrixType> +typename internal::traits<MatrixType>::Scalar FullPivLU<MatrixType>::determinant() const +{ + eigen_assert(m_isInitialized && "LU is not initialized."); + eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the determinant of a non-square matrix!"); + return Scalar(m_det_pq) * Scalar(m_lu.diagonal().prod()); +} + +/** \returns the matrix represented by the decomposition, + * i.e., it returns the product: \f$ P^{-1} L U Q^{-1} \f$. + * This function is provided for debug purposes. */ +template<typename MatrixType> +MatrixType FullPivLU<MatrixType>::reconstructedMatrix() const +{ + eigen_assert(m_isInitialized && "LU is not initialized."); + const Index smalldim = (std::min)(m_lu.rows(), m_lu.cols()); + // LU + MatrixType res(m_lu.rows(),m_lu.cols()); + // FIXME the .toDenseMatrix() should not be needed... + res = m_lu.leftCols(smalldim) + .template triangularView<UnitLower>().toDenseMatrix() + * m_lu.topRows(smalldim) + .template triangularView<Upper>().toDenseMatrix(); + + // P^{-1}(LU) + res = m_p.inverse() * res; + + // (P^{-1}LU)Q^{-1} + res = res * m_q.inverse(); + + return res; +} + +/********* Implementation of kernel() **************************************************/ + +namespace internal { +template<typename _MatrixType> +struct kernel_retval<FullPivLU<_MatrixType> > + : kernel_retval_base<FullPivLU<_MatrixType> > +{ + EIGEN_MAKE_KERNEL_HELPERS(FullPivLU<_MatrixType>) + + enum { MaxSmallDimAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED( + MatrixType::MaxColsAtCompileTime, + MatrixType::MaxRowsAtCompileTime) + }; + + template<typename Dest> void evalTo(Dest& dst) const + { + using std::abs; + const Index cols = dec().matrixLU().cols(), dimker = cols - rank(); + if(dimker == 0) + { + // The Kernel is just {0}, so it doesn't have a basis properly speaking, but let's + // avoid crashing/asserting as that depends on floating point calculations. Let's + // just return a single column vector filled with zeros. + dst.setZero(); + return; + } + + /* Let us use the following lemma: + * + * Lemma: If the matrix A has the LU decomposition PAQ = LU, + * then Ker A = Q(Ker U). + * + * Proof: trivial: just keep in mind that P, Q, L are invertible. + */ + + /* Thus, all we need to do is to compute Ker U, and then apply Q. + * + * U is upper triangular, with eigenvalues sorted so that any zeros appear at the end. + * Thus, the diagonal of U ends with exactly + * dimKer zero's. Let us use that to construct dimKer linearly + * independent vectors in Ker U. + */ + + Matrix<Index, Dynamic, 1, 0, MaxSmallDimAtCompileTime, 1> pivots(rank()); + RealScalar premultiplied_threshold = dec().maxPivot() * dec().threshold(); + Index p = 0; + for(Index i = 0; i < dec().nonzeroPivots(); ++i) + if(abs(dec().matrixLU().coeff(i,i)) > premultiplied_threshold) + pivots.coeffRef(p++) = i; + eigen_internal_assert(p == rank()); + + // we construct a temporaty trapezoid matrix m, by taking the U matrix and + // permuting the rows and cols to bring the nonnegligible pivots to the top of + // the main diagonal. We need that to be able to apply our triangular solvers. + // FIXME when we get triangularView-for-rectangular-matrices, this can be simplified + Matrix<typename MatrixType::Scalar, Dynamic, Dynamic, MatrixType::Options, + MaxSmallDimAtCompileTime, MatrixType::MaxColsAtCompileTime> + m(dec().matrixLU().block(0, 0, rank(), cols)); + for(Index i = 0; i < rank(); ++i) + { + if(i) m.row(i).head(i).setZero(); + m.row(i).tail(cols-i) = dec().matrixLU().row(pivots.coeff(i)).tail(cols-i); + } + m.block(0, 0, rank(), rank()); + m.block(0, 0, rank(), rank()).template triangularView<StrictlyLower>().setZero(); + for(Index i = 0; i < rank(); ++i) + m.col(i).swap(m.col(pivots.coeff(i))); + + // ok, we have our trapezoid matrix, we can apply the triangular solver. + // notice that the math behind this suggests that we should apply this to the + // negative of the RHS, but for performance we just put the negative sign elsewhere, see below. + m.topLeftCorner(rank(), rank()) + .template triangularView<Upper>().solveInPlace( + m.topRightCorner(rank(), dimker) + ); + + // now we must undo the column permutation that we had applied! + for(Index i = rank()-1; i >= 0; --i) + m.col(i).swap(m.col(pivots.coeff(i))); + + // see the negative sign in the next line, that's what we were talking about above. + for(Index i = 0; i < rank(); ++i) dst.row(dec().permutationQ().indices().coeff(i)) = -m.row(i).tail(dimker); + for(Index i = rank(); i < cols; ++i) dst.row(dec().permutationQ().indices().coeff(i)).setZero(); + for(Index k = 0; k < dimker; ++k) dst.coeffRef(dec().permutationQ().indices().coeff(rank()+k), k) = Scalar(1); + } +}; + +/***** Implementation of image() *****************************************************/ + +template<typename _MatrixType> +struct image_retval<FullPivLU<_MatrixType> > + : image_retval_base<FullPivLU<_MatrixType> > +{ + EIGEN_MAKE_IMAGE_HELPERS(FullPivLU<_MatrixType>) + + enum { MaxSmallDimAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED( + MatrixType::MaxColsAtCompileTime, + MatrixType::MaxRowsAtCompileTime) + }; + + template<typename Dest> void evalTo(Dest& dst) const + { + using std::abs; + if(rank() == 0) + { + // The Image is just {0}, so it doesn't have a basis properly speaking, but let's + // avoid crashing/asserting as that depends on floating point calculations. Let's + // just return a single column vector filled with zeros. + dst.setZero(); + return; + } + + Matrix<Index, Dynamic, 1, 0, MaxSmallDimAtCompileTime, 1> pivots(rank()); + RealScalar premultiplied_threshold = dec().maxPivot() * dec().threshold(); + Index p = 0; + for(Index i = 0; i < dec().nonzeroPivots(); ++i) + if(abs(dec().matrixLU().coeff(i,i)) > premultiplied_threshold) + pivots.coeffRef(p++) = i; + eigen_internal_assert(p == rank()); + + for(Index i = 0; i < rank(); ++i) + dst.col(i) = originalMatrix().col(dec().permutationQ().indices().coeff(pivots.coeff(i))); + } +}; + +/***** Implementation of solve() *****************************************************/ + +} // end namespace internal + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template<typename _MatrixType> +template<typename RhsType, typename DstType> +void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const +{ + /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}. + * So we proceed as follows: + * Step 1: compute c = P * rhs. + * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible. + * Step 3: replace c by the solution x to Ux = c. May or may not exist. + * Step 4: result = Q * c; + */ + + const Index rows = this->rows(), + cols = this->cols(), + nonzero_pivots = this->rank(); + const Index smalldim = (std::min)(rows, cols); + + if(nonzero_pivots == 0) + { + dst.setZero(); + return; + } + + typename RhsType::PlainObject c(rhs.rows(), rhs.cols()); + + // Step 1 + c = permutationP() * rhs; + + // Step 2 + m_lu.topLeftCorner(smalldim,smalldim) + .template triangularView<UnitLower>() + .solveInPlace(c.topRows(smalldim)); + if(rows>cols) + c.bottomRows(rows-cols) -= m_lu.bottomRows(rows-cols) * c.topRows(cols); + + // Step 3 + m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots) + .template triangularView<Upper>() + .solveInPlace(c.topRows(nonzero_pivots)); + + // Step 4 + for(Index i = 0; i < nonzero_pivots; ++i) + dst.row(permutationQ().indices().coeff(i)) = c.row(i); + for(Index i = nonzero_pivots; i < m_lu.cols(); ++i) + dst.row(permutationQ().indices().coeff(i)).setZero(); +} + +template<typename _MatrixType> +template<bool Conjugate, typename RhsType, typename DstType> +void FullPivLU<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const +{ + /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}, + * and since permutations are real and unitary, we can write this + * as A^T = Q U^T L^T P, + * So we proceed as follows: + * Step 1: compute c = Q^T rhs. + * Step 2: replace c by the solution x to U^T x = c. May or may not exist. + * Step 3: replace c by the solution x to L^T x = c. + * Step 4: result = P^T c. + * If Conjugate is true, replace "^T" by "^*" above. + */ + + const Index rows = this->rows(), cols = this->cols(), + nonzero_pivots = this->rank(); + const Index smalldim = (std::min)(rows, cols); + + if(nonzero_pivots == 0) + { + dst.setZero(); + return; + } + + typename RhsType::PlainObject c(rhs.rows(), rhs.cols()); + + // Step 1 + c = permutationQ().inverse() * rhs; + + // Step 2 + m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots) + .template triangularView<Upper>() + .transpose() + .template conjugateIf<Conjugate>() + .solveInPlace(c.topRows(nonzero_pivots)); + + // Step 3 + m_lu.topLeftCorner(smalldim, smalldim) + .template triangularView<UnitLower>() + .transpose() + .template conjugateIf<Conjugate>() + .solveInPlace(c.topRows(smalldim)); + + // Step 4 + PermutationPType invp = permutationP().inverse().eval(); + for(Index i = 0; i < smalldim; ++i) + dst.row(invp.indices().coeff(i)) = c.row(i); + for(Index i = smalldim; i < rows; ++i) + dst.row(invp.indices().coeff(i)).setZero(); +} + +#endif + +namespace internal { + + +/***** Implementation of inverse() *****************************************************/ +template<typename DstXprType, typename MatrixType> +struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename FullPivLU<MatrixType>::Scalar>, Dense2Dense> +{ + typedef FullPivLU<MatrixType> LuType; + typedef Inverse<LuType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename MatrixType::Scalar> &) + { + dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); + } +}; +} // end namespace internal + +/******* MatrixBase methods *****************************************************************/ + +/** \lu_module + * + * \return the full-pivoting LU decomposition of \c *this. + * + * \sa class FullPivLU + */ +template<typename Derived> +inline const FullPivLU<typename MatrixBase<Derived>::PlainObject> +MatrixBase<Derived>::fullPivLu() const +{ + return FullPivLU<PlainObject>(eval()); +} + +} // end namespace Eigen + +#endif // EIGEN_LU_H diff --git a/src/Eigen/src/LU/InverseImpl.h b/src/Eigen/src/LU/InverseImpl.h new file mode 100644 index 0000000..a40cefa --- /dev/null +++ b/src/Eigen/src/LU/InverseImpl.h @@ -0,0 +1,432 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Benoit Jacob <jacob.benoit.1@gmail.com> +// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INVERSE_IMPL_H +#define EIGEN_INVERSE_IMPL_H + +namespace Eigen { + +namespace internal { + +/********************************** +*** General case implementation *** +**********************************/ + +template<typename MatrixType, typename ResultType, int Size = MatrixType::RowsAtCompileTime> +struct compute_inverse +{ + EIGEN_DEVICE_FUNC + static inline void run(const MatrixType& matrix, ResultType& result) + { + result = matrix.partialPivLu().inverse(); + } +}; + +template<typename MatrixType, typename ResultType, int Size = MatrixType::RowsAtCompileTime> +struct compute_inverse_and_det_with_check { /* nothing! general case not supported. */ }; + +/**************************** +*** Size 1 implementation *** +****************************/ + +template<typename MatrixType, typename ResultType> +struct compute_inverse<MatrixType, ResultType, 1> +{ + EIGEN_DEVICE_FUNC + static inline void run(const MatrixType& matrix, ResultType& result) + { + typedef typename MatrixType::Scalar Scalar; + internal::evaluator<MatrixType> matrixEval(matrix); + result.coeffRef(0,0) = Scalar(1) / matrixEval.coeff(0,0); + } +}; + +template<typename MatrixType, typename ResultType> +struct compute_inverse_and_det_with_check<MatrixType, ResultType, 1> +{ + EIGEN_DEVICE_FUNC + static inline void run( + const MatrixType& matrix, + const typename MatrixType::RealScalar& absDeterminantThreshold, + ResultType& result, + typename ResultType::Scalar& determinant, + bool& invertible + ) + { + using std::abs; + determinant = matrix.coeff(0,0); + invertible = abs(determinant) > absDeterminantThreshold; + if(invertible) result.coeffRef(0,0) = typename ResultType::Scalar(1) / determinant; + } +}; + +/**************************** +*** Size 2 implementation *** +****************************/ + +template<typename MatrixType, typename ResultType> +EIGEN_DEVICE_FUNC +inline void compute_inverse_size2_helper( + const MatrixType& matrix, const typename ResultType::Scalar& invdet, + ResultType& result) +{ + typename ResultType::Scalar temp = matrix.coeff(0,0); + result.coeffRef(0,0) = matrix.coeff(1,1) * invdet; + result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet; + result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet; + result.coeffRef(1,1) = temp * invdet; +} + +template<typename MatrixType, typename ResultType> +struct compute_inverse<MatrixType, ResultType, 2> +{ + EIGEN_DEVICE_FUNC + static inline void run(const MatrixType& matrix, ResultType& result) + { + typedef typename ResultType::Scalar Scalar; + const Scalar invdet = typename MatrixType::Scalar(1) / matrix.determinant(); + compute_inverse_size2_helper(matrix, invdet, result); + } +}; + +template<typename MatrixType, typename ResultType> +struct compute_inverse_and_det_with_check<MatrixType, ResultType, 2> +{ + EIGEN_DEVICE_FUNC + static inline void run( + const MatrixType& matrix, + const typename MatrixType::RealScalar& absDeterminantThreshold, + ResultType& inverse, + typename ResultType::Scalar& determinant, + bool& invertible + ) + { + using std::abs; + typedef typename ResultType::Scalar Scalar; + determinant = matrix.determinant(); + invertible = abs(determinant) > absDeterminantThreshold; + if(!invertible) return; + const Scalar invdet = Scalar(1) / determinant; + compute_inverse_size2_helper(matrix, invdet, inverse); + } +}; + +/**************************** +*** Size 3 implementation *** +****************************/ + +template<typename MatrixType, int i, int j> +EIGEN_DEVICE_FUNC +inline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m) +{ + enum { + i1 = (i+1) % 3, + i2 = (i+2) % 3, + j1 = (j+1) % 3, + j2 = (j+2) % 3 + }; + return m.coeff(i1, j1) * m.coeff(i2, j2) + - m.coeff(i1, j2) * m.coeff(i2, j1); +} + +template<typename MatrixType, typename ResultType> +EIGEN_DEVICE_FUNC +inline void compute_inverse_size3_helper( + const MatrixType& matrix, + const typename ResultType::Scalar& invdet, + const Matrix<typename ResultType::Scalar,3,1>& cofactors_col0, + ResultType& result) +{ + // Compute cofactors in a way that avoids aliasing issues. + typedef typename ResultType::Scalar Scalar; + const Scalar c01 = cofactor_3x3<MatrixType,0,1>(matrix) * invdet; + const Scalar c11 = cofactor_3x3<MatrixType,1,1>(matrix) * invdet; + const Scalar c02 = cofactor_3x3<MatrixType,0,2>(matrix) * invdet; + result.coeffRef(1,2) = cofactor_3x3<MatrixType,2,1>(matrix) * invdet; + result.coeffRef(2,1) = cofactor_3x3<MatrixType,1,2>(matrix) * invdet; + result.coeffRef(2,2) = cofactor_3x3<MatrixType,2,2>(matrix) * invdet; + result.coeffRef(1,0) = c01; + result.coeffRef(1,1) = c11; + result.coeffRef(2,0) = c02; + result.row(0) = cofactors_col0 * invdet; +} + +template<typename MatrixType, typename ResultType> +struct compute_inverse<MatrixType, ResultType, 3> +{ + EIGEN_DEVICE_FUNC + static inline void run(const MatrixType& matrix, ResultType& result) + { + typedef typename ResultType::Scalar Scalar; + Matrix<typename MatrixType::Scalar,3,1> cofactors_col0; + cofactors_col0.coeffRef(0) = cofactor_3x3<MatrixType,0,0>(matrix); + cofactors_col0.coeffRef(1) = cofactor_3x3<MatrixType,1,0>(matrix); + cofactors_col0.coeffRef(2) = cofactor_3x3<MatrixType,2,0>(matrix); + const Scalar det = (cofactors_col0.cwiseProduct(matrix.col(0))).sum(); + const Scalar invdet = Scalar(1) / det; + compute_inverse_size3_helper(matrix, invdet, cofactors_col0, result); + } +}; + +template<typename MatrixType, typename ResultType> +struct compute_inverse_and_det_with_check<MatrixType, ResultType, 3> +{ + EIGEN_DEVICE_FUNC + static inline void run( + const MatrixType& matrix, + const typename MatrixType::RealScalar& absDeterminantThreshold, + ResultType& inverse, + typename ResultType::Scalar& determinant, + bool& invertible + ) + { + typedef typename ResultType::Scalar Scalar; + Matrix<Scalar,3,1> cofactors_col0; + cofactors_col0.coeffRef(0) = cofactor_3x3<MatrixType,0,0>(matrix); + cofactors_col0.coeffRef(1) = cofactor_3x3<MatrixType,1,0>(matrix); + cofactors_col0.coeffRef(2) = cofactor_3x3<MatrixType,2,0>(matrix); + determinant = (cofactors_col0.cwiseProduct(matrix.col(0))).sum(); + invertible = Eigen::numext::abs(determinant) > absDeterminantThreshold; + if(!invertible) return; + const Scalar invdet = Scalar(1) / determinant; + compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse); + } +}; + +/**************************** +*** Size 4 implementation *** +****************************/ + +template<typename Derived> +EIGEN_DEVICE_FUNC +inline const typename Derived::Scalar general_det3_helper +(const MatrixBase<Derived>& matrix, int i1, int i2, int i3, int j1, int j2, int j3) +{ + return matrix.coeff(i1,j1) + * (matrix.coeff(i2,j2) * matrix.coeff(i3,j3) - matrix.coeff(i2,j3) * matrix.coeff(i3,j2)); +} + +template<typename MatrixType, int i, int j> +EIGEN_DEVICE_FUNC +inline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix) +{ + enum { + i1 = (i+1) % 4, + i2 = (i+2) % 4, + i3 = (i+3) % 4, + j1 = (j+1) % 4, + j2 = (j+2) % 4, + j3 = (j+3) % 4 + }; + return general_det3_helper(matrix, i1, i2, i3, j1, j2, j3) + + general_det3_helper(matrix, i2, i3, i1, j1, j2, j3) + + general_det3_helper(matrix, i3, i1, i2, j1, j2, j3); +} + +template<int Arch, typename Scalar, typename MatrixType, typename ResultType> +struct compute_inverse_size4 +{ + EIGEN_DEVICE_FUNC + static void run(const MatrixType& matrix, ResultType& result) + { + result.coeffRef(0,0) = cofactor_4x4<MatrixType,0,0>(matrix); + result.coeffRef(1,0) = -cofactor_4x4<MatrixType,0,1>(matrix); + result.coeffRef(2,0) = cofactor_4x4<MatrixType,0,2>(matrix); + result.coeffRef(3,0) = -cofactor_4x4<MatrixType,0,3>(matrix); + result.coeffRef(0,2) = cofactor_4x4<MatrixType,2,0>(matrix); + result.coeffRef(1,2) = -cofactor_4x4<MatrixType,2,1>(matrix); + result.coeffRef(2,2) = cofactor_4x4<MatrixType,2,2>(matrix); + result.coeffRef(3,2) = -cofactor_4x4<MatrixType,2,3>(matrix); + result.coeffRef(0,1) = -cofactor_4x4<MatrixType,1,0>(matrix); + result.coeffRef(1,1) = cofactor_4x4<MatrixType,1,1>(matrix); + result.coeffRef(2,1) = -cofactor_4x4<MatrixType,1,2>(matrix); + result.coeffRef(3,1) = cofactor_4x4<MatrixType,1,3>(matrix); + result.coeffRef(0,3) = -cofactor_4x4<MatrixType,3,0>(matrix); + result.coeffRef(1,3) = cofactor_4x4<MatrixType,3,1>(matrix); + result.coeffRef(2,3) = -cofactor_4x4<MatrixType,3,2>(matrix); + result.coeffRef(3,3) = cofactor_4x4<MatrixType,3,3>(matrix); + result /= (matrix.col(0).cwiseProduct(result.row(0).transpose())).sum(); + } +}; + +template<typename MatrixType, typename ResultType> +struct compute_inverse<MatrixType, ResultType, 4> + : compute_inverse_size4<Architecture::Target, typename MatrixType::Scalar, + MatrixType, ResultType> +{ +}; + +template<typename MatrixType, typename ResultType> +struct compute_inverse_and_det_with_check<MatrixType, ResultType, 4> +{ + EIGEN_DEVICE_FUNC + static inline void run( + const MatrixType& matrix, + const typename MatrixType::RealScalar& absDeterminantThreshold, + ResultType& inverse, + typename ResultType::Scalar& determinant, + bool& invertible + ) + { + using std::abs; + determinant = matrix.determinant(); + invertible = abs(determinant) > absDeterminantThreshold; + if(invertible && extract_data(matrix) != extract_data(inverse)) { + compute_inverse<MatrixType, ResultType>::run(matrix, inverse); + } + else if(invertible) { + MatrixType matrix_t = matrix; + compute_inverse<MatrixType, ResultType>::run(matrix_t, inverse); + } + } +}; + +/************************* +*** MatrixBase methods *** +*************************/ + +} // end namespace internal + +namespace internal { + +// Specialization for "dense = dense_xpr.inverse()" +template<typename DstXprType, typename XprType> +struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar>, Dense2Dense> +{ + typedef Inverse<XprType> SrcXprType; + EIGEN_DEVICE_FUNC + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar> &) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime); + EIGEN_ONLY_USED_FOR_DEBUG(Size); + eigen_assert(( (Size<=1) || (Size>4) || (extract_data(src.nestedExpression())!=extract_data(dst))) + && "Aliasing problem detected in inverse(), you need to do inverse().eval() here."); + + typedef typename internal::nested_eval<XprType,XprType::ColsAtCompileTime>::type ActualXprType; + typedef typename internal::remove_all<ActualXprType>::type ActualXprTypeCleanded; + + ActualXprType actual_xpr(src.nestedExpression()); + + compute_inverse<ActualXprTypeCleanded, DstXprType>::run(actual_xpr, dst); + } +}; + + +} // end namespace internal + +/** \lu_module + * + * \returns the matrix inverse of this matrix. + * + * For small fixed sizes up to 4x4, this method uses cofactors. + * In the general case, this method uses class PartialPivLU. + * + * \note This matrix must be invertible, otherwise the result is undefined. If you need an + * invertibility check, do the following: + * \li for fixed sizes up to 4x4, use computeInverseAndDetWithCheck(). + * \li for the general case, use class FullPivLU. + * + * Example: \include MatrixBase_inverse.cpp + * Output: \verbinclude MatrixBase_inverse.out + * + * \sa computeInverseAndDetWithCheck() + */ +template<typename Derived> +EIGEN_DEVICE_FUNC +inline const Inverse<Derived> MatrixBase<Derived>::inverse() const +{ + EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES) + eigen_assert(rows() == cols()); + return Inverse<Derived>(derived()); +} + +/** \lu_module + * + * Computation of matrix inverse and determinant, with invertibility check. + * + * This is only for fixed-size square matrices of size up to 4x4. + * + * Notice that it will trigger a copy of input matrix when trying to do the inverse in place. + * + * \param inverse Reference to the matrix in which to store the inverse. + * \param determinant Reference to the variable in which to store the determinant. + * \param invertible Reference to the bool variable in which to store whether the matrix is invertible. + * \param absDeterminantThreshold Optional parameter controlling the invertibility check. + * The matrix will be declared invertible if the absolute value of its + * determinant is greater than this threshold. + * + * Example: \include MatrixBase_computeInverseAndDetWithCheck.cpp + * Output: \verbinclude MatrixBase_computeInverseAndDetWithCheck.out + * + * \sa inverse(), computeInverseWithCheck() + */ +template<typename Derived> +template<typename ResultType> +inline void MatrixBase<Derived>::computeInverseAndDetWithCheck( + ResultType& inverse, + typename ResultType::Scalar& determinant, + bool& invertible, + const RealScalar& absDeterminantThreshold + ) const +{ + // i'd love to put some static assertions there, but SFINAE means that they have no effect... + eigen_assert(rows() == cols()); + // for 2x2, it's worth giving a chance to avoid evaluating. + // for larger sizes, evaluating has negligible cost and limits code size. + typedef typename internal::conditional< + RowsAtCompileTime == 2, + typename internal::remove_all<typename internal::nested_eval<Derived, 2>::type>::type, + PlainObject + >::type MatrixType; + internal::compute_inverse_and_det_with_check<MatrixType, ResultType>::run + (derived(), absDeterminantThreshold, inverse, determinant, invertible); +} + +/** \lu_module + * + * Computation of matrix inverse, with invertibility check. + * + * This is only for fixed-size square matrices of size up to 4x4. + * + * Notice that it will trigger a copy of input matrix when trying to do the inverse in place. + * + * \param inverse Reference to the matrix in which to store the inverse. + * \param invertible Reference to the bool variable in which to store whether the matrix is invertible. + * \param absDeterminantThreshold Optional parameter controlling the invertibility check. + * The matrix will be declared invertible if the absolute value of its + * determinant is greater than this threshold. + * + * Example: \include MatrixBase_computeInverseWithCheck.cpp + * Output: \verbinclude MatrixBase_computeInverseWithCheck.out + * + * \sa inverse(), computeInverseAndDetWithCheck() + */ +template<typename Derived> +template<typename ResultType> +inline void MatrixBase<Derived>::computeInverseWithCheck( + ResultType& inverse, + bool& invertible, + const RealScalar& absDeterminantThreshold + ) const +{ + Scalar determinant; + // i'd love to put some static assertions there, but SFINAE means that they have no effect... + eigen_assert(rows() == cols()); + computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold); +} + +} // end namespace Eigen + +#endif // EIGEN_INVERSE_IMPL_H diff --git a/src/Eigen/src/LU/PartialPivLU.h b/src/Eigen/src/LU/PartialPivLU.h new file mode 100644 index 0000000..34aed72 --- /dev/null +++ b/src/Eigen/src/LU/PartialPivLU.h @@ -0,0 +1,624 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com> +// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PARTIALLU_H +#define EIGEN_PARTIALLU_H + +namespace Eigen { + +namespace internal { +template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> > + : traits<_MatrixType> +{ + typedef MatrixXpr XprKind; + typedef SolverStorage StorageKind; + typedef int StorageIndex; + typedef traits<_MatrixType> BaseTraits; + enum { + Flags = BaseTraits::Flags & RowMajorBit, + CoeffReadCost = Dynamic + }; +}; + +template<typename T,typename Derived> +struct enable_if_ref; +// { +// typedef Derived type; +// }; + +template<typename T,typename Derived> +struct enable_if_ref<Ref<T>,Derived> { + typedef Derived type; +}; + +} // end namespace internal + +/** \ingroup LU_Module + * + * \class PartialPivLU + * + * \brief LU decomposition of a matrix with partial pivoting, and related features + * + * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition + * + * This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A + * is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P + * is a permutation matrix. + * + * Typically, partial pivoting LU decomposition is only considered numerically stable for square invertible + * matrices. Thus LAPACK's dgesv and dgesvx require the matrix to be square and invertible. The present class + * does the same. It will assert that the matrix is square, but it won't (actually it can't) check that the + * matrix is invertible: it is your task to check that you only use this decomposition on invertible matrices. + * + * The guaranteed safe alternative, working for all matrices, is the full pivoting LU decomposition, provided + * by class FullPivLU. + * + * This is \b not a rank-revealing LU decomposition. Many features are intentionally absent from this class, + * such as rank computation. If you need these features, use class FullPivLU. + * + * This LU decomposition is suitable to invert invertible matrices. It is what MatrixBase::inverse() uses + * in the general case. + * On the other hand, it is \b not suitable to determine whether a given matrix is invertible. + * + * The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP(). + * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * + * \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU + */ +template<typename _MatrixType> class PartialPivLU + : public SolverBase<PartialPivLU<_MatrixType> > +{ + public: + + typedef _MatrixType MatrixType; + typedef SolverBase<PartialPivLU> Base; + friend class SolverBase<PartialPivLU>; + + EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU) + enum { + MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; + typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType; + typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType; + typedef typename MatrixType::PlainObject PlainObject; + + /** + * \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via PartialPivLU::compute(const MatrixType&). + */ + PartialPivLU(); + + /** \brief Default Constructor with memory preallocation + * + * Like the default constructor but with preallocation of the internal data + * according to the specified problem \a size. + * \sa PartialPivLU() + */ + explicit PartialPivLU(Index size); + + /** Constructor. + * + * \param matrix the matrix of which to compute the LU decomposition. + * + * \warning The matrix should have full rank (e.g. if it's square, it should be invertible). + * If you need to deal with non-full rank, use class FullPivLU instead. + */ + template<typename InputType> + explicit PartialPivLU(const EigenBase<InputType>& matrix); + + /** Constructor for \link InplaceDecomposition inplace decomposition \endlink + * + * \param matrix the matrix of which to compute the LU decomposition. + * + * \warning The matrix should have full rank (e.g. if it's square, it should be invertible). + * If you need to deal with non-full rank, use class FullPivLU instead. + */ + template<typename InputType> + explicit PartialPivLU(EigenBase<InputType>& matrix); + + template<typename InputType> + PartialPivLU& compute(const EigenBase<InputType>& matrix) { + m_lu = matrix.derived(); + compute(); + return *this; + } + + /** \returns the LU decomposition matrix: the upper-triangular part is U, the + * unit-lower-triangular part is L (at least for square matrices; in the non-square + * case, special care is needed, see the documentation of class FullPivLU). + * + * \sa matrixL(), matrixU() + */ + inline const MatrixType& matrixLU() const + { + eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); + return m_lu; + } + + /** \returns the permutation matrix P. + */ + inline const PermutationType& permutationP() const + { + eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); + return m_p; + } + + #ifdef EIGEN_PARSED_BY_DOXYGEN + /** This method returns the solution x to the equation Ax=b, where A is the matrix of which + * *this is the LU decomposition. + * + * \param b the right-hand-side of the equation to solve. Can be a vector or a matrix, + * the only requirement in order for the equation to make sense is that + * b.rows()==A.rows(), where A is the matrix of which *this is the LU decomposition. + * + * \returns the solution. + * + * Example: \include PartialPivLU_solve.cpp + * Output: \verbinclude PartialPivLU_solve.out + * + * Since this PartialPivLU class assumes anyway that the matrix A is invertible, the solution + * theoretically exists and is unique regardless of b. + * + * \sa TriangularView::solve(), inverse(), computeInverse() + */ + template<typename Rhs> + inline const Solve<PartialPivLU, Rhs> + solve(const MatrixBase<Rhs>& b) const; + #endif + + /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is + the LU decomposition. + */ + inline RealScalar rcond() const + { + eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); + return internal::rcond_estimate_helper(m_l1_norm, *this); + } + + /** \returns the inverse of the matrix of which *this is the LU decomposition. + * + * \warning The matrix being decomposed here is assumed to be invertible. If you need to check for + * invertibility, use class FullPivLU instead. + * + * \sa MatrixBase::inverse(), LU::inverse() + */ + inline const Inverse<PartialPivLU> inverse() const + { + eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); + return Inverse<PartialPivLU>(*this); + } + + /** \returns the determinant of the matrix of which + * *this is the LU decomposition. It has only linear complexity + * (that is, O(n) where n is the dimension of the square matrix) + * as the LU decomposition has already been computed. + * + * \note For fixed-size matrices of size up to 4, MatrixBase::determinant() offers + * optimized paths. + * + * \warning a determinant can be very big or small, so for matrices + * of large enough dimension, there is a risk of overflow/underflow. + * + * \sa MatrixBase::determinant() + */ + Scalar determinant() const; + + MatrixType reconstructedMatrix() const; + + EIGEN_CONSTEXPR inline Index rows() const EIGEN_NOEXCEPT { return m_lu.rows(); } + EIGEN_CONSTEXPR inline Index cols() const EIGEN_NOEXCEPT { return m_lu.cols(); } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const { + /* The decomposition PA = LU can be rewritten as A = P^{-1} L U. + * So we proceed as follows: + * Step 1: compute c = Pb. + * Step 2: replace c by the solution x to Lx = c. + * Step 3: replace c by the solution x to Ux = c. + */ + + // Step 1 + dst = permutationP() * rhs; + + // Step 2 + m_lu.template triangularView<UnitLower>().solveInPlace(dst); + + // Step 3 + m_lu.template triangularView<Upper>().solveInPlace(dst); + } + + template<bool Conjugate, typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC + void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const { + /* The decomposition PA = LU can be rewritten as A^T = U^T L^T P. + * So we proceed as follows: + * Step 1: compute c as the solution to L^T c = b + * Step 2: replace c by the solution x to U^T x = c. + * Step 3: update c = P^-1 c. + */ + + eigen_assert(rhs.rows() == m_lu.cols()); + + // Step 1 + dst = m_lu.template triangularView<Upper>().transpose() + .template conjugateIf<Conjugate>().solve(rhs); + // Step 2 + m_lu.template triangularView<UnitLower>().transpose() + .template conjugateIf<Conjugate>().solveInPlace(dst); + // Step 3 + dst = permutationP().transpose() * dst; + } + #endif + + protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + + void compute(); + + MatrixType m_lu; + PermutationType m_p; + TranspositionType m_rowsTranspositions; + RealScalar m_l1_norm; + signed char m_det_p; + bool m_isInitialized; +}; + +template<typename MatrixType> +PartialPivLU<MatrixType>::PartialPivLU() + : m_lu(), + m_p(), + m_rowsTranspositions(), + m_l1_norm(0), + m_det_p(0), + m_isInitialized(false) +{ +} + +template<typename MatrixType> +PartialPivLU<MatrixType>::PartialPivLU(Index size) + : m_lu(size, size), + m_p(size), + m_rowsTranspositions(size), + m_l1_norm(0), + m_det_p(0), + m_isInitialized(false) +{ +} + +template<typename MatrixType> +template<typename InputType> +PartialPivLU<MatrixType>::PartialPivLU(const EigenBase<InputType>& matrix) + : m_lu(matrix.rows(),matrix.cols()), + m_p(matrix.rows()), + m_rowsTranspositions(matrix.rows()), + m_l1_norm(0), + m_det_p(0), + m_isInitialized(false) +{ + compute(matrix.derived()); +} + +template<typename MatrixType> +template<typename InputType> +PartialPivLU<MatrixType>::PartialPivLU(EigenBase<InputType>& matrix) + : m_lu(matrix.derived()), + m_p(matrix.rows()), + m_rowsTranspositions(matrix.rows()), + m_l1_norm(0), + m_det_p(0), + m_isInitialized(false) +{ + compute(); +} + +namespace internal { + +/** \internal This is the blocked version of fullpivlu_unblocked() */ +template<typename Scalar, int StorageOrder, typename PivIndex, int SizeAtCompileTime=Dynamic> +struct partial_lu_impl +{ + static const int UnBlockedBound = 16; + static const bool UnBlockedAtCompileTime = SizeAtCompileTime!=Dynamic && SizeAtCompileTime<=UnBlockedBound; + static const int ActualSizeAtCompileTime = UnBlockedAtCompileTime ? SizeAtCompileTime : Dynamic; + // Remaining rows and columns at compile-time: + static const int RRows = SizeAtCompileTime==2 ? 1 : Dynamic; + static const int RCols = SizeAtCompileTime==2 ? 1 : Dynamic; + typedef Matrix<Scalar, ActualSizeAtCompileTime, ActualSizeAtCompileTime, StorageOrder> MatrixType; + typedef Ref<MatrixType> MatrixTypeRef; + typedef Ref<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > BlockType; + typedef typename MatrixType::RealScalar RealScalar; + + /** \internal performs the LU decomposition in-place of the matrix \a lu + * using an unblocked algorithm. + * + * In addition, this function returns the row transpositions in the + * vector \a row_transpositions which must have a size equal to the number + * of columns of the matrix \a lu, and an integer \a nb_transpositions + * which returns the actual number of transpositions. + * + * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise. + */ + static Index unblocked_lu(MatrixTypeRef& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions) + { + typedef scalar_score_coeff_op<Scalar> Scoring; + typedef typename Scoring::result_type Score; + const Index rows = lu.rows(); + const Index cols = lu.cols(); + const Index size = (std::min)(rows,cols); + // For small compile-time matrices it is worth processing the last row separately: + // speedup: +100% for 2x2, +10% for others. + const Index endk = UnBlockedAtCompileTime ? size-1 : size; + nb_transpositions = 0; + Index first_zero_pivot = -1; + for(Index k = 0; k < endk; ++k) + { + int rrows = internal::convert_index<int>(rows-k-1); + int rcols = internal::convert_index<int>(cols-k-1); + + Index row_of_biggest_in_col; + Score biggest_in_corner + = lu.col(k).tail(rows-k).unaryExpr(Scoring()).maxCoeff(&row_of_biggest_in_col); + row_of_biggest_in_col += k; + + row_transpositions[k] = PivIndex(row_of_biggest_in_col); + + if(biggest_in_corner != Score(0)) + { + if(k != row_of_biggest_in_col) + { + lu.row(k).swap(lu.row(row_of_biggest_in_col)); + ++nb_transpositions; + } + + lu.col(k).tail(fix<RRows>(rrows)) /= lu.coeff(k,k); + } + else if(first_zero_pivot==-1) + { + // the pivot is exactly zero, we record the index of the first pivot which is exactly 0, + // and continue the factorization such we still have A = PLU + first_zero_pivot = k; + } + + if(k<rows-1) + lu.bottomRightCorner(fix<RRows>(rrows),fix<RCols>(rcols)).noalias() -= lu.col(k).tail(fix<RRows>(rrows)) * lu.row(k).tail(fix<RCols>(rcols)); + } + + // special handling of the last entry + if(UnBlockedAtCompileTime) + { + Index k = endk; + row_transpositions[k] = PivIndex(k); + if (Scoring()(lu(k, k)) == Score(0) && first_zero_pivot == -1) + first_zero_pivot = k; + } + + return first_zero_pivot; + } + + /** \internal performs the LU decomposition in-place of the matrix represented + * by the variables \a rows, \a cols, \a lu_data, and \a lu_stride using a + * recursive, blocked algorithm. + * + * In addition, this function returns the row transpositions in the + * vector \a row_transpositions which must have a size equal to the number + * of columns of the matrix \a lu, and an integer \a nb_transpositions + * which returns the actual number of transpositions. + * + * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise. + * + * \note This very low level interface using pointers, etc. is to: + * 1 - reduce the number of instantiations to the strict minimum + * 2 - avoid infinite recursion of the instantiations with Block<Block<Block<...> > > + */ + static Index blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, PivIndex* row_transpositions, PivIndex& nb_transpositions, Index maxBlockSize=256) + { + MatrixTypeRef lu = MatrixType::Map(lu_data,rows, cols, OuterStride<>(luStride)); + + const Index size = (std::min)(rows,cols); + + // if the matrix is too small, no blocking: + if(UnBlockedAtCompileTime || size<=UnBlockedBound) + { + return unblocked_lu(lu, row_transpositions, nb_transpositions); + } + + // automatically adjust the number of subdivisions to the size + // of the matrix so that there is enough sub blocks: + Index blockSize; + { + blockSize = size/8; + blockSize = (blockSize/16)*16; + blockSize = (std::min)((std::max)(blockSize,Index(8)), maxBlockSize); + } + + nb_transpositions = 0; + Index first_zero_pivot = -1; + for(Index k = 0; k < size; k+=blockSize) + { + Index bs = (std::min)(size-k,blockSize); // actual size of the block + Index trows = rows - k - bs; // trailing rows + Index tsize = size - k - bs; // trailing size + + // partition the matrix: + // A00 | A01 | A02 + // lu = A_0 | A_1 | A_2 = A10 | A11 | A12 + // A20 | A21 | A22 + BlockType A_0 = lu.block(0,0,rows,k); + BlockType A_2 = lu.block(0,k+bs,rows,tsize); + BlockType A11 = lu.block(k,k,bs,bs); + BlockType A12 = lu.block(k,k+bs,bs,tsize); + BlockType A21 = lu.block(k+bs,k,trows,bs); + BlockType A22 = lu.block(k+bs,k+bs,trows,tsize); + + PivIndex nb_transpositions_in_panel; + // recursively call the blocked LU algorithm on [A11^T A21^T]^T + // with a very small blocking size: + Index ret = blocked_lu(trows+bs, bs, &lu.coeffRef(k,k), luStride, + row_transpositions+k, nb_transpositions_in_panel, 16); + if(ret>=0 && first_zero_pivot==-1) + first_zero_pivot = k+ret; + + nb_transpositions += nb_transpositions_in_panel; + // update permutations and apply them to A_0 + for(Index i=k; i<k+bs; ++i) + { + Index piv = (row_transpositions[i] += internal::convert_index<PivIndex>(k)); + A_0.row(i).swap(A_0.row(piv)); + } + + if(trows) + { + // apply permutations to A_2 + for(Index i=k;i<k+bs; ++i) + A_2.row(i).swap(A_2.row(row_transpositions[i])); + + // A12 = A11^-1 A12 + A11.template triangularView<UnitLower>().solveInPlace(A12); + + A22.noalias() -= A21 * A12; + } + } + return first_zero_pivot; + } +}; + +/** \internal performs the LU decomposition with partial pivoting in-place. + */ +template<typename MatrixType, typename TranspositionType> +void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, typename TranspositionType::StorageIndex& nb_transpositions) +{ + // Special-case of zero matrix. + if (lu.rows() == 0 || lu.cols() == 0) { + nb_transpositions = 0; + return; + } + eigen_assert(lu.cols() == row_transpositions.size()); + eigen_assert(row_transpositions.size() < 2 || (&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1); + + partial_lu_impl + < typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor, + typename TranspositionType::StorageIndex, + EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime)> + ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions); +} + +} // end namespace internal + +template<typename MatrixType> +void PartialPivLU<MatrixType>::compute() +{ + check_template_parameters(); + + // the row permutation is stored as int indices, so just to be sure: + eigen_assert(m_lu.rows()<NumTraits<int>::highest()); + + if(m_lu.cols()>0) + m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff(); + else + m_l1_norm = RealScalar(0); + + eigen_assert(m_lu.rows() == m_lu.cols() && "PartialPivLU is only for square (and moreover invertible) matrices"); + const Index size = m_lu.rows(); + + m_rowsTranspositions.resize(size); + + typename TranspositionType::StorageIndex nb_transpositions; + internal::partial_lu_inplace(m_lu, m_rowsTranspositions, nb_transpositions); + m_det_p = (nb_transpositions%2) ? -1 : 1; + + m_p = m_rowsTranspositions; + + m_isInitialized = true; +} + +template<typename MatrixType> +typename PartialPivLU<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() const +{ + eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); + return Scalar(m_det_p) * m_lu.diagonal().prod(); +} + +/** \returns the matrix represented by the decomposition, + * i.e., it returns the product: P^{-1} L U. + * This function is provided for debug purpose. */ +template<typename MatrixType> +MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const +{ + eigen_assert(m_isInitialized && "LU is not initialized."); + // LU + MatrixType res = m_lu.template triangularView<UnitLower>().toDenseMatrix() + * m_lu.template triangularView<Upper>(); + + // P^{-1}(LU) + res = m_p.inverse() * res; + + return res; +} + +/***** Implementation details *****************************************************/ + +namespace internal { + +/***** Implementation of inverse() *****************************************************/ +template<typename DstXprType, typename MatrixType> +struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename PartialPivLU<MatrixType>::Scalar>, Dense2Dense> +{ + typedef PartialPivLU<MatrixType> LuType; + typedef Inverse<LuType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename LuType::Scalar> &) + { + dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); + } +}; +} // end namespace internal + +/******** MatrixBase methods *******/ + +/** \lu_module + * + * \return the partial-pivoting LU decomposition of \c *this. + * + * \sa class PartialPivLU + */ +template<typename Derived> +inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject> +MatrixBase<Derived>::partialPivLu() const +{ + return PartialPivLU<PlainObject>(eval()); +} + +/** \lu_module + * + * Synonym of partialPivLu(). + * + * \return the partial-pivoting LU decomposition of \c *this. + * + * \sa class PartialPivLU + */ +template<typename Derived> +inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject> +MatrixBase<Derived>::lu() const +{ + return PartialPivLU<PlainObject>(eval()); +} + +} // end namespace Eigen + +#endif // EIGEN_PARTIALLU_H diff --git a/src/Eigen/src/LU/PartialPivLU_LAPACKE.h b/src/Eigen/src/LU/PartialPivLU_LAPACKE.h new file mode 100644 index 0000000..755168a --- /dev/null +++ b/src/Eigen/src/LU/PartialPivLU_LAPACKE.h @@ -0,0 +1,83 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to LAPACKe + * LU decomposition with partial pivoting based on LAPACKE_?getrf function. + ******************************************************************************** +*/ + +#ifndef EIGEN_PARTIALLU_LAPACK_H +#define EIGEN_PARTIALLU_LAPACK_H + +namespace Eigen { + +namespace internal { + +/** \internal Specialization for the data types supported by LAPACKe */ + +#define EIGEN_LAPACKE_LU_PARTPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \ +template<int StorageOrder> \ +struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \ +{ \ + /* \internal performs the LU decomposition in-place of the matrix represented */ \ + static lapack_int blocked_lu(Index rows, Index cols, EIGTYPE* lu_data, Index luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \ + { \ + EIGEN_UNUSED_VARIABLE(maxBlockSize);\ + lapack_int matrix_order, first_zero_pivot; \ + lapack_int m, n, lda, *ipiv, info; \ + EIGTYPE* a; \ +/* Set up parameters for ?getrf */ \ + matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ + lda = convert_index<lapack_int>(luStride); \ + a = lu_data; \ + ipiv = row_transpositions; \ + m = convert_index<lapack_int>(rows); \ + n = convert_index<lapack_int>(cols); \ + nb_transpositions = 0; \ +\ + info = LAPACKE_##LAPACKE_PREFIX##getrf( matrix_order, m, n, (LAPACKE_TYPE*)a, lda, ipiv ); \ +\ + for(int i=0;i<m;i++) { ipiv[i]--; if (ipiv[i]!=i) nb_transpositions++; } \ +\ + eigen_assert(info >= 0); \ +/* something should be done with nb_transpositions */ \ +\ + first_zero_pivot = info; \ + return first_zero_pivot; \ + } \ +}; + +EIGEN_LAPACKE_LU_PARTPIV(double, double, d) +EIGEN_LAPACKE_LU_PARTPIV(float, float, s) +EIGEN_LAPACKE_LU_PARTPIV(dcomplex, lapack_complex_double, z) +EIGEN_LAPACKE_LU_PARTPIV(scomplex, lapack_complex_float, c) + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PARTIALLU_LAPACK_H diff --git a/src/Eigen/src/LU/arch/InverseSize4.h b/src/Eigen/src/LU/arch/InverseSize4.h new file mode 100644 index 0000000..a232ffc --- /dev/null +++ b/src/Eigen/src/LU/arch/InverseSize4.h @@ -0,0 +1,351 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2001 Intel Corporation +// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +// +// The algorithm below is a reimplementation of former \src\LU\Inverse_SSE.h using PacketMath. +// inv(M) = M#/|M|, where inv(M), M# and |M| denote the inverse of M, +// adjugate of M and determinant of M respectively. M# is computed block-wise +// using specific formulae. For proof, see: +// https://lxjk.github.io/2017/09/03/Fast-4x4-Matrix-Inverse-with-SSE-SIMD-Explained.html +// Variable names are adopted from \src\LU\Inverse_SSE.h. +// +// The SSE code for the 4x4 float and double matrix inverse in former (deprecated) \src\LU\Inverse_SSE.h +// comes from the following Intel's library: +// http://software.intel.com/en-us/articles/optimized-matrix-library-for-use-with-the-intel-pentiumr-4-processors-sse2-instructions/ +// +// Here is the respective copyright and license statement: +// +// Copyright (c) 2001 Intel Corporation. +// +// Permition is granted to use, copy, distribute and prepare derivative works +// of this library for any purpose and without fee, provided, that the above +// copyright notice and this statement appear in all copies. +// Intel makes no representations about the suitability of this software for +// any purpose, and specifically disclaims all warranties. +// See LEGAL.TXT for all the legal information. +// +// TODO: Unify implementations of different data types (i.e. float and double). +#ifndef EIGEN_INVERSE_SIZE_4_H +#define EIGEN_INVERSE_SIZE_4_H + +namespace Eigen +{ +namespace internal +{ +template <typename MatrixType, typename ResultType> +struct compute_inverse_size4<Architecture::Target, float, MatrixType, ResultType> +{ + enum + { + MatrixAlignment = traits<MatrixType>::Alignment, + ResultAlignment = traits<ResultType>::Alignment, + StorageOrdersMatch = (MatrixType::Flags & RowMajorBit) == (ResultType::Flags & RowMajorBit) + }; + typedef typename conditional<(MatrixType::Flags & LinearAccessBit), MatrixType const &, typename MatrixType::PlainObject>::type ActualMatrixType; + + static void run(const MatrixType &mat, ResultType &result) + { + ActualMatrixType matrix(mat); + + const float* data = matrix.data(); + const Index stride = matrix.innerStride(); + Packet4f _L1 = ploadt<Packet4f,MatrixAlignment>(data); + Packet4f _L2 = ploadt<Packet4f,MatrixAlignment>(data + stride*4); + Packet4f _L3 = ploadt<Packet4f,MatrixAlignment>(data + stride*8); + Packet4f _L4 = ploadt<Packet4f,MatrixAlignment>(data + stride*12); + + // Four 2x2 sub-matrices of the input matrix + // input = [[A, B], + // [C, D]] + Packet4f A, B, C, D; + + if (!StorageOrdersMatch) + { + A = vec4f_unpacklo(_L1, _L2); + B = vec4f_unpacklo(_L3, _L4); + C = vec4f_unpackhi(_L1, _L2); + D = vec4f_unpackhi(_L3, _L4); + } + else + { + A = vec4f_movelh(_L1, _L2); + B = vec4f_movehl(_L2, _L1); + C = vec4f_movelh(_L3, _L4); + D = vec4f_movehl(_L4, _L3); + } + + Packet4f AB, DC; + + // AB = A# * B, where A# denotes the adjugate of A, and * denotes matrix product. + AB = pmul(vec4f_swizzle2(A, A, 3, 3, 0, 0), B); + AB = psub(AB, pmul(vec4f_swizzle2(A, A, 1, 1, 2, 2), vec4f_swizzle2(B, B, 2, 3, 0, 1))); + + // DC = D#*C + DC = pmul(vec4f_swizzle2(D, D, 3, 3, 0, 0), C); + DC = psub(DC, pmul(vec4f_swizzle2(D, D, 1, 1, 2, 2), vec4f_swizzle2(C, C, 2, 3, 0, 1))); + + // determinants of the sub-matrices + Packet4f dA, dB, dC, dD; + + dA = pmul(vec4f_swizzle2(A, A, 3, 3, 1, 1), A); + dA = psub(dA, vec4f_movehl(dA, dA)); + + dB = pmul(vec4f_swizzle2(B, B, 3, 3, 1, 1), B); + dB = psub(dB, vec4f_movehl(dB, dB)); + + dC = pmul(vec4f_swizzle2(C, C, 3, 3, 1, 1), C); + dC = psub(dC, vec4f_movehl(dC, dC)); + + dD = pmul(vec4f_swizzle2(D, D, 3, 3, 1, 1), D); + dD = psub(dD, vec4f_movehl(dD, dD)); + + Packet4f d, d1, d2; + + d = pmul(vec4f_swizzle2(DC, DC, 0, 2, 1, 3), AB); + d = padd(d, vec4f_movehl(d, d)); + d = padd(d, vec4f_swizzle2(d, d, 1, 0, 0, 0)); + d1 = pmul(dA, dD); + d2 = pmul(dB, dC); + + // determinant of the input matrix, det = |A||D| + |B||C| - trace(A#*B*D#*C) + Packet4f det = vec4f_duplane(psub(padd(d1, d2), d), 0); + + // reciprocal of the determinant of the input matrix, rd = 1/det + Packet4f rd = pdiv(pset1<Packet4f>(1.0f), det); + + // Four sub-matrices of the inverse + Packet4f iA, iB, iC, iD; + + // iD = D*|A| - C*A#*B + iD = pmul(vec4f_swizzle2(C, C, 0, 0, 2, 2), vec4f_movelh(AB, AB)); + iD = padd(iD, pmul(vec4f_swizzle2(C, C, 1, 1, 3, 3), vec4f_movehl(AB, AB))); + iD = psub(pmul(D, vec4f_duplane(dA, 0)), iD); + + // iA = A*|D| - B*D#*C + iA = pmul(vec4f_swizzle2(B, B, 0, 0, 2, 2), vec4f_movelh(DC, DC)); + iA = padd(iA, pmul(vec4f_swizzle2(B, B, 1, 1, 3, 3), vec4f_movehl(DC, DC))); + iA = psub(pmul(A, vec4f_duplane(dD, 0)), iA); + + // iB = C*|B| - D * (A#B)# = C*|B| - D*B#*A + iB = pmul(D, vec4f_swizzle2(AB, AB, 3, 0, 3, 0)); + iB = psub(iB, pmul(vec4f_swizzle2(D, D, 1, 0, 3, 2), vec4f_swizzle2(AB, AB, 2, 1, 2, 1))); + iB = psub(pmul(C, vec4f_duplane(dB, 0)), iB); + + // iC = B*|C| - A * (D#C)# = B*|C| - A*C#*D + iC = pmul(A, vec4f_swizzle2(DC, DC, 3, 0, 3, 0)); + iC = psub(iC, pmul(vec4f_swizzle2(A, A, 1, 0, 3, 2), vec4f_swizzle2(DC, DC, 2, 1, 2, 1))); + iC = psub(pmul(B, vec4f_duplane(dC, 0)), iC); + + const float sign_mask[4] = {0.0f, numext::bit_cast<float>(0x80000000u), numext::bit_cast<float>(0x80000000u), 0.0f}; + const Packet4f p4f_sign_PNNP = ploadu<Packet4f>(sign_mask); + rd = pxor(rd, p4f_sign_PNNP); + iA = pmul(iA, rd); + iB = pmul(iB, rd); + iC = pmul(iC, rd); + iD = pmul(iD, rd); + + Index res_stride = result.outerStride(); + float *res = result.data(); + + pstoret<float, Packet4f, ResultAlignment>(res + 0, vec4f_swizzle2(iA, iB, 3, 1, 3, 1)); + pstoret<float, Packet4f, ResultAlignment>(res + res_stride, vec4f_swizzle2(iA, iB, 2, 0, 2, 0)); + pstoret<float, Packet4f, ResultAlignment>(res + 2 * res_stride, vec4f_swizzle2(iC, iD, 3, 1, 3, 1)); + pstoret<float, Packet4f, ResultAlignment>(res + 3 * res_stride, vec4f_swizzle2(iC, iD, 2, 0, 2, 0)); + } +}; + +#if !(defined EIGEN_VECTORIZE_NEON && !(EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG)) +// same algorithm as above, except that each operand is split into +// halves for two registers to hold. +template <typename MatrixType, typename ResultType> +struct compute_inverse_size4<Architecture::Target, double, MatrixType, ResultType> +{ + enum + { + MatrixAlignment = traits<MatrixType>::Alignment, + ResultAlignment = traits<ResultType>::Alignment, + StorageOrdersMatch = (MatrixType::Flags & RowMajorBit) == (ResultType::Flags & RowMajorBit) + }; + typedef typename conditional<(MatrixType::Flags & LinearAccessBit), + MatrixType const &, + typename MatrixType::PlainObject>::type + ActualMatrixType; + + static void run(const MatrixType &mat, ResultType &result) + { + ActualMatrixType matrix(mat); + + // Four 2x2 sub-matrices of the input matrix, each is further divided into upper and lower + // row e.g. A1, upper row of A, A2, lower row of A + // input = [[A, B], = [[[A1, [B1, + // [C, D]] A2], B2]], + // [[C1, [D1, + // C2], D2]]] + + Packet2d A1, A2, B1, B2, C1, C2, D1, D2; + + const double* data = matrix.data(); + const Index stride = matrix.innerStride(); + if (StorageOrdersMatch) + { + A1 = ploadt<Packet2d,MatrixAlignment>(data + stride*0); + B1 = ploadt<Packet2d,MatrixAlignment>(data + stride*2); + A2 = ploadt<Packet2d,MatrixAlignment>(data + stride*4); + B2 = ploadt<Packet2d,MatrixAlignment>(data + stride*6); + C1 = ploadt<Packet2d,MatrixAlignment>(data + stride*8); + D1 = ploadt<Packet2d,MatrixAlignment>(data + stride*10); + C2 = ploadt<Packet2d,MatrixAlignment>(data + stride*12); + D2 = ploadt<Packet2d,MatrixAlignment>(data + stride*14); + } + else + { + Packet2d temp; + A1 = ploadt<Packet2d,MatrixAlignment>(data + stride*0); + C1 = ploadt<Packet2d,MatrixAlignment>(data + stride*2); + A2 = ploadt<Packet2d,MatrixAlignment>(data + stride*4); + C2 = ploadt<Packet2d,MatrixAlignment>(data + stride*6); + temp = A1; + A1 = vec2d_unpacklo(A1, A2); + A2 = vec2d_unpackhi(temp, A2); + + temp = C1; + C1 = vec2d_unpacklo(C1, C2); + C2 = vec2d_unpackhi(temp, C2); + + B1 = ploadt<Packet2d,MatrixAlignment>(data + stride*8); + D1 = ploadt<Packet2d,MatrixAlignment>(data + stride*10); + B2 = ploadt<Packet2d,MatrixAlignment>(data + stride*12); + D2 = ploadt<Packet2d,MatrixAlignment>(data + stride*14); + + temp = B1; + B1 = vec2d_unpacklo(B1, B2); + B2 = vec2d_unpackhi(temp, B2); + + temp = D1; + D1 = vec2d_unpacklo(D1, D2); + D2 = vec2d_unpackhi(temp, D2); + } + + // determinants of the sub-matrices + Packet2d dA, dB, dC, dD; + + dA = vec2d_swizzle2(A2, A2, 1); + dA = pmul(A1, dA); + dA = psub(dA, vec2d_duplane(dA, 1)); + + dB = vec2d_swizzle2(B2, B2, 1); + dB = pmul(B1, dB); + dB = psub(dB, vec2d_duplane(dB, 1)); + + dC = vec2d_swizzle2(C2, C2, 1); + dC = pmul(C1, dC); + dC = psub(dC, vec2d_duplane(dC, 1)); + + dD = vec2d_swizzle2(D2, D2, 1); + dD = pmul(D1, dD); + dD = psub(dD, vec2d_duplane(dD, 1)); + + Packet2d DC1, DC2, AB1, AB2; + + // AB = A# * B, where A# denotes the adjugate of A, and * denotes matrix product. + AB1 = pmul(B1, vec2d_duplane(A2, 1)); + AB2 = pmul(B2, vec2d_duplane(A1, 0)); + AB1 = psub(AB1, pmul(B2, vec2d_duplane(A1, 1))); + AB2 = psub(AB2, pmul(B1, vec2d_duplane(A2, 0))); + + // DC = D#*C + DC1 = pmul(C1, vec2d_duplane(D2, 1)); + DC2 = pmul(C2, vec2d_duplane(D1, 0)); + DC1 = psub(DC1, pmul(C2, vec2d_duplane(D1, 1))); + DC2 = psub(DC2, pmul(C1, vec2d_duplane(D2, 0))); + + Packet2d d1, d2; + + // determinant of the input matrix, det = |A||D| + |B||C| - trace(A#*B*D#*C) + Packet2d det; + + // reciprocal of the determinant of the input matrix, rd = 1/det + Packet2d rd; + + d1 = pmul(AB1, vec2d_swizzle2(DC1, DC2, 0)); + d2 = pmul(AB2, vec2d_swizzle2(DC1, DC2, 3)); + rd = padd(d1, d2); + rd = padd(rd, vec2d_duplane(rd, 1)); + + d1 = pmul(dA, dD); + d2 = pmul(dB, dC); + + det = padd(d1, d2); + det = psub(det, rd); + det = vec2d_duplane(det, 0); + rd = pdiv(pset1<Packet2d>(1.0), det); + + // rows of four sub-matrices of the inverse + Packet2d iA1, iA2, iB1, iB2, iC1, iC2, iD1, iD2; + + // iD = D*|A| - C*A#*B + iD1 = pmul(AB1, vec2d_duplane(C1, 0)); + iD2 = pmul(AB1, vec2d_duplane(C2, 0)); + iD1 = padd(iD1, pmul(AB2, vec2d_duplane(C1, 1))); + iD2 = padd(iD2, pmul(AB2, vec2d_duplane(C2, 1))); + dA = vec2d_duplane(dA, 0); + iD1 = psub(pmul(D1, dA), iD1); + iD2 = psub(pmul(D2, dA), iD2); + + // iA = A*|D| - B*D#*C + iA1 = pmul(DC1, vec2d_duplane(B1, 0)); + iA2 = pmul(DC1, vec2d_duplane(B2, 0)); + iA1 = padd(iA1, pmul(DC2, vec2d_duplane(B1, 1))); + iA2 = padd(iA2, pmul(DC2, vec2d_duplane(B2, 1))); + dD = vec2d_duplane(dD, 0); + iA1 = psub(pmul(A1, dD), iA1); + iA2 = psub(pmul(A2, dD), iA2); + + // iB = C*|B| - D * (A#B)# = C*|B| - D*B#*A + iB1 = pmul(D1, vec2d_swizzle2(AB2, AB1, 1)); + iB2 = pmul(D2, vec2d_swizzle2(AB2, AB1, 1)); + iB1 = psub(iB1, pmul(vec2d_swizzle2(D1, D1, 1), vec2d_swizzle2(AB2, AB1, 2))); + iB2 = psub(iB2, pmul(vec2d_swizzle2(D2, D2, 1), vec2d_swizzle2(AB2, AB1, 2))); + dB = vec2d_duplane(dB, 0); + iB1 = psub(pmul(C1, dB), iB1); + iB2 = psub(pmul(C2, dB), iB2); + + // iC = B*|C| - A * (D#C)# = B*|C| - A*C#*D + iC1 = pmul(A1, vec2d_swizzle2(DC2, DC1, 1)); + iC2 = pmul(A2, vec2d_swizzle2(DC2, DC1, 1)); + iC1 = psub(iC1, pmul(vec2d_swizzle2(A1, A1, 1), vec2d_swizzle2(DC2, DC1, 2))); + iC2 = psub(iC2, pmul(vec2d_swizzle2(A2, A2, 1), vec2d_swizzle2(DC2, DC1, 2))); + dC = vec2d_duplane(dC, 0); + iC1 = psub(pmul(B1, dC), iC1); + iC2 = psub(pmul(B2, dC), iC2); + + const double sign_mask1[2] = {0.0, numext::bit_cast<double>(0x8000000000000000ull)}; + const double sign_mask2[2] = {numext::bit_cast<double>(0x8000000000000000ull), 0.0}; + const Packet2d sign_PN = ploadu<Packet2d>(sign_mask1); + const Packet2d sign_NP = ploadu<Packet2d>(sign_mask2); + d1 = pxor(rd, sign_PN); + d2 = pxor(rd, sign_NP); + + Index res_stride = result.outerStride(); + double *res = result.data(); + pstoret<double, Packet2d, ResultAlignment>(res + 0, pmul(vec2d_swizzle2(iA2, iA1, 3), d1)); + pstoret<double, Packet2d, ResultAlignment>(res + res_stride, pmul(vec2d_swizzle2(iA2, iA1, 0), d2)); + pstoret<double, Packet2d, ResultAlignment>(res + 2, pmul(vec2d_swizzle2(iB2, iB1, 3), d1)); + pstoret<double, Packet2d, ResultAlignment>(res + res_stride + 2, pmul(vec2d_swizzle2(iB2, iB1, 0), d2)); + pstoret<double, Packet2d, ResultAlignment>(res + 2 * res_stride, pmul(vec2d_swizzle2(iC2, iC1, 3), d1)); + pstoret<double, Packet2d, ResultAlignment>(res + 3 * res_stride, pmul(vec2d_swizzle2(iC2, iC1, 0), d2)); + pstoret<double, Packet2d, ResultAlignment>(res + 2 * res_stride + 2, pmul(vec2d_swizzle2(iD2, iD1, 3), d1)); + pstoret<double, Packet2d, ResultAlignment>(res + 3 * res_stride + 2, pmul(vec2d_swizzle2(iD2, iD1, 0), d2)); + } +}; +#endif +} // namespace internal +} // namespace Eigen +#endif |