// SPDX-License-Identifier: Apache-2.0 // // Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au) // Copyright 2008-2016 National ICT Australia (NICTA) // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ------------------------------------------------------------------------ //! \addtogroup auxlib //! @{ template inline bool auxlib::inv(Mat& A) { arma_extra_debug_sigprint(); if(A.is_empty()) { return true; } #if defined(ARMA_USE_LAPACK) { arma_debug_assert_blas_size(A); blas_int n = blas_int(A.n_rows); blas_int lda = blas_int(A.n_rows); blas_int lwork = (std::max)(blas_int(podarray_prealloc_n_elem::val), n); blas_int info = 0; podarray ipiv(A.n_rows); arma_extra_debug_print("lapack::getrf()"); lapack::getrf(&n, &n, A.memptr(), &lda, ipiv.memptr(), &info); if(info != 0) { return false; } if(n > 16) { eT work_query[2] = {}; blas_int lwork_query = -1; arma_extra_debug_print("lapack::getri()"); lapack::getri(&n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info); if(info != 0) { return false; } blas_int lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); lwork = (std::max)(lwork_proposed, lwork); } podarray work( static_cast(lwork) ); arma_extra_debug_print("lapack::getri()"); lapack::getri(&n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info); return (info == 0); } #else { arma_ignore(A); arma_stop_logic_error("inv(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::inv(Mat& out, const Mat& X) { arma_extra_debug_sigprint(); out = X; return auxlib::inv(out); } template inline bool auxlib::inv_rcond(Mat& A, typename get_pod_type::result& out_rcond) { arma_extra_debug_sigprint(); typedef typename get_pod_type::result T; out_rcond = T(0); if(A.is_empty()) { return true; } #if defined(ARMA_USE_LAPACK) { arma_debug_assert_blas_size(A); char norm_id = '1'; blas_int n = blas_int(A.n_rows); blas_int lda = blas_int(A.n_rows); blas_int lwork = (std::max)(blas_int(podarray_prealloc_n_elem::val), n); blas_int info = 0; T norm_val = T(0); podarray junk(1); podarray ipiv(A.n_rows); arma_extra_debug_print("lapack::lange()"); norm_val = (has_blas_float_bug::value) ? auxlib::norm1_gen(A) : lapack::lange(&norm_id, &n, &n, A.memptr(), &lda, junk.memptr()); arma_extra_debug_print("lapack::getrf()"); lapack::getrf(&n, &n, A.memptr(), &lda, ipiv.memptr(), &info); if(info != 0) { return false; } out_rcond = auxlib::lu_rcond(A, norm_val); if(n > 16) { eT work_query[2] = {}; blas_int lwork_query = -1; arma_extra_debug_print("lapack::getri()"); lapack::getri(&n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info); if(info != 0) { return false; } blas_int lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); lwork = (std::max)(lwork_proposed, lwork); } podarray work( static_cast(lwork) ); arma_extra_debug_print("lapack::getri()"); lapack::getri(&n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info); return (info == 0); } #else { arma_ignore(A); arma_stop_logic_error("inv_rcond(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::inv_tr(Mat& A, const uword layout) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { if(A.is_empty()) { return true; } arma_debug_assert_blas_size(A); char uplo = (layout == 0) ? 'U' : 'L'; char diag = 'N'; blas_int n = blas_int(A.n_rows); blas_int info = 0; arma_extra_debug_print("lapack::trtri()"); lapack::trtri(&uplo, &diag, &n, A.memptr(), &n, &info); if(info != 0) { return false; } return true; } #else { arma_ignore(A); arma_ignore(layout); arma_stop_logic_error("inv(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::inv_tr_rcond(Mat& A, typename get_pod_type::result& out_rcond, const uword layout) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename get_pod_type::result T; if(A.is_empty()) { return true; } out_rcond = auxlib::rcond_trimat(A, layout); arma_debug_assert_blas_size(A); char uplo = (layout == 0) ? 'U' : 'L'; char diag = 'N'; blas_int n = blas_int(A.n_rows); blas_int info = 0; arma_extra_debug_print("lapack::trtri()"); lapack::trtri(&uplo, &diag, &n, A.memptr(), &n, &info); if(info != 0) { out_rcond = T(0); return false; } return true; } #else { arma_ignore(A); arma_ignore(out_rcond); arma_ignore(layout); arma_stop_logic_error("inv(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::inv_sympd(Mat& A, bool& out_sympd_state) { arma_extra_debug_sigprint(); out_sympd_state = false; if(A.is_empty()) { return true; } #if defined(ARMA_USE_LAPACK) { arma_debug_assert_blas_size(A); char uplo = 'L'; blas_int n = blas_int(A.n_rows); blas_int info = 0; // NOTE: for complex matrices, zpotrf() assumes the matrix is hermitian (not simply symmetric) arma_extra_debug_print("lapack::potrf()"); lapack::potrf(&uplo, &n, A.memptr(), &n, &info); if(info != 0) { return false; } out_sympd_state = true; arma_extra_debug_print("lapack::potri()"); lapack::potri(&uplo, &n, A.memptr(), &n, &info); if(info != 0) { return false; } A = symmatl(A); return true; } #else { arma_ignore(A); arma_ignore(out_sympd_state); arma_stop_logic_error("inv_sympd(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::inv_sympd(Mat& out, const Mat& X) { arma_extra_debug_sigprint(); out = X; bool sympd_state_junk = false; return auxlib::inv_sympd(out, sympd_state_junk); } template inline bool auxlib::inv_sympd_rcond(Mat& A, bool& out_sympd_state, eT& out_rcond) { arma_extra_debug_sigprint(); out_sympd_state = false; if(A.is_empty()) { return true; } #if defined(ARMA_USE_LAPACK) { typedef typename get_pod_type::result T; arma_debug_assert_blas_size(A); char norm_id = '1'; char uplo = 'L'; blas_int n = blas_int(A.n_rows); blas_int info = 0; T norm_val = T(0); podarray work(A.n_rows); arma_extra_debug_print("lapack::lansy()"); norm_val = (has_blas_float_bug::value) ? auxlib::norm1_sym(A) : lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr()); arma_extra_debug_print("lapack::potrf()"); lapack::potrf(&uplo, &n, A.memptr(), &n, &info); if(info != 0) { out_rcond = eT(0); return false; } out_sympd_state = true; out_rcond = auxlib::lu_rcond_sympd(A, norm_val); if(arma_isnan(out_rcond)) { return false; } arma_extra_debug_print("lapack::potri()"); lapack::potri(&uplo, &n, A.memptr(), &n, &info); if(info != 0) { return false; } A = symmatl(A); return true; } #else { arma_ignore(A); arma_ignore(out_sympd_state); arma_ignore(out_rcond); arma_stop_logic_error("inv_sympd_rcond(): use LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::inv_sympd_rcond(Mat< std::complex >& A, bool& out_sympd_state, T& out_rcond) { arma_extra_debug_sigprint(); out_sympd_state = false; if(A.is_empty()) { return true; } #if defined(ARMA_CRIPPLED_LAPACK) { arma_ignore(A); arma_ignore(out_sympd_state); arma_ignore(out_rcond); return false; } #elif defined(ARMA_USE_LAPACK) { arma_debug_assert_blas_size(A); char norm_id = '1'; char uplo = 'L'; blas_int n = blas_int(A.n_rows); blas_int info = 0; T norm_val = T(0); podarray work(A.n_rows); arma_extra_debug_print("lapack::lanhe()"); norm_val = (has_blas_float_bug::value) ? auxlib::norm1_sym(A) : lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr()); arma_extra_debug_print("lapack::potrf()"); lapack::potrf(&uplo, &n, A.memptr(), &n, &info); if(info != 0) { out_rcond = T(0); return false; } out_sympd_state = true; out_rcond = auxlib::lu_rcond_sympd(A, norm_val); if(arma_isnan(out_rcond)) { return false; } arma_extra_debug_print("lapack::potri()"); lapack::potri(&uplo, &n, A.memptr(), &n, &info); if(info != 0) { return false; } A = symmatl(A); return true; } #else { arma_ignore(A); arma_ignore(out_sympd_state); arma_ignore(out_rcond); arma_stop_logic_error("inv_sympd_rcond(): use LAPACK must be enabled"); return false; } #endif } //! determinant of a matrix template inline bool auxlib::det(eT& out_val, Mat& A) { arma_extra_debug_sigprint(); if(A.is_empty()) { out_val = eT(1); return true; } #if defined(ARMA_USE_LAPACK) { arma_debug_assert_blas_size(A); podarray ipiv(A.n_rows); blas_int info = 0; blas_int n_rows = blas_int(A.n_rows); blas_int n_cols = blas_int(A.n_cols); arma_extra_debug_print("lapack::getrf()"); lapack::getrf(&n_rows, &n_cols, A.memptr(), &n_rows, ipiv.memptr(), &info); if(info < 0) { return false; } // on output A appears to be L+U_alt, where U_alt is U with the main diagonal set to zero eT val = A.at(0,0); for(uword i=1; i < A.n_rows; ++i) { val *= A.at(i,i); } blas_int sign = +1; for(uword i=0; i < A.n_rows; ++i) { // NOTE: adjustment of -1 is required as Fortran counts from 1 if( blas_int(i) != (ipiv.mem[i] - 1) ) { sign *= -1; } } out_val = (sign < 0) ? eT(-val) : eT(val); return true; } #else { arma_ignore(out_val); arma_ignore(A); arma_stop_logic_error("det(): use of LAPACK must be enabled"); return false; } #endif } //! log determinant of a matrix template inline bool auxlib::log_det(eT& out_val, typename get_pod_type::result& out_sign, Mat& A) { arma_extra_debug_sigprint(); typedef typename get_pod_type::result T; if(A.is_empty()) { out_val = eT(0); out_sign = T(1); return true; } #if defined(ARMA_USE_LAPACK) { arma_debug_assert_blas_size(A); podarray ipiv(A.n_rows); blas_int info = 0; blas_int n_rows = blas_int(A.n_rows); blas_int n_cols = blas_int(A.n_cols); arma_extra_debug_print("lapack::getrf()"); lapack::getrf(&n_rows, &n_cols, A.memptr(), &n_rows, ipiv.memptr(), &info); if(info < 0) { return false; } // on output A appears to be L+U_alt, where U_alt is U with the main diagonal set to zero sword sign = (is_cx::no) ? ( (access::tmp_real( A.at(0,0) ) < T(0)) ? -1 : +1 ) : +1; eT val = (is_cx::no) ? std::log( (access::tmp_real( A.at(0,0) ) < T(0)) ? A.at(0,0)*T(-1) : A.at(0,0) ) : std::log( A.at(0,0) ); for(uword i=1; i < A.n_rows; ++i) { const eT x = A.at(i,i); sign *= (is_cx::no) ? ( (access::tmp_real(x) < T(0)) ? -1 : +1 ) : +1; val += (is_cx::no) ? std::log( (access::tmp_real(x) < T(0)) ? x*T(-1) : x ) : std::log(x); } for(uword i=0; i < A.n_rows; ++i) { if( blas_int(i) != (ipiv.mem[i] - 1) ) // NOTE: adjustment of -1 is required as Fortran counts from 1 { sign *= -1; } } out_val = val; out_sign = T(sign); return true; } #else { arma_ignore(A); arma_ignore(out_val); arma_ignore(out_sign); arma_stop_logic_error("log_det(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::log_det_sympd(typename get_pod_type::result& out_val, Mat& A) { arma_extra_debug_sigprint(); typedef typename get_pod_type::result T; if(A.is_empty()) { out_val = T(0); return true; } #if defined(ARMA_USE_LAPACK) { arma_debug_assert_blas_size(A); char uplo = 'L'; blas_int n = blas_int(A.n_rows); blas_int info = 0; arma_extra_debug_print("lapack::potrf()"); lapack::potrf(&uplo, &n, A.memptr(), &n, &info); if(info != 0) { return false; } T val = T(0); for(uword i=0; i < A.n_rows; ++i) { val += std::log( access::tmp_real(A.at(i,i)) ); } out_val = T(2) * val; return true; } #else { arma_ignore(out_val); arma_ignore(A); arma_stop_logic_error("log_det_sympd(): use of LAPACK must be enabled"); return false; } #endif } //! LU decomposition of a matrix template inline bool auxlib::lu(Mat& L, Mat& U, podarray& ipiv, const Base& X) { arma_extra_debug_sigprint(); U = X.get_ref(); const uword U_n_rows = U.n_rows; const uword U_n_cols = U.n_cols; if(U.is_empty()) { L.set_size(U_n_rows, 0); U.set_size(0, U_n_cols); ipiv.reset(); return true; } #if defined(ARMA_USE_LAPACK) { arma_debug_assert_blas_size(U); ipiv.set_size( (std::min)(U_n_rows, U_n_cols) ); blas_int info = 0; blas_int n_rows = blas_int(U_n_rows); blas_int n_cols = blas_int(U_n_cols); arma_extra_debug_print("lapack::getrf()"); lapack::getrf(&n_rows, &n_cols, U.memptr(), &n_rows, ipiv.memptr(), &info); if(info < 0) { return false; } // take into account that Fortran counts from 1 arrayops::inplace_minus(ipiv.memptr(), blas_int(1), ipiv.n_elem); L.copy_size(U); for(uword col=0; col < U_n_cols; ++col) { for(uword row=0; (row < col) && (row < U_n_rows); ++row) { L.at(row,col) = eT(0); } if( L.in_range(col,col) ) { L.at(col,col) = eT(1); } for(uword row = (col+1); row < U_n_rows; ++row) { L.at(row,col) = U.at(row,col); U.at(row,col) = eT(0); } } return true; } #else { arma_stop_logic_error("lu(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::lu(Mat& L, Mat& U, Mat& P, const Base& X) { arma_extra_debug_sigprint(); podarray ipiv1; const bool status = auxlib::lu(L, U, ipiv1, X); if(status == false) { return false; } if(U.is_empty()) { // L and U have been already set to the correct empty matrices P.eye(L.n_rows, L.n_rows); return true; } const uword n = ipiv1.n_elem; const uword P_rows = U.n_rows; podarray ipiv2(P_rows); const blas_int* ipiv1_mem = ipiv1.memptr(); blas_int* ipiv2_mem = ipiv2.memptr(); for(uword i=0; i(ipiv1_mem[i]); if( ipiv2_mem[i] != ipiv2_mem[k] ) { std::swap( ipiv2_mem[i], ipiv2_mem[k] ); } } P.zeros(P_rows, P_rows); for(uword row=0; row(ipiv2_mem[row])) = eT(1); } if(L.n_cols > U.n_rows) { L.shed_cols(U.n_rows, L.n_cols-1); } if(U.n_rows > L.n_cols) { U.shed_rows(L.n_cols, U.n_rows-1); } return true; } template inline bool auxlib::lu(Mat& L, Mat& U, const Base& X) { arma_extra_debug_sigprint(); podarray ipiv1; const bool status = auxlib::lu(L, U, ipiv1, X); if(status == false) { return false; } if(U.is_empty()) { // L and U have been already set to the correct empty matrices return true; } const uword n = ipiv1.n_elem; const uword P_rows = U.n_rows; podarray ipiv2(P_rows); const blas_int* ipiv1_mem = ipiv1.memptr(); blas_int* ipiv2_mem = ipiv2.memptr(); for(uword i=0; i(ipiv1_mem[i]); if( ipiv2_mem[i] != ipiv2_mem[k] ) { std::swap( ipiv2_mem[i], ipiv2_mem[k] ); L.swap_rows( static_cast(ipiv2_mem[i]), static_cast(ipiv2_mem[k]) ); } } if(L.n_cols > U.n_rows) { L.shed_cols(U.n_rows, L.n_cols-1); } if(U.n_rows > L.n_cols) { U.shed_rows(L.n_cols, U.n_rows-1); } return true; } //! eigen decomposition of general square matrix (real) template inline bool auxlib::eig_gen ( Mat< std::complex >& vals, Mat< std::complex >& vecs, const bool vecs_on, const Base& expr ) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; Mat X = expr.get_ref(); arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); arma_debug_assert_blas_size(X); if(X.is_empty()) { vals.reset(); vecs.reset(); return true; } if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } vals.set_size(X.n_rows, 1); Mat tmp(1, 1, arma_nozeros_indicator()); if(vecs_on) { vecs.set_size(X.n_rows, X.n_rows); tmp.set_size(X.n_rows, X.n_rows); } podarray junk(1); char jobvl = 'N'; char jobvr = (vecs_on) ? 'V' : 'N'; blas_int N = blas_int(X.n_rows); T* vl = junk.memptr(); T* vr = (vecs_on) ? tmp.memptr() : junk.memptr(); blas_int ldvl = blas_int(1); blas_int ldvr = (vecs_on) ? blas_int(tmp.n_rows) : blas_int(1); blas_int lwork = 64*N; // lwork_min = (vecs_on) ? (std::max)(blas_int(1), 4*N) : (std::max)(blas_int(1), 3*N) blas_int info = 0; podarray work( static_cast(lwork) ); podarray vals_real(X.n_rows); podarray vals_imag(X.n_rows); arma_extra_debug_print("lapack::geev() -- START"); lapack::geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, &info); arma_extra_debug_print("lapack::geev() -- END"); if(info != 0) { return false; } arma_extra_debug_print("reformatting eigenvalues and eigenvectors"); std::complex* vals_mem = vals.memptr(); for(uword i=0; i < X.n_rows; ++i) { vals_mem[i] = std::complex(vals_real[i], vals_imag[i]); } if(vecs_on) { for(uword j=0; j < X.n_rows; ++j) { if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) ) { for(uword i=0; i < X.n_rows; ++i) { vecs.at(i,j) = std::complex( tmp.at(i,j), tmp.at(i,j+1) ); vecs.at(i,j+1) = std::complex( tmp.at(i,j), -tmp.at(i,j+1) ); } ++j; } else { for(uword i=0; i(tmp.at(i,j), T(0)); } } } } return true; } #else { arma_ignore(vals); arma_ignore(vecs); arma_ignore(vecs_on); arma_ignore(expr); arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); return false; } #endif } //! eigen decomposition of general square matrix (complex) template inline bool auxlib::eig_gen ( Mat< std::complex >& vals, Mat< std::complex >& vecs, const bool vecs_on, const Base< std::complex, T1 >& expr ) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; typedef typename std::complex eT; Mat X = expr.get_ref(); arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); arma_debug_assert_blas_size(X); if(X.is_empty()) { vals.reset(); vecs.reset(); return true; } if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } vals.set_size(X.n_rows, 1); if(vecs_on) { vecs.set_size(X.n_rows, X.n_rows); } podarray junk(1); char jobvl = 'N'; char jobvr = (vecs_on) ? 'V' : 'N'; blas_int N = blas_int(X.n_rows); eT* vl = junk.memptr(); eT* vr = (vecs_on) ? vecs.memptr() : junk.memptr(); blas_int ldvl = blas_int(1); blas_int ldvr = (vecs_on) ? blas_int(vecs.n_rows) : blas_int(1); blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), 2*N) blas_int info = 0; podarray work( static_cast(lwork) ); podarray< T> rwork( static_cast(2*N) ); arma_extra_debug_print("lapack::cx_geev() -- START"); lapack::cx_geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, rwork.memptr(), &info); arma_extra_debug_print("lapack::cx_geev() -- END"); return (info == 0); } #else { arma_ignore(vals); arma_ignore(vecs); arma_ignore(vecs_on); arma_ignore(expr); arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); return false; } #endif } //! eigen decomposition of general square matrix (real, balance given matrix) template inline bool auxlib::eig_gen_balance ( Mat< std::complex >& vals, Mat< std::complex >& vecs, const bool vecs_on, const Base& expr ) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; Mat X = expr.get_ref(); arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); arma_debug_assert_blas_size(X); if(X.is_empty()) { vals.reset(); vecs.reset(); return true; } if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } vals.set_size(X.n_rows, 1); Mat tmp(1, 1, arma_nozeros_indicator()); if(vecs_on) { vecs.set_size(X.n_rows, X.n_rows); tmp.set_size(X.n_rows, X.n_rows); } podarray junk(1); char bal = 'B'; char jobvl = 'N'; char jobvr = (vecs_on) ? 'V' : 'N'; char sense = 'N'; blas_int N = blas_int(X.n_rows); T* vl = junk.memptr(); T* vr = (vecs_on) ? tmp.memptr() : junk.memptr(); blas_int ldvl = blas_int(1); blas_int ldvr = (vecs_on) ? blas_int(tmp.n_rows) : blas_int(1); blas_int ilo = blas_int(0); blas_int ihi = blas_int(0); T abnrm = T(0); blas_int lwork = 64*N; // lwork_min = (vecs_on) ? (std::max)(blas_int(1), 2*N) : (std::max)(blas_int(1), 3*N) blas_int info = blas_int(0); podarray scale(X.n_rows); podarray rconde(X.n_rows); podarray rcondv(X.n_rows); podarray work( static_cast(lwork) ); podarray iwork( uword(1) ); // iwork not used by lapack::geevx() as sense = 'N' podarray vals_real(X.n_rows); podarray vals_imag(X.n_rows); arma_extra_debug_print("lapack::geevx() -- START"); lapack::geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), vl, &ldvl, vr, &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, iwork.memptr(), &info); arma_extra_debug_print("lapack::geevx() -- END"); if(info != 0) { return false; } arma_extra_debug_print("reformatting eigenvalues and eigenvectors"); std::complex* vals_mem = vals.memptr(); for(uword i=0; i < X.n_rows; ++i) { vals_mem[i] = std::complex(vals_real[i], vals_imag[i]); } if(vecs_on) { for(uword j=0; j < X.n_rows; ++j) { if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) ) { for(uword i=0; i < X.n_rows; ++i) { vecs.at(i,j) = std::complex( tmp.at(i,j), tmp.at(i,j+1) ); vecs.at(i,j+1) = std::complex( tmp.at(i,j), -tmp.at(i,j+1) ); } ++j; } else { for(uword i=0; i(tmp.at(i,j), T(0)); } } } } return true; } #else { arma_ignore(vals); arma_ignore(vecs); arma_ignore(vecs_on); arma_ignore(expr); arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); return false; } #endif } //! eigen decomposition of general square matrix (complex, balance given matrix) template inline bool auxlib::eig_gen_balance ( Mat< std::complex >& vals, Mat< std::complex >& vecs, const bool vecs_on, const Base< std::complex, T1 >& expr ) { arma_extra_debug_sigprint(); #if defined(ARMA_CRIPPLED_LAPACK) { arma_extra_debug_print("auxlib::eig_gen_balance(): redirecting to auxlib::eig_gen() due to crippled LAPACK"); return auxlib::eig_gen(vals, vecs, vecs_on, expr); } #elif defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; typedef typename std::complex eT; Mat X = expr.get_ref(); arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); arma_debug_assert_blas_size(X); if(X.is_empty()) { vals.reset(); vecs.reset(); return true; } if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } vals.set_size(X.n_rows, 1); if(vecs_on) { vecs.set_size(X.n_rows, X.n_rows); } podarray junk(1); char bal = 'B'; char jobvl = 'N'; char jobvr = (vecs_on) ? 'V' : 'N'; char sense = 'N'; blas_int N = blas_int(X.n_rows); eT* vl = junk.memptr(); eT* vr = (vecs_on) ? vecs.memptr() : junk.memptr(); blas_int ldvl = blas_int(1); blas_int ldvr = (vecs_on) ? blas_int(vecs.n_rows) : blas_int(1); blas_int ilo = blas_int(0); blas_int ihi = blas_int(0); T abnrm = T(0); blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), blas_int(2*N)) blas_int info = blas_int(0); podarray scale(X.n_rows); podarray rconde(X.n_rows); podarray rcondv(X.n_rows); podarray work( static_cast(lwork) ); podarray< T> rwork( static_cast(2*N) ); arma_extra_debug_print("lapack::cx_geevx() -- START"); lapack::cx_geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals.memptr(), vl, &ldvl, vr, &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, rwork.memptr(), &info); arma_extra_debug_print("lapack::cx_geevx() -- END"); return (info == 0); } #else { arma_ignore(vals); arma_ignore(vecs); arma_ignore(vecs_on); arma_ignore(expr); arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); return false; } #endif } //! two-sided eigen decomposition of general square matrix (real) template inline bool auxlib::eig_gen_twosided ( Mat< std::complex >& vals, Mat< std::complex >& lvecs, Mat< std::complex >& rvecs, const Base& expr ) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; Mat X = expr.get_ref(); arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); arma_debug_assert_blas_size(X); if(X.is_empty()) { vals.reset(); lvecs.reset(); rvecs.reset(); return true; } if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } vals.set_size(X.n_rows, 1); lvecs.set_size(X.n_rows, X.n_rows); rvecs.set_size(X.n_rows, X.n_rows); Mat ltmp(X.n_rows, X.n_rows, arma_nozeros_indicator()); Mat rtmp(X.n_rows, X.n_rows, arma_nozeros_indicator()); char jobvl = 'V'; char jobvr = 'V'; blas_int N = blas_int(X.n_rows); blas_int ldvl = blas_int(ltmp.n_rows); blas_int ldvr = blas_int(rtmp.n_rows); blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), 4*N) blas_int info = 0; podarray work( static_cast(lwork) ); podarray vals_real(X.n_rows); podarray vals_imag(X.n_rows); arma_extra_debug_print("lapack::geev() -- START"); lapack::geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), ltmp.memptr(), &ldvl, rtmp.memptr(), &ldvr, work.memptr(), &lwork, &info); arma_extra_debug_print("lapack::geev() -- END"); if(info != 0) { return false; } arma_extra_debug_print("reformatting eigenvalues and eigenvectors"); std::complex* vals_mem = vals.memptr(); for(uword i=0; i < X.n_rows; ++i) { vals_mem[i] = std::complex(vals_real[i], vals_imag[i]); } for(uword j=0; j < X.n_rows; ++j) { if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) ) { for(uword i=0; i < X.n_rows; ++i) { lvecs.at(i,j) = std::complex( ltmp.at(i,j), ltmp.at(i,j+1) ); lvecs.at(i,j+1) = std::complex( ltmp.at(i,j), -ltmp.at(i,j+1) ); rvecs.at(i,j) = std::complex( rtmp.at(i,j), rtmp.at(i,j+1) ); rvecs.at(i,j+1) = std::complex( rtmp.at(i,j), -rtmp.at(i,j+1) ); } ++j; } else { for(uword i=0; i(ltmp.at(i,j), T(0)); rvecs.at(i,j) = std::complex(rtmp.at(i,j), T(0)); } } } return true; } #else { arma_ignore(vals); arma_ignore(lvecs); arma_ignore(rvecs); arma_ignore(expr); arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); return false; } #endif } //! two-sided eigen decomposition of general square matrix (complex) template inline bool auxlib::eig_gen_twosided ( Mat< std::complex >& vals, Mat< std::complex >& lvecs, Mat< std::complex >& rvecs, const Base< std::complex, T1 >& expr ) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; typedef typename std::complex eT; Mat X = expr.get_ref(); arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); arma_debug_assert_blas_size(X); if(X.is_empty()) { vals.reset(); lvecs.reset(); rvecs.reset(); return true; } if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } vals.set_size(X.n_rows, 1); lvecs.set_size(X.n_rows, X.n_rows); rvecs.set_size(X.n_rows, X.n_rows); char jobvl = 'V'; char jobvr = 'V'; blas_int N = blas_int(X.n_rows); blas_int ldvl = blas_int(lvecs.n_rows); blas_int ldvr = blas_int(rvecs.n_rows); blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), 2*N) blas_int info = 0; podarray work( static_cast(lwork) ); podarray< T> rwork( static_cast(2*N) ); arma_extra_debug_print("lapack::cx_geev() -- START"); lapack::cx_geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals.memptr(), lvecs.memptr(), &ldvl, rvecs.memptr(), &ldvr, work.memptr(), &lwork, rwork.memptr(), &info); arma_extra_debug_print("lapack::cx_geev() -- END"); return (info == 0); } #else { arma_ignore(vals); arma_ignore(lvecs); arma_ignore(rvecs); arma_ignore(expr); arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); return false; } #endif } //! two-sided eigen decomposition of general square matrix (real, balance given matrix) template inline bool auxlib::eig_gen_twosided_balance ( Mat< std::complex >& vals, Mat< std::complex >& lvecs, Mat< std::complex >& rvecs, const Base& expr ) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; Mat X = expr.get_ref(); arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); arma_debug_assert_blas_size(X); if(X.is_empty()) { vals.reset(); lvecs.reset(); rvecs.reset(); return true; } if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } vals.set_size(X.n_rows, 1); lvecs.set_size(X.n_rows, X.n_rows); rvecs.set_size(X.n_rows, X.n_rows); Mat ltmp(X.n_rows, X.n_rows, arma_nozeros_indicator()); Mat rtmp(X.n_rows, X.n_rows, arma_nozeros_indicator()); char bal = 'B'; char jobvl = 'V'; char jobvr = 'V'; char sense = 'N'; blas_int N = blas_int(X.n_rows); blas_int ldvl = blas_int(ltmp.n_rows); blas_int ldvr = blas_int(rtmp.n_rows); blas_int ilo = blas_int(0); blas_int ihi = blas_int(0); T abnrm = T(0); blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), blas_int(3*N)) blas_int info = blas_int(0); podarray scale(X.n_rows); podarray rconde(X.n_rows); podarray rcondv(X.n_rows); podarray work( static_cast(lwork) ); podarray iwork( uword(1) ); // iwork not used by lapack::geevx() as sense = 'N' podarray vals_real(X.n_rows); podarray vals_imag(X.n_rows); arma_extra_debug_print("lapack::geevx() -- START"); lapack::geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), ltmp.memptr(), &ldvl, rtmp.memptr(), &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, iwork.memptr(), &info); arma_extra_debug_print("lapack::geevx() -- END"); if(info != 0) { return false; } arma_extra_debug_print("reformatting eigenvalues and eigenvectors"); std::complex* vals_mem = vals.memptr(); for(uword i=0; i < X.n_rows; ++i) { vals_mem[i] = std::complex(vals_real[i], vals_imag[i]); } for(uword j=0; j < X.n_rows; ++j) { if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) ) { for(uword i=0; i < X.n_rows; ++i) { lvecs.at(i,j) = std::complex( ltmp.at(i,j), ltmp.at(i,j+1) ); lvecs.at(i,j+1) = std::complex( ltmp.at(i,j), -ltmp.at(i,j+1) ); rvecs.at(i,j) = std::complex( rtmp.at(i,j), rtmp.at(i,j+1) ); rvecs.at(i,j+1) = std::complex( rtmp.at(i,j), -rtmp.at(i,j+1) ); } ++j; } else { for(uword i=0; i(ltmp.at(i,j), T(0)); rvecs.at(i,j) = std::complex(rtmp.at(i,j), T(0)); } } } return true; } #else { arma_ignore(vals); arma_ignore(lvecs); arma_ignore(rvecs); arma_ignore(expr); arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); return false; } #endif } //! two-sided eigen decomposition of general square matrix (complex, balance given matrix) template inline bool auxlib::eig_gen_twosided_balance ( Mat< std::complex >& vals, Mat< std::complex >& lvecs, Mat< std::complex >& rvecs, const Base< std::complex, T1 >& expr ) { arma_extra_debug_sigprint(); #if defined(ARMA_CRIPPLED_LAPACK) { arma_extra_debug_print("auxlib::eig_gen_twosided_balance(): redirecting to auxlib::eig_gen() due to crippled LAPACK"); return auxlib::eig_gen(vals, lvecs, rvecs, expr); } #elif defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; typedef typename std::complex eT; Mat X = expr.get_ref(); arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); arma_debug_assert_blas_size(X); if(X.is_empty()) { vals.reset(); lvecs.reset(); rvecs.reset(); return true; } if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } vals.set_size(X.n_rows, 1); lvecs.set_size(X.n_rows, X.n_rows); rvecs.set_size(X.n_rows, X.n_rows); char bal = 'B'; char jobvl = 'V'; char jobvr = 'V'; char sense = 'N'; blas_int N = blas_int(X.n_rows); blas_int ldvl = blas_int(lvecs.n_rows); blas_int ldvr = blas_int(rvecs.n_rows); blas_int ilo = blas_int(0); blas_int ihi = blas_int(0); T abnrm = T(0); blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), blas_int(2*N)) blas_int info = blas_int(0); podarray scale(X.n_rows); podarray rconde(X.n_rows); podarray rcondv(X.n_rows); podarray work( static_cast(lwork) ); podarray< T> rwork( static_cast(2*N) ); arma_extra_debug_print("lapack::cx_geevx() -- START"); lapack::cx_geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals.memptr(), lvecs.memptr(), &ldvl, rvecs.memptr(), &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, rwork.memptr(), &info); arma_extra_debug_print("lapack::cx_geevx() -- END"); return (info == 0); } #else { arma_ignore(vals); arma_ignore(lvecs); arma_ignore(rvecs); arma_ignore(expr); arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); return false; } #endif } //! eigendecomposition of general square matrix pair (real) template inline bool auxlib::eig_pair ( Mat< std::complex >& vals, Mat< std::complex >& vecs, const bool vecs_on, const Base& A_expr, const Base& B_expr ) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; typedef std::complex eT; Mat A(A_expr.get_ref()); Mat B(B_expr.get_ref()); arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" ); arma_debug_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" ); arma_debug_assert_blas_size(A); if(A.is_empty()) { vals.reset(); vecs.reset(); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } vals.set_size(A.n_rows, 1); Mat tmp(1, 1, arma_nozeros_indicator()); if(vecs_on) { vecs.set_size(A.n_rows, A.n_rows); tmp.set_size(A.n_rows, A.n_rows); } podarray junk(1); char jobvl = 'N'; char jobvr = (vecs_on) ? 'V' : 'N'; blas_int N = blas_int(A.n_rows); T* vl = junk.memptr(); T* vr = (vecs_on) ? tmp.memptr() : junk.memptr(); blas_int ldvl = blas_int(1); blas_int ldvr = (vecs_on) ? blas_int(tmp.n_rows) : blas_int(1); blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), 8*N) blas_int info = 0; podarray alphar(A.n_rows); podarray alphai(A.n_rows); podarray beta(A.n_rows); podarray work( static_cast(lwork) ); arma_extra_debug_print("lapack::ggev()"); lapack::ggev(&jobvl, &jobvr, &N, A.memptr(), &N, B.memptr(), &N, alphar.memptr(), alphai.memptr(), beta.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, &info); if(info != 0) { return false; } arma_extra_debug_print("reformatting eigenvalues and eigenvectors"); eT* vals_mem = vals.memptr(); const T* alphar_mem = alphar.memptr(); const T* alphai_mem = alphai.memptr(); const T* beta_mem = beta.memptr(); bool beta_has_zero = false; for(uword j=0; j(re, im); if( (alphai_val > T(0)) && (j < (A.n_rows-1)) ) { ++j; vals_mem[j] = std::complex(re,-im); // force exact conjugate } } if(beta_has_zero) { arma_debug_warn_level(1, "eig_pair(): given matrices appear ill-conditioned"); } if(vecs_on) { for(uword j=0; j( tmp.at(i,j), tmp.at(i,j+1) ); vecs.at(i,j+1) = std::complex( tmp.at(i,j), -tmp.at(i,j+1) ); } ++j; } else { for(uword i=0; i(tmp.at(i,j), T(0)); } } } } return true; } #else { arma_ignore(vals); arma_ignore(vecs); arma_ignore(vecs_on); arma_ignore(A_expr); arma_ignore(B_expr); arma_stop_logic_error("eig_pair(): use of LAPACK must be enabled"); return false; } #endif } //! eigendecomposition of general square matrix pair (complex) template inline bool auxlib::eig_pair ( Mat< std::complex >& vals, Mat< std::complex >& vecs, const bool vecs_on, const Base< std::complex, T1 >& A_expr, const Base< std::complex, T2 >& B_expr ) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; typedef typename std::complex eT; Mat A(A_expr.get_ref()); Mat B(B_expr.get_ref()); arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" ); arma_debug_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" ); arma_debug_assert_blas_size(A); if(A.is_empty()) { vals.reset(); vecs.reset(); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } vals.set_size(A.n_rows, 1); if(vecs_on) { vecs.set_size(A.n_rows, A.n_rows); } podarray junk(1); char jobvl = 'N'; char jobvr = (vecs_on) ? 'V' : 'N'; blas_int N = blas_int(A.n_rows); eT* vl = junk.memptr(); eT* vr = (vecs_on) ? vecs.memptr() : junk.memptr(); blas_int ldvl = blas_int(1); blas_int ldvr = (vecs_on) ? blas_int(vecs.n_rows) : blas_int(1); blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1),2*N) blas_int info = 0; podarray alpha(A.n_rows); podarray beta(A.n_rows); podarray work( static_cast(lwork) ); podarray rwork( static_cast(8*N) ); arma_extra_debug_print("lapack::cx_ggev()"); lapack::cx_ggev(&jobvl, &jobvr, &N, A.memptr(), &N, B.memptr(), &N, alpha.memptr(), beta.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, rwork.memptr(), &info); if(info != 0) { return false; } eT* vals_mem = vals.memptr(); const eT* alpha_mem = alpha.memptr(); const eT* beta_mem = beta.memptr(); const std::complex zero(T(0), T(0)); bool beta_has_zero = false; for(uword i=0; i inline bool auxlib::eig_pair_twosided ( Mat< std::complex >& vals, Mat< std::complex >& lvecs, Mat< std::complex >& rvecs, const Base& A_expr, const Base& B_expr ) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; typedef std::complex eT; Mat A(A_expr.get_ref()); Mat B(B_expr.get_ref()); arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" ); arma_debug_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" ); arma_debug_assert_blas_size(A); if(A.is_empty()) { vals.reset(); lvecs.reset(); rvecs.reset(); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } vals.set_size(A.n_rows, 1); lvecs.set_size(A.n_rows, A.n_rows); rvecs.set_size(A.n_rows, A.n_rows); Mat ltmp(A.n_rows, A.n_rows, arma_nozeros_indicator()); Mat rtmp(A.n_rows, A.n_rows, arma_nozeros_indicator()); char jobvl = 'V'; char jobvr = 'V'; blas_int N = blas_int(A.n_rows); blas_int ldvl = blas_int(ltmp.n_rows); blas_int ldvr = blas_int(rtmp.n_rows); blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), 8*N) blas_int info = 0; podarray alphar(A.n_rows); podarray alphai(A.n_rows); podarray beta(A.n_rows); podarray work( static_cast(lwork) ); arma_extra_debug_print("lapack::ggev()"); lapack::ggev(&jobvl, &jobvr, &N, A.memptr(), &N, B.memptr(), &N, alphar.memptr(), alphai.memptr(), beta.memptr(), ltmp.memptr(), &ldvl, rtmp.memptr(), &ldvr, work.memptr(), &lwork, &info); if(info != 0) { return false; } arma_extra_debug_print("reformatting eigenvalues and eigenvectors"); eT* vals_mem = vals.memptr(); const T* alphar_mem = alphar.memptr(); const T* alphai_mem = alphai.memptr(); const T* beta_mem = beta.memptr(); bool beta_has_zero = false; for(uword j=0; j(re, im); if( (alphai_val > T(0)) && (j < (A.n_rows-1)) ) { ++j; vals_mem[j] = std::complex(re,-im); // force exact conjugate } } if(beta_has_zero) { arma_debug_warn_level(1, "eig_pair(): given matrices appear ill-conditioned"); } for(uword j=0; j < A.n_rows; ++j) { if( (j < (A.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) ) { for(uword i=0; i < A.n_rows; ++i) { lvecs.at(i,j) = std::complex( ltmp.at(i,j), ltmp.at(i,j+1) ); lvecs.at(i,j+1) = std::complex( ltmp.at(i,j), -ltmp.at(i,j+1) ); rvecs.at(i,j) = std::complex( rtmp.at(i,j), rtmp.at(i,j+1) ); rvecs.at(i,j+1) = std::complex( rtmp.at(i,j), -rtmp.at(i,j+1) ); } ++j; } else { for(uword i=0; i(ltmp.at(i,j), T(0)); rvecs.at(i,j) = std::complex(rtmp.at(i,j), T(0)); } } } return true; } #else { arma_ignore(vals); arma_ignore(lvecs); arma_ignore(rvecs); arma_ignore(A_expr); arma_ignore(B_expr); arma_stop_logic_error("eig_pair(): use of LAPACK must be enabled"); return false; } #endif } //! two-sided eigendecomposition of general square matrix pair (complex) template inline bool auxlib::eig_pair_twosided ( Mat< std::complex >& vals, Mat< std::complex >& lvecs, Mat< std::complex >& rvecs, const Base< std::complex, T1 >& A_expr, const Base< std::complex, T2 >& B_expr ) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; typedef typename std::complex eT; Mat A(A_expr.get_ref()); Mat B(B_expr.get_ref()); arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" ); arma_debug_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" ); arma_debug_assert_blas_size(A); if(A.is_empty()) { vals.reset(); lvecs.reset(); rvecs.reset(); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } vals.set_size(A.n_rows, 1); lvecs.set_size(A.n_rows, A.n_rows); rvecs.set_size(A.n_rows, A.n_rows); char jobvl = 'V'; char jobvr = 'V'; blas_int N = blas_int(A.n_rows); blas_int ldvl = blas_int(lvecs.n_rows); blas_int ldvr = blas_int(rvecs.n_rows); blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1),2*N) blas_int info = 0; podarray alpha(A.n_rows); podarray beta(A.n_rows); podarray work( static_cast(lwork) ); podarray rwork( static_cast(8*N) ); arma_extra_debug_print("lapack::cx_ggev()"); lapack::cx_ggev(&jobvl, &jobvr, &N, A.memptr(), &N, B.memptr(), &N, alpha.memptr(), beta.memptr(), lvecs.memptr(), &ldvl, rvecs.memptr(), &ldvr, work.memptr(), &lwork, rwork.memptr(), &info); if(info != 0) { return false; } eT* vals_mem = vals.memptr(); const eT* alpha_mem = alpha.memptr(); const eT* beta_mem = beta.memptr(); const std::complex zero(T(0), T(0)); bool beta_has_zero = false; for(uword i=0; i inline bool auxlib::eig_sym(Col& eigval, Mat& A) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { arma_debug_check( (A.is_square() == false), "eig_sym(): given matrix must be square sized" ); if(A.is_empty()) { eigval.reset(); return true; } if((arma_config::debug) && (auxlib::rudimentary_sym_check(A) == false)) { arma_debug_warn_level(1, "eig_sym(): given matrix is not symmetric"); } if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(A)) { return false; } arma_debug_assert_blas_size(A); eigval.set_size(A.n_rows); char jobz = 'N'; char uplo = 'U'; blas_int N = blas_int(A.n_rows); blas_int lwork = (64+2)*N; // lwork_min = (std::max)(blas_int(1), 3*N-1) blas_int info = 0; podarray work( static_cast(lwork) ); arma_extra_debug_print("lapack::syev()"); lapack::syev(&jobz, &uplo, &N, A.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, &info); return (info == 0); } #else { arma_ignore(eigval); arma_ignore(A); arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled"); return false; } #endif } //! eigenvalues of a hermitian complex matrix template inline bool auxlib::eig_sym(Col& eigval, Mat< std::complex >& A) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename std::complex eT; arma_debug_check( (A.is_square() == false), "eig_sym(): given matrix must be square sized" ); if(A.is_empty()) { eigval.reset(); return true; } if((arma_config::debug) && (auxlib::rudimentary_sym_check(A) == false)) { arma_debug_warn_level(1, "eig_sym(): given matrix is not hermitian"); } if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(A)) { return false; } arma_debug_assert_blas_size(A); eigval.set_size(A.n_rows); char jobz = 'N'; char uplo = 'U'; blas_int N = blas_int(A.n_rows); blas_int lwork = (64+1)*N; // lwork_min = (std::max)(blas_int(1), 2*N-1) blas_int info = 0; podarray work( static_cast(lwork) ); podarray rwork( static_cast( (std::max)(blas_int(1), 3*N) ) ); arma_extra_debug_print("lapack::heev()"); lapack::heev(&jobz, &uplo, &N, A.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, rwork.memptr(), &info); return (info == 0); } #else { arma_ignore(eigval); arma_ignore(A); arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled"); return false; } #endif } //! eigenvalues and eigenvectors of a symmetric real matrix template inline bool auxlib::eig_sym(Col& eigval, Mat& eigvec, const Mat& X) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { arma_debug_check( (X.is_square() == false), "eig_sym(): given matrix must be square sized" ); if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(X)) { return false; } eigvec = X; if(eigvec.is_empty()) { eigval.reset(); eigvec.reset(); return true; } arma_debug_assert_blas_size(eigvec); eigval.set_size(eigvec.n_rows); char jobz = 'V'; char uplo = 'U'; blas_int N = blas_int(eigvec.n_rows); blas_int lwork = (64+2)*N; // lwork_min = (std::max)(blas_int(1), 3*N-1) blas_int info = 0; podarray work( static_cast(lwork) ); arma_extra_debug_print("lapack::syev()"); lapack::syev(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, &info); return (info == 0); } #else { arma_ignore(eigval); arma_ignore(eigvec); arma_ignore(X); arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled"); return false; } #endif } //! eigenvalues and eigenvectors of a hermitian complex matrix template inline bool auxlib::eig_sym(Col& eigval, Mat< std::complex >& eigvec, const Mat< std::complex >& X) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename std::complex eT; arma_debug_check( (X.is_square() == false), "eig_sym(): given matrix must be square sized" ); if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(X)) { return false; } eigvec = X; if(eigvec.is_empty()) { eigval.reset(); eigvec.reset(); return true; } arma_debug_assert_blas_size(eigvec); eigval.set_size(eigvec.n_rows); char jobz = 'V'; char uplo = 'U'; blas_int N = blas_int(eigvec.n_rows); blas_int lwork = (64+1)*N; // lwork_min = (std::max)(blas_int(1), 2*N-1) blas_int info = 0; podarray work( static_cast(lwork) ); podarray rwork( static_cast((std::max)(blas_int(1), 3*N)) ); arma_extra_debug_print("lapack::heev()"); lapack::heev(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, rwork.memptr(), &info); return (info == 0); } #else { arma_ignore(eigval); arma_ignore(eigvec); arma_ignore(X); arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled"); return false; } #endif } //! eigenvalues and eigenvectors of a symmetric real matrix (divide and conquer algorithm) template inline bool auxlib::eig_sym_dc(Col& eigval, Mat& eigvec, const Mat& X) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { arma_debug_check( (X.is_square() == false), "eig_sym(): given matrix must be square sized" ); if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(X)) { return false; } eigvec = X; if(eigvec.is_empty()) { eigval.reset(); eigvec.reset(); return true; } arma_debug_assert_blas_size(eigvec); eigval.set_size(eigvec.n_rows); char jobz = 'V'; char uplo = 'U'; blas_int N = blas_int(eigvec.n_rows); blas_int lwork_min = 1 + 6*N + 2*(N*N); blas_int liwork_min = 3 + 5*N; blas_int info = 0; blas_int lwork_proposed = 0; blas_int liwork_proposed = 0; if(N >= 32) { eT work_query[2] = {}; blas_int iwork_query[2] = {}; blas_int lwork_query = -1; blas_int liwork_query = -1; arma_extra_debug_print("lapack::syevd()"); lapack::syevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), &work_query[0], &lwork_query, &iwork_query[0], &liwork_query, &info); if(info != 0) { return false; } lwork_proposed = static_cast( work_query[0] ); liwork_proposed = iwork_query[0]; } blas_int lwork_final = (std::max)( lwork_proposed, lwork_min); blas_int liwork_final = (std::max)(liwork_proposed, liwork_min); podarray work( static_cast( lwork_final) ); podarray iwork( static_cast(liwork_final) ); arma_extra_debug_print("lapack::syevd()"); lapack::syevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork_final, iwork.memptr(), &liwork_final, &info); return (info == 0); } #else { arma_ignore(eigval); arma_ignore(eigvec); arma_ignore(X); arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled"); return false; } #endif } //! eigenvalues and eigenvectors of a hermitian complex matrix (divide and conquer algorithm) template inline bool auxlib::eig_sym_dc(Col& eigval, Mat< std::complex >& eigvec, const Mat< std::complex >& X) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename std::complex eT; arma_debug_check( (X.is_square() == false), "eig_sym(): given matrix must be square sized" ); if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(X)) { return false; } eigvec = X; if(eigvec.is_empty()) { eigval.reset(); eigvec.reset(); return true; } arma_debug_assert_blas_size(eigvec); eigval.set_size(eigvec.n_rows); char jobz = 'V'; char uplo = 'U'; blas_int N = blas_int(eigvec.n_rows); blas_int lwork_min = 2*N + N*N; blas_int lrwork_min = 1 + 5*N + 2*(N*N); blas_int liwork_min = 3 + 5*N; blas_int info = 0; blas_int lwork_proposed = 0; blas_int lrwork_proposed = 0; blas_int liwork_proposed = 0; if(N >= 32) { eT work_query[2] = {}; T rwork_query[2] = {}; blas_int iwork_query[2] = {}; blas_int lwork_query = -1; blas_int lrwork_query = -1; blas_int liwork_query = -1; arma_extra_debug_print("lapack::heevd()"); lapack::heevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), &work_query[0], &lwork_query, &rwork_query[0], &lrwork_query, &iwork_query[0], &liwork_query, &info); if(info != 0) { return false; } lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); lrwork_proposed = static_cast( rwork_query[0] ); liwork_proposed = iwork_query[0]; } blas_int lwork_final = (std::max)( lwork_proposed, lwork_min); blas_int lrwork_final = (std::max)(lrwork_proposed, lrwork_min); blas_int liwork_final = (std::max)(liwork_proposed, liwork_min); podarray work( static_cast( lwork_final) ); podarray< T> rwork( static_cast(lrwork_final) ); podarray iwork( static_cast(liwork_final) ); arma_extra_debug_print("lapack::heevd()"); lapack::heevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork_final, rwork.memptr(), &lrwork_final, iwork.memptr(), &liwork_final, &info); return (info == 0); } #else { arma_ignore(eigval); arma_ignore(eigvec); arma_ignore(X); arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::chol_simple(Mat& X) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { arma_debug_assert_blas_size(X); char uplo = 'U'; blas_int n = blas_int(X.n_rows); blas_int info = 0; arma_extra_debug_print("lapack::potrf()"); lapack::potrf(&uplo, &n, X.memptr(), &n, &info); return (info == 0); } #else { arma_ignore(X); arma_stop_logic_error("chol(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::chol(Mat& X, const uword layout) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { arma_debug_assert_blas_size(X); char uplo = (layout == 0) ? 'U' : 'L'; blas_int n = blas_int(X.n_rows); blas_int info = 0; arma_extra_debug_print("lapack::potrf()"); lapack::potrf(&uplo, &n, X.memptr(), &n, &info); if(info != 0) { return false; } X = (layout == 0) ? trimatu(X) : trimatl(X); // trimatu() and trimatl() return the same type return true; } #else { arma_ignore(X); arma_ignore(layout); arma_stop_logic_error("chol(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::chol_band(Mat& X, const uword KD, const uword layout) { arma_extra_debug_sigprint(); return auxlib::chol_band_common(X, KD, layout); } template inline bool auxlib::chol_band(Mat< std::complex >& X, const uword KD, const uword layout) { arma_extra_debug_sigprint(); #if defined(ARMA_CRIPPLED_LAPACK) { arma_extra_debug_print("auxlib::chol_band(): redirecting to auxlib::chol() due to crippled LAPACK"); arma_ignore(KD); return auxlib::chol(X, layout); } #else { return auxlib::chol_band_common(X, KD, layout); } #endif } template inline bool auxlib::chol_band_common(Mat& X, const uword KD, const uword layout) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { const uword N = X.n_rows; const uword KL = (layout == 0) ? uword(0) : KD; const uword KU = (layout == 0) ? KD : uword(0); Mat AB; band_helper::compress(AB, X, KL, KU, false); arma_debug_assert_blas_size(AB); char uplo = (layout == 0) ? 'U' : 'L'; blas_int n = blas_int(N); blas_int kd = blas_int(KD); blas_int ldab = blas_int(AB.n_rows); blas_int info = 0; arma_extra_debug_print("lapack::pbtrf()"); lapack::pbtrf(&uplo, &n, &kd, AB.memptr(), &ldab, &info); if(info != 0) { return false; } band_helper::uncompress(X, AB, KL, KU, false); return true; } #else { arma_ignore(X); arma_ignore(KD); arma_ignore(layout); arma_stop_logic_error("chol(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::chol_pivot(Mat& X, Mat& P, const uword layout) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename get_pod_type::result T; arma_debug_assert_blas_size(X); char uplo = (layout == 0) ? 'U' : 'L'; blas_int n = blas_int(X.n_rows); blas_int rank = 0; T tol = T(-1); blas_int info = 0; podarray ipiv( X.n_rows); podarray work(2*X.n_rows); ipiv.zeros(); arma_extra_debug_print("lapack::pstrf()"); lapack::pstrf(&uplo, &n, X.memptr(), &n, ipiv.memptr(), &rank, &tol, work.memptr(), &info); if(info != 0) { return false; } X = (layout == 0) ? trimatu(X) : trimatl(X); // trimatu() and trimatl() return the same type P.set_size(X.n_rows, 1); for(uword i=0; i < X.n_rows; ++i) { P[i] = uword(ipiv[i] - 1); // take into account that Fortran counts from 1 } return true; } #else { arma_ignore(X); arma_ignore(P); arma_ignore(layout); arma_stop_logic_error("chol(): use of LAPACK must be enabled"); return false; } #endif } // // hessenberg decomposition template inline bool auxlib::hess(Mat& H, const Base& X, Col& tao) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { H = X.get_ref(); arma_debug_check( (H.is_square() == false), "hess(): given matrix must be square sized" ); if(H.is_empty()) { return true; } arma_debug_assert_blas_size(H); if(H.n_rows > 2) { tao.set_size(H.n_rows-1); blas_int n = blas_int(H.n_rows); blas_int ilo = 1; blas_int ihi = blas_int(H.n_rows); blas_int lda = blas_int(H.n_rows); blas_int lwork = blas_int(H.n_rows) * 64; blas_int info = 0; podarray work(static_cast(lwork)); arma_extra_debug_print("lapack::gehrd()"); lapack::gehrd(&n, &ilo, &ihi, H.memptr(), &lda, tao.memptr(), work.memptr(), &lwork, &info); return (info == 0); } return true; } #else { arma_ignore(H); arma_ignore(X); arma_ignore(tao); arma_stop_logic_error("hess(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::qr(Mat& Q, Mat& R, const Base& X) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { R = X.get_ref(); const uword R_n_rows = R.n_rows; const uword R_n_cols = R.n_cols; if(R.is_empty()) { Q.eye(R_n_rows, R_n_rows); return true; } arma_debug_assert_blas_size(R); blas_int m = static_cast(R_n_rows); blas_int n = static_cast(R_n_cols); blas_int lwork_min = (std::max)(blas_int(1), (std::max)(m,n)); // take into account requirements of geqrf() _and_ orgqr()/ungqr() blas_int k = (std::min)(m,n); blas_int info = 0; podarray tau( static_cast(k) ); eT work_query[2] = {}; blas_int lwork_query = -1; arma_extra_debug_print("lapack::geqrf()"); lapack::geqrf(&m, &n, R.memptr(), &m, tau.memptr(), &work_query[0], &lwork_query, &info); if(info != 0) { return false; } blas_int lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::geqrf()"); lapack::geqrf(&m, &n, R.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); if(info != 0) { return false; } Q.set_size(R_n_rows, R_n_rows); arrayops::copy( Q.memptr(), R.memptr(), (std::min)(Q.n_elem, R.n_elem) ); // // construct R for(uword col=0; col < R_n_cols; ++col) { for(uword row=(col+1); row < R_n_rows; ++row) { R.at(row,col) = eT(0); } } if( (is_float::value) || (is_double::value) ) { arma_extra_debug_print("lapack::orgqr()"); lapack::orgqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); } else if( (is_cx_float::value) || (is_cx_double::value) ) { arma_extra_debug_print("lapack::ungqr()"); lapack::ungqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); } return (info == 0); } #else { arma_ignore(Q); arma_ignore(R); arma_ignore(X); arma_stop_logic_error("qr(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::qr_econ(Mat& Q, Mat& R, const Base& X) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { if(is_Mat::value) { const unwrap tmp(X.get_ref()); const Mat& M = tmp.M; if(M.n_rows < M.n_cols) { return auxlib::qr(Q, R, X); } } Q = X.get_ref(); const uword Q_n_rows = Q.n_rows; const uword Q_n_cols = Q.n_cols; if( Q_n_rows <= Q_n_cols ) { return auxlib::qr(Q, R, Q); } if(Q.is_empty()) { Q.set_size(Q_n_rows, 0); R.set_size(0, Q_n_cols); return true; } arma_debug_assert_blas_size(Q); blas_int m = static_cast(Q_n_rows); blas_int n = static_cast(Q_n_cols); blas_int lwork_min = (std::max)(blas_int(1), (std::max)(m,n)); // take into account requirements of geqrf() _and_ orgqr()/ungqr() blas_int k = (std::min)(m,n); blas_int info = 0; podarray tau( static_cast(k) ); eT work_query[2] = {}; blas_int lwork_query = -1; arma_extra_debug_print("lapack::geqrf()"); lapack::geqrf(&m, &n, Q.memptr(), &m, tau.memptr(), &work_query[0], &lwork_query, &info); if(info != 0) { return false; } blas_int lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::geqrf()"); lapack::geqrf(&m, &n, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); if(info != 0) { return false; } R.zeros(Q_n_cols, Q_n_cols); // // construct R for(uword col=0; col < Q_n_cols; ++col) { for(uword row=0; row <= col; ++row) { R.at(row,col) = Q.at(row,col); } } if( (is_float::value) || (is_double::value) ) { arma_extra_debug_print("lapack::orgqr()"); lapack::orgqr(&m, &n, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); } else if( (is_cx_float::value) || (is_cx_double::value) ) { arma_extra_debug_print("lapack::ungqr()"); lapack::ungqr(&m, &n, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); } return (info == 0); } #else { arma_ignore(Q); arma_ignore(R); arma_ignore(X); arma_stop_logic_error("qr_econ(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::qr_pivot(Mat& Q, Mat& R, Mat& P, const Base& X) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { R = X.get_ref(); const uword R_n_rows = R.n_rows; const uword R_n_cols = R.n_cols; if(R.is_empty()) { Q.eye(R_n_rows, R_n_rows); P.set_size(R_n_cols, 1); for(uword col=0; col < R_n_cols; ++col) { P.at(col) = col; } return true; } arma_debug_assert_blas_size(R); blas_int m = static_cast(R_n_rows); blas_int n = static_cast(R_n_cols); blas_int lwork_min = (std::max)(blas_int(3*n + 1), (std::max)(m,n)); // take into account requirements of geqp3() and orgqr() blas_int k = (std::min)(m,n); blas_int info = 0; podarray tau( static_cast(k) ); podarray jpvt( R_n_cols ); jpvt.zeros(); eT work_query[2] = {}; blas_int lwork_query = -1; arma_extra_debug_print("lapack::geqp3()"); lapack::geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), &work_query[0], &lwork_query, &info); if(info != 0) { return false; } blas_int lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::geqp3()"); lapack::geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), work.memptr(), &lwork_final, &info); if(info != 0) { return false; } Q.set_size(R_n_rows, R_n_rows); arrayops::copy( Q.memptr(), R.memptr(), (std::min)(Q.n_elem, R.n_elem) ); // // construct R and P P.set_size(R_n_cols, 1); for(uword col=0; col < R_n_cols; ++col) { for(uword row=(col+1); row < R_n_rows; ++row) { R.at(row,col) = eT(0); } P.at(col) = jpvt[col] - 1; // take into account that Fortran counts from 1 } arma_extra_debug_print("lapack::orgqr()"); lapack::orgqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); return (info == 0); } #else { arma_ignore(Q); arma_ignore(R); arma_ignore(P); arma_ignore(X); arma_stop_logic_error("qr(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::qr_pivot(Mat< std::complex >& Q, Mat< std::complex >& R, Mat& P, const Base,T1>& X) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename std::complex eT; R = X.get_ref(); const uword R_n_rows = R.n_rows; const uword R_n_cols = R.n_cols; if(R.is_empty()) { Q.eye(R_n_rows, R_n_rows); P.set_size(R_n_cols, 1); for(uword col=0; col < R_n_cols; ++col) { P.at(col) = col; } return true; } arma_debug_assert_blas_size(R); blas_int m = static_cast(R_n_rows); blas_int n = static_cast(R_n_cols); blas_int lwork_min = (std::max)(blas_int(3*n + 1), (std::max)(m,n)); // take into account requirements of geqp3() and ungqr() blas_int k = (std::min)(m,n); blas_int info = 0; podarray tau( static_cast(k) ); podarray< T> rwork( 2*R_n_cols ); podarray jpvt( R_n_cols ); jpvt.zeros(); eT work_query[2] = {}; blas_int lwork_query = -1; arma_extra_debug_print("lapack::geqp3()"); lapack::cx_geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), &work_query[0], &lwork_query, rwork.memptr(), &info); if(info != 0) { return false; } blas_int lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::geqp3()"); lapack::cx_geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), work.memptr(), &lwork_final, rwork.memptr(), &info); if(info != 0) { return false; } Q.set_size(R_n_rows, R_n_rows); arrayops::copy( Q.memptr(), R.memptr(), (std::min)(Q.n_elem, R.n_elem) ); // // construct R and P P.set_size(R_n_cols, 1); for(uword col=0; col < R_n_cols; ++col) { for(uword row=(col+1); row < R_n_rows; ++row) { R.at(row,col) = eT(0); } P.at(col) = jpvt[col] - 1; // take into account that Fortran counts from 1 } arma_extra_debug_print("lapack::ungqr()"); lapack::ungqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); return (info == 0); } #else { arma_ignore(Q); arma_ignore(R); arma_ignore(P); arma_ignore(X); arma_stop_logic_error("qr(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::svd(Col& S, Mat& A) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { if(A.is_empty()) { S.reset(); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A); Mat U(1, 1, arma_nozeros_indicator()); Mat V(1, A.n_cols, arma_nozeros_indicator()); char jobu = 'N'; char jobvt = 'N'; blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int min_mn = (std::min)(m,n); blas_int lda = blas_int(A.n_rows); blas_int ldu = blas_int(U.n_rows); blas_int ldvt = blas_int(V.n_rows); blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) ); blas_int info = 0; S.set_size( static_cast(min_mn) ); blas_int lwork_proposed = 0; if(A.n_elem >= 1024) { eT work_query[2] = {}; blas_int lwork_query = -1; arma_extra_debug_print("lapack::gesvd()"); lapack::gesvd(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, &info); if(info != 0) { return false; } lwork_proposed = static_cast( work_query[0] ); } blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::gesvd()"); lapack::gesvd(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, &info); return (info == 0); } #else { arma_ignore(S); arma_ignore(A); arma_stop_logic_error("svd(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::svd(Col& S, Mat< std::complex >& A) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef std::complex eT; if(A.is_empty()) { S.reset(); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A); Mat U(1, 1, arma_nozeros_indicator()); Mat V(1, A.n_cols, arma_nozeros_indicator()); char jobu = 'N'; char jobvt = 'N'; blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int min_mn = (std::min)(m,n); blas_int lda = blas_int(A.n_rows); blas_int ldu = blas_int(U.n_rows); blas_int ldvt = blas_int(V.n_rows); blas_int lwork_min = (std::max)( blas_int(1), 2*min_mn+(std::max)(m,n) ); blas_int info = 0; S.set_size( static_cast(min_mn) ); podarray rwork( static_cast(5*min_mn) ); blas_int lwork_proposed = 0; if(A.n_elem >= 256) { eT work_query[2] = {}; blas_int lwork_query = -1; // query to find optimum size of workspace arma_extra_debug_print("lapack::cx_gesvd()"); lapack::cx_gesvd(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), &info); if(info != 0) { return false; } lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); } blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::cx_gesvd()"); lapack::cx_gesvd(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), &info); return (info == 0); } #else { arma_ignore(S); arma_ignore(A); arma_stop_logic_error("svd(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::svd(Mat& U, Col& S, Mat& V, Mat& A) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { if(A.is_empty()) { U.eye(A.n_rows, A.n_rows); S.reset(); V.eye(A.n_cols, A.n_cols); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A); U.set_size(A.n_rows, A.n_rows); V.set_size(A.n_cols, A.n_cols); char jobu = 'A'; char jobvt = 'A'; blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int min_mn = (std::min)(m,n); blas_int lda = blas_int(A.n_rows); blas_int ldu = blas_int(U.n_rows); blas_int ldvt = blas_int(V.n_rows); blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) ); blas_int info = 0; S.set_size( static_cast(min_mn) ); blas_int lwork_proposed = 0; if(A.n_elem >= 1024) { // query to find optimum size of workspace eT work_query[2] = {}; blas_int lwork_query = -1; arma_extra_debug_print("lapack::gesvd()"); lapack::gesvd(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, &info); if(info != 0) { return false; } lwork_proposed = static_cast( work_query[0] ); } blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::gesvd()"); lapack::gesvd(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, &info); if(info != 0) { return false; } op_strans::apply_mat_inplace(V); return true; } #else { arma_ignore(U); arma_ignore(S); arma_ignore(V); arma_ignore(A); arma_stop_logic_error("svd(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::svd(Mat< std::complex >& U, Col& S, Mat< std::complex >& V, Mat< std::complex >& A) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef std::complex eT; if(A.is_empty()) { U.eye(A.n_rows, A.n_rows); S.reset(); V.eye(A.n_cols, A.n_cols); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A); U.set_size(A.n_rows, A.n_rows); V.set_size(A.n_cols, A.n_cols); char jobu = 'A'; char jobvt = 'A'; blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int min_mn = (std::min)(m,n); blas_int lda = blas_int(A.n_rows); blas_int ldu = blas_int(U.n_rows); blas_int ldvt = blas_int(V.n_rows); blas_int lwork_min = (std::max)( blas_int(1), 2*min_mn + (std::max)(m,n) ); blas_int info = 0; S.set_size( static_cast(min_mn) ); podarray rwork( static_cast(5*min_mn) ); blas_int lwork_proposed = 0; if(A.n_elem >= 256) { eT work_query[2] = {}; blas_int lwork_query = -1; // query to find optimum size of workspace arma_extra_debug_print("lapack::cx_gesvd()"); lapack::cx_gesvd(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), &info); if(info != 0) { return false; } lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); } blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::cx_gesvd()"); lapack::cx_gesvd(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), &info); if(info != 0) { return false; } op_htrans::apply_mat_inplace(V); return true; } #else { arma_ignore(U); arma_ignore(S); arma_ignore(V); arma_ignore(A); arma_stop_logic_error("svd(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::svd_econ(Mat& U, Col& S, Mat& V, Mat& A, const char mode) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { if(A.is_empty()) { U.eye(); S.reset(); V.eye(); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A); blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int min_mn = (std::min)(m,n); blas_int lda = blas_int(A.n_rows); S.set_size( static_cast(min_mn) ); blas_int ldu = 0; blas_int ldvt = 0; char jobu = char(0); char jobvt = char(0); if(mode == 'l') { jobu = 'S'; jobvt = 'N'; ldu = m; ldvt = 1; U.set_size( static_cast(ldu), static_cast(min_mn) ); V.reset(); } if(mode == 'r') { jobu = 'N'; jobvt = 'S'; ldu = 1; ldvt = (std::min)(m,n); U.reset(); V.set_size( static_cast(ldvt), static_cast(n) ); } if(mode == 'b') { jobu = 'S'; jobvt = 'S'; ldu = m; ldvt = (std::min)(m,n); U.set_size( static_cast(ldu), static_cast(min_mn) ); V.set_size( static_cast(ldvt), static_cast(n ) ); } blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) ); blas_int info = 0; blas_int lwork_proposed = 0; if(A.n_elem >= 1024) { eT work_query[2] = {}; blas_int lwork_query = -1; // query to find optimum size of workspace arma_extra_debug_print("lapack::gesvd()"); lapack::gesvd(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, &info); if(info != 0) { return false; } lwork_proposed = static_cast(work_query[0]); } blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::gesvd()"); lapack::gesvd(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, &info); if(info != 0) { return false; } op_strans::apply_mat_inplace(V); return true; } #else { arma_ignore(U); arma_ignore(S); arma_ignore(V); arma_ignore(A); arma_ignore(mode); arma_stop_logic_error("svd(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::svd_econ(Mat< std::complex >& U, Col& S, Mat< std::complex >& V, Mat< std::complex >& A, const char mode) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef std::complex eT; if(A.is_empty()) { U.eye(); S.reset(); V.eye(); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A); blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int min_mn = (std::min)(m,n); blas_int lda = blas_int(A.n_rows); S.set_size( static_cast(min_mn) ); blas_int ldu = 0; blas_int ldvt = 0; char jobu = char(0); char jobvt = char(0); if(mode == 'l') { jobu = 'S'; jobvt = 'N'; ldu = m; ldvt = 1; U.set_size( static_cast(ldu), static_cast(min_mn) ); V.reset(); } if(mode == 'r') { jobu = 'N'; jobvt = 'S'; ldu = 1; ldvt = (std::min)(m,n); U.reset(); V.set_size( static_cast(ldvt), static_cast(n) ); } if(mode == 'b') { jobu = 'S'; jobvt = 'S'; ldu = m; ldvt = (std::min)(m,n); U.set_size( static_cast(ldu), static_cast(min_mn) ); V.set_size( static_cast(ldvt), static_cast(n) ); } blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) ); blas_int info = 0; podarray rwork( static_cast(5*min_mn) ); blas_int lwork_proposed = 0; if(A.n_elem >= 256) { eT work_query[2] = {}; blas_int lwork_query = -1; // query to find optimum size of workspace arma_extra_debug_print("lapack::cx_gesvd()"); lapack::cx_gesvd(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), &info); if(info != 0) { return false; } lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); } blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::cx_gesvd()"); lapack::cx_gesvd(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), &info); if(info != 0) { return false; } op_htrans::apply_mat_inplace(V); return true; } #else { arma_ignore(U); arma_ignore(S); arma_ignore(V); arma_ignore(A); arma_ignore(mode); arma_stop_logic_error("svd(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::svd_dc(Col& S, Mat& A) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { if(A.is_empty()) { S.reset(); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A); Mat U(1, 1, arma_nozeros_indicator()); Mat V(1, 1, arma_nozeros_indicator()); char jobz = 'N'; blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int min_mn = (std::min)(m,n); blas_int max_mn = (std::max)(m,n); blas_int lda = blas_int(A.n_rows); blas_int ldu = blas_int(U.n_rows); blas_int ldvt = blas_int(V.n_rows); blas_int lwork_min = 3*min_mn + (std::max)( max_mn, 7*min_mn ); blas_int info = 0; S.set_size( static_cast(min_mn) ); podarray iwork( static_cast(8*min_mn) ); blas_int lwork_proposed = 0; if(A.n_elem >= 1024) { eT work_query[2] = {}; blas_int lwork_query = blas_int(-1); arma_extra_debug_print("lapack::gesdd()"); lapack::gesdd(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, iwork.memptr(), &info); if(info != 0) { return false; } lwork_proposed = static_cast( work_query[0] ); } blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::gesdd()"); lapack::gesdd(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, iwork.memptr(), &info); return (info == 0); } #else { arma_ignore(S); arma_ignore(A); arma_stop_logic_error("svd(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::svd_dc(Col& S, Mat< std::complex >& A) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef std::complex eT; if(A.is_empty()) { S.reset(); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A); Mat U(1, 1, arma_nozeros_indicator()); Mat V(1, 1, arma_nozeros_indicator()); char jobz = 'N'; blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int min_mn = (std::min)(m,n); blas_int max_mn = (std::max)(m,n); blas_int lda = blas_int(A.n_rows); blas_int ldu = blas_int(U.n_rows); blas_int ldvt = blas_int(V.n_rows); blas_int lwork_min = 2*min_mn + max_mn; blas_int info = 0; S.set_size( static_cast(min_mn) ); podarray rwork( static_cast(7*min_mn) ); // from LAPACK 3.8 docs: LAPACK <= v3.6 needs 7*mn podarray iwork( static_cast(8*min_mn) ); blas_int lwork_proposed = 0; if(A.n_elem >= 256) { eT work_query[2] = {}; blas_int lwork_query = blas_int(-1); arma_extra_debug_print("lapack::cx_gesdd()"); lapack::cx_gesdd(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), iwork.memptr(), &info); if(info != 0) { return false; } lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); } blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::cx_gesdd()"); lapack::cx_gesdd(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info); return (info == 0); } #else { arma_ignore(S); arma_ignore(A); arma_stop_logic_error("svd(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::svd_dc(Mat& U, Col& S, Mat& V, Mat& A) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { if(A.is_empty()) { U.eye(A.n_rows, A.n_rows); S.reset(); V.eye(A.n_cols, A.n_cols); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A); U.set_size(A.n_rows, A.n_rows); V.set_size(A.n_cols, A.n_cols); char jobz = 'A'; blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int min_mn = (std::min)(m,n); blas_int max_mn = (std::max)(m,n); blas_int lda = blas_int(A.n_rows); blas_int ldu = blas_int(U.n_rows); blas_int ldvt = blas_int(V.n_rows); blas_int lwork1 = 3*min_mn*min_mn + (std::max)(max_mn, 4*min_mn*min_mn + 4*min_mn); // as per LAPACK 3.2 docs blas_int lwork2 = 4*min_mn*min_mn + 6*min_mn + max_mn; // as per LAPACK 3.8 docs; consistent with LAPACK 3.4 docs blas_int lwork_min = (std::max)(lwork1, lwork2); // due to differences between LAPACK 3.2 and 3.8 blas_int info = 0; S.set_size( static_cast(min_mn) ); podarray iwork( static_cast(8*min_mn) ); blas_int lwork_proposed = 0; if(A.n_elem >= 1024) { eT work_query[2] = {}; blas_int lwork_query = blas_int(-1); arma_extra_debug_print("lapack::gesdd()"); lapack::gesdd(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, iwork.memptr(), &info); if(info != 0) { return false; } lwork_proposed = static_cast(work_query[0]); } blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::gesdd()"); lapack::gesdd(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, iwork.memptr(), &info); if(info != 0) { return false; } op_strans::apply_mat_inplace(V); return true; } #else { arma_ignore(U); arma_ignore(S); arma_ignore(V); arma_ignore(A); arma_stop_logic_error("svd(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::svd_dc(Mat< std::complex >& U, Col& S, Mat< std::complex >& V, Mat< std::complex >& A) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef std::complex eT; if(A.is_empty()) { U.eye(A.n_rows, A.n_rows); S.reset(); V.eye(A.n_cols, A.n_cols); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A); U.set_size(A.n_rows, A.n_rows); V.set_size(A.n_cols, A.n_cols); char jobz = 'A'; blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int min_mn = (std::min)(m,n); blas_int max_mn = (std::max)(m,n); blas_int lda = blas_int(A.n_rows); blas_int ldu = blas_int(U.n_rows); blas_int ldvt = blas_int(V.n_rows); blas_int lwork_min = min_mn*min_mn + 2*min_mn + max_mn; // as per LAPACK 3.2, 3.4, 3.8 docs blas_int lrwork = min_mn * ((std::max)(5*min_mn+7, 2*max_mn + 2*min_mn+1)); // as per LAPACK 3.4 docs; LAPACK 3.8 uses 5*min_mn+5 instead of 5*min_mn+7 blas_int info = 0; S.set_size( static_cast(min_mn) ); podarray rwork( static_cast(lrwork ) ); podarray iwork( static_cast(8*min_mn) ); blas_int lwork_proposed = 0; if(A.n_elem >= 256) { eT work_query[2] = {}; blas_int lwork_query = blas_int(-1); arma_extra_debug_print("lapack::cx_gesdd()"); lapack::cx_gesdd(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), iwork.memptr(), &info); if(info != 0) { return false; } lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); } blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::cx_gesdd()"); lapack::cx_gesdd(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info); if(info != 0) { return false; } op_htrans::apply_mat_inplace(V); return true; } #else { arma_ignore(U); arma_ignore(S); arma_ignore(V); arma_ignore(A); arma_stop_logic_error("svd(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::svd_dc_econ(Mat& U, Col& S, Mat& V, Mat& A) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A); char jobz = 'S'; blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int min_mn = (std::min)(m,n); blas_int max_mn = (std::max)(m,n); blas_int lda = blas_int(A.n_rows); blas_int ldu = m; blas_int ldvt = min_mn; blas_int lwork1 = 3*min_mn*min_mn + (std::max)( max_mn, 4*min_mn*min_mn + 4*min_mn ); // as per LAPACK 3.2 docs blas_int lwork2 = 4*min_mn*min_mn + 6*min_mn + max_mn; // as per LAPACK 3.4 docs; LAPACK 3.8 requires 4*min_mn*min_mn + 7*min_mn blas_int lwork_min = (std::max)(lwork1, lwork2); // due to differences between LAPACK 3.2 and 3.4 blas_int info = 0; if(A.is_empty()) { U.eye(); S.reset(); V.eye( static_cast(n), static_cast(min_mn) ); return true; } S.set_size( static_cast(min_mn) ); U.set_size( static_cast(m), static_cast(min_mn) ); V.set_size( static_cast(min_mn), static_cast(n) ); podarray iwork( static_cast(8*min_mn) ); blas_int lwork_proposed = 0; if(A.n_elem >= 1024) { eT work_query[2] = {}; blas_int lwork_query = blas_int(-1); arma_extra_debug_print("lapack::gesdd()"); lapack::gesdd(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, iwork.memptr(), &info); if(info != 0) { return false; } lwork_proposed = static_cast(work_query[0]); } blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::gesdd()"); lapack::gesdd(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, iwork.memptr(), &info); if(info != 0) { return false; } op_strans::apply_mat_inplace(V); return true; } #else { arma_ignore(U); arma_ignore(S); arma_ignore(V); arma_ignore(A); arma_stop_logic_error("svd(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::svd_dc_econ(Mat< std::complex >& U, Col& S, Mat< std::complex >& V, Mat< std::complex >& A) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef std::complex eT; if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A); char jobz = 'S'; blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int min_mn = (std::min)(m,n); blas_int max_mn = (std::max)(m,n); blas_int lda = blas_int(A.n_rows); blas_int ldu = m; blas_int ldvt = min_mn; blas_int lwork_min = min_mn*min_mn + 2*min_mn + max_mn; // as per LAPACK 3.2 docs blas_int lrwork = min_mn * ((std::max)(5*min_mn+7, 2*max_mn + 2*min_mn+1)); // LAPACK 3.8 uses 5*min_mn+5 instead of 5*min_mn+7 blas_int info = 0; if(A.is_empty()) { U.eye(); S.reset(); V.eye( static_cast(n), static_cast(min_mn) ); return true; } S.set_size( static_cast(min_mn) ); U.set_size( static_cast(m), static_cast(min_mn) ); V.set_size( static_cast(min_mn), static_cast(n) ); podarray rwork( static_cast(lrwork ) ); podarray iwork( static_cast(8*min_mn) ); blas_int lwork_proposed = 0; if(A.n_elem >= 256) { eT work_query[2] = {}; blas_int lwork_query = blas_int(-1); arma_extra_debug_print("lapack::cx_gesdd()"); lapack::cx_gesdd(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), iwork.memptr(), &info); if(info != 0) { return false; } lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); } blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::cx_gesdd()"); lapack::cx_gesdd(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info); if(info != 0) { return false; } op_htrans::apply_mat_inplace(V); return true; } #else { arma_ignore(U); arma_ignore(S); arma_ignore(V); arma_ignore(A); arma_stop_logic_error("svd(): use of LAPACK must be enabled"); return false; } #endif } //! solve a system of linear equations via LU decomposition template inline bool auxlib::solve_square_fast(Mat& out, Mat& A, const Base& B_expr) { arma_extra_debug_sigprint(); out = B_expr.get_ref(); const uword B_n_rows = out.n_rows; const uword B_n_cols = out.n_cols; arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } #if defined(ARMA_USE_LAPACK) { typedef typename T1::elem_type eT; arma_debug_assert_blas_size(A); blas_int n = blas_int(A.n_rows); // assuming A is square blas_int lda = blas_int(A.n_rows); blas_int ldb = blas_int(B_n_rows); blas_int nrhs = blas_int(B_n_cols); blas_int info = blas_int(0); podarray ipiv(A.n_rows + 2); // +2 for paranoia: some versions of Lapack might be trashing memory arma_extra_debug_print("lapack::gesv()"); lapack::gesv(&n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info); return (info == 0); } #else { arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } //! solve a system of linear equations via LU decomposition with rcond estimate template inline bool auxlib::solve_square_rcond(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::elem_type eT; typedef typename T1::pod_type T; out_rcond = T(0); out = B_expr.get_ref(); const uword B_n_rows = out.n_rows; const uword B_n_cols = out.n_cols; arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } arma_debug_assert_blas_size(A); char norm_id = '1'; char trans = 'N'; blas_int n = blas_int(A.n_rows); // assuming A is square blas_int lda = blas_int(A.n_rows); blas_int ldb = blas_int(B_n_rows); blas_int nrhs = blas_int(B_n_cols); blas_int info = blas_int(0); T norm_val = T(0); podarray junk(1); podarray ipiv(A.n_rows + 2); // +2 for paranoia arma_extra_debug_print("lapack::lange()"); norm_val = (has_blas_float_bug::value) ? auxlib::norm1_gen(A) : lapack::lange(&norm_id, &n, &n, A.memptr(), &lda, junk.memptr()); arma_extra_debug_print("lapack::getrf()"); lapack::getrf(&n, &n, A.memptr(), &n, ipiv.memptr(), &info); if(info != blas_int(0)) { return false; } arma_extra_debug_print("lapack::getrs()"); lapack::getrs(&trans, &n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info); if(info != blas_int(0)) { return false; } out_rcond = auxlib::lu_rcond(A, norm_val); return true; } #else { arma_ignore(out); arma_ignore(out_rcond); arma_ignore(A); arma_ignore(B_expr); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } //! solve a system of linear equations via LU decomposition with refinement (real matrices) template inline bool auxlib::solve_square_refine(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool equilibrate) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type eT; // Mat B = B_expr.get_ref(); // B is overwritten by lapack::gesvx() if equilibrate is enabled quasi_unwrap UB(B_expr.get_ref()); // deliberately not declaring as const const Mat& UB_M_as_Mat = UB.M; // so we don't confuse the ?: operator below const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out)); Mat B_tmp; if(use_copy) { B_tmp = UB_M_as_Mat; } const Mat& B = (use_copy) ? B_tmp : UB_M_as_Mat; arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } arma_debug_assert_blas_size(A,B); out.set_size(A.n_rows, B.n_cols); char fact = (equilibrate) ? 'E' : 'N'; char trans = 'N'; char equed = char(0); blas_int n = blas_int(A.n_rows); blas_int nrhs = blas_int(B.n_cols); blas_int lda = blas_int(A.n_rows); blas_int ldaf = blas_int(A.n_rows); blas_int ldb = blas_int(A.n_rows); blas_int ldx = blas_int(A.n_rows); blas_int info = blas_int(0); eT rcond = eT(0); Mat AF(A.n_rows, A.n_rows, arma_nozeros_indicator()); podarray IPIV( A.n_rows); podarray R( A.n_rows); podarray C( A.n_rows); podarray FERR( B.n_cols); podarray BERR( B.n_cols); podarray WORK(4*A.n_rows); podarray IWORK( A.n_rows); arma_extra_debug_print("lapack::gesvx()"); lapack::gesvx ( &fact, &trans, &n, &nrhs, A.memptr(), &lda, AF.memptr(), &ldaf, IPIV.memptr(), &equed, R.memptr(), C.memptr(), const_cast(B.memptr()), &ldb, out.memptr(), &ldx, &rcond, FERR.memptr(), BERR.memptr(), WORK.memptr(), IWORK.memptr(), &info ); // NOTE: using const_cast(B.memptr()) to allow B to be overwritten for equilibration; // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr out_rcond = rcond; return ((info == 0) || (info == (n+1))); } #else { arma_ignore(out); arma_ignore(out_rcond); arma_ignore(A); arma_ignore(B_expr); arma_ignore(equilibrate); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } //! solve a system of linear equations via LU decomposition with refinement (complex matrices) template inline bool auxlib::solve_square_refine(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const Base,T1>& B_expr, const bool equilibrate) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; typedef typename std::complex eT; // Mat B = B_expr.get_ref(); // B is overwritten by lapack::cx_gesvx() if equilibrate is enabled quasi_unwrap UB(B_expr.get_ref()); // deliberately not declaring as const const Mat& UB_M_as_Mat = UB.M; // so we don't confuse the ?: operator below const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out)); Mat B_tmp; if(use_copy) { B_tmp = UB_M_as_Mat; } const Mat& B = (use_copy) ? B_tmp : UB_M_as_Mat; arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } arma_debug_assert_blas_size(A,B); out.set_size(A.n_rows, B.n_cols); char fact = (equilibrate) ? 'E' : 'N'; char trans = 'N'; char equed = char(0); blas_int n = blas_int(A.n_rows); blas_int nrhs = blas_int(B.n_cols); blas_int lda = blas_int(A.n_rows); blas_int ldaf = blas_int(A.n_rows); blas_int ldb = blas_int(A.n_rows); blas_int ldx = blas_int(A.n_rows); blas_int info = blas_int(0); T rcond = T(0); Mat AF(A.n_rows, A.n_rows, arma_nozeros_indicator()); podarray IPIV( A.n_rows); podarray< T> R( A.n_rows); podarray< T> C( A.n_rows); podarray< T> FERR( B.n_cols); podarray< T> BERR( B.n_cols); podarray WORK(2*A.n_rows); podarray< T> RWORK(2*A.n_rows); arma_extra_debug_print("lapack::cx_gesvx()"); lapack::cx_gesvx ( &fact, &trans, &n, &nrhs, A.memptr(), &lda, AF.memptr(), &ldaf, IPIV.memptr(), &equed, R.memptr(), C.memptr(), const_cast(B.memptr()), &ldb, out.memptr(), &ldx, &rcond, FERR.memptr(), BERR.memptr(), WORK.memptr(), RWORK.memptr(), &info ); // NOTE: using const_cast(B.memptr()) to allow B to be overwritten for equilibration; // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr out_rcond = rcond; return ((info == 0) || (info == (n+1))); } #else { arma_ignore(out); arma_ignore(out_rcond); arma_ignore(A); arma_ignore(B_expr); arma_ignore(equilibrate); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::solve_sympd_fast(Mat& out, Mat& A, const Base& B_expr) { arma_extra_debug_sigprint(); #if defined(ARMA_CRIPPLED_LAPACK) { arma_extra_debug_print("auxlib::solve_sympd_fast(): redirecting to auxlib::solve_square_fast() due to crippled LAPACK"); return auxlib::solve_square_fast(out, A, B_expr); } #else { return auxlib::solve_sympd_fast_common(out, A, B_expr); } #endif } template inline bool auxlib::solve_sympd_fast_common(Mat& out, Mat& A, const Base& B_expr) { arma_extra_debug_sigprint(); out = B_expr.get_ref(); const uword B_n_rows = out.n_rows; const uword B_n_cols = out.n_cols; arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } #if defined(ARMA_USE_LAPACK) { typedef typename T1::elem_type eT; arma_debug_assert_blas_size(A, out); char uplo = 'L'; blas_int n = blas_int(A.n_rows); // assuming A is square blas_int nrhs = blas_int(B_n_cols); blas_int lda = blas_int(A.n_rows); blas_int ldb = blas_int(B_n_rows); blas_int info = blas_int(0); arma_extra_debug_print("lapack::posv()"); lapack::posv(&uplo, &n, &nrhs, A.memptr(), &lda, out.memptr(), &ldb, &info); return (info == 0); } #else { arma_ignore(out); arma_ignore(A); arma_ignore(B_expr); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } //! solve a system of linear equations via Cholesky decomposition with rcond estimate (real matrices) template inline bool auxlib::solve_sympd_rcond(Mat& out, bool& out_sympd_state, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::elem_type eT; typedef typename T1::pod_type T; out_sympd_state = false; out_rcond = T(0); out = B_expr.get_ref(); const uword B_n_rows = out.n_rows; const uword B_n_cols = out.n_cols; arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } arma_debug_assert_blas_size(A, out); char norm_id = '1'; char uplo = 'L'; blas_int n = blas_int(A.n_rows); // assuming A is square blas_int nrhs = blas_int(B_n_cols); blas_int info = blas_int(0); T norm_val = T(0); podarray work(A.n_rows); arma_extra_debug_print("lapack::lansy()"); norm_val = (has_blas_float_bug::value) ? auxlib::norm1_sym(A) : lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr()); arma_extra_debug_print("lapack::potrf()"); lapack::potrf(&uplo, &n, A.memptr(), &n, &info); if(info != 0) { return false; } out_sympd_state = true; arma_extra_debug_print("lapack::potrs()"); lapack::potrs(&uplo, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info); if(info != 0) { return false; } out_rcond = auxlib::lu_rcond_sympd(A, norm_val); return true; } #else { arma_ignore(out); arma_ignore(out_sympd_state); arma_ignore(out_rcond); arma_ignore(A); arma_ignore(B_expr); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } //! solve a system of linear equations via Cholesky decomposition with rcond estimate (complex matrices) template inline bool auxlib::solve_sympd_rcond(Mat< std::complex >& out, bool& out_sympd_state, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const Base< std::complex,T1>& B_expr) { arma_extra_debug_sigprint(); #if defined(ARMA_CRIPPLED_LAPACK) { arma_extra_debug_print("auxlib::solve_sympd_rcond(): redirecting to auxlib::solve_square_rcond() due to crippled LAPACK"); out_sympd_state = false; return auxlib::solve_square_rcond(out, out_rcond, A, B_expr); } #elif defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; typedef typename std::complex eT; out_sympd_state = false; out_rcond = T(0); out = B_expr.get_ref(); const uword B_n_rows = out.n_rows; const uword B_n_cols = out.n_cols; arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } arma_debug_assert_blas_size(A, out); char norm_id = '1'; char uplo = 'L'; blas_int n = blas_int(A.n_rows); // assuming A is square blas_int nrhs = blas_int(B_n_cols); blas_int info = blas_int(0); T norm_val = T(0); podarray work(A.n_rows); arma_extra_debug_print("lapack::lanhe()"); norm_val = (has_blas_float_bug::value) ? auxlib::norm1_sym(A) : lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr()); arma_extra_debug_print("lapack::potrf()"); lapack::potrf(&uplo, &n, A.memptr(), &n, &info); if(info != 0) { return false; } out_sympd_state = true; arma_extra_debug_print("lapack::potrs()"); lapack::potrs(&uplo, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info); if(info != 0) { return false; } out_rcond = auxlib::lu_rcond_sympd(A, norm_val); return true; } #else { arma_ignore(out); arma_ignore(out_sympd_state); arma_ignore(out_rcond); arma_ignore(A); arma_ignore(B_expr); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } //! solve a system of linear equations via Cholesky decomposition with refinement (real matrices) template inline bool auxlib::solve_sympd_refine(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool equilibrate) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type eT; // Mat B = B_expr.get_ref(); // B is overwritten by lapack::posvx() if equilibrate is enabled quasi_unwrap UB(B_expr.get_ref()); // deliberately not declaring as const const Mat& UB_M_as_Mat = UB.M; // so we don't confuse the ?: operator below const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out)); Mat B_tmp; if(use_copy) { B_tmp = UB_M_as_Mat; } const Mat& B = (use_copy) ? B_tmp : UB_M_as_Mat; arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } arma_debug_assert_blas_size(A,B); out.set_size(A.n_rows, B.n_cols); char fact = (equilibrate) ? 'E' : 'N'; char uplo = 'L'; char equed = char(0); blas_int n = blas_int(A.n_rows); blas_int nrhs = blas_int(B.n_cols); blas_int lda = blas_int(A.n_rows); blas_int ldaf = blas_int(A.n_rows); blas_int ldb = blas_int(A.n_rows); blas_int ldx = blas_int(A.n_rows); blas_int info = blas_int(0); eT rcond = eT(0); Mat AF(A.n_rows, A.n_rows, arma_nozeros_indicator()); podarray S( A.n_rows); podarray FERR( B.n_cols); podarray BERR( B.n_cols); podarray WORK(3*A.n_rows); podarray IWORK( A.n_rows); arma_extra_debug_print("lapack::posvx()"); lapack::posvx(&fact, &uplo, &n, &nrhs, A.memptr(), &lda, AF.memptr(), &ldaf, &equed, S.memptr(), const_cast(B.memptr()), &ldb, out.memptr(), &ldx, &rcond, FERR.memptr(), BERR.memptr(), WORK.memptr(), IWORK.memptr(), &info); // NOTE: using const_cast(B.memptr()) to allow B to be overwritten for equilibration; // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr // NOTE: lapack::posvx() sets rcond to zero if A is not sympd out_rcond = rcond; return ((info == 0) || (info == (n+1))); } #else { arma_ignore(out); arma_ignore(out_rcond); arma_ignore(A); arma_ignore(B_expr); arma_ignore(equilibrate); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } //! solve a system of linear equations via Cholesky decomposition with refinement (complex matrices) template inline bool auxlib::solve_sympd_refine(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const Base,T1>& B_expr, const bool equilibrate) { arma_extra_debug_sigprint(); #if defined(ARMA_CRIPPLED_LAPACK) { arma_extra_debug_print("auxlib::solve_sympd_refine(): redirecting to auxlib::solve_square_refine() due to crippled LAPACK"); return auxlib::solve_square_refine(out, out_rcond, A, B_expr, equilibrate); } #elif defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; typedef typename std::complex eT; // Mat B = B_expr.get_ref(); // B is overwritten by lapack::cx_posvx() if equilibrate is enabled quasi_unwrap UB(B_expr.get_ref()); // deliberately not declaring as const const Mat& UB_M_as_Mat = UB.M; // so we don't confuse the ?: operator below const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out)); Mat B_tmp; if(use_copy) { B_tmp = UB_M_as_Mat; } const Mat& B = (use_copy) ? B_tmp : UB_M_as_Mat; arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } arma_debug_assert_blas_size(A,B); out.set_size(A.n_rows, B.n_cols); char fact = (equilibrate) ? 'E' : 'N'; char uplo = 'L'; char equed = char(0); blas_int n = blas_int(A.n_rows); blas_int nrhs = blas_int(B.n_cols); blas_int lda = blas_int(A.n_rows); blas_int ldaf = blas_int(A.n_rows); blas_int ldb = blas_int(A.n_rows); blas_int ldx = blas_int(A.n_rows); blas_int info = blas_int(0); T rcond = T(0); Mat AF(A.n_rows, A.n_rows, arma_nozeros_indicator()); podarray< T> S( A.n_rows); podarray< T> FERR( B.n_cols); podarray< T> BERR( B.n_cols); podarray WORK(2*A.n_rows); podarray< T> RWORK( A.n_rows); arma_extra_debug_print("lapack::cx_posvx()"); lapack::cx_posvx(&fact, &uplo, &n, &nrhs, A.memptr(), &lda, AF.memptr(), &ldaf, &equed, S.memptr(), const_cast(B.memptr()), &ldb, out.memptr(), &ldx, &rcond, FERR.memptr(), BERR.memptr(), WORK.memptr(), RWORK.memptr(), &info); // NOTE: using const_cast(B.memptr()) to allow B to be overwritten for equilibration; // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr // NOTE: lapack::cx_posvx() sets rcond to zero if A is not sympd out_rcond = rcond; return ((info == 0) || (info == (n+1))); } #else { arma_ignore(out); arma_ignore(out_rcond); arma_ignore(A); arma_ignore(B_expr); arma_ignore(equilibrate); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } //! solve a non-square full-rank system via QR or LQ decomposition template inline bool auxlib::solve_rect_fast(Mat& out, Mat& A, const Base& B_expr) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::elem_type eT; const unwrap U(B_expr.get_ref()); const Mat& B = U.M; arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_cols, B.n_cols); return true; } arma_debug_assert_blas_size(A,B); Mat tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() ); if(arma::size(tmp) == arma::size(B)) { tmp = B; } else { tmp.zeros(); tmp(0,0, arma::size(B)) = B; } char trans = 'N'; blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int lda = blas_int(A.n_rows); blas_int ldb = blas_int(tmp.n_rows); blas_int nrhs = blas_int(B.n_cols); blas_int min_mn = (std::min)(m,n); blas_int lwork_min = (std::max)(blas_int(1), min_mn + (std::max)(min_mn, nrhs)); blas_int info = 0; blas_int lwork_proposed = 0; if(A.n_elem >= ((is_cx::yes) ? uword(256) : uword(1024))) { eT work_query[2] = {}; blas_int lwork_query = -1; arma_extra_debug_print("lapack::gels()"); lapack::gels( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, &work_query[0], &lwork_query, &info ); if(info != 0) { return false; } lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); } blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::gels()"); lapack::gels( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, work.memptr(), &lwork_final, &info ); if(info != 0) { return false; } if(tmp.n_rows == A.n_cols) { out.steal_mem(tmp); } else { out = tmp.head_rows(A.n_cols); } return true; } #else { arma_ignore(out); arma_ignore(A); arma_ignore(B_expr); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } //! solve a non-square full-rank system via QR or LQ decomposition with rcond estimate (experimental) template inline bool auxlib::solve_rect_rcond(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::elem_type eT; typedef typename T1::pod_type T; out_rcond = T(0); const unwrap U(B_expr.get_ref()); const Mat& B = U.M; arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_cols, B.n_cols); return true; } arma_debug_assert_blas_size(A,B); Mat tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() ); if(arma::size(tmp) == arma::size(B)) { tmp = B; } else { tmp.zeros(); tmp(0,0, arma::size(B)) = B; } char trans = 'N'; blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int lda = blas_int(A.n_rows); blas_int ldb = blas_int(tmp.n_rows); blas_int nrhs = blas_int(B.n_cols); blas_int min_mn = (std::min)(m,n); blas_int lwork_min = (std::max)(blas_int(1), min_mn + (std::max)(min_mn, nrhs)); blas_int info = 0; blas_int lwork_proposed = 0; if(A.n_elem >= ((is_cx::yes) ? uword(256) : uword(1024))) { eT work_query[2] = {}; blas_int lwork_query = -1; arma_extra_debug_print("lapack::gels()"); lapack::gels( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, &work_query[0], &lwork_query, &info ); if(info != 0) { return false; } lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); } blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::gels()"); lapack::gels( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, work.memptr(), &lwork_final, &info ); if(info != 0) { return false; } if(A.n_rows >= A.n_cols) { arma_extra_debug_print("estimating rcond via R"); // xGELS docs: for M >= N, A contains details of its QR decomposition as returned by xGEQRF // xGEQRF docs: elements on and above the diagonal contain the min(M,N)-by-N upper trapezoidal matrix R Mat R(A.n_cols, A.n_cols, arma_zeros_indicator()); for(uword col=0; col < A.n_cols; ++col) { for(uword row=0; row <= col; ++row) { R.at(row,col) = A.at(row,col); } } // determine quality of solution out_rcond = auxlib::rcond_trimat(R, 0); // 0: upper triangular; 1: lower triangular } else if(A.n_rows < A.n_cols) { arma_extra_debug_print("estimating rcond via L"); // xGELS docs: for M < N, A contains details of its LQ decomposition as returned by xGELQF // xGELQF docs: elements on and below the diagonal contain the M-by-min(M,N) lower trapezoidal matrix L Mat L(A.n_rows, A.n_rows, arma_zeros_indicator()); for(uword col=0; col < A.n_rows; ++col) { for(uword row=col; row < A.n_rows; ++row) { L.at(row,col) = A.at(row,col); } } // determine quality of solution out_rcond = auxlib::rcond_trimat(L, 1); // 0: upper triangular; 1: lower triangular } if(tmp.n_rows == A.n_cols) { out.steal_mem(tmp); } else { out = tmp.head_rows(A.n_cols); } return true; } #else { arma_ignore(out); arma_ignore(out_rcond); arma_ignore(A); arma_ignore(B_expr); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::solve_approx_svd(Mat& out, Mat& A, const Base& B_expr) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type eT; const unwrap U(B_expr.get_ref()); const Mat& B = U.M; arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_cols, B.n_cols); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A,B); Mat tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() ); if(arma::size(tmp) == arma::size(B)) { tmp = B; } else { tmp.zeros(); tmp(0,0, arma::size(B)) = B; } blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int min_mn = (std::min)(m, n); blas_int nrhs = blas_int(B.n_cols); blas_int lda = blas_int(A.n_rows); blas_int ldb = blas_int(tmp.n_rows); //eT rcond = eT(-1); // -1 means "use machine precision" eT rcond = (std::max)(A.n_rows, A.n_cols) * std::numeric_limits::epsilon(); blas_int rank = blas_int(0); blas_int info = blas_int(0); podarray S( static_cast(min_mn) ); // NOTE: with LAPACK 3.8, can use the workspace query to also obtain liwork, // NOTE: which makes the call to lapack::laenv() redundant blas_int ispec = blas_int(9); const char* const_name = (is_float::value) ? "SGELSD" : "DGELSD"; const char* const_opts = " "; char* name = const_cast(const_name); char* opts = const_cast(const_opts); blas_int n1 = m; blas_int n2 = n; blas_int n3 = nrhs; blas_int n4 = lda; blas_int laenv_result = (arma_config::hidden_args) ? blas_int(lapack::laenv(&ispec, name, opts, &n1, &n2, &n3, &n4, 6, 1)) : blas_int(0); blas_int smlsiz = (std::max)( blas_int(25), laenv_result ); blas_int smlsiz_p1 = blas_int(1) + smlsiz; blas_int nlvl = (std::max)( blas_int(0), blas_int(1) + blas_int( std::log2( double(min_mn)/double(smlsiz_p1) ) ) ); blas_int liwork = (std::max)( blas_int(1), (blas_int(3)*min_mn*nlvl + blas_int(11)*min_mn) ); podarray iwork( static_cast(liwork) ); blas_int lwork_min = blas_int(12)*min_mn + blas_int(2)*min_mn*smlsiz + blas_int(8)*min_mn*nlvl + min_mn*nrhs + smlsiz_p1*smlsiz_p1; eT work_query[2] = {}; blas_int lwork_query = blas_int(-1); arma_extra_debug_print("lapack::gelsd()"); lapack::gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, &work_query[0], &lwork_query, iwork.memptr(), &info); if(info != 0) { return false; } // NOTE: in LAPACK 3.8, iwork[0] returns the minimum liwork blas_int lwork_proposed = static_cast( access::tmp_real(work_query[0]) ); blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::gelsd()"); lapack::gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, work.memptr(), &lwork_final, iwork.memptr(), &info); if(info != 0) { return false; } if(tmp.n_rows == A.n_cols) { out.steal_mem(tmp); } else { out = tmp.head_rows(A.n_cols); } return true; } #else { arma_ignore(out); arma_ignore(A); arma_ignore(B_expr); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::solve_approx_svd(Mat< std::complex >& out, Mat< std::complex >& A, const Base,T1>& B_expr) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; typedef typename std::complex eT; const unwrap U(B_expr.get_ref()); const Mat& B = U.M; arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_cols, B.n_cols); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A,B); Mat tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() ); if(arma::size(tmp) == arma::size(B)) { tmp = B; } else { tmp.zeros(); tmp(0,0, arma::size(B)) = B; } blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_cols); blas_int min_mn = (std::min)(m, n); blas_int nrhs = blas_int(B.n_cols); blas_int lda = blas_int(A.n_rows); blas_int ldb = blas_int(tmp.n_rows); //T rcond = T(-1); // -1 means "use machine precision" T rcond = (std::max)(A.n_rows, A.n_cols) * std::numeric_limits::epsilon(); blas_int rank = blas_int(0); blas_int info = blas_int(0); podarray S( static_cast(min_mn) ); blas_int ispec = blas_int(9); const char* const_name = (is_float::value) ? "CGELSD" : "ZGELSD"; const char* const_opts = " "; char* name = const_cast(const_name); char* opts = const_cast(const_opts); blas_int n1 = m; blas_int n2 = n; blas_int n3 = nrhs; blas_int n4 = lda; blas_int laenv_result = (arma_config::hidden_args) ? blas_int(lapack::laenv(&ispec, name, opts, &n1, &n2, &n3, &n4, 6, 1)) : blas_int(0); blas_int smlsiz = (std::max)( blas_int(25), laenv_result ); blas_int smlsiz_p1 = blas_int(1) + smlsiz; blas_int nlvl = (std::max)( blas_int(0), blas_int(1) + blas_int( std::log2( double(min_mn)/double(smlsiz_p1) ) ) ); blas_int lrwork = (m >= n) ? blas_int(10)*n + blas_int(2)*n*smlsiz + blas_int(8)*n*nlvl + blas_int(3)*smlsiz*nrhs + (std::max)( (smlsiz_p1)*(smlsiz_p1), n*(blas_int(1)+nrhs) + blas_int(2)*nrhs ) : blas_int(10)*m + blas_int(2)*m*smlsiz + blas_int(8)*m*nlvl + blas_int(3)*smlsiz*nrhs + (std::max)( (smlsiz_p1)*(smlsiz_p1), n*(blas_int(1)+nrhs) + blas_int(2)*nrhs ); blas_int liwork = (std::max)( blas_int(1), (blas_int(3)*blas_int(min_mn)*nlvl + blas_int(11)*blas_int(min_mn)) ); podarray rwork( static_cast(lrwork) ); podarray iwork( static_cast(liwork) ); blas_int lwork_min = 2*min_mn + min_mn*nrhs; eT work_query[2] = {}; blas_int lwork_query = blas_int(-1); arma_extra_debug_print("lapack::cx_gelsd()"); lapack::cx_gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, &work_query[0], &lwork_query, rwork.memptr(), iwork.memptr(), &info); if(info != 0) { return false; } blas_int lwork_proposed = static_cast( access::tmp_real( work_query[0]) ); blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); podarray work( static_cast(lwork_final) ); arma_extra_debug_print("lapack::cx_gelsd()"); lapack::cx_gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info); if(info != 0) { return false; } if(tmp.n_rows == A.n_cols) { out.steal_mem(tmp); } else { out = tmp.head_rows(A.n_cols); } return true; } #else { arma_ignore(out); arma_ignore(A); arma_ignore(B_expr); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::solve_trimat_fast(Mat& out, const Mat& A, const Base& B_expr, const uword layout) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { out = B_expr.get_ref(); const uword B_n_rows = out.n_rows; const uword B_n_cols = out.n_cols; arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } arma_debug_assert_blas_size(A,out); char uplo = (layout == 0) ? 'U' : 'L'; char trans = 'N'; char diag = 'N'; blas_int n = blas_int(A.n_rows); blas_int nrhs = blas_int(B_n_cols); blas_int info = 0; arma_extra_debug_print("lapack::trtrs()"); lapack::trtrs(&uplo, &trans, &diag, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info); return (info == 0); } #else { arma_ignore(out); arma_ignore(A); arma_ignore(B_expr); arma_ignore(layout); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::solve_trimat_rcond(Mat& out, typename T1::pod_type& out_rcond, const Mat& A, const Base& B_expr, const uword layout) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; out_rcond = T(0); out = B_expr.get_ref(); const uword B_n_rows = out.n_rows; const uword B_n_cols = out.n_cols; arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } arma_debug_assert_blas_size(A,out); char uplo = (layout == 0) ? 'U' : 'L'; char trans = 'N'; char diag = 'N'; blas_int n = blas_int(A.n_rows); blas_int nrhs = blas_int(B_n_cols); blas_int info = 0; arma_extra_debug_print("lapack::trtrs()"); lapack::trtrs(&uplo, &trans, &diag, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info); if(info != 0) { return false; } // determine quality of solution out_rcond = auxlib::rcond_trimat(A, layout); return true; } #else { arma_ignore(out); arma_ignore(out_rcond); arma_ignore(A); arma_ignore(B_expr); arma_ignore(layout); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } //! solve a system of linear equations via LU decomposition (real band matrix) template inline bool auxlib::solve_band_fast(Mat& out, Mat& A, const uword KL, const uword KU, const Base& B_expr) { arma_extra_debug_sigprint(); return auxlib::solve_band_fast_common(out, A, KL, KU, B_expr); } //! solve a system of linear equations via LU decomposition (complex band matrix) template inline bool auxlib::solve_band_fast(Mat< std::complex >& out, Mat< std::complex >& A, const uword KL, const uword KU, const Base< std::complex,T1>& B_expr) { arma_extra_debug_sigprint(); #if defined(ARMA_CRIPPLED_LAPACK) { arma_extra_debug_print("auxlib::solve_band_fast(): redirecting to auxlib::solve_square_fast() due to crippled LAPACK"); arma_ignore(KL); arma_ignore(KU); return auxlib::solve_square_fast(out, A, B_expr); } #else { return auxlib::solve_band_fast_common(out, A, KL, KU, B_expr); } #endif } //! solve a system of linear equations via LU decomposition (band matrix) template inline bool auxlib::solve_band_fast_common(Mat& out, const Mat& A, const uword KL, const uword KU, const Base& B_expr) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::elem_type eT; out = B_expr.get_ref(); const uword B_n_rows = out.n_rows; const uword B_n_cols = out.n_cols; arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); if(A.is_empty() || out.is_empty()) { out.zeros(A.n_rows, B_n_cols); return true; } // for gbsv, matrix AB size: 2*KL+KU+1 x N; band representation of A stored in rows KL+1 to 2*KL+KU+1 (note: fortran counts from 1) Mat AB; band_helper::compress(AB, A, KL, KU, true); const uword N = AB.n_cols; // order of the original square matrix A arma_debug_assert_blas_size(AB,out); blas_int n = blas_int(N); blas_int kl = blas_int(KL); blas_int ku = blas_int(KU); blas_int nrhs = blas_int(B_n_cols); blas_int ldab = blas_int(AB.n_rows); blas_int ldb = blas_int(B_n_rows); blas_int info = blas_int(0); podarray ipiv(N + 2); // +2 for paranoia // NOTE: AB is overwritten arma_extra_debug_print("lapack::gbsv()"); lapack::gbsv(&n, &kl, &ku, &nrhs, AB.memptr(), &ldab, ipiv.memptr(), out.memptr(), &ldb, &info); return (info == 0); } #else { arma_ignore(out); arma_ignore(A); arma_ignore(KL); arma_ignore(KU); arma_ignore(B_expr); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } //! solve a system of linear equations via LU decomposition (real band matrix) template inline bool auxlib::solve_band_rcond(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const uword KL, const uword KU, const Base& B_expr) { arma_extra_debug_sigprint(); return auxlib::solve_band_rcond_common(out, out_rcond, A, KL, KU, B_expr); } //! solve a system of linear equations via LU decomposition (complex band matrix) template inline bool auxlib::solve_band_rcond(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const uword KL, const uword KU, const Base< std::complex,T1>& B_expr) { arma_extra_debug_sigprint(); #if defined(ARMA_CRIPPLED_LAPACK) { arma_extra_debug_print("auxlib::solve_band_rcond(): redirecting to auxlib::solve_square_rcond() due to crippled LAPACK"); arma_ignore(KL); arma_ignore(KU); return auxlib::solve_square_rcond(out, out_rcond, A, B_expr); } #else { return auxlib::solve_band_rcond_common(out, out_rcond, A, KL, KU, B_expr); } #endif } //! solve a system of linear equations via LU decomposition (band matrix) template inline bool auxlib::solve_band_rcond_common(Mat& out, typename T1::pod_type& out_rcond, const Mat& A, const uword KL, const uword KU, const Base& B_expr) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::elem_type eT; typedef typename T1::pod_type T; out_rcond = T(0); out = B_expr.get_ref(); const uword B_n_rows = out.n_rows; const uword B_n_cols = out.n_cols; arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); if(A.is_empty() || out.is_empty()) { out.zeros(A.n_rows, B_n_cols); return true; } // for gbtrf, matrix AB size: 2*KL+KU+1 x N; band representation of A stored in rows KL+1 to 2*KL+KU+1 (note: fortran counts from 1) Mat AB; band_helper::compress(AB, A, KL, KU, true); const uword N = AB.n_cols; // order of the original square matrix A arma_debug_assert_blas_size(AB,out); //char norm_id = '1'; char trans = 'N'; blas_int n = blas_int(N); // assuming square matrix blas_int kl = blas_int(KL); blas_int ku = blas_int(KU); blas_int nrhs = blas_int(B_n_cols); blas_int ldab = blas_int(AB.n_rows); blas_int ldb = blas_int(B_n_rows); blas_int info = blas_int(0); T norm_val = T(0); //podarray junk(1); podarray ipiv(N + 2); // +2 for paranoia // // NOTE: lapack::langb() and lapack::gbtrf() use incompatible storage formats for the band matrix // arma_extra_debug_print("lapack::langb()"); // norm_val = lapack::langb(&norm_id, &n, &kl, &ku, AB.memptr(), &ldab, junk.memptr()); norm_val = auxlib::norm1_band(A,KL,KU); arma_extra_debug_print("lapack::gbtrf()"); lapack::gbtrf(&n, &n, &kl, &ku, AB.memptr(), &ldab, ipiv.memptr(), &info); if(info != 0) { return false; } arma_extra_debug_print("lapack::gbtrs()"); lapack::gbtrs(&trans, &n, &kl, &ku, &nrhs, AB.memptr(), &ldab, ipiv.memptr(), out.memptr(), &ldb, &info); if(info != 0) { return false; } out_rcond = auxlib::lu_rcond_band(AB, KL, KU, ipiv, norm_val); return true; } #else { arma_ignore(out); arma_ignore(out_rcond); arma_ignore(A); arma_ignore(KL); arma_ignore(KU); arma_ignore(B_expr); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } //! solve a system of linear equations via LU decomposition with refinement (real band matrices) template inline bool auxlib::solve_band_refine(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const uword KL, const uword KU, const Base& B_expr, const bool equilibrate) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type eT; Mat B = B_expr.get_ref(); // B is overwritten arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } // for gbsvx, matrix AB size: KL+KU+1 x N; band representation of A stored in rows 1 to KL+KU+1 (note: fortran counts from 1) Mat AB; band_helper::compress(AB, A, KL, KU, false); const uword N = AB.n_cols; arma_debug_assert_blas_size(AB,B); out.set_size(N, B.n_cols); Mat AFB(2*KL+KU+1, N, arma_nozeros_indicator()); char fact = (equilibrate) ? 'E' : 'N'; char trans = 'N'; char equed = char(0); blas_int n = blas_int(N); blas_int kl = blas_int(KL); blas_int ku = blas_int(KU); blas_int nrhs = blas_int(B.n_cols); blas_int ldab = blas_int(AB.n_rows); blas_int ldafb = blas_int(AFB.n_rows); blas_int ldb = blas_int(B.n_rows); blas_int ldx = blas_int(N); blas_int info = blas_int(0); eT rcond = eT(0); podarray IPIV( N); podarray R( N); podarray C( N); podarray FERR( B.n_cols); podarray BERR( B.n_cols); podarray WORK(3*N); podarray IWORK( N); arma_extra_debug_print("lapack::gbsvx()"); lapack::gbsvx ( &fact, &trans, &n, &kl, &ku, &nrhs, AB.memptr(), &ldab, AFB.memptr(), &ldafb, IPIV.memptr(), &equed, R.memptr(), C.memptr(), B.memptr(), &ldb, out.memptr(), &ldx, &rcond, FERR.memptr(), BERR.memptr(), WORK.memptr(), IWORK.memptr(), &info ); out_rcond = rcond; return ((info == 0) || (info == (n+1))); } #else { arma_ignore(out); arma_ignore(out_rcond); arma_ignore(A); arma_ignore(KL); arma_ignore(KU); arma_ignore(B_expr); arma_ignore(equilibrate); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } //! solve a system of linear equations via LU decomposition with refinement (complex band matrices) template inline bool auxlib::solve_band_refine(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const uword KL, const uword KU, const Base,T1>& B_expr, const bool equilibrate) { arma_extra_debug_sigprint(); #if defined(ARMA_CRIPPLED_LAPACK) { arma_extra_debug_print("auxlib::solve_band_refine(): redirecting to auxlib::solve_square_refine() due to crippled LAPACK"); arma_ignore(KL); arma_ignore(KU); return auxlib::solve_square_refine(out, out_rcond, A, B_expr, equilibrate); } #elif defined(ARMA_USE_LAPACK) { typedef typename T1::pod_type T; typedef typename std::complex eT; Mat B = B_expr.get_ref(); // B is overwritten arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } // for gbsvx, matrix AB size: KL+KU+1 x N; band representation of A stored in rows 1 to KL+KU+1 (note: fortran counts from 1) Mat AB; band_helper::compress(AB, A, KL, KU, false); const uword N = AB.n_cols; arma_debug_assert_blas_size(AB,B); out.set_size(N, B.n_cols); Mat AFB(2*KL+KU+1, N, arma_nozeros_indicator()); char fact = (equilibrate) ? 'E' : 'N'; char trans = 'N'; char equed = char(0); blas_int n = blas_int(N); blas_int kl = blas_int(KL); blas_int ku = blas_int(KU); blas_int nrhs = blas_int(B.n_cols); blas_int ldab = blas_int(AB.n_rows); blas_int ldafb = blas_int(AFB.n_rows); blas_int ldb = blas_int(B.n_rows); blas_int ldx = blas_int(N); blas_int info = blas_int(0); T rcond = T(0); podarray IPIV( N); podarray< T> R( N); podarray< T> C( N); podarray< T> FERR( B.n_cols); podarray< T> BERR( B.n_cols); podarray WORK(2*N); podarray< T> RWORK( N); // NOTE: according to lapack 3.6.1 docs, the size of RWORK in zgbsvx is different to RWORK in dgesvx arma_extra_debug_print("lapack::cx_gbsvx()"); lapack::cx_gbsvx ( &fact, &trans, &n, &kl, &ku, &nrhs, AB.memptr(), &ldab, AFB.memptr(), &ldafb, IPIV.memptr(), &equed, R.memptr(), C.memptr(), B.memptr(), &ldb, out.memptr(), &ldx, &rcond, FERR.memptr(), BERR.memptr(), WORK.memptr(), RWORK.memptr(), &info ); out_rcond = rcond; return ((info == 0) || (info == (n+1))); } #else { arma_ignore(out); arma_ignore(out_rcond); arma_ignore(A); arma_ignore(KL); arma_ignore(KU); arma_ignore(B_expr); arma_ignore(equilibrate); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } //! solve a system of linear equations via Gaussian elimination with partial pivoting (real tridiagonal band matrix) template inline bool auxlib::solve_tridiag_fast(Mat& out, Mat& A, const Base& B_expr) { arma_extra_debug_sigprint(); return auxlib::solve_tridiag_fast_common(out, A, B_expr); } //! solve a system of linear equations via Gaussian elimination with partial pivoting (complex tridiagonal band matrix) template inline bool auxlib::solve_tridiag_fast(Mat< std::complex >& out, Mat< std::complex >& A, const Base< std::complex,T1>& B_expr) { arma_extra_debug_sigprint(); #if defined(ARMA_CRIPPLED_LAPACK) { arma_extra_debug_print("auxlib::solve_tridiag_fast(): redirecting to auxlib::solve_square_fast() due to crippled LAPACK"); return auxlib::solve_square_fast(out, A, B_expr); } #else { return auxlib::solve_tridiag_fast_common(out, A, B_expr); } #endif } //! solve a system of linear equations via Gaussian elimination with partial pivoting (tridiagonal band matrix) template inline bool auxlib::solve_tridiag_fast_common(Mat& out, const Mat& A, const Base& B_expr) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename T1::elem_type eT; out = B_expr.get_ref(); const uword B_n_rows = out.n_rows; const uword B_n_cols = out.n_cols; arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); if(A.is_empty() || out.is_empty()) { out.zeros(A.n_rows, B_n_cols); return true; } Mat tridiag; band_helper::extract_tridiag(tridiag, A); arma_debug_assert_blas_size(tridiag, out); blas_int n = blas_int(A.n_rows); blas_int nrhs = blas_int(B_n_cols); blas_int ldb = blas_int(B_n_rows); blas_int info = blas_int(0); arma_extra_debug_print("lapack::gtsv()"); lapack::gtsv(&n, &nrhs, tridiag.colptr(0), tridiag.colptr(1), tridiag.colptr(2), out.memptr(), &ldb, &info); return (info == 0); } #else { arma_ignore(out); arma_ignore(A); arma_ignore(B_expr); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } #endif } // // Schur decomposition template inline bool auxlib::schur(Mat& U, Mat& S, const Base& X, const bool calc_U) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { S = X.get_ref(); arma_debug_check( (S.is_square() == false), "schur(): given matrix must be square sized" ); if(S.is_empty()) { U.reset(); S.reset(); return true; } arma_debug_assert_blas_size(S); const uword S_n_rows = S.n_rows; if(calc_U) { U.set_size(S_n_rows, S_n_rows); } else { U.set_size(1,1); } char jobvs = calc_U ? 'V' : 'N'; char sort = 'N'; void* select = 0; blas_int n = blas_int(S_n_rows); blas_int sdim = 0; blas_int ldvs = calc_U ? n : blas_int(1); blas_int lwork = 64*n; // lwork_min = (std::max)(blas_int(1), 3*n) blas_int info = 0; podarray wr(S_n_rows); podarray wi(S_n_rows); podarray work( static_cast(lwork) ); podarray bwork(S_n_rows); arma_extra_debug_print("lapack::gees()"); lapack::gees(&jobvs, &sort, select, &n, S.memptr(), &n, &sdim, wr.memptr(), wi.memptr(), U.memptr(), &ldvs, work.memptr(), &lwork, bwork.memptr(), &info); return (info == 0); } #else { arma_ignore(U); arma_ignore(S); arma_ignore(X); arma_ignore(calc_U); arma_stop_logic_error("schur(): use of LAPACK must be enabled"); return false; } #endif } template inline bool auxlib::schur(Mat< std::complex >& U, Mat< std::complex >& S, const Base,T1>& X, const bool calc_U) { arma_extra_debug_sigprint(); S = X.get_ref(); arma_debug_check( (S.is_square() == false), "schur(): given matrix must be square sized" ); return auxlib::schur(U,S,calc_U); } template inline bool auxlib::schur(Mat< std::complex >& U, Mat< std::complex >& S, const bool calc_U) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef std::complex eT; if(S.is_empty()) { U.reset(); S.reset(); return true; } arma_debug_assert_blas_size(S); const uword S_n_rows = S.n_rows; if(calc_U) { U.set_size(S_n_rows, S_n_rows); } else { U.set_size(1,1); } char jobvs = calc_U ? 'V' : 'N'; char sort = 'N'; void* select = 0; blas_int n = blas_int(S_n_rows); blas_int sdim = 0; blas_int ldvs = calc_U ? n : blas_int(1); blas_int lwork = 64*n; // lwork_min = (std::max)(blas_int(1), 2*n) blas_int info = 0; podarray w(S_n_rows); podarray work( static_cast(lwork) ); podarray< T> rwork(S_n_rows); podarray bwork(S_n_rows); arma_extra_debug_print("lapack::cx_gees()"); lapack::cx_gees(&jobvs, &sort, select, &n, S.memptr(), &n, &sdim, w.memptr(), U.memptr(), &ldvs, work.memptr(), &lwork, rwork.memptr(), bwork.memptr(), &info); return (info == 0); } #else { arma_ignore(U); arma_ignore(S); arma_ignore(calc_U); arma_stop_logic_error("schur(): use of LAPACK must be enabled"); return false; } #endif } // // solve the Sylvester equation AX + XB = C template inline bool auxlib::syl(Mat& X, const Mat& A, const Mat& B, const Mat& C) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { arma_debug_check( (A.is_square() == false) || (B.is_square() == false), "syl(): given matrices must be square sized" ); arma_debug_check( (C.n_rows != A.n_rows) || (C.n_cols != B.n_cols), "syl(): matrices are not conformant" ); if(A.is_empty() || B.is_empty() || C.is_empty()) { X.reset(); return true; } Mat Z1, Z2, T1, T2; const bool status_sd1 = auxlib::schur(Z1, T1, A); const bool status_sd2 = auxlib::schur(Z2, T2, B); if( (status_sd1 == false) || (status_sd2 == false) ) { return false; } char trana = 'N'; char tranb = 'N'; blas_int isgn = +1; blas_int m = blas_int(T1.n_rows); blas_int n = blas_int(T2.n_cols); eT scale = eT(0); blas_int info = 0; Mat Y = trans(Z1) * C * Z2; arma_extra_debug_print("lapack::trsyl()"); lapack::trsyl(&trana, &tranb, &isgn, &m, &n, T1.memptr(), &m, T2.memptr(), &n, Y.memptr(), &m, &scale, &info); if(info < 0) { return false; } //Y /= scale; Y /= (-scale); X = Z1 * Y * trans(Z2); return true; } #else { arma_ignore(X); arma_ignore(A); arma_ignore(B); arma_ignore(C); arma_stop_logic_error("syl(): use of LAPACK must be enabled"); return false; } #endif } // // QZ decomposition of general square real matrix pair template inline bool auxlib::qz(Mat& A, Mat& B, Mat& vsl, Mat& vsr, const Base& X_expr, const Base& Y_expr, const char mode) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { A = X_expr.get_ref(); B = Y_expr.get_ref(); arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "qz(): given matrices must be square sized", [&](){ A.soft_reset(); B.soft_reset(); } ); arma_debug_check( (A.n_rows != B.n_rows), "qz(): given matrices must have the same size" ); if(A.is_empty()) { A.reset(); B.reset(); vsl.reset(); vsr.reset(); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A); vsl.set_size(A.n_rows, A.n_rows); vsr.set_size(A.n_rows, A.n_rows); char jobvsl = 'V'; char jobvsr = 'V'; char eigsort = 'N'; void* selctg = 0; blas_int N = blas_int(A.n_rows); blas_int sdim = 0; blas_int lwork = 64*N+16; // lwork_min = (std::max)(blas_int(1),8*N+16) blas_int info = 0; if(mode == 'l') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_lhp)); } else if(mode == 'r') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_rhp)); } else if(mode == 'i') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_iuc)); } else if(mode == 'o') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_ouc)); } podarray alphar(A.n_rows); podarray alphai(A.n_rows); podarray beta(A.n_rows); podarray work( static_cast(lwork) ); podarray bwork( static_cast(N) ); arma_extra_debug_print("lapack::gges()"); lapack::gges ( &jobvsl, &jobvsr, &eigsort, selctg, &N, A.memptr(), &N, B.memptr(), &N, &sdim, alphar.memptr(), alphai.memptr(), beta.memptr(), vsl.memptr(), &N, vsr.memptr(), &N, work.memptr(), &lwork, bwork.memptr(), &info ); if(info != 0) { return false; } op_strans::apply_mat_inplace(vsl); return true; } #else { arma_ignore(A); arma_ignore(B); arma_ignore(vsl); arma_ignore(vsr); arma_ignore(X_expr); arma_ignore(Y_expr); arma_ignore(mode); arma_stop_logic_error("qz(): use of LAPACK must be enabled"); return false; } #endif } // // QZ decomposition of general square complex matrix pair template inline bool auxlib::qz(Mat< std::complex >& A, Mat< std::complex >& B, Mat< std::complex >& vsl, Mat< std::complex >& vsr, const Base< std::complex, T1 >& X_expr, const Base< std::complex, T2 >& Y_expr, const char mode) { arma_extra_debug_sigprint(); #if defined(ARMA_USE_LAPACK) { typedef typename std::complex eT; A = X_expr.get_ref(); B = Y_expr.get_ref(); arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "qz(): given matrices must be square sized", [&](){ A.soft_reset(); B.soft_reset(); } ); arma_debug_check( (A.n_rows != B.n_rows), "qz(): given matrices must have the same size" ); if(A.is_empty()) { A.reset(); B.reset(); vsl.reset(); vsr.reset(); return true; } if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } arma_debug_assert_blas_size(A); vsl.set_size(A.n_rows, A.n_rows); vsr.set_size(A.n_rows, A.n_rows); char jobvsl = 'V'; char jobvsr = 'V'; char eigsort = 'N'; void* selctg = 0; blas_int N = blas_int(A.n_rows); blas_int sdim = 0; blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1),2*N) blas_int info = 0; if(mode == 'l') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_lhp)); } else if(mode == 'r') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_rhp)); } else if(mode == 'i') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_iuc)); } else if(mode == 'o') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_ouc)); } podarray alpha(A.n_rows); podarray beta(A.n_rows); podarray work( static_cast(lwork) ); podarray< T> rwork( static_cast(8*N) ); podarray bwork( static_cast(N) ); arma_extra_debug_print("lapack::cx_gges()"); lapack::cx_gges ( &jobvsl, &jobvsr, &eigsort, selctg, &N, A.memptr(), &N, B.memptr(), &N, &sdim, alpha.memptr(), beta.memptr(), vsl.memptr(), &N, vsr.memptr(), &N, work.memptr(), &lwork, rwork.memptr(), bwork.memptr(), &info ); if(info != 0) { return false; } op_htrans::apply_mat_inplace(vsl); return true; } #else { arma_ignore(A); arma_ignore(B); arma_ignore(vsl); arma_ignore(vsr); arma_ignore(X_expr); arma_ignore(Y_expr); arma_ignore(mode); arma_stop_logic_error("qz(): use of LAPACK must be enabled"); return false; } #endif } template inline eT auxlib::rcond(Mat& A) { #if defined(ARMA_USE_LAPACK) { arma_debug_assert_blas_size(A); char norm_id = '1'; blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_rows); // assuming square matrix blas_int lda = blas_int(A.n_rows); eT norm_val = eT(0); eT rcond = eT(0); blas_int info = blas_int(0); podarray work(4*A.n_rows); podarray iwork( A.n_rows); podarray ipiv( (std::min)(A.n_rows, A.n_cols) ); arma_extra_debug_print("lapack::lange()"); norm_val = (has_blas_float_bug::value) ? auxlib::norm1_gen(A) : lapack::lange(&norm_id, &m, &n, A.memptr(), &lda, work.memptr()); arma_extra_debug_print("lapack::getrf()"); lapack::getrf(&m, &n, A.memptr(), &lda, ipiv.memptr(), &info); if(info != blas_int(0)) { return eT(0); } arma_extra_debug_print("lapack::gecon()"); lapack::gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), iwork.memptr(), &info); if(info != blas_int(0)) { return eT(0); } return rcond; } #else { arma_ignore(A); arma_stop_logic_error("rcond(): use of LAPACK must be enabled"); return eT(0); } #endif } template inline T auxlib::rcond(Mat< std::complex >& A) { #if defined(ARMA_USE_LAPACK) { typedef typename std::complex eT; arma_debug_assert_blas_size(A); char norm_id = '1'; blas_int m = blas_int(A.n_rows); blas_int n = blas_int(A.n_rows); // assuming square matrix blas_int lda = blas_int(A.n_rows); T norm_val = T(0); T rcond = T(0); blas_int info = blas_int(0); podarray< T> junk(1); podarray work(2*A.n_rows); podarray< T> rwork(2*A.n_rows); podarray ipiv( (std::min)(A.n_rows, A.n_cols) ); arma_extra_debug_print("lapack::lange()"); norm_val = (has_blas_float_bug::value) ? auxlib::norm1_gen(A) : lapack::lange(&norm_id, &m, &n, A.memptr(), &lda, junk.memptr()); arma_extra_debug_print("lapack::getrf()"); lapack::getrf(&m, &n, A.memptr(), &lda, ipiv.memptr(), &info); if(info != blas_int(0)) { return T(0); } arma_extra_debug_print("lapack::cx_gecon()"); lapack::cx_gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), rwork.memptr(), &info); if(info != blas_int(0)) { return T(0); } return rcond; } #else { arma_ignore(A); arma_stop_logic_error("rcond(): use of LAPACK must be enabled"); return T(0); } #endif } template inline eT auxlib::rcond_sympd(Mat& A, bool& calc_ok) { #if defined(ARMA_USE_LAPACK) { arma_debug_assert_blas_size(A); calc_ok = false; char norm_id = '1'; char uplo = 'L'; blas_int n = blas_int(A.n_rows); // assuming square matrix blas_int lda = blas_int(A.n_rows); eT norm_val = eT(0); eT rcond = eT(0); blas_int info = blas_int(0); podarray work(3*A.n_rows); podarray iwork( A.n_rows); arma_extra_debug_print("lapack::lansy()"); norm_val = (has_blas_float_bug::value) ? auxlib::norm1_sym(A) : lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &lda, work.memptr()); arma_extra_debug_print("lapack::potrf()"); lapack::potrf(&uplo, &n, A.memptr(), &lda, &info); if(info != blas_int(0)) { return eT(0); } arma_extra_debug_print("lapack::pocon()"); lapack::pocon(&uplo, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), iwork.memptr(), &info); if(info != blas_int(0)) { return eT(0); } calc_ok = true; return rcond; } #else { arma_ignore(A); calc_ok = false; arma_stop_logic_error("rcond(): use of LAPACK must be enabled"); return eT(0); } #endif } template inline T auxlib::rcond_sympd(Mat< std::complex >& A, bool& calc_ok) { #if defined(ARMA_CRIPPLED_LAPACK) { arma_extra_debug_print("auxlib::rcond_sympd(): redirecting to auxlib::rcond() due to crippled LAPACK"); calc_ok = true; return auxlib::rcond(A); } #elif defined(ARMA_USE_LAPACK) { typedef typename std::complex eT; arma_debug_assert_blas_size(A); calc_ok = false; char norm_id = '1'; char uplo = 'L'; blas_int n = blas_int(A.n_rows); // assuming square matrix blas_int lda = blas_int(A.n_rows); T norm_val = T(0); T rcond = T(0); blas_int info = blas_int(0); podarray work(2*A.n_rows); podarray< T> rwork( A.n_rows); arma_extra_debug_print("lapack::lanhe()"); norm_val = (has_blas_float_bug::value) ? auxlib::norm1_sym(A) : lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &lda, rwork.memptr()); arma_extra_debug_print("lapack::potrf()"); lapack::potrf(&uplo, &n, A.memptr(), &lda, &info); if(info != blas_int(0)) { return T(0); } arma_extra_debug_print("lapack::cx_pocon()"); lapack::cx_pocon(&uplo, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), rwork.memptr(), &info); if(info != blas_int(0)) { return T(0); } calc_ok = true; return rcond; } #else { arma_ignore(A); calc_ok = false; arma_stop_logic_error("rcond(): use of LAPACK must be enabled"); return T(0); } #endif } template inline eT auxlib::rcond_trimat(const Mat& A, const uword layout) { #if defined(ARMA_USE_LAPACK) { arma_debug_assert_blas_size(A); char norm_id = '1'; char uplo = (layout == 0) ? 'U' : 'L'; char diag = 'N'; blas_int n = blas_int(A.n_rows); // assuming square matrix eT rcond = eT(0); blas_int info = blas_int(0); podarray work(3*A.n_rows); podarray iwork( A.n_rows); arma_extra_debug_print("lapack::trcon()"); lapack::trcon(&norm_id, &uplo, &diag, &n, A.memptr(), &n, &rcond, work.memptr(), iwork.memptr(), &info); if(info != blas_int(0)) { return eT(0); } return rcond; } #else { arma_ignore(A); arma_ignore(layout); arma_stop_logic_error("rcond(): use of LAPACK must be enabled"); return eT(0); } #endif } template inline T auxlib::rcond_trimat(const Mat< std::complex >& A, const uword layout) { #if defined(ARMA_USE_LAPACK) { typedef typename std::complex eT; arma_debug_assert_blas_size(A); char norm_id = '1'; char uplo = (layout == 0) ? 'U' : 'L'; char diag = 'N'; blas_int n = blas_int(A.n_rows); // assuming square matrix T rcond = T(0); blas_int info = blas_int(0); podarray work(2*A.n_rows); podarray< T> rwork( A.n_rows); arma_extra_debug_print("lapack::cx_trcon()"); lapack::cx_trcon(&norm_id, &uplo, &diag, &n, A.memptr(), &n, &rcond, work.memptr(), rwork.memptr(), &info); if(info != blas_int(0)) { return T(0); } return rcond; } #else { arma_ignore(A); arma_ignore(layout); arma_stop_logic_error("rcond(): use of LAPACK must be enabled"); return T(0); } #endif } template inline eT auxlib::lu_rcond(const Mat& A, const eT norm_val) { #if defined(ARMA_USE_LAPACK) { char norm_id = '1'; blas_int n = blas_int(A.n_rows); // assuming square matrix blas_int lda = blas_int(A.n_rows); eT rcond = eT(0); blas_int info = blas_int(0); podarray work(4*A.n_rows); podarray iwork( A.n_rows); arma_extra_debug_print("lapack::gecon()"); lapack::gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), iwork.memptr(), &info); if(info != blas_int(0)) { return eT(0); } return rcond; } #else { arma_ignore(A); arma_ignore(norm_val); return eT(0); } #endif } template inline T auxlib::lu_rcond(const Mat< std::complex >& A, const T norm_val) { #if defined(ARMA_USE_LAPACK) { typedef typename std::complex eT; char norm_id = '1'; blas_int n = blas_int(A.n_rows); // assuming square matrix blas_int lda = blas_int(A.n_rows); T rcond = T(0); blas_int info = blas_int(0); podarray work(2*A.n_rows); podarray< T> rwork(2*A.n_rows); arma_extra_debug_print("lapack::cx_gecon()"); lapack::cx_gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), rwork.memptr(), &info); if(info != blas_int(0)) { return T(0); } return rcond; } #else { arma_ignore(A); arma_ignore(norm_val); return T(0); } #endif } template inline eT auxlib::lu_rcond_sympd(const Mat& A, const eT norm_val) { #if defined(ARMA_USE_LAPACK) { char uplo = 'L'; blas_int n = blas_int(A.n_rows); // assuming square matrix eT rcond = eT(0); blas_int info = blas_int(0); podarray work(3*A.n_rows); podarray iwork( A.n_rows); arma_extra_debug_print("lapack::pocon()"); lapack::pocon(&uplo, &n, A.memptr(), &n, &norm_val, &rcond, work.memptr(), iwork.memptr(), &info); if(info != blas_int(0)) { return eT(0); } return rcond; } #else { arma_ignore(A); arma_ignore(norm_val); return eT(0); } #endif } template inline T auxlib::lu_rcond_sympd(const Mat< std::complex >& A, const T norm_val) { #if defined(ARMA_CRIPPLED_LAPACK) { arma_ignore(A); arma_ignore(norm_val); return T(0); } #elif defined(ARMA_USE_LAPACK) { typedef typename std::complex eT; char uplo = 'L'; blas_int n = blas_int(A.n_rows); // assuming square matrix T rcond = T(0); blas_int info = blas_int(0); podarray work(2*A.n_rows); podarray< T> rwork( A.n_rows); arma_extra_debug_print("lapack::cx_pocon()"); lapack::cx_pocon(&uplo, &n, A.memptr(), &n, &norm_val, &rcond, work.memptr(), rwork.memptr(), &info); if(info != blas_int(0)) { return T(0); } return rcond; } #else { arma_ignore(A); arma_ignore(norm_val); return T(0); } #endif } template inline eT auxlib::lu_rcond_band(const Mat& AB, const uword KL, const uword KU, const podarray& ipiv, const eT norm_val) { #if defined(ARMA_USE_LAPACK) { const uword N = AB.n_cols; // order of the original square matrix A char norm_id = '1'; blas_int n = blas_int(N); blas_int kl = blas_int(KL); blas_int ku = blas_int(KU); blas_int ldab = blas_int(AB.n_rows); eT rcond = eT(0); blas_int info = blas_int(0); podarray work(3*N); podarray iwork( N); arma_extra_debug_print("lapack::gbcon()"); lapack::gbcon(&norm_id, &n, &kl, &ku, AB.memptr(), &ldab, ipiv.memptr(), &norm_val, &rcond, work.memptr(), iwork.memptr(), &info); if(info != blas_int(0)) { return eT(0); } return rcond; } #else { arma_ignore(AB); arma_ignore(KL); arma_ignore(KU); arma_ignore(ipiv); arma_ignore(norm_val); return eT(0); } #endif } template inline T auxlib::lu_rcond_band(const Mat< std::complex >& AB, const uword KL, const uword KU, const podarray& ipiv, const T norm_val) { #if defined(ARMA_CRIPPLED_LAPACK) { arma_ignore(AB); arma_ignore(KL); arma_ignore(KU); arma_ignore(ipiv); arma_ignore(norm_val); return T(0); } #elif defined(ARMA_USE_LAPACK) { typedef typename std::complex eT; const uword N = AB.n_cols; // order of the original square matrix A char norm_id = '1'; blas_int n = blas_int(N); blas_int kl = blas_int(KL); blas_int ku = blas_int(KU); blas_int ldab = blas_int(AB.n_rows); T rcond = T(0); blas_int info = blas_int(0); podarray work(2*N); podarray< T> rwork( N); arma_extra_debug_print("lapack::cx_gbcon()"); lapack::cx_gbcon(&norm_id, &n, &kl, &ku, AB.memptr(), &ldab, ipiv.memptr(), &norm_val, &rcond, work.memptr(), rwork.memptr(), &info); if(info != blas_int(0)) { return T(0); } return rcond; } #else { arma_ignore(AB); arma_ignore(KL); arma_ignore(KU); arma_ignore(ipiv); arma_ignore(norm_val); return T(0); } #endif } template inline bool auxlib::crippled_lapack(const Base&) { #if defined(ARMA_CRIPPLED_LAPACK) { arma_extra_debug_print("auxlib::crippled_lapack(): true"); return (is_cx::yes); } #else { return false; } #endif } template inline bool auxlib::rudimentary_sym_check(const Mat& X) { arma_extra_debug_sigprint(); const uword N = X.n_rows; const uword Nm2 = N-2; if(N != X.n_cols) { return false; } if(N <= uword(1)) { return true; } const eT* X_mem = X.memptr(); const eT* X_offsetA = &(X_mem[Nm2 ]); const eT* X_offsetB = &(X_mem[Nm2*N]); const eT A1 = *(X_offsetA ); const eT A2 = *(X_offsetA+1); // bottom-left corner (ie. last value in first column) const eT B1 = *(X_offsetB ); const eT B2 = *(X_offsetB+N); // top-right corner (ie. first value in last column) const eT C1 = (std::max)(std::abs(A1), std::abs(B1)); const eT C2 = (std::max)(std::abs(A2), std::abs(B2)); const eT delta1 = std::abs(A1 - B1); const eT delta2 = std::abs(A2 - B2); const eT tol = eT(10000)*std::numeric_limits::epsilon(); // allow some leeway const bool okay1 = ( (delta1 <= tol) || (delta1 <= (C1 * tol)) ); const bool okay2 = ( (delta2 <= tol) || (delta2 <= (C2 * tol)) ); return (okay1 && okay2); } template inline bool auxlib::rudimentary_sym_check(const Mat< std::complex >& X) { arma_extra_debug_sigprint(); // NOTE: the function name is a misnomer, as it checks for hermitian complex matrices; // NOTE: for simplicity of use, the function name is the same as for real matrices typedef typename std::complex eT; const uword N = X.n_rows; const uword Nm1 = N-1; if(N != X.n_cols) { return false; } if(N == uword(0)) { return true; } const eT* X_mem = X.memptr(); const T tol = T(10000)*std::numeric_limits::epsilon(); // allow some leeway if(std::abs(X_mem[0 ].imag()) > tol) { return false; } // check top-left if(std::abs(X_mem[X.n_elem-1].imag()) > tol) { return false; } // check bottom-right const eT& A = X_mem[Nm1 ]; // bottom-left corner (ie. last value in first column) const eT& B = X_mem[Nm1*N]; // top-right corner (ie. first value in last column) const T C_real = (std::max)(std::abs(A.real()), std::abs(B.real())); const T C_imag = (std::max)(std::abs(A.imag()), std::abs(B.imag())); const T delta_real = std::abs(A.real() - B.real()); const T delta_imag = std::abs(A.imag() + B.imag()); // take into account the conjugate const bool okay_real = ( (delta_real <= tol) || (delta_real <= (C_real * tol)) ); const bool okay_imag = ( (delta_imag <= tol) || (delta_imag <= (C_imag * tol)) ); return (okay_real && okay_imag); } template inline typename get_pod_type::result auxlib::norm1_gen(const Mat& A) { arma_extra_debug_sigprint(); typedef typename get_pod_type::result T; if(A.n_elem == 0) { return T(0); } const uword n_rows = A.n_rows; const uword n_cols = A.n_cols; T max_val = T(0); for(uword c=0; c < n_cols; ++c) { const eT* colmem = A.colptr(c); T acc_val = T(0); for(uword r=0; r < n_rows; ++r) { acc_val += std::abs(colmem[r]); } max_val = (acc_val > max_val) ? acc_val : max_val; } return max_val; } template inline typename get_pod_type::result auxlib::norm1_sym(const Mat& A) { arma_extra_debug_sigprint(); typedef typename get_pod_type::result T; if(A.n_elem == 0) { return T(0); } const uword N = (std::min)(A.n_rows, A.n_cols); T max_val = T(0); for(uword col=0; col < N; ++col) { const eT* colmem = A.colptr(col); T acc_val = T(0); for(uword c=0; c < col; ++c) { acc_val += std::abs(A.at(col,c)); } for(uword r=col; r < N; ++r) { acc_val += std::abs(colmem[r]); } max_val = (acc_val > max_val) ? acc_val : max_val; } return max_val; } template inline typename get_pod_type::result auxlib::norm1_band(const Mat& A, const uword KL, const uword KU) { arma_extra_debug_sigprint(); typedef typename get_pod_type::result T; if(A.n_elem == 0) { return T(0); } const uword n_rows = A.n_rows; const uword n_cols = A.n_cols; T max_val = T(0); for(uword c=0; c < n_cols; ++c) { const eT* colmem = A.colptr(c); T acc_val = T(0); // use values only from main diagonal + KU upper diagonals + KL lower diagonals const uword start = ( c > KU ) ? (c - KU) : 0; const uword end = ((c + KL) < n_rows) ? (c + KL) : (n_rows-1); for(uword r=start; r <= end; ++r) { acc_val += std::abs(colmem[r]); } max_val = (acc_val > max_val) ? acc_val : max_val; } return max_val; } // namespace qz_helper { // sgges() and dgges() require an external function with three arguments: // select(alpha_real, alpha_imag, beta) // where the eigenvalue is defined as complex(alpha_real, alpha_imag) / beta template inline blas_int select_lhp(const T* x_ptr, const T* y_ptr, const T* z_ptr) { arma_extra_debug_sigprint(); // cout << "select_lhp(): (*x_ptr) = " << (*x_ptr) << endl; // cout << "select_lhp(): (*y_ptr) = " << (*y_ptr) << endl; // cout << "select_lhp(): (*z_ptr) = " << (*z_ptr) << endl; arma_ignore(y_ptr); // ignore imaginary part const T x = (*x_ptr); const T z = (*z_ptr); if(z == T(0)) { return blas_int(0); } // consider an infinite eig value not to lie in either lhp or rhp return ((x/z) < T(0)) ? blas_int(1) : blas_int(0); } template inline blas_int select_rhp(const T* x_ptr, const T* y_ptr, const T* z_ptr) { arma_extra_debug_sigprint(); // cout << "select_rhp(): (*x_ptr) = " << (*x_ptr) << endl; // cout << "select_rhp(): (*y_ptr) = " << (*y_ptr) << endl; // cout << "select_rhp(): (*z_ptr) = " << (*z_ptr) << endl; arma_ignore(y_ptr); // ignore imaginary part const T x = (*x_ptr); const T z = (*z_ptr); if(z == T(0)) { return blas_int(0); } // consider an infinite eig value not to lie in either lhp or rhp return ((x/z) > T(0)) ? blas_int(1) : blas_int(0); } template inline blas_int select_iuc(const T* x_ptr, const T* y_ptr, const T* z_ptr) { arma_extra_debug_sigprint(); // cout << "select_iuc(): (*x_ptr) = " << (*x_ptr) << endl; // cout << "select_iuc(): (*y_ptr) = " << (*y_ptr) << endl; // cout << "select_iuc(): (*z_ptr) = " << (*z_ptr) << endl; const T x = (*x_ptr); const T y = (*y_ptr); const T z = (*z_ptr); if(z == T(0)) { return blas_int(0); } // consider an infinite eig value to be outside of the unit circle //return (std::abs(std::complex(x,y) / z) < T(1)) ? blas_int(1) : blas_int(0); return (std::sqrt(x*x + y*y) < std::abs(z)) ? blas_int(1) : blas_int(0); } template inline blas_int select_ouc(const T* x_ptr, const T* y_ptr, const T* z_ptr) { arma_extra_debug_sigprint(); // cout << "select_ouc(): (*x_ptr) = " << (*x_ptr) << endl; // cout << "select_ouc(): (*y_ptr) = " << (*y_ptr) << endl; // cout << "select_ouc(): (*z_ptr) = " << (*z_ptr) << endl; const T x = (*x_ptr); const T y = (*y_ptr); const T z = (*z_ptr); if(z == T(0)) { return (x == T(0)) ? blas_int(0) : blas_int(1); // consider an infinite eig value to be outside of the unit circle } //return (std::abs(std::complex(x,y) / z) > T(1)) ? blas_int(1) : blas_int(0); return (std::sqrt(x*x + y*y) > std::abs(z)) ? blas_int(1) : blas_int(0); } // cgges() and zgges() require an external function with two arguments: // select(alpha, beta) // where the complex eigenvalue is defined as (alpha / beta) template inline blas_int cx_select_lhp(const std::complex* x_ptr, const std::complex* y_ptr) { arma_extra_debug_sigprint(); // cout << "cx_select_lhp(): (*x_ptr) = " << (*x_ptr) << endl; // cout << "cx_select_lhp(): (*y_ptr) = " << (*y_ptr) << endl; const std::complex& x = (*x_ptr); const std::complex& y = (*y_ptr); if( (y.real() == T(0)) && (y.imag() == T(0)) ) { return blas_int(0); } // consider an infinite eig value not to lie in either lhp or rhp return (std::real(x / y) < T(0)) ? blas_int(1) : blas_int(0); } template inline blas_int cx_select_rhp(const std::complex* x_ptr, const std::complex* y_ptr) { arma_extra_debug_sigprint(); // cout << "cx_select_rhp(): (*x_ptr) = " << (*x_ptr) << endl; // cout << "cx_select_rhp(): (*y_ptr) = " << (*y_ptr) << endl; const std::complex& x = (*x_ptr); const std::complex& y = (*y_ptr); if( (y.real() == T(0)) && (y.imag() == T(0)) ) { return blas_int(0); } // consider an infinite eig value not to lie in either lhp or rhp return (std::real(x / y) > T(0)) ? blas_int(1) : blas_int(0); } template inline blas_int cx_select_iuc(const std::complex* x_ptr, const std::complex* y_ptr) { arma_extra_debug_sigprint(); // cout << "cx_select_iuc(): (*x_ptr) = " << (*x_ptr) << endl; // cout << "cx_select_iuc(): (*y_ptr) = " << (*y_ptr) << endl; const std::complex& x = (*x_ptr); const std::complex& y = (*y_ptr); if( (y.real() == T(0)) && (y.imag() == T(0)) ) { return blas_int(0); } // consider an infinite eig value to be outside of the unit circle return (std::abs(x / y) < T(1)) ? blas_int(1) : blas_int(0); } template inline blas_int cx_select_ouc(const std::complex* x_ptr, const std::complex* y_ptr) { arma_extra_debug_sigprint(); // cout << "cx_select_ouc(): (*x_ptr) = " << (*x_ptr) << endl; // cout << "cx_select_ouc(): (*y_ptr) = " << (*y_ptr) << endl; const std::complex& x = (*x_ptr); const std::complex& y = (*y_ptr); if( (y.real() == T(0)) && (y.imag() == T(0)) ) { return ((x.real() == T(0)) && (x.imag() == T(0))) ? blas_int(0) : blas_int(1); // consider an infinite eig value to be outside of the unit circle } return (std::abs(x / y) > T(1)) ? blas_int(1) : blas_int(0); } // need to do shenanigans with pointers due to: // - we're using LAPACK ?gges() defined to expect pointer-to-function to be passed as pointer-to-object // - explicit casting between pointer-to-function and pointer-to-object is a non-standard extension in C // - the extension is essentially mandatory on POSIX systems // - some compilers will complain about the extension in pedantic mode template inline void_ptr ptr_cast(blas_int (*function)(const T*, const T*, const T*)) { union converter { blas_int (*fn)(const T*, const T*, const T*); void_ptr obj; }; converter tmp; tmp.obj = 0; tmp.fn = function; return tmp.obj; } template inline void_ptr ptr_cast(blas_int (*function)(const std::complex*, const std::complex*)) { union converter { blas_int (*fn)(const std::complex*, const std::complex*); void_ptr obj; }; converter tmp; tmp.obj = 0; tmp.fn = function; return tmp.obj; } } // end of namespace qz_helper //! @}