diff options
Diffstat (limited to 'src/armadillo/include/armadillo_bits/auxlib_meat.hpp')
-rw-r--r-- | src/armadillo/include/armadillo_bits/auxlib_meat.hpp | 7050 |
1 files changed, 7050 insertions, 0 deletions
diff --git a/src/armadillo/include/armadillo_bits/auxlib_meat.hpp b/src/armadillo/include/armadillo_bits/auxlib_meat.hpp new file mode 100644 index 0000000..373aaa4 --- /dev/null +++ b/src/armadillo/include/armadillo_bits/auxlib_meat.hpp @@ -0,0 +1,7050 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au) +// Copyright 2008-2016 National ICT Australia (NICTA) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ------------------------------------------------------------------------ + + +//! \addtogroup auxlib +//! @{ + + + +template<typename eT> +inline +bool +auxlib::inv(Mat<eT>& A) + { + arma_extra_debug_sigprint(); + + if(A.is_empty()) { return true; } + + #if defined(ARMA_USE_LAPACK) + { + arma_debug_assert_blas_size(A); + + blas_int n = blas_int(A.n_rows); + blas_int lda = blas_int(A.n_rows); + blas_int lwork = (std::max)(blas_int(podarray_prealloc_n_elem::val), n); + blas_int info = 0; + + podarray<blas_int> ipiv(A.n_rows); + + arma_extra_debug_print("lapack::getrf()"); + lapack::getrf(&n, &n, A.memptr(), &lda, ipiv.memptr(), &info); + + if(info != 0) { return false; } + + if(n > 16) + { + eT work_query[2] = {}; + blas_int lwork_query = -1; + + arma_extra_debug_print("lapack::getri()"); + lapack::getri(&n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info); + + if(info != 0) { return false; } + + blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + + lwork = (std::max)(lwork_proposed, lwork); + } + + podarray<eT> work( static_cast<uword>(lwork) ); + + arma_extra_debug_print("lapack::getri()"); + lapack::getri(&n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info); + + return (info == 0); + } + #else + { + arma_ignore(A); + arma_stop_logic_error("inv(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::inv(Mat<eT>& out, const Mat<eT>& X) + { + arma_extra_debug_sigprint(); + + out = X; + + return auxlib::inv(out); + } + + + +template<typename eT> +inline +bool +auxlib::inv_rcond(Mat<eT>& A, typename get_pod_type<eT>::result& out_rcond) + { + arma_extra_debug_sigprint(); + + typedef typename get_pod_type<eT>::result T; + + out_rcond = T(0); + + if(A.is_empty()) { return true; } + + #if defined(ARMA_USE_LAPACK) + { + arma_debug_assert_blas_size(A); + + char norm_id = '1'; + blas_int n = blas_int(A.n_rows); + blas_int lda = blas_int(A.n_rows); + blas_int lwork = (std::max)(blas_int(podarray_prealloc_n_elem::val), n); + blas_int info = 0; + T norm_val = T(0); + + podarray<T> junk(1); + podarray<blas_int> ipiv(A.n_rows); + + arma_extra_debug_print("lapack::lange()"); + norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_gen(A) : lapack::lange<eT>(&norm_id, &n, &n, A.memptr(), &lda, junk.memptr()); + + arma_extra_debug_print("lapack::getrf()"); + lapack::getrf(&n, &n, A.memptr(), &lda, ipiv.memptr(), &info); + + if(info != 0) { return false; } + + out_rcond = auxlib::lu_rcond<T>(A, norm_val); + + if(n > 16) + { + eT work_query[2] = {}; + blas_int lwork_query = -1; + + arma_extra_debug_print("lapack::getri()"); + lapack::getri(&n, A.memptr(), &lda, ipiv.memptr(), &work_query[0], &lwork_query, &info); + + if(info != 0) { return false; } + + blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + + lwork = (std::max)(lwork_proposed, lwork); + } + + podarray<eT> work( static_cast<uword>(lwork) ); + + arma_extra_debug_print("lapack::getri()"); + lapack::getri(&n, A.memptr(), &lda, ipiv.memptr(), work.memptr(), &lwork, &info); + + return (info == 0); + } + #else + { + arma_ignore(A); + arma_stop_logic_error("inv_rcond(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::inv_tr(Mat<eT>& A, const uword layout) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + if(A.is_empty()) { return true; } + + arma_debug_assert_blas_size(A); + + char uplo = (layout == 0) ? 'U' : 'L'; + char diag = 'N'; + blas_int n = blas_int(A.n_rows); + blas_int info = 0; + + arma_extra_debug_print("lapack::trtri()"); + lapack::trtri(&uplo, &diag, &n, A.memptr(), &n, &info); + + if(info != 0) { return false; } + + return true; + } + #else + { + arma_ignore(A); + arma_ignore(layout); + arma_stop_logic_error("inv(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::inv_tr_rcond(Mat<eT>& A, typename get_pod_type<eT>::result& out_rcond, const uword layout) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename get_pod_type<eT>::result T; + + if(A.is_empty()) { return true; } + + out_rcond = auxlib::rcond_trimat(A, layout); + + arma_debug_assert_blas_size(A); + + char uplo = (layout == 0) ? 'U' : 'L'; + char diag = 'N'; + blas_int n = blas_int(A.n_rows); + blas_int info = 0; + + arma_extra_debug_print("lapack::trtri()"); + lapack::trtri(&uplo, &diag, &n, A.memptr(), &n, &info); + + if(info != 0) { out_rcond = T(0); return false; } + + return true; + } + #else + { + arma_ignore(A); + arma_ignore(out_rcond); + arma_ignore(layout); + arma_stop_logic_error("inv(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::inv_sympd(Mat<eT>& A, bool& out_sympd_state) + { + arma_extra_debug_sigprint(); + + out_sympd_state = false; + + if(A.is_empty()) { return true; } + + #if defined(ARMA_USE_LAPACK) + { + arma_debug_assert_blas_size(A); + + char uplo = 'L'; + blas_int n = blas_int(A.n_rows); + blas_int info = 0; + + // NOTE: for complex matrices, zpotrf() assumes the matrix is hermitian (not simply symmetric) + + arma_extra_debug_print("lapack::potrf()"); + lapack::potrf(&uplo, &n, A.memptr(), &n, &info); + + if(info != 0) { return false; } + + out_sympd_state = true; + + arma_extra_debug_print("lapack::potri()"); + lapack::potri(&uplo, &n, A.memptr(), &n, &info); + + if(info != 0) { return false; } + + A = symmatl(A); + + return true; + } + #else + { + arma_ignore(A); + arma_ignore(out_sympd_state); + arma_stop_logic_error("inv_sympd(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::inv_sympd(Mat<eT>& out, const Mat<eT>& X) + { + arma_extra_debug_sigprint(); + + out = X; + + bool sympd_state_junk = false; + + return auxlib::inv_sympd(out, sympd_state_junk); + } + + + +template<typename eT> +inline +bool +auxlib::inv_sympd_rcond(Mat<eT>& A, bool& out_sympd_state, eT& out_rcond) + { + arma_extra_debug_sigprint(); + + out_sympd_state = false; + + if(A.is_empty()) { return true; } + + #if defined(ARMA_USE_LAPACK) + { + typedef typename get_pod_type<eT>::result T; + + arma_debug_assert_blas_size(A); + + char norm_id = '1'; + char uplo = 'L'; + blas_int n = blas_int(A.n_rows); + blas_int info = 0; + T norm_val = T(0); + + podarray<T> work(A.n_rows); + + arma_extra_debug_print("lapack::lansy()"); + norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr()); + + arma_extra_debug_print("lapack::potrf()"); + lapack::potrf(&uplo, &n, A.memptr(), &n, &info); + + if(info != 0) { out_rcond = eT(0); return false; } + + out_sympd_state = true; + + out_rcond = auxlib::lu_rcond_sympd<T>(A, norm_val); + + if(arma_isnan(out_rcond)) { return false; } + + arma_extra_debug_print("lapack::potri()"); + lapack::potri(&uplo, &n, A.memptr(), &n, &info); + + if(info != 0) { return false; } + + A = symmatl(A); + + return true; + } + #else + { + arma_ignore(A); + arma_ignore(out_sympd_state); + arma_ignore(out_rcond); + arma_stop_logic_error("inv_sympd_rcond(): use LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename T> +inline +bool +auxlib::inv_sympd_rcond(Mat< std::complex<T> >& A, bool& out_sympd_state, T& out_rcond) + { + arma_extra_debug_sigprint(); + + out_sympd_state = false; + + if(A.is_empty()) { return true; } + + #if defined(ARMA_CRIPPLED_LAPACK) + { + arma_ignore(A); + arma_ignore(out_sympd_state); + arma_ignore(out_rcond); + return false; + } + #elif defined(ARMA_USE_LAPACK) + { + arma_debug_assert_blas_size(A); + + char norm_id = '1'; + char uplo = 'L'; + blas_int n = blas_int(A.n_rows); + blas_int info = 0; + T norm_val = T(0); + + podarray<T> work(A.n_rows); + + arma_extra_debug_print("lapack::lanhe()"); + norm_val = (has_blas_float_bug<T>::value) ? auxlib::norm1_sym(A) : lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr()); + + arma_extra_debug_print("lapack::potrf()"); + lapack::potrf(&uplo, &n, A.memptr(), &n, &info); + + if(info != 0) { out_rcond = T(0); return false; } + + out_sympd_state = true; + + out_rcond = auxlib::lu_rcond_sympd<T>(A, norm_val); + + if(arma_isnan(out_rcond)) { return false; } + + arma_extra_debug_print("lapack::potri()"); + lapack::potri(&uplo, &n, A.memptr(), &n, &info); + + if(info != 0) { return false; } + + A = symmatl(A); + + return true; + } + #else + { + arma_ignore(A); + arma_ignore(out_sympd_state); + arma_ignore(out_rcond); + arma_stop_logic_error("inv_sympd_rcond(): use LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! determinant of a matrix +template<typename eT> +inline +bool +auxlib::det(eT& out_val, Mat<eT>& A) + { + arma_extra_debug_sigprint(); + + if(A.is_empty()) { out_val = eT(1); return true; } + + #if defined(ARMA_USE_LAPACK) + { + arma_debug_assert_blas_size(A); + + podarray<blas_int> ipiv(A.n_rows); + + blas_int info = 0; + blas_int n_rows = blas_int(A.n_rows); + blas_int n_cols = blas_int(A.n_cols); + + arma_extra_debug_print("lapack::getrf()"); + lapack::getrf(&n_rows, &n_cols, A.memptr(), &n_rows, ipiv.memptr(), &info); + + if(info < 0) { return false; } + + // on output A appears to be L+U_alt, where U_alt is U with the main diagonal set to zero + eT val = A.at(0,0); + for(uword i=1; i < A.n_rows; ++i) { val *= A.at(i,i); } + + blas_int sign = +1; + for(uword i=0; i < A.n_rows; ++i) + { + // NOTE: adjustment of -1 is required as Fortran counts from 1 + if( blas_int(i) != (ipiv.mem[i] - 1) ) { sign *= -1; } + } + + out_val = (sign < 0) ? eT(-val) : eT(val); + + return true; + } + #else + { + arma_ignore(out_val); + arma_ignore(A); + arma_stop_logic_error("det(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! log determinant of a matrix +template<typename eT> +inline +bool +auxlib::log_det(eT& out_val, typename get_pod_type<eT>::result& out_sign, Mat<eT>& A) + { + arma_extra_debug_sigprint(); + + typedef typename get_pod_type<eT>::result T; + + if(A.is_empty()) { out_val = eT(0); out_sign = T(1); return true; } + + #if defined(ARMA_USE_LAPACK) + { + arma_debug_assert_blas_size(A); + + podarray<blas_int> ipiv(A.n_rows); + + blas_int info = 0; + blas_int n_rows = blas_int(A.n_rows); + blas_int n_cols = blas_int(A.n_cols); + + arma_extra_debug_print("lapack::getrf()"); + lapack::getrf(&n_rows, &n_cols, A.memptr(), &n_rows, ipiv.memptr(), &info); + + if(info < 0) { return false; } + + // on output A appears to be L+U_alt, where U_alt is U with the main diagonal set to zero + + sword sign = (is_cx<eT>::no) ? ( (access::tmp_real( A.at(0,0) ) < T(0)) ? -1 : +1 ) : +1; + eT val = (is_cx<eT>::no) ? std::log( (access::tmp_real( A.at(0,0) ) < T(0)) ? A.at(0,0)*T(-1) : A.at(0,0) ) : std::log( A.at(0,0) ); + + for(uword i=1; i < A.n_rows; ++i) + { + const eT x = A.at(i,i); + + sign *= (is_cx<eT>::no) ? ( (access::tmp_real(x) < T(0)) ? -1 : +1 ) : +1; + val += (is_cx<eT>::no) ? std::log( (access::tmp_real(x) < T(0)) ? x*T(-1) : x ) : std::log(x); + } + + for(uword i=0; i < A.n_rows; ++i) + { + if( blas_int(i) != (ipiv.mem[i] - 1) ) // NOTE: adjustment of -1 is required as Fortran counts from 1 + { + sign *= -1; + } + } + + out_val = val; + out_sign = T(sign); + + return true; + } + #else + { + arma_ignore(A); + arma_ignore(out_val); + arma_ignore(out_sign); + arma_stop_logic_error("log_det(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::log_det_sympd(typename get_pod_type<eT>::result& out_val, Mat<eT>& A) + { + arma_extra_debug_sigprint(); + + typedef typename get_pod_type<eT>::result T; + + if(A.is_empty()) { out_val = T(0); return true; } + + #if defined(ARMA_USE_LAPACK) + { + arma_debug_assert_blas_size(A); + + char uplo = 'L'; + blas_int n = blas_int(A.n_rows); + blas_int info = 0; + + arma_extra_debug_print("lapack::potrf()"); + lapack::potrf(&uplo, &n, A.memptr(), &n, &info); + + if(info != 0) { return false; } + + T val = T(0); + + for(uword i=0; i < A.n_rows; ++i) { val += std::log( access::tmp_real(A.at(i,i)) ); } + + out_val = T(2) * val; + + return true; + } + #else + { + arma_ignore(out_val); + arma_ignore(A); + arma_stop_logic_error("log_det_sympd(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! LU decomposition of a matrix +template<typename eT, typename T1> +inline +bool +auxlib::lu(Mat<eT>& L, Mat<eT>& U, podarray<blas_int>& ipiv, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + U = X.get_ref(); + + const uword U_n_rows = U.n_rows; + const uword U_n_cols = U.n_cols; + + if(U.is_empty()) { L.set_size(U_n_rows, 0); U.set_size(0, U_n_cols); ipiv.reset(); return true; } + + #if defined(ARMA_USE_LAPACK) + { + arma_debug_assert_blas_size(U); + + ipiv.set_size( (std::min)(U_n_rows, U_n_cols) ); + + blas_int info = 0; + + blas_int n_rows = blas_int(U_n_rows); + blas_int n_cols = blas_int(U_n_cols); + + arma_extra_debug_print("lapack::getrf()"); + lapack::getrf(&n_rows, &n_cols, U.memptr(), &n_rows, ipiv.memptr(), &info); + + if(info < 0) { return false; } + + // take into account that Fortran counts from 1 + arrayops::inplace_minus(ipiv.memptr(), blas_int(1), ipiv.n_elem); + + L.copy_size(U); + + for(uword col=0; col < U_n_cols; ++col) + { + for(uword row=0; (row < col) && (row < U_n_rows); ++row) + { + L.at(row,col) = eT(0); + } + + if( L.in_range(col,col) ) + { + L.at(col,col) = eT(1); + } + + for(uword row = (col+1); row < U_n_rows; ++row) + { + L.at(row,col) = U.at(row,col); + U.at(row,col) = eT(0); + } + } + + return true; + } + #else + { + arma_stop_logic_error("lu(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::lu(Mat<eT>& L, Mat<eT>& U, Mat<eT>& P, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + podarray<blas_int> ipiv1; + const bool status = auxlib::lu(L, U, ipiv1, X); + + if(status == false) { return false; } + + if(U.is_empty()) + { + // L and U have been already set to the correct empty matrices + P.eye(L.n_rows, L.n_rows); + return true; + } + + const uword n = ipiv1.n_elem; + const uword P_rows = U.n_rows; + + podarray<blas_int> ipiv2(P_rows); + + const blas_int* ipiv1_mem = ipiv1.memptr(); + blas_int* ipiv2_mem = ipiv2.memptr(); + + for(uword i=0; i<P_rows; ++i) + { + ipiv2_mem[i] = blas_int(i); + } + + for(uword i=0; i<n; ++i) + { + const uword k = static_cast<uword>(ipiv1_mem[i]); + + if( ipiv2_mem[i] != ipiv2_mem[k] ) + { + std::swap( ipiv2_mem[i], ipiv2_mem[k] ); + } + } + + P.zeros(P_rows, P_rows); + + for(uword row=0; row<P_rows; ++row) + { + P.at(row, static_cast<uword>(ipiv2_mem[row])) = eT(1); + } + + if(L.n_cols > U.n_rows) + { + L.shed_cols(U.n_rows, L.n_cols-1); + } + + if(U.n_rows > L.n_cols) + { + U.shed_rows(L.n_cols, U.n_rows-1); + } + + return true; + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::lu(Mat<eT>& L, Mat<eT>& U, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + podarray<blas_int> ipiv1; + const bool status = auxlib::lu(L, U, ipiv1, X); + + if(status == false) { return false; } + + if(U.is_empty()) + { + // L and U have been already set to the correct empty matrices + return true; + } + + const uword n = ipiv1.n_elem; + const uword P_rows = U.n_rows; + + podarray<blas_int> ipiv2(P_rows); + + const blas_int* ipiv1_mem = ipiv1.memptr(); + blas_int* ipiv2_mem = ipiv2.memptr(); + + for(uword i=0; i<P_rows; ++i) + { + ipiv2_mem[i] = blas_int(i); + } + + for(uword i=0; i<n; ++i) + { + const uword k = static_cast<uword>(ipiv1_mem[i]); + + if( ipiv2_mem[i] != ipiv2_mem[k] ) + { + std::swap( ipiv2_mem[i], ipiv2_mem[k] ); + L.swap_rows( static_cast<uword>(ipiv2_mem[i]), static_cast<uword>(ipiv2_mem[k]) ); + } + } + + if(L.n_cols > U.n_rows) + { + L.shed_cols(U.n_rows, L.n_cols-1); + } + + if(U.n_rows > L.n_cols) + { + U.shed_rows(L.n_cols, U.n_rows-1); + } + + return true; + } + + + +//! eigen decomposition of general square matrix (real) +template<typename T1> +inline +bool +auxlib::eig_gen + ( + Mat< std::complex<typename T1::pod_type> >& vals, + Mat< std::complex<typename T1::pod_type> >& vecs, + const bool vecs_on, + const Base<typename T1::pod_type,T1>& expr + ) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + + Mat<T> X = expr.get_ref(); + + arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); + + arma_debug_assert_blas_size(X); + + if(X.is_empty()) { vals.reset(); vecs.reset(); return true; } + + if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } + + vals.set_size(X.n_rows, 1); + + Mat<T> tmp(1, 1, arma_nozeros_indicator()); + + if(vecs_on) + { + vecs.set_size(X.n_rows, X.n_rows); + tmp.set_size(X.n_rows, X.n_rows); + } + + podarray<T> junk(1); + + char jobvl = 'N'; + char jobvr = (vecs_on) ? 'V' : 'N'; + blas_int N = blas_int(X.n_rows); + T* vl = junk.memptr(); + T* vr = (vecs_on) ? tmp.memptr() : junk.memptr(); + blas_int ldvl = blas_int(1); + blas_int ldvr = (vecs_on) ? blas_int(tmp.n_rows) : blas_int(1); + blas_int lwork = 64*N; // lwork_min = (vecs_on) ? (std::max)(blas_int(1), 4*N) : (std::max)(blas_int(1), 3*N) + blas_int info = 0; + + podarray<T> work( static_cast<uword>(lwork) ); + + podarray<T> vals_real(X.n_rows); + podarray<T> vals_imag(X.n_rows); + + arma_extra_debug_print("lapack::geev() -- START"); + lapack::geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, &info); + arma_extra_debug_print("lapack::geev() -- END"); + + if(info != 0) { return false; } + + arma_extra_debug_print("reformatting eigenvalues and eigenvectors"); + + std::complex<T>* vals_mem = vals.memptr(); + + for(uword i=0; i < X.n_rows; ++i) { vals_mem[i] = std::complex<T>(vals_real[i], vals_imag[i]); } + + if(vecs_on) + { + for(uword j=0; j < X.n_rows; ++j) + { + if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) ) + { + for(uword i=0; i < X.n_rows; ++i) + { + vecs.at(i,j) = std::complex<T>( tmp.at(i,j), tmp.at(i,j+1) ); + vecs.at(i,j+1) = std::complex<T>( tmp.at(i,j), -tmp.at(i,j+1) ); + } + + ++j; + } + else + { + for(uword i=0; i<X.n_rows; ++i) + { + vecs.at(i,j) = std::complex<T>(tmp.at(i,j), T(0)); + } + } + } + } + + return true; + } + #else + { + arma_ignore(vals); + arma_ignore(vecs); + arma_ignore(vecs_on); + arma_ignore(expr); + arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! eigen decomposition of general square matrix (complex) +template<typename T1> +inline +bool +auxlib::eig_gen + ( + Mat< std::complex<typename T1::pod_type> >& vals, + Mat< std::complex<typename T1::pod_type> >& vecs, + const bool vecs_on, + const Base< std::complex<typename T1::pod_type>, T1 >& expr + ) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + typedef typename std::complex<T> eT; + + Mat<eT> X = expr.get_ref(); + + arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); + + arma_debug_assert_blas_size(X); + + if(X.is_empty()) { vals.reset(); vecs.reset(); return true; } + + if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } + + vals.set_size(X.n_rows, 1); + + if(vecs_on) { vecs.set_size(X.n_rows, X.n_rows); } + + podarray<eT> junk(1); + + char jobvl = 'N'; + char jobvr = (vecs_on) ? 'V' : 'N'; + blas_int N = blas_int(X.n_rows); + eT* vl = junk.memptr(); + eT* vr = (vecs_on) ? vecs.memptr() : junk.memptr(); + blas_int ldvl = blas_int(1); + blas_int ldvr = (vecs_on) ? blas_int(vecs.n_rows) : blas_int(1); + blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), 2*N) + blas_int info = 0; + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray< T> rwork( static_cast<uword>(2*N) ); + + arma_extra_debug_print("lapack::cx_geev() -- START"); + lapack::cx_geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, rwork.memptr(), &info); + arma_extra_debug_print("lapack::cx_geev() -- END"); + + return (info == 0); + } + #else + { + arma_ignore(vals); + arma_ignore(vecs); + arma_ignore(vecs_on); + arma_ignore(expr); + arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! eigen decomposition of general square matrix (real, balance given matrix) +template<typename T1> +inline +bool +auxlib::eig_gen_balance + ( + Mat< std::complex<typename T1::pod_type> >& vals, + Mat< std::complex<typename T1::pod_type> >& vecs, + const bool vecs_on, + const Base<typename T1::pod_type,T1>& expr + ) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + + Mat<T> X = expr.get_ref(); + + arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); + + arma_debug_assert_blas_size(X); + + if(X.is_empty()) { vals.reset(); vecs.reset(); return true; } + + if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } + + vals.set_size(X.n_rows, 1); + + Mat<T> tmp(1, 1, arma_nozeros_indicator()); + + if(vecs_on) + { + vecs.set_size(X.n_rows, X.n_rows); + tmp.set_size(X.n_rows, X.n_rows); + } + + podarray<T> junk(1); + + char bal = 'B'; + char jobvl = 'N'; + char jobvr = (vecs_on) ? 'V' : 'N'; + char sense = 'N'; + blas_int N = blas_int(X.n_rows); + T* vl = junk.memptr(); + T* vr = (vecs_on) ? tmp.memptr() : junk.memptr(); + blas_int ldvl = blas_int(1); + blas_int ldvr = (vecs_on) ? blas_int(tmp.n_rows) : blas_int(1); + blas_int ilo = blas_int(0); + blas_int ihi = blas_int(0); + T abnrm = T(0); + blas_int lwork = 64*N; // lwork_min = (vecs_on) ? (std::max)(blas_int(1), 2*N) : (std::max)(blas_int(1), 3*N) + blas_int info = blas_int(0); + + podarray<T> scale(X.n_rows); + podarray<T> rconde(X.n_rows); + podarray<T> rcondv(X.n_rows); + + podarray<T> work( static_cast<uword>(lwork) ); + podarray<blas_int> iwork( uword(1) ); // iwork not used by lapack::geevx() as sense = 'N' + + podarray<T> vals_real(X.n_rows); + podarray<T> vals_imag(X.n_rows); + + arma_extra_debug_print("lapack::geevx() -- START"); + lapack::geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), vl, &ldvl, vr, &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, iwork.memptr(), &info); + arma_extra_debug_print("lapack::geevx() -- END"); + + if(info != 0) { return false; } + + arma_extra_debug_print("reformatting eigenvalues and eigenvectors"); + + std::complex<T>* vals_mem = vals.memptr(); + + for(uword i=0; i < X.n_rows; ++i) { vals_mem[i] = std::complex<T>(vals_real[i], vals_imag[i]); } + + if(vecs_on) + { + for(uword j=0; j < X.n_rows; ++j) + { + if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) ) + { + for(uword i=0; i < X.n_rows; ++i) + { + vecs.at(i,j) = std::complex<T>( tmp.at(i,j), tmp.at(i,j+1) ); + vecs.at(i,j+1) = std::complex<T>( tmp.at(i,j), -tmp.at(i,j+1) ); + } + + ++j; + } + else + { + for(uword i=0; i<X.n_rows; ++i) + { + vecs.at(i,j) = std::complex<T>(tmp.at(i,j), T(0)); + } + } + } + } + + return true; + } + #else + { + arma_ignore(vals); + arma_ignore(vecs); + arma_ignore(vecs_on); + arma_ignore(expr); + arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! eigen decomposition of general square matrix (complex, balance given matrix) +template<typename T1> +inline +bool +auxlib::eig_gen_balance + ( + Mat< std::complex<typename T1::pod_type> >& vals, + Mat< std::complex<typename T1::pod_type> >& vecs, + const bool vecs_on, + const Base< std::complex<typename T1::pod_type>, T1 >& expr + ) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_CRIPPLED_LAPACK) + { + arma_extra_debug_print("auxlib::eig_gen_balance(): redirecting to auxlib::eig_gen() due to crippled LAPACK"); + + return auxlib::eig_gen(vals, vecs, vecs_on, expr); + } + #elif defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + typedef typename std::complex<T> eT; + + Mat<eT> X = expr.get_ref(); + + arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); + + arma_debug_assert_blas_size(X); + + if(X.is_empty()) { vals.reset(); vecs.reset(); return true; } + + if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } + + vals.set_size(X.n_rows, 1); + + if(vecs_on) { vecs.set_size(X.n_rows, X.n_rows); } + + podarray<eT> junk(1); + + char bal = 'B'; + char jobvl = 'N'; + char jobvr = (vecs_on) ? 'V' : 'N'; + char sense = 'N'; + blas_int N = blas_int(X.n_rows); + eT* vl = junk.memptr(); + eT* vr = (vecs_on) ? vecs.memptr() : junk.memptr(); + blas_int ldvl = blas_int(1); + blas_int ldvr = (vecs_on) ? blas_int(vecs.n_rows) : blas_int(1); + blas_int ilo = blas_int(0); + blas_int ihi = blas_int(0); + T abnrm = T(0); + blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), blas_int(2*N)) + blas_int info = blas_int(0); + + podarray<T> scale(X.n_rows); + podarray<T> rconde(X.n_rows); + podarray<T> rcondv(X.n_rows); + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray< T> rwork( static_cast<uword>(2*N) ); + + arma_extra_debug_print("lapack::cx_geevx() -- START"); + lapack::cx_geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals.memptr(), vl, &ldvl, vr, &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, rwork.memptr(), &info); + arma_extra_debug_print("lapack::cx_geevx() -- END"); + + return (info == 0); + } + #else + { + arma_ignore(vals); + arma_ignore(vecs); + arma_ignore(vecs_on); + arma_ignore(expr); + arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! two-sided eigen decomposition of general square matrix (real) +template<typename T1> +inline +bool +auxlib::eig_gen_twosided + ( + Mat< std::complex<typename T1::pod_type> >& vals, + Mat< std::complex<typename T1::pod_type> >& lvecs, + Mat< std::complex<typename T1::pod_type> >& rvecs, + const Base<typename T1::pod_type,T1>& expr + ) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + + Mat<T> X = expr.get_ref(); + + arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); + + arma_debug_assert_blas_size(X); + + if(X.is_empty()) { vals.reset(); lvecs.reset(); rvecs.reset(); return true; } + + if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } + + vals.set_size(X.n_rows, 1); + + lvecs.set_size(X.n_rows, X.n_rows); + rvecs.set_size(X.n_rows, X.n_rows); + + Mat<T> ltmp(X.n_rows, X.n_rows, arma_nozeros_indicator()); + Mat<T> rtmp(X.n_rows, X.n_rows, arma_nozeros_indicator()); + + char jobvl = 'V'; + char jobvr = 'V'; + blas_int N = blas_int(X.n_rows); + blas_int ldvl = blas_int(ltmp.n_rows); + blas_int ldvr = blas_int(rtmp.n_rows); + blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), 4*N) + blas_int info = 0; + + podarray<T> work( static_cast<uword>(lwork) ); + + podarray<T> vals_real(X.n_rows); + podarray<T> vals_imag(X.n_rows); + + arma_extra_debug_print("lapack::geev() -- START"); + lapack::geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), ltmp.memptr(), &ldvl, rtmp.memptr(), &ldvr, work.memptr(), &lwork, &info); + arma_extra_debug_print("lapack::geev() -- END"); + + if(info != 0) { return false; } + + arma_extra_debug_print("reformatting eigenvalues and eigenvectors"); + + std::complex<T>* vals_mem = vals.memptr(); + + for(uword i=0; i < X.n_rows; ++i) { vals_mem[i] = std::complex<T>(vals_real[i], vals_imag[i]); } + + for(uword j=0; j < X.n_rows; ++j) + { + if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) ) + { + for(uword i=0; i < X.n_rows; ++i) + { + lvecs.at(i,j) = std::complex<T>( ltmp.at(i,j), ltmp.at(i,j+1) ); + lvecs.at(i,j+1) = std::complex<T>( ltmp.at(i,j), -ltmp.at(i,j+1) ); + rvecs.at(i,j) = std::complex<T>( rtmp.at(i,j), rtmp.at(i,j+1) ); + rvecs.at(i,j+1) = std::complex<T>( rtmp.at(i,j), -rtmp.at(i,j+1) ); + } + ++j; + } + else + { + for(uword i=0; i<X.n_rows; ++i) + { + lvecs.at(i,j) = std::complex<T>(ltmp.at(i,j), T(0)); + rvecs.at(i,j) = std::complex<T>(rtmp.at(i,j), T(0)); + } + } + } + + return true; + } + #else + { + arma_ignore(vals); + arma_ignore(lvecs); + arma_ignore(rvecs); + arma_ignore(expr); + arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! two-sided eigen decomposition of general square matrix (complex) +template<typename T1> +inline +bool +auxlib::eig_gen_twosided + ( + Mat< std::complex<typename T1::pod_type> >& vals, + Mat< std::complex<typename T1::pod_type> >& lvecs, + Mat< std::complex<typename T1::pod_type> >& rvecs, + const Base< std::complex<typename T1::pod_type>, T1 >& expr + ) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + typedef typename std::complex<T> eT; + + Mat<eT> X = expr.get_ref(); + + arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); + + arma_debug_assert_blas_size(X); + + if(X.is_empty()) { vals.reset(); lvecs.reset(); rvecs.reset(); return true; } + + if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } + + vals.set_size(X.n_rows, 1); + + lvecs.set_size(X.n_rows, X.n_rows); + rvecs.set_size(X.n_rows, X.n_rows); + + char jobvl = 'V'; + char jobvr = 'V'; + blas_int N = blas_int(X.n_rows); + blas_int ldvl = blas_int(lvecs.n_rows); + blas_int ldvr = blas_int(rvecs.n_rows); + blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), 2*N) + blas_int info = 0; + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray< T> rwork( static_cast<uword>(2*N) ); + + arma_extra_debug_print("lapack::cx_geev() -- START"); + lapack::cx_geev(&jobvl, &jobvr, &N, X.memptr(), &N, vals.memptr(), lvecs.memptr(), &ldvl, rvecs.memptr(), &ldvr, work.memptr(), &lwork, rwork.memptr(), &info); + arma_extra_debug_print("lapack::cx_geev() -- END"); + + return (info == 0); + } + #else + { + arma_ignore(vals); + arma_ignore(lvecs); + arma_ignore(rvecs); + arma_ignore(expr); + arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! two-sided eigen decomposition of general square matrix (real, balance given matrix) +template<typename T1> +inline +bool +auxlib::eig_gen_twosided_balance + ( + Mat< std::complex<typename T1::pod_type> >& vals, + Mat< std::complex<typename T1::pod_type> >& lvecs, + Mat< std::complex<typename T1::pod_type> >& rvecs, + const Base<typename T1::pod_type,T1>& expr + ) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + + Mat<T> X = expr.get_ref(); + + arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); + + arma_debug_assert_blas_size(X); + + if(X.is_empty()) { vals.reset(); lvecs.reset(); rvecs.reset(); return true; } + + if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } + + vals.set_size(X.n_rows, 1); + + lvecs.set_size(X.n_rows, X.n_rows); + rvecs.set_size(X.n_rows, X.n_rows); + + Mat<T> ltmp(X.n_rows, X.n_rows, arma_nozeros_indicator()); + Mat<T> rtmp(X.n_rows, X.n_rows, arma_nozeros_indicator()); + + char bal = 'B'; + char jobvl = 'V'; + char jobvr = 'V'; + char sense = 'N'; + blas_int N = blas_int(X.n_rows); + blas_int ldvl = blas_int(ltmp.n_rows); + blas_int ldvr = blas_int(rtmp.n_rows); + blas_int ilo = blas_int(0); + blas_int ihi = blas_int(0); + T abnrm = T(0); + blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), blas_int(3*N)) + blas_int info = blas_int(0); + + podarray<T> scale(X.n_rows); + podarray<T> rconde(X.n_rows); + podarray<T> rcondv(X.n_rows); + + podarray<T> work( static_cast<uword>(lwork) ); + podarray<blas_int> iwork( uword(1) ); // iwork not used by lapack::geevx() as sense = 'N' + + podarray<T> vals_real(X.n_rows); + podarray<T> vals_imag(X.n_rows); + + arma_extra_debug_print("lapack::geevx() -- START"); + lapack::geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals_real.memptr(), vals_imag.memptr(), ltmp.memptr(), &ldvl, rtmp.memptr(), &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, iwork.memptr(), &info); + arma_extra_debug_print("lapack::geevx() -- END"); + + if(info != 0) { return false; } + + arma_extra_debug_print("reformatting eigenvalues and eigenvectors"); + + std::complex<T>* vals_mem = vals.memptr(); + + for(uword i=0; i < X.n_rows; ++i) { vals_mem[i] = std::complex<T>(vals_real[i], vals_imag[i]); } + + for(uword j=0; j < X.n_rows; ++j) + { + if( (j < (X.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) ) + { + for(uword i=0; i < X.n_rows; ++i) + { + lvecs.at(i,j) = std::complex<T>( ltmp.at(i,j), ltmp.at(i,j+1) ); + lvecs.at(i,j+1) = std::complex<T>( ltmp.at(i,j), -ltmp.at(i,j+1) ); + rvecs.at(i,j) = std::complex<T>( rtmp.at(i,j), rtmp.at(i,j+1) ); + rvecs.at(i,j+1) = std::complex<T>( rtmp.at(i,j), -rtmp.at(i,j+1) ); + } + ++j; + } + else + { + for(uword i=0; i<X.n_rows; ++i) + { + lvecs.at(i,j) = std::complex<T>(ltmp.at(i,j), T(0)); + rvecs.at(i,j) = std::complex<T>(rtmp.at(i,j), T(0)); + } + } + } + + return true; + } + #else + { + arma_ignore(vals); + arma_ignore(lvecs); + arma_ignore(rvecs); + arma_ignore(expr); + arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! two-sided eigen decomposition of general square matrix (complex, balance given matrix) +template<typename T1> +inline +bool +auxlib::eig_gen_twosided_balance + ( + Mat< std::complex<typename T1::pod_type> >& vals, + Mat< std::complex<typename T1::pod_type> >& lvecs, + Mat< std::complex<typename T1::pod_type> >& rvecs, + const Base< std::complex<typename T1::pod_type>, T1 >& expr + ) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_CRIPPLED_LAPACK) + { + arma_extra_debug_print("auxlib::eig_gen_twosided_balance(): redirecting to auxlib::eig_gen() due to crippled LAPACK"); + + return auxlib::eig_gen(vals, lvecs, rvecs, expr); + } + #elif defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + typedef typename std::complex<T> eT; + + Mat<eT> X = expr.get_ref(); + + arma_debug_check( (X.is_square() == false), "eig_gen(): given matrix must be square sized" ); + + arma_debug_assert_blas_size(X); + + if(X.is_empty()) { vals.reset(); lvecs.reset(); rvecs.reset(); return true; } + + if(arma_config::check_nonfinite && X.internal_has_nonfinite()) { return false; } + + vals.set_size(X.n_rows, 1); + + lvecs.set_size(X.n_rows, X.n_rows); + rvecs.set_size(X.n_rows, X.n_rows); + + char bal = 'B'; + char jobvl = 'V'; + char jobvr = 'V'; + char sense = 'N'; + blas_int N = blas_int(X.n_rows); + blas_int ldvl = blas_int(lvecs.n_rows); + blas_int ldvr = blas_int(rvecs.n_rows); + blas_int ilo = blas_int(0); + blas_int ihi = blas_int(0); + T abnrm = T(0); + blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), blas_int(2*N)) + blas_int info = blas_int(0); + + podarray<T> scale(X.n_rows); + podarray<T> rconde(X.n_rows); + podarray<T> rcondv(X.n_rows); + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray< T> rwork( static_cast<uword>(2*N) ); + + arma_extra_debug_print("lapack::cx_geevx() -- START"); + lapack::cx_geevx(&bal, &jobvl, &jobvr, &sense, &N, X.memptr(), &N, vals.memptr(), lvecs.memptr(), &ldvl, rvecs.memptr(), &ldvr, &ilo, &ihi, scale.memptr(), &abnrm, rconde.memptr(), rcondv.memptr(), work.memptr(), &lwork, rwork.memptr(), &info); + arma_extra_debug_print("lapack::cx_geevx() -- END"); + + return (info == 0); + } + #else + { + arma_ignore(vals); + arma_ignore(lvecs); + arma_ignore(rvecs); + arma_ignore(expr); + arma_stop_logic_error("eig_gen(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! eigendecomposition of general square matrix pair (real) +template<typename T1, typename T2> +inline +bool +auxlib::eig_pair + ( + Mat< std::complex<typename T1::pod_type> >& vals, + Mat< std::complex<typename T1::pod_type> >& vecs, + const bool vecs_on, + const Base<typename T1::pod_type,T1>& A_expr, + const Base<typename T1::pod_type,T2>& B_expr + ) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + typedef std::complex<T> eT; + + Mat<T> A(A_expr.get_ref()); + Mat<T> B(B_expr.get_ref()); + + arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" ); + + arma_debug_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" ); + + arma_debug_assert_blas_size(A); + + if(A.is_empty()) { vals.reset(); vecs.reset(); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } + + vals.set_size(A.n_rows, 1); + + Mat<T> tmp(1, 1, arma_nozeros_indicator()); + + if(vecs_on) + { + vecs.set_size(A.n_rows, A.n_rows); + tmp.set_size(A.n_rows, A.n_rows); + } + + podarray<T> junk(1); + + char jobvl = 'N'; + char jobvr = (vecs_on) ? 'V' : 'N'; + blas_int N = blas_int(A.n_rows); + T* vl = junk.memptr(); + T* vr = (vecs_on) ? tmp.memptr() : junk.memptr(); + blas_int ldvl = blas_int(1); + blas_int ldvr = (vecs_on) ? blas_int(tmp.n_rows) : blas_int(1); + blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), 8*N) + blas_int info = 0; + + podarray<T> alphar(A.n_rows); + podarray<T> alphai(A.n_rows); + podarray<T> beta(A.n_rows); + + podarray<T> work( static_cast<uword>(lwork) ); + + arma_extra_debug_print("lapack::ggev()"); + lapack::ggev(&jobvl, &jobvr, &N, A.memptr(), &N, B.memptr(), &N, alphar.memptr(), alphai.memptr(), beta.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, &info); + + if(info != 0) { return false; } + + arma_extra_debug_print("reformatting eigenvalues and eigenvectors"); + + eT* vals_mem = vals.memptr(); + const T* alphar_mem = alphar.memptr(); + const T* alphai_mem = alphai.memptr(); + const T* beta_mem = beta.memptr(); + + bool beta_has_zero = false; + + for(uword j=0; j<A.n_rows; ++j) + { + const T alphai_val = alphai_mem[j]; + const T beta_val = beta_mem[j]; + + const T re = alphar_mem[j] / beta_val; + const T im = alphai_val / beta_val; + + beta_has_zero = (beta_has_zero || (beta_val == T(0))); + + vals_mem[j] = std::complex<T>(re, im); + + if( (alphai_val > T(0)) && (j < (A.n_rows-1)) ) + { + ++j; + vals_mem[j] = std::complex<T>(re,-im); // force exact conjugate + } + } + + if(beta_has_zero) { arma_debug_warn_level(1, "eig_pair(): given matrices appear ill-conditioned"); } + + if(vecs_on) + { + for(uword j=0; j<A.n_rows; ++j) + { + if( (j < (A.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) ) + { + for(uword i=0; i<A.n_rows; ++i) + { + vecs.at(i,j) = std::complex<T>( tmp.at(i,j), tmp.at(i,j+1) ); + vecs.at(i,j+1) = std::complex<T>( tmp.at(i,j), -tmp.at(i,j+1) ); + } + + ++j; + } + else + { + for(uword i=0; i<A.n_rows; ++i) + { + vecs.at(i,j) = std::complex<T>(tmp.at(i,j), T(0)); + } + } + } + } + + return true; + } + #else + { + arma_ignore(vals); + arma_ignore(vecs); + arma_ignore(vecs_on); + arma_ignore(A_expr); + arma_ignore(B_expr); + arma_stop_logic_error("eig_pair(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! eigendecomposition of general square matrix pair (complex) +template<typename T1, typename T2> +inline +bool +auxlib::eig_pair + ( + Mat< std::complex<typename T1::pod_type> >& vals, + Mat< std::complex<typename T1::pod_type> >& vecs, + const bool vecs_on, + const Base< std::complex<typename T1::pod_type>, T1 >& A_expr, + const Base< std::complex<typename T1::pod_type>, T2 >& B_expr + ) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + typedef typename std::complex<T> eT; + + Mat<eT> A(A_expr.get_ref()); + Mat<eT> B(B_expr.get_ref()); + + arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" ); + + arma_debug_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" ); + + arma_debug_assert_blas_size(A); + + if(A.is_empty()) { vals.reset(); vecs.reset(); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } + + vals.set_size(A.n_rows, 1); + + if(vecs_on) { vecs.set_size(A.n_rows, A.n_rows); } + + podarray<eT> junk(1); + + char jobvl = 'N'; + char jobvr = (vecs_on) ? 'V' : 'N'; + blas_int N = blas_int(A.n_rows); + eT* vl = junk.memptr(); + eT* vr = (vecs_on) ? vecs.memptr() : junk.memptr(); + blas_int ldvl = blas_int(1); + blas_int ldvr = (vecs_on) ? blas_int(vecs.n_rows) : blas_int(1); + blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1),2*N) + blas_int info = 0; + + podarray<eT> alpha(A.n_rows); + podarray<eT> beta(A.n_rows); + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray<T> rwork( static_cast<uword>(8*N) ); + + arma_extra_debug_print("lapack::cx_ggev()"); + lapack::cx_ggev(&jobvl, &jobvr, &N, A.memptr(), &N, B.memptr(), &N, alpha.memptr(), beta.memptr(), vl, &ldvl, vr, &ldvr, work.memptr(), &lwork, rwork.memptr(), &info); + + if(info != 0) { return false; } + + eT* vals_mem = vals.memptr(); + const eT* alpha_mem = alpha.memptr(); + const eT* beta_mem = beta.memptr(); + + const std::complex<T> zero(T(0), T(0)); + + bool beta_has_zero = false; + + for(uword i=0; i<A.n_rows; ++i) + { + const eT& beta_val = beta_mem[i]; + + vals_mem[i] = alpha_mem[i] / beta_val; + + beta_has_zero = (beta_has_zero || (beta_val == zero)); + } + + if(beta_has_zero) { arma_debug_warn_level(1, "eig_pair(): given matrices appear ill-conditioned"); } + + return true; + } + #else + { + arma_ignore(vals); + arma_ignore(vecs); + arma_ignore(vecs_on); + arma_ignore(A_expr); + arma_ignore(B_expr); + arma_stop_logic_error("eig_pair(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! two-sided eigendecomposition of general square matrix pair (real) +template<typename T1, typename T2> +inline +bool +auxlib::eig_pair_twosided + ( + Mat< std::complex<typename T1::pod_type> >& vals, + Mat< std::complex<typename T1::pod_type> >& lvecs, + Mat< std::complex<typename T1::pod_type> >& rvecs, + const Base<typename T1::pod_type,T1>& A_expr, + const Base<typename T1::pod_type,T2>& B_expr + ) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + typedef std::complex<T> eT; + + Mat<T> A(A_expr.get_ref()); + Mat<T> B(B_expr.get_ref()); + + arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" ); + + arma_debug_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" ); + + arma_debug_assert_blas_size(A); + + if(A.is_empty()) { vals.reset(); lvecs.reset(); rvecs.reset(); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } + + vals.set_size(A.n_rows, 1); + + lvecs.set_size(A.n_rows, A.n_rows); + rvecs.set_size(A.n_rows, A.n_rows); + + Mat<T> ltmp(A.n_rows, A.n_rows, arma_nozeros_indicator()); + Mat<T> rtmp(A.n_rows, A.n_rows, arma_nozeros_indicator()); + + char jobvl = 'V'; + char jobvr = 'V'; + blas_int N = blas_int(A.n_rows); + blas_int ldvl = blas_int(ltmp.n_rows); + blas_int ldvr = blas_int(rtmp.n_rows); + blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1), 8*N) + blas_int info = 0; + + podarray<T> alphar(A.n_rows); + podarray<T> alphai(A.n_rows); + podarray<T> beta(A.n_rows); + + podarray<T> work( static_cast<uword>(lwork) ); + + arma_extra_debug_print("lapack::ggev()"); + lapack::ggev(&jobvl, &jobvr, &N, A.memptr(), &N, B.memptr(), &N, alphar.memptr(), alphai.memptr(), beta.memptr(), ltmp.memptr(), &ldvl, rtmp.memptr(), &ldvr, work.memptr(), &lwork, &info); + + if(info != 0) { return false; } + + arma_extra_debug_print("reformatting eigenvalues and eigenvectors"); + + eT* vals_mem = vals.memptr(); + const T* alphar_mem = alphar.memptr(); + const T* alphai_mem = alphai.memptr(); + const T* beta_mem = beta.memptr(); + + bool beta_has_zero = false; + + for(uword j=0; j<A.n_rows; ++j) + { + const T alphai_val = alphai_mem[j]; + const T beta_val = beta_mem[j]; + + const T re = alphar_mem[j] / beta_val; + const T im = alphai_val / beta_val; + + beta_has_zero = (beta_has_zero || (beta_val == T(0))); + + vals_mem[j] = std::complex<T>(re, im); + + if( (alphai_val > T(0)) && (j < (A.n_rows-1)) ) + { + ++j; + vals_mem[j] = std::complex<T>(re,-im); // force exact conjugate + } + } + + if(beta_has_zero) { arma_debug_warn_level(1, "eig_pair(): given matrices appear ill-conditioned"); } + + for(uword j=0; j < A.n_rows; ++j) + { + if( (j < (A.n_rows-1)) && (vals_mem[j] == std::conj(vals_mem[j+1])) ) + { + for(uword i=0; i < A.n_rows; ++i) + { + lvecs.at(i,j) = std::complex<T>( ltmp.at(i,j), ltmp.at(i,j+1) ); + lvecs.at(i,j+1) = std::complex<T>( ltmp.at(i,j), -ltmp.at(i,j+1) ); + rvecs.at(i,j) = std::complex<T>( rtmp.at(i,j), rtmp.at(i,j+1) ); + rvecs.at(i,j+1) = std::complex<T>( rtmp.at(i,j), -rtmp.at(i,j+1) ); + } + ++j; + } + else + { + for(uword i=0; i<A.n_rows; ++i) + { + lvecs.at(i,j) = std::complex<T>(ltmp.at(i,j), T(0)); + rvecs.at(i,j) = std::complex<T>(rtmp.at(i,j), T(0)); + } + } + } + + return true; + } + #else + { + arma_ignore(vals); + arma_ignore(lvecs); + arma_ignore(rvecs); + arma_ignore(A_expr); + arma_ignore(B_expr); + arma_stop_logic_error("eig_pair(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! two-sided eigendecomposition of general square matrix pair (complex) +template<typename T1, typename T2> +inline +bool +auxlib::eig_pair_twosided + ( + Mat< std::complex<typename T1::pod_type> >& vals, + Mat< std::complex<typename T1::pod_type> >& lvecs, + Mat< std::complex<typename T1::pod_type> >& rvecs, + const Base< std::complex<typename T1::pod_type>, T1 >& A_expr, + const Base< std::complex<typename T1::pod_type>, T2 >& B_expr + ) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + typedef typename std::complex<T> eT; + + Mat<eT> A(A_expr.get_ref()); + Mat<eT> B(B_expr.get_ref()); + + arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "eig_pair(): given matrices must be square sized" ); + + arma_debug_check( (A.n_rows != B.n_rows), "eig_pair(): given matrices must have the same size" ); + + arma_debug_assert_blas_size(A); + + if(A.is_empty()) { vals.reset(); lvecs.reset(); rvecs.reset(); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } + + vals.set_size(A.n_rows, 1); + + lvecs.set_size(A.n_rows, A.n_rows); + rvecs.set_size(A.n_rows, A.n_rows); + + char jobvl = 'V'; + char jobvr = 'V'; + blas_int N = blas_int(A.n_rows); + blas_int ldvl = blas_int(lvecs.n_rows); + blas_int ldvr = blas_int(rvecs.n_rows); + blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1),2*N) + blas_int info = 0; + + podarray<eT> alpha(A.n_rows); + podarray<eT> beta(A.n_rows); + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray<T> rwork( static_cast<uword>(8*N) ); + + arma_extra_debug_print("lapack::cx_ggev()"); + lapack::cx_ggev(&jobvl, &jobvr, &N, A.memptr(), &N, B.memptr(), &N, alpha.memptr(), beta.memptr(), lvecs.memptr(), &ldvl, rvecs.memptr(), &ldvr, work.memptr(), &lwork, rwork.memptr(), &info); + + if(info != 0) { return false; } + + eT* vals_mem = vals.memptr(); + const eT* alpha_mem = alpha.memptr(); + const eT* beta_mem = beta.memptr(); + + const std::complex<T> zero(T(0), T(0)); + + bool beta_has_zero = false; + + for(uword i=0; i<A.n_rows; ++i) + { + const eT& beta_val = beta_mem[i]; + + vals_mem[i] = alpha_mem[i] / beta_val; + + beta_has_zero = (beta_has_zero || (beta_val == zero)); + } + + if(beta_has_zero) { arma_debug_warn_level(1, "eig_pair(): given matrices appear ill-conditioned"); } + + return true; + } + #else + { + arma_ignore(vals); + arma_ignore(lvecs); + arma_ignore(rvecs); + arma_ignore(A_expr); + arma_ignore(B_expr); + arma_stop_logic_error("eig_pair(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! eigenvalues of a symmetric real matrix +template<typename eT> +inline +bool +auxlib::eig_sym(Col<eT>& eigval, Mat<eT>& A) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + arma_debug_check( (A.is_square() == false), "eig_sym(): given matrix must be square sized" ); + + if(A.is_empty()) { eigval.reset(); return true; } + + if((arma_config::debug) && (auxlib::rudimentary_sym_check(A) == false)) + { + arma_debug_warn_level(1, "eig_sym(): given matrix is not symmetric"); + } + + if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(A)) { return false; } + + arma_debug_assert_blas_size(A); + + eigval.set_size(A.n_rows); + + char jobz = 'N'; + char uplo = 'U'; + + blas_int N = blas_int(A.n_rows); + blas_int lwork = (64+2)*N; // lwork_min = (std::max)(blas_int(1), 3*N-1) + blas_int info = 0; + + podarray<eT> work( static_cast<uword>(lwork) ); + + arma_extra_debug_print("lapack::syev()"); + lapack::syev(&jobz, &uplo, &N, A.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, &info); + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(A); + arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! eigenvalues of a hermitian complex matrix +template<typename T> +inline +bool +auxlib::eig_sym(Col<T>& eigval, Mat< std::complex<T> >& A) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename std::complex<T> eT; + + arma_debug_check( (A.is_square() == false), "eig_sym(): given matrix must be square sized" ); + + if(A.is_empty()) { eigval.reset(); return true; } + + if((arma_config::debug) && (auxlib::rudimentary_sym_check(A) == false)) + { + arma_debug_warn_level(1, "eig_sym(): given matrix is not hermitian"); + } + + if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(A)) { return false; } + + arma_debug_assert_blas_size(A); + + eigval.set_size(A.n_rows); + + char jobz = 'N'; + char uplo = 'U'; + + blas_int N = blas_int(A.n_rows); + blas_int lwork = (64+1)*N; // lwork_min = (std::max)(blas_int(1), 2*N-1) + blas_int info = 0; + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray<T> rwork( static_cast<uword>( (std::max)(blas_int(1), 3*N) ) ); + + arma_extra_debug_print("lapack::heev()"); + lapack::heev(&jobz, &uplo, &N, A.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, rwork.memptr(), &info); + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(A); + arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! eigenvalues and eigenvectors of a symmetric real matrix +template<typename eT> +inline +bool +auxlib::eig_sym(Col<eT>& eigval, Mat<eT>& eigvec, const Mat<eT>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + arma_debug_check( (X.is_square() == false), "eig_sym(): given matrix must be square sized" ); + + if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(X)) { return false; } + + eigvec = X; + + if(eigvec.is_empty()) { eigval.reset(); eigvec.reset(); return true; } + + arma_debug_assert_blas_size(eigvec); + + eigval.set_size(eigvec.n_rows); + + char jobz = 'V'; + char uplo = 'U'; + + blas_int N = blas_int(eigvec.n_rows); + blas_int lwork = (64+2)*N; // lwork_min = (std::max)(blas_int(1), 3*N-1) + blas_int info = 0; + + podarray<eT> work( static_cast<uword>(lwork) ); + + arma_extra_debug_print("lapack::syev()"); + lapack::syev(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, &info); + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(X); + arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! eigenvalues and eigenvectors of a hermitian complex matrix +template<typename T> +inline +bool +auxlib::eig_sym(Col<T>& eigval, Mat< std::complex<T> >& eigvec, const Mat< std::complex<T> >& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename std::complex<T> eT; + + arma_debug_check( (X.is_square() == false), "eig_sym(): given matrix must be square sized" ); + + if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(X)) { return false; } + + eigvec = X; + + if(eigvec.is_empty()) { eigval.reset(); eigvec.reset(); return true; } + + arma_debug_assert_blas_size(eigvec); + + eigval.set_size(eigvec.n_rows); + + char jobz = 'V'; + char uplo = 'U'; + + blas_int N = blas_int(eigvec.n_rows); + blas_int lwork = (64+1)*N; // lwork_min = (std::max)(blas_int(1), 2*N-1) + blas_int info = 0; + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray<T> rwork( static_cast<uword>((std::max)(blas_int(1), 3*N)) ); + + arma_extra_debug_print("lapack::heev()"); + lapack::heev(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, rwork.memptr(), &info); + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(X); + arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! eigenvalues and eigenvectors of a symmetric real matrix (divide and conquer algorithm) +template<typename eT> +inline +bool +auxlib::eig_sym_dc(Col<eT>& eigval, Mat<eT>& eigvec, const Mat<eT>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + arma_debug_check( (X.is_square() == false), "eig_sym(): given matrix must be square sized" ); + + if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(X)) { return false; } + + eigvec = X; + + if(eigvec.is_empty()) { eigval.reset(); eigvec.reset(); return true; } + + arma_debug_assert_blas_size(eigvec); + + eigval.set_size(eigvec.n_rows); + + char jobz = 'V'; + char uplo = 'U'; + + blas_int N = blas_int(eigvec.n_rows); + blas_int lwork_min = 1 + 6*N + 2*(N*N); + blas_int liwork_min = 3 + 5*N; + blas_int info = 0; + + blas_int lwork_proposed = 0; + blas_int liwork_proposed = 0; + + if(N >= 32) + { + eT work_query[2] = {}; + blas_int iwork_query[2] = {}; + + blas_int lwork_query = -1; + blas_int liwork_query = -1; + + arma_extra_debug_print("lapack::syevd()"); + lapack::syevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), &work_query[0], &lwork_query, &iwork_query[0], &liwork_query, &info); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>( work_query[0] ); + liwork_proposed = iwork_query[0]; + } + + blas_int lwork_final = (std::max)( lwork_proposed, lwork_min); + blas_int liwork_final = (std::max)(liwork_proposed, liwork_min); + + podarray<eT> work( static_cast<uword>( lwork_final) ); + podarray<blas_int> iwork( static_cast<uword>(liwork_final) ); + + arma_extra_debug_print("lapack::syevd()"); + lapack::syevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork_final, iwork.memptr(), &liwork_final, &info); + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(X); + arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! eigenvalues and eigenvectors of a hermitian complex matrix (divide and conquer algorithm) +template<typename T> +inline +bool +auxlib::eig_sym_dc(Col<T>& eigval, Mat< std::complex<T> >& eigvec, const Mat< std::complex<T> >& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename std::complex<T> eT; + + arma_debug_check( (X.is_square() == false), "eig_sym(): given matrix must be square sized" ); + + if(arma_config::check_nonfinite && trimat_helper::has_nonfinite_triu(X)) { return false; } + + eigvec = X; + + if(eigvec.is_empty()) { eigval.reset(); eigvec.reset(); return true; } + + arma_debug_assert_blas_size(eigvec); + + eigval.set_size(eigvec.n_rows); + + char jobz = 'V'; + char uplo = 'U'; + + blas_int N = blas_int(eigvec.n_rows); + blas_int lwork_min = 2*N + N*N; + blas_int lrwork_min = 1 + 5*N + 2*(N*N); + blas_int liwork_min = 3 + 5*N; + blas_int info = 0; + + blas_int lwork_proposed = 0; + blas_int lrwork_proposed = 0; + blas_int liwork_proposed = 0; + + if(N >= 32) + { + eT work_query[2] = {}; + T rwork_query[2] = {}; + blas_int iwork_query[2] = {}; + + blas_int lwork_query = -1; + blas_int lrwork_query = -1; + blas_int liwork_query = -1; + + arma_extra_debug_print("lapack::heevd()"); + lapack::heevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), &work_query[0], &lwork_query, &rwork_query[0], &lrwork_query, &iwork_query[0], &liwork_query, &info); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + lrwork_proposed = static_cast<blas_int>( rwork_query[0] ); + liwork_proposed = iwork_query[0]; + } + + blas_int lwork_final = (std::max)( lwork_proposed, lwork_min); + blas_int lrwork_final = (std::max)(lrwork_proposed, lrwork_min); + blas_int liwork_final = (std::max)(liwork_proposed, liwork_min); + + podarray<eT> work( static_cast<uword>( lwork_final) ); + podarray< T> rwork( static_cast<uword>(lrwork_final) ); + podarray<blas_int> iwork( static_cast<uword>(liwork_final) ); + + arma_extra_debug_print("lapack::heevd()"); + lapack::heevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork_final, rwork.memptr(), &lrwork_final, iwork.memptr(), &liwork_final, &info); + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(X); + arma_stop_logic_error("eig_sym(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::chol_simple(Mat<eT>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + arma_debug_assert_blas_size(X); + + char uplo = 'U'; + blas_int n = blas_int(X.n_rows); + blas_int info = 0; + + arma_extra_debug_print("lapack::potrf()"); + lapack::potrf(&uplo, &n, X.memptr(), &n, &info); + + return (info == 0); + } + #else + { + arma_ignore(X); + + arma_stop_logic_error("chol(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::chol(Mat<eT>& X, const uword layout) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + arma_debug_assert_blas_size(X); + + char uplo = (layout == 0) ? 'U' : 'L'; + blas_int n = blas_int(X.n_rows); + blas_int info = 0; + + arma_extra_debug_print("lapack::potrf()"); + lapack::potrf(&uplo, &n, X.memptr(), &n, &info); + + if(info != 0) { return false; } + + X = (layout == 0) ? trimatu(X) : trimatl(X); // trimatu() and trimatl() return the same type + + return true; + } + #else + { + arma_ignore(X); + arma_ignore(layout); + + arma_stop_logic_error("chol(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::chol_band(Mat<eT>& X, const uword KD, const uword layout) + { + arma_extra_debug_sigprint(); + + return auxlib::chol_band_common(X, KD, layout); + } + + + +template<typename T> +inline +bool +auxlib::chol_band(Mat< std::complex<T> >& X, const uword KD, const uword layout) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_CRIPPLED_LAPACK) + { + arma_extra_debug_print("auxlib::chol_band(): redirecting to auxlib::chol() due to crippled LAPACK"); + + arma_ignore(KD); + + return auxlib::chol(X, layout); + } + #else + { + return auxlib::chol_band_common(X, KD, layout); + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::chol_band_common(Mat<eT>& X, const uword KD, const uword layout) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + const uword N = X.n_rows; + + const uword KL = (layout == 0) ? uword(0) : KD; + const uword KU = (layout == 0) ? KD : uword(0); + + Mat<eT> AB; + band_helper::compress(AB, X, KL, KU, false); + + arma_debug_assert_blas_size(AB); + + char uplo = (layout == 0) ? 'U' : 'L'; + blas_int n = blas_int(N); + blas_int kd = blas_int(KD); + blas_int ldab = blas_int(AB.n_rows); + blas_int info = 0; + + arma_extra_debug_print("lapack::pbtrf()"); + lapack::pbtrf(&uplo, &n, &kd, AB.memptr(), &ldab, &info); + + if(info != 0) { return false; } + + band_helper::uncompress(X, AB, KL, KU, false); + + return true; + } + #else + { + arma_ignore(X); + arma_ignore(KD); + arma_ignore(layout); + + arma_stop_logic_error("chol(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::chol_pivot(Mat<eT>& X, Mat<uword>& P, const uword layout) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename get_pod_type<eT>::result T; + + arma_debug_assert_blas_size(X); + + char uplo = (layout == 0) ? 'U' : 'L'; + blas_int n = blas_int(X.n_rows); + blas_int rank = 0; + T tol = T(-1); + blas_int info = 0; + + podarray<blas_int> ipiv( X.n_rows); + podarray<T> work(2*X.n_rows); + + ipiv.zeros(); + + arma_extra_debug_print("lapack::pstrf()"); + lapack::pstrf(&uplo, &n, X.memptr(), &n, ipiv.memptr(), &rank, &tol, work.memptr(), &info); + + if(info != 0) { return false; } + + X = (layout == 0) ? trimatu(X) : trimatl(X); // trimatu() and trimatl() return the same type + + P.set_size(X.n_rows, 1); + + for(uword i=0; i < X.n_rows; ++i) + { + P[i] = uword(ipiv[i] - 1); // take into account that Fortran counts from 1 + } + + return true; + } + #else + { + arma_ignore(X); + arma_ignore(P); + arma_ignore(layout); + + arma_stop_logic_error("chol(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +// +// hessenberg decomposition +template<typename eT, typename T1> +inline +bool +auxlib::hess(Mat<eT>& H, const Base<eT,T1>& X, Col<eT>& tao) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + H = X.get_ref(); + + arma_debug_check( (H.is_square() == false), "hess(): given matrix must be square sized" ); + + if(H.is_empty()) { return true; } + + arma_debug_assert_blas_size(H); + + if(H.n_rows > 2) + { + tao.set_size(H.n_rows-1); + + blas_int n = blas_int(H.n_rows); + blas_int ilo = 1; + blas_int ihi = blas_int(H.n_rows); + blas_int lda = blas_int(H.n_rows); + blas_int lwork = blas_int(H.n_rows) * 64; + blas_int info = 0; + + podarray<eT> work(static_cast<uword>(lwork)); + + arma_extra_debug_print("lapack::gehrd()"); + lapack::gehrd(&n, &ilo, &ihi, H.memptr(), &lda, tao.memptr(), work.memptr(), &lwork, &info); + + return (info == 0); + } + + return true; + } + #else + { + arma_ignore(H); + arma_ignore(X); + arma_ignore(tao); + arma_stop_logic_error("hess(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::qr(Mat<eT>& Q, Mat<eT>& R, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + R = X.get_ref(); + + const uword R_n_rows = R.n_rows; + const uword R_n_cols = R.n_cols; + + if(R.is_empty()) { Q.eye(R_n_rows, R_n_rows); return true; } + + arma_debug_assert_blas_size(R); + + blas_int m = static_cast<blas_int>(R_n_rows); + blas_int n = static_cast<blas_int>(R_n_cols); + blas_int lwork_min = (std::max)(blas_int(1), (std::max)(m,n)); // take into account requirements of geqrf() _and_ orgqr()/ungqr() + blas_int k = (std::min)(m,n); + blas_int info = 0; + + podarray<eT> tau( static_cast<uword>(k) ); + + eT work_query[2] = {}; + blas_int lwork_query = -1; + + arma_extra_debug_print("lapack::geqrf()"); + lapack::geqrf(&m, &n, R.memptr(), &m, tau.memptr(), &work_query[0], &lwork_query, &info); + + if(info != 0) { return false; } + + blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::geqrf()"); + lapack::geqrf(&m, &n, R.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); + + if(info != 0) { return false; } + + Q.set_size(R_n_rows, R_n_rows); + + arrayops::copy( Q.memptr(), R.memptr(), (std::min)(Q.n_elem, R.n_elem) ); + + // + // construct R + + for(uword col=0; col < R_n_cols; ++col) + { + for(uword row=(col+1); row < R_n_rows; ++row) + { + R.at(row,col) = eT(0); + } + } + + + if( (is_float<eT>::value) || (is_double<eT>::value) ) + { + arma_extra_debug_print("lapack::orgqr()"); + lapack::orgqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); + } + else + if( (is_cx_float<eT>::value) || (is_cx_double<eT>::value) ) + { + arma_extra_debug_print("lapack::ungqr()"); + lapack::ungqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); + } + + return (info == 0); + } + #else + { + arma_ignore(Q); + arma_ignore(R); + arma_ignore(X); + arma_stop_logic_error("qr(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::qr_econ(Mat<eT>& Q, Mat<eT>& R, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + if(is_Mat<T1>::value) + { + const unwrap<T1> tmp(X.get_ref()); + const Mat<eT>& M = tmp.M; + + if(M.n_rows < M.n_cols) { return auxlib::qr(Q, R, X); } + } + + Q = X.get_ref(); + + const uword Q_n_rows = Q.n_rows; + const uword Q_n_cols = Q.n_cols; + + if( Q_n_rows <= Q_n_cols ) { return auxlib::qr(Q, R, Q); } + + if(Q.is_empty()) { Q.set_size(Q_n_rows, 0); R.set_size(0, Q_n_cols); return true; } + + arma_debug_assert_blas_size(Q); + + blas_int m = static_cast<blas_int>(Q_n_rows); + blas_int n = static_cast<blas_int>(Q_n_cols); + blas_int lwork_min = (std::max)(blas_int(1), (std::max)(m,n)); // take into account requirements of geqrf() _and_ orgqr()/ungqr() + blas_int k = (std::min)(m,n); + blas_int info = 0; + + podarray<eT> tau( static_cast<uword>(k) ); + + eT work_query[2] = {}; + blas_int lwork_query = -1; + + arma_extra_debug_print("lapack::geqrf()"); + lapack::geqrf(&m, &n, Q.memptr(), &m, tau.memptr(), &work_query[0], &lwork_query, &info); + + if(info != 0) { return false; } + + blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::geqrf()"); + lapack::geqrf(&m, &n, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); + + if(info != 0) { return false; } + + R.zeros(Q_n_cols, Q_n_cols); + + // + // construct R + + for(uword col=0; col < Q_n_cols; ++col) + { + for(uword row=0; row <= col; ++row) + { + R.at(row,col) = Q.at(row,col); + } + } + + if( (is_float<eT>::value) || (is_double<eT>::value) ) + { + arma_extra_debug_print("lapack::orgqr()"); + lapack::orgqr(&m, &n, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); + } + else + if( (is_cx_float<eT>::value) || (is_cx_double<eT>::value) ) + { + arma_extra_debug_print("lapack::ungqr()"); + lapack::ungqr(&m, &n, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); + } + + return (info == 0); + } + #else + { + arma_ignore(Q); + arma_ignore(R); + arma_ignore(X); + arma_stop_logic_error("qr_econ(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::qr_pivot(Mat<eT>& Q, Mat<eT>& R, Mat<uword>& P, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + R = X.get_ref(); + + const uword R_n_rows = R.n_rows; + const uword R_n_cols = R.n_cols; + + if(R.is_empty()) + { + Q.eye(R_n_rows, R_n_rows); + + P.set_size(R_n_cols, 1); + + for(uword col=0; col < R_n_cols; ++col) { P.at(col) = col; } + + return true; + } + + arma_debug_assert_blas_size(R); + + blas_int m = static_cast<blas_int>(R_n_rows); + blas_int n = static_cast<blas_int>(R_n_cols); + blas_int lwork_min = (std::max)(blas_int(3*n + 1), (std::max)(m,n)); // take into account requirements of geqp3() and orgqr() + blas_int k = (std::min)(m,n); + blas_int info = 0; + + podarray<eT> tau( static_cast<uword>(k) ); + podarray<blas_int> jpvt( R_n_cols ); + + jpvt.zeros(); + + eT work_query[2] = {}; + blas_int lwork_query = -1; + + arma_extra_debug_print("lapack::geqp3()"); + lapack::geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), &work_query[0], &lwork_query, &info); + + if(info != 0) { return false; } + + blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::geqp3()"); + lapack::geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), work.memptr(), &lwork_final, &info); + + if(info != 0) { return false; } + + Q.set_size(R_n_rows, R_n_rows); + + arrayops::copy( Q.memptr(), R.memptr(), (std::min)(Q.n_elem, R.n_elem) ); + + // + // construct R and P + + P.set_size(R_n_cols, 1); + + for(uword col=0; col < R_n_cols; ++col) + { + for(uword row=(col+1); row < R_n_rows; ++row) { R.at(row,col) = eT(0); } + + P.at(col) = jpvt[col] - 1; // take into account that Fortran counts from 1 + } + + arma_extra_debug_print("lapack::orgqr()"); + lapack::orgqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); + + return (info == 0); + } + #else + { + arma_ignore(Q); + arma_ignore(R); + arma_ignore(P); + arma_ignore(X); + arma_stop_logic_error("qr(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename T, typename T1> +inline +bool +auxlib::qr_pivot(Mat< std::complex<T> >& Q, Mat< std::complex<T> >& R, Mat<uword>& P, const Base<std::complex<T>,T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename std::complex<T> eT; + + R = X.get_ref(); + + const uword R_n_rows = R.n_rows; + const uword R_n_cols = R.n_cols; + + if(R.is_empty()) + { + Q.eye(R_n_rows, R_n_rows); + + P.set_size(R_n_cols, 1); + + for(uword col=0; col < R_n_cols; ++col) { P.at(col) = col; } + + return true; + } + + arma_debug_assert_blas_size(R); + + blas_int m = static_cast<blas_int>(R_n_rows); + blas_int n = static_cast<blas_int>(R_n_cols); + blas_int lwork_min = (std::max)(blas_int(3*n + 1), (std::max)(m,n)); // take into account requirements of geqp3() and ungqr() + blas_int k = (std::min)(m,n); + blas_int info = 0; + + podarray<eT> tau( static_cast<uword>(k) ); + podarray< T> rwork( 2*R_n_cols ); + podarray<blas_int> jpvt( R_n_cols ); + + jpvt.zeros(); + + eT work_query[2] = {}; + blas_int lwork_query = -1; + + arma_extra_debug_print("lapack::geqp3()"); + lapack::cx_geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), &work_query[0], &lwork_query, rwork.memptr(), &info); + + if(info != 0) { return false; } + + blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::geqp3()"); + lapack::cx_geqp3(&m, &n, R.memptr(), &m, jpvt.memptr(), tau.memptr(), work.memptr(), &lwork_final, rwork.memptr(), &info); + + if(info != 0) { return false; } + + Q.set_size(R_n_rows, R_n_rows); + + arrayops::copy( Q.memptr(), R.memptr(), (std::min)(Q.n_elem, R.n_elem) ); + + // + // construct R and P + + P.set_size(R_n_cols, 1); + + for(uword col=0; col < R_n_cols; ++col) + { + for(uword row=(col+1); row < R_n_rows; ++row) { R.at(row,col) = eT(0); } + + P.at(col) = jpvt[col] - 1; // take into account that Fortran counts from 1 + } + + arma_extra_debug_print("lapack::ungqr()"); + lapack::ungqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork_final, &info); + + return (info == 0); + } + #else + { + arma_ignore(Q); + arma_ignore(R); + arma_ignore(P); + arma_ignore(X); + arma_stop_logic_error("qr(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::svd(Col<eT>& S, Mat<eT>& A) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + if(A.is_empty()) { S.reset(); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A); + + Mat<eT> U(1, 1, arma_nozeros_indicator()); + Mat<eT> V(1, A.n_cols, arma_nozeros_indicator()); + + char jobu = 'N'; + char jobvt = 'N'; + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int lda = blas_int(A.n_rows); + blas_int ldu = blas_int(U.n_rows); + blas_int ldvt = blas_int(V.n_rows); + blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) ); + blas_int info = 0; + + S.set_size( static_cast<uword>(min_mn) ); + + blas_int lwork_proposed = 0; + + if(A.n_elem >= 1024) + { + eT work_query[2] = {}; + blas_int lwork_query = -1; + + arma_extra_debug_print("lapack::gesvd()"); + lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, &info); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>( work_query[0] ); + } + + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::gesvd()"); + lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, &info); + + return (info == 0); + } + #else + { + arma_ignore(S); + arma_ignore(A); + arma_stop_logic_error("svd(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename T> +inline +bool +auxlib::svd(Col<T>& S, Mat< std::complex<T> >& A) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef std::complex<T> eT; + + if(A.is_empty()) { S.reset(); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A); + + Mat<eT> U(1, 1, arma_nozeros_indicator()); + Mat<eT> V(1, A.n_cols, arma_nozeros_indicator()); + + char jobu = 'N'; + char jobvt = 'N'; + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int lda = blas_int(A.n_rows); + blas_int ldu = blas_int(U.n_rows); + blas_int ldvt = blas_int(V.n_rows); + blas_int lwork_min = (std::max)( blas_int(1), 2*min_mn+(std::max)(m,n) ); + blas_int info = 0; + + S.set_size( static_cast<uword>(min_mn) ); + + podarray<T> rwork( static_cast<uword>(5*min_mn) ); + + blas_int lwork_proposed = 0; + + if(A.n_elem >= 256) + { + eT work_query[2] = {}; + blas_int lwork_query = -1; // query to find optimum size of workspace + + arma_extra_debug_print("lapack::cx_gesvd()"); + lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), &info); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + } + + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::cx_gesvd()"); + lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), &info); + + return (info == 0); + } + #else + { + arma_ignore(S); + arma_ignore(A); + arma_stop_logic_error("svd(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::svd(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, Mat<eT>& A) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + if(A.is_empty()) { U.eye(A.n_rows, A.n_rows); S.reset(); V.eye(A.n_cols, A.n_cols); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A); + + U.set_size(A.n_rows, A.n_rows); + V.set_size(A.n_cols, A.n_cols); + + char jobu = 'A'; + char jobvt = 'A'; + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int lda = blas_int(A.n_rows); + blas_int ldu = blas_int(U.n_rows); + blas_int ldvt = blas_int(V.n_rows); + blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) ); + blas_int info = 0; + + S.set_size( static_cast<uword>(min_mn) ); + + blas_int lwork_proposed = 0; + + if(A.n_elem >= 1024) + { + // query to find optimum size of workspace + eT work_query[2] = {}; + blas_int lwork_query = -1; + + arma_extra_debug_print("lapack::gesvd()"); + lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, &info); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>( work_query[0] ); + } + + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::gesvd()"); + lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, &info); + + if(info != 0) { return false; } + + op_strans::apply_mat_inplace(V); + + return true; + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(V); + arma_ignore(A); + arma_stop_logic_error("svd(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename T> +inline +bool +auxlib::svd(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, Mat< std::complex<T> >& A) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef std::complex<T> eT; + + if(A.is_empty()) { U.eye(A.n_rows, A.n_rows); S.reset(); V.eye(A.n_cols, A.n_cols); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A); + + U.set_size(A.n_rows, A.n_rows); + V.set_size(A.n_cols, A.n_cols); + + char jobu = 'A'; + char jobvt = 'A'; + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int lda = blas_int(A.n_rows); + blas_int ldu = blas_int(U.n_rows); + blas_int ldvt = blas_int(V.n_rows); + blas_int lwork_min = (std::max)( blas_int(1), 2*min_mn + (std::max)(m,n) ); + blas_int info = 0; + + S.set_size( static_cast<uword>(min_mn) ); + + podarray<T> rwork( static_cast<uword>(5*min_mn) ); + + blas_int lwork_proposed = 0; + + if(A.n_elem >= 256) + { + eT work_query[2] = {}; + blas_int lwork_query = -1; // query to find optimum size of workspace + + arma_extra_debug_print("lapack::cx_gesvd()"); + lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), &info); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + } + + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::cx_gesvd()"); + lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), &info); + + if(info != 0) { return false; } + + op_htrans::apply_mat_inplace(V); + + return true; + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(V); + arma_ignore(A); + arma_stop_logic_error("svd(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::svd_econ(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, Mat<eT>& A, const char mode) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + if(A.is_empty()) { U.eye(); S.reset(); V.eye(); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A); + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int lda = blas_int(A.n_rows); + + S.set_size( static_cast<uword>(min_mn) ); + + blas_int ldu = 0; + blas_int ldvt = 0; + + char jobu = char(0); + char jobvt = char(0); + + if(mode == 'l') + { + jobu = 'S'; + jobvt = 'N'; + + ldu = m; + ldvt = 1; + + U.set_size( static_cast<uword>(ldu), static_cast<uword>(min_mn) ); + V.reset(); + } + + if(mode == 'r') + { + jobu = 'N'; + jobvt = 'S'; + + ldu = 1; + ldvt = (std::min)(m,n); + + U.reset(); + V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n) ); + } + + if(mode == 'b') + { + jobu = 'S'; + jobvt = 'S'; + + ldu = m; + ldvt = (std::min)(m,n); + + U.set_size( static_cast<uword>(ldu), static_cast<uword>(min_mn) ); + V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n ) ); + } + + + blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) ); + blas_int info = 0; + + blas_int lwork_proposed = 0; + + if(A.n_elem >= 1024) + { + eT work_query[2] = {}; + blas_int lwork_query = -1; // query to find optimum size of workspace + + arma_extra_debug_print("lapack::gesvd()"); + lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, &info); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>(work_query[0]); + } + + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::gesvd()"); + lapack::gesvd<eT>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, &info); + + if(info != 0) { return false; } + + op_strans::apply_mat_inplace(V); + + return true; + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(V); + arma_ignore(A); + arma_ignore(mode); + arma_stop_logic_error("svd(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename T> +inline +bool +auxlib::svd_econ(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, Mat< std::complex<T> >& A, const char mode) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef std::complex<T> eT; + + if(A.is_empty()) { U.eye(); S.reset(); V.eye(); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A); + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int lda = blas_int(A.n_rows); + + S.set_size( static_cast<uword>(min_mn) ); + + blas_int ldu = 0; + blas_int ldvt = 0; + + char jobu = char(0); + char jobvt = char(0); + + if(mode == 'l') + { + jobu = 'S'; + jobvt = 'N'; + + ldu = m; + ldvt = 1; + + U.set_size( static_cast<uword>(ldu), static_cast<uword>(min_mn) ); + V.reset(); + } + + if(mode == 'r') + { + jobu = 'N'; + jobvt = 'S'; + + ldu = 1; + ldvt = (std::min)(m,n); + + U.reset(); + V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n) ); + } + + if(mode == 'b') + { + jobu = 'S'; + jobvt = 'S'; + + ldu = m; + ldvt = (std::min)(m,n); + + U.set_size( static_cast<uword>(ldu), static_cast<uword>(min_mn) ); + V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n) ); + } + + blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) ); + blas_int info = 0; + + podarray<T> rwork( static_cast<uword>(5*min_mn) ); + + blas_int lwork_proposed = 0; + + if(A.n_elem >= 256) + { + eT work_query[2] = {}; + blas_int lwork_query = -1; // query to find optimum size of workspace + + arma_extra_debug_print("lapack::cx_gesvd()"); + lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), &info); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + } + + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::cx_gesvd()"); + lapack::cx_gesvd<T>(&jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), &info); + + if(info != 0) { return false; } + + op_htrans::apply_mat_inplace(V); + + return true; + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(V); + arma_ignore(A); + arma_ignore(mode); + arma_stop_logic_error("svd(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::svd_dc(Col<eT>& S, Mat<eT>& A) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + if(A.is_empty()) { S.reset(); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A); + + Mat<eT> U(1, 1, arma_nozeros_indicator()); + Mat<eT> V(1, 1, arma_nozeros_indicator()); + + char jobz = 'N'; + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int max_mn = (std::max)(m,n); + blas_int lda = blas_int(A.n_rows); + blas_int ldu = blas_int(U.n_rows); + blas_int ldvt = blas_int(V.n_rows); + blas_int lwork_min = 3*min_mn + (std::max)( max_mn, 7*min_mn ); + blas_int info = 0; + + S.set_size( static_cast<uword>(min_mn) ); + + podarray<blas_int> iwork( static_cast<uword>(8*min_mn) ); + + blas_int lwork_proposed = 0; + + if(A.n_elem >= 1024) + { + eT work_query[2] = {}; + blas_int lwork_query = blas_int(-1); + + arma_extra_debug_print("lapack::gesdd()"); + lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, iwork.memptr(), &info); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>( work_query[0] ); + } + + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::gesdd()"); + lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, iwork.memptr(), &info); + + return (info == 0); + } + #else + { + arma_ignore(S); + arma_ignore(A); + arma_stop_logic_error("svd(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename T> +inline +bool +auxlib::svd_dc(Col<T>& S, Mat< std::complex<T> >& A) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef std::complex<T> eT; + + if(A.is_empty()) { S.reset(); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A); + + Mat<eT> U(1, 1, arma_nozeros_indicator()); + Mat<eT> V(1, 1, arma_nozeros_indicator()); + + char jobz = 'N'; + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int max_mn = (std::max)(m,n); + blas_int lda = blas_int(A.n_rows); + blas_int ldu = blas_int(U.n_rows); + blas_int ldvt = blas_int(V.n_rows); + blas_int lwork_min = 2*min_mn + max_mn; + blas_int info = 0; + + S.set_size( static_cast<uword>(min_mn) ); + + podarray<T> rwork( static_cast<uword>(7*min_mn) ); // from LAPACK 3.8 docs: LAPACK <= v3.6 needs 7*mn + podarray<blas_int> iwork( static_cast<uword>(8*min_mn) ); + + blas_int lwork_proposed = 0; + + if(A.n_elem >= 256) + { + eT work_query[2] = {}; + blas_int lwork_query = blas_int(-1); + + arma_extra_debug_print("lapack::cx_gesdd()"); + lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), iwork.memptr(), &info); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + } + + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::cx_gesdd()"); + lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info); + + return (info == 0); + } + #else + { + arma_ignore(S); + arma_ignore(A); + arma_stop_logic_error("svd(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::svd_dc(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, Mat<eT>& A) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + if(A.is_empty()) { U.eye(A.n_rows, A.n_rows); S.reset(); V.eye(A.n_cols, A.n_cols); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A); + + U.set_size(A.n_rows, A.n_rows); + V.set_size(A.n_cols, A.n_cols); + + char jobz = 'A'; + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int max_mn = (std::max)(m,n); + blas_int lda = blas_int(A.n_rows); + blas_int ldu = blas_int(U.n_rows); + blas_int ldvt = blas_int(V.n_rows); + blas_int lwork1 = 3*min_mn*min_mn + (std::max)(max_mn, 4*min_mn*min_mn + 4*min_mn); // as per LAPACK 3.2 docs + blas_int lwork2 = 4*min_mn*min_mn + 6*min_mn + max_mn; // as per LAPACK 3.8 docs; consistent with LAPACK 3.4 docs + blas_int lwork_min = (std::max)(lwork1, lwork2); // due to differences between LAPACK 3.2 and 3.8 + blas_int info = 0; + + S.set_size( static_cast<uword>(min_mn) ); + + podarray<blas_int> iwork( static_cast<uword>(8*min_mn) ); + + blas_int lwork_proposed = 0; + + if(A.n_elem >= 1024) + { + eT work_query[2] = {}; + blas_int lwork_query = blas_int(-1); + + arma_extra_debug_print("lapack::gesdd()"); + lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, iwork.memptr(), &info); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>(work_query[0]); + } + + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::gesdd()"); + lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, iwork.memptr(), &info); + + if(info != 0) { return false; } + + op_strans::apply_mat_inplace(V); + + return true; + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(V); + arma_ignore(A); + arma_stop_logic_error("svd(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename T> +inline +bool +auxlib::svd_dc(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, Mat< std::complex<T> >& A) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef std::complex<T> eT; + + if(A.is_empty()) { U.eye(A.n_rows, A.n_rows); S.reset(); V.eye(A.n_cols, A.n_cols); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A); + + U.set_size(A.n_rows, A.n_rows); + V.set_size(A.n_cols, A.n_cols); + + char jobz = 'A'; + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int max_mn = (std::max)(m,n); + blas_int lda = blas_int(A.n_rows); + blas_int ldu = blas_int(U.n_rows); + blas_int ldvt = blas_int(V.n_rows); + blas_int lwork_min = min_mn*min_mn + 2*min_mn + max_mn; // as per LAPACK 3.2, 3.4, 3.8 docs + blas_int lrwork = min_mn * ((std::max)(5*min_mn+7, 2*max_mn + 2*min_mn+1)); // as per LAPACK 3.4 docs; LAPACK 3.8 uses 5*min_mn+5 instead of 5*min_mn+7 + blas_int info = 0; + + S.set_size( static_cast<uword>(min_mn) ); + + podarray<T> rwork( static_cast<uword>(lrwork ) ); + podarray<blas_int> iwork( static_cast<uword>(8*min_mn) ); + + blas_int lwork_proposed = 0; + + if(A.n_elem >= 256) + { + eT work_query[2] = {}; + blas_int lwork_query = blas_int(-1); + + arma_extra_debug_print("lapack::cx_gesdd()"); + lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), iwork.memptr(), &info); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + } + + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::cx_gesdd()"); + lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info); + + if(info != 0) { return false; } + + op_htrans::apply_mat_inplace(V); + + return true; + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(V); + arma_ignore(A); + arma_stop_logic_error("svd(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::svd_dc_econ(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, Mat<eT>& A) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A); + + char jobz = 'S'; + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int max_mn = (std::max)(m,n); + blas_int lda = blas_int(A.n_rows); + blas_int ldu = m; + blas_int ldvt = min_mn; + blas_int lwork1 = 3*min_mn*min_mn + (std::max)( max_mn, 4*min_mn*min_mn + 4*min_mn ); // as per LAPACK 3.2 docs + blas_int lwork2 = 4*min_mn*min_mn + 6*min_mn + max_mn; // as per LAPACK 3.4 docs; LAPACK 3.8 requires 4*min_mn*min_mn + 7*min_mn + blas_int lwork_min = (std::max)(lwork1, lwork2); // due to differences between LAPACK 3.2 and 3.4 + blas_int info = 0; + + if(A.is_empty()) + { + U.eye(); + S.reset(); + V.eye( static_cast<uword>(n), static_cast<uword>(min_mn) ); + return true; + } + + S.set_size( static_cast<uword>(min_mn) ); + + U.set_size( static_cast<uword>(m), static_cast<uword>(min_mn) ); + + V.set_size( static_cast<uword>(min_mn), static_cast<uword>(n) ); + + podarray<blas_int> iwork( static_cast<uword>(8*min_mn) ); + + blas_int lwork_proposed = 0; + + if(A.n_elem >= 1024) + { + eT work_query[2] = {}; + blas_int lwork_query = blas_int(-1); + + arma_extra_debug_print("lapack::gesdd()"); + lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, iwork.memptr(), &info); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>(work_query[0]); + } + + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::gesdd()"); + lapack::gesdd<eT>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, iwork.memptr(), &info); + + if(info != 0) { return false; } + + op_strans::apply_mat_inplace(V); + + return true; + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(V); + arma_ignore(A); + arma_stop_logic_error("svd(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename T> +inline +bool +auxlib::svd_dc_econ(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, Mat< std::complex<T> >& A) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef std::complex<T> eT; + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A); + + char jobz = 'S'; + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int max_mn = (std::max)(m,n); + blas_int lda = blas_int(A.n_rows); + blas_int ldu = m; + blas_int ldvt = min_mn; + blas_int lwork_min = min_mn*min_mn + 2*min_mn + max_mn; // as per LAPACK 3.2 docs + blas_int lrwork = min_mn * ((std::max)(5*min_mn+7, 2*max_mn + 2*min_mn+1)); // LAPACK 3.8 uses 5*min_mn+5 instead of 5*min_mn+7 + blas_int info = 0; + + if(A.is_empty()) + { + U.eye(); + S.reset(); + V.eye( static_cast<uword>(n), static_cast<uword>(min_mn) ); + return true; + } + + S.set_size( static_cast<uword>(min_mn) ); + + U.set_size( static_cast<uword>(m), static_cast<uword>(min_mn) ); + + V.set_size( static_cast<uword>(min_mn), static_cast<uword>(n) ); + + podarray<T> rwork( static_cast<uword>(lrwork ) ); + podarray<blas_int> iwork( static_cast<uword>(8*min_mn) ); + + blas_int lwork_proposed = 0; + + if(A.n_elem >= 256) + { + eT work_query[2] = {}; + blas_int lwork_query = blas_int(-1); + + arma_extra_debug_print("lapack::cx_gesdd()"); + lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, rwork.memptr(), iwork.memptr(), &info); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + } + + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::cx_gesdd()"); + lapack::cx_gesdd<T>(&jobz, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info); + + if(info != 0) { return false; } + + op_htrans::apply_mat_inplace(V); + + return true; + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(V); + arma_ignore(A); + arma_stop_logic_error("svd(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! solve a system of linear equations via LU decomposition +template<typename T1> +inline +bool +auxlib::solve_square_fast(Mat<typename T1::elem_type>& out, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + out = B_expr.get_ref(); + + const uword B_n_rows = out.n_rows; + const uword B_n_cols = out.n_cols; + + arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); + + if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::elem_type eT; + + arma_debug_assert_blas_size(A); + + blas_int n = blas_int(A.n_rows); // assuming A is square + blas_int lda = blas_int(A.n_rows); + blas_int ldb = blas_int(B_n_rows); + blas_int nrhs = blas_int(B_n_cols); + blas_int info = blas_int(0); + + podarray<blas_int> ipiv(A.n_rows + 2); // +2 for paranoia: some versions of Lapack might be trashing memory + + arma_extra_debug_print("lapack::gesv()"); + lapack::gesv<eT>(&n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info); + + return (info == 0); + } + #else + { + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! solve a system of linear equations via LU decomposition with rcond estimate +template<typename T1> +inline +bool +auxlib::solve_square_rcond(Mat<typename T1::elem_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::elem_type eT; + typedef typename T1::pod_type T; + + out_rcond = T(0); + + out = B_expr.get_ref(); + + const uword B_n_rows = out.n_rows; + const uword B_n_cols = out.n_cols; + + arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); + + if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } + + arma_debug_assert_blas_size(A); + + char norm_id = '1'; + char trans = 'N'; + blas_int n = blas_int(A.n_rows); // assuming A is square + blas_int lda = blas_int(A.n_rows); + blas_int ldb = blas_int(B_n_rows); + blas_int nrhs = blas_int(B_n_cols); + blas_int info = blas_int(0); + T norm_val = T(0); + + podarray<T> junk(1); + podarray<blas_int> ipiv(A.n_rows + 2); // +2 for paranoia + + arma_extra_debug_print("lapack::lange()"); + norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_gen(A) : lapack::lange<eT>(&norm_id, &n, &n, A.memptr(), &lda, junk.memptr()); + + arma_extra_debug_print("lapack::getrf()"); + lapack::getrf<eT>(&n, &n, A.memptr(), &n, ipiv.memptr(), &info); + + if(info != blas_int(0)) { return false; } + + arma_extra_debug_print("lapack::getrs()"); + lapack::getrs<eT>(&trans, &n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info); + + if(info != blas_int(0)) { return false; } + + out_rcond = auxlib::lu_rcond<T>(A, norm_val); + + return true; + } + #else + { + arma_ignore(out); + arma_ignore(out_rcond); + arma_ignore(A); + arma_ignore(B_expr); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! solve a system of linear equations via LU decomposition with refinement (real matrices) +template<typename T1> +inline +bool +auxlib::solve_square_refine(Mat<typename T1::pod_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr, const bool equilibrate) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type eT; + + // Mat<eT> B = B_expr.get_ref(); // B is overwritten by lapack::gesvx() if equilibrate is enabled + + quasi_unwrap<T1> UB(B_expr.get_ref()); // deliberately not declaring as const + + const Mat<eT>& UB_M_as_Mat = UB.M; // so we don't confuse the ?: operator below + + const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out)); + + Mat<eT> B_tmp; if(use_copy) { B_tmp = UB_M_as_Mat; } + + const Mat<eT>& B = (use_copy) ? B_tmp : UB_M_as_Mat; + + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); + + if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } + + arma_debug_assert_blas_size(A,B); + + out.set_size(A.n_rows, B.n_cols); + + char fact = (equilibrate) ? 'E' : 'N'; + char trans = 'N'; + char equed = char(0); + blas_int n = blas_int(A.n_rows); + blas_int nrhs = blas_int(B.n_cols); + blas_int lda = blas_int(A.n_rows); + blas_int ldaf = blas_int(A.n_rows); + blas_int ldb = blas_int(A.n_rows); + blas_int ldx = blas_int(A.n_rows); + blas_int info = blas_int(0); + eT rcond = eT(0); + + Mat<eT> AF(A.n_rows, A.n_rows, arma_nozeros_indicator()); + + podarray<blas_int> IPIV( A.n_rows); + podarray<eT> R( A.n_rows); + podarray<eT> C( A.n_rows); + podarray<eT> FERR( B.n_cols); + podarray<eT> BERR( B.n_cols); + podarray<eT> WORK(4*A.n_rows); + podarray<blas_int> IWORK( A.n_rows); + + arma_extra_debug_print("lapack::gesvx()"); + lapack::gesvx + ( + &fact, &trans, &n, &nrhs, + A.memptr(), &lda, + AF.memptr(), &ldaf, + IPIV.memptr(), + &equed, + R.memptr(), + C.memptr(), + const_cast<eT*>(B.memptr()), &ldb, + out.memptr(), &ldx, + &rcond, + FERR.memptr(), + BERR.memptr(), + WORK.memptr(), + IWORK.memptr(), + &info + ); + + // NOTE: using const_cast<eT*>(B.memptr()) to allow B to be overwritten for equilibration; + // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr + + out_rcond = rcond; + + return ((info == 0) || (info == (n+1))); + } + #else + { + arma_ignore(out); + arma_ignore(out_rcond); + arma_ignore(A); + arma_ignore(B_expr); + arma_ignore(equilibrate); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! solve a system of linear equations via LU decomposition with refinement (complex matrices) +template<typename T1> +inline +bool +auxlib::solve_square_refine(Mat< std::complex<typename T1::pod_type> >& out, typename T1::pod_type& out_rcond, Mat< std::complex<typename T1::pod_type> >& A, const Base<std::complex<typename T1::pod_type>,T1>& B_expr, const bool equilibrate) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + typedef typename std::complex<T> eT; + + // Mat<eT> B = B_expr.get_ref(); // B is overwritten by lapack::cx_gesvx() if equilibrate is enabled + + quasi_unwrap<T1> UB(B_expr.get_ref()); // deliberately not declaring as const + + const Mat<eT>& UB_M_as_Mat = UB.M; // so we don't confuse the ?: operator below + + const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out)); + + Mat<eT> B_tmp; if(use_copy) { B_tmp = UB_M_as_Mat; } + + const Mat<eT>& B = (use_copy) ? B_tmp : UB_M_as_Mat; + + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); + + if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } + + arma_debug_assert_blas_size(A,B); + + out.set_size(A.n_rows, B.n_cols); + + char fact = (equilibrate) ? 'E' : 'N'; + char trans = 'N'; + char equed = char(0); + blas_int n = blas_int(A.n_rows); + blas_int nrhs = blas_int(B.n_cols); + blas_int lda = blas_int(A.n_rows); + blas_int ldaf = blas_int(A.n_rows); + blas_int ldb = blas_int(A.n_rows); + blas_int ldx = blas_int(A.n_rows); + blas_int info = blas_int(0); + T rcond = T(0); + + Mat<eT> AF(A.n_rows, A.n_rows, arma_nozeros_indicator()); + + podarray<blas_int> IPIV( A.n_rows); + podarray< T> R( A.n_rows); + podarray< T> C( A.n_rows); + podarray< T> FERR( B.n_cols); + podarray< T> BERR( B.n_cols); + podarray<eT> WORK(2*A.n_rows); + podarray< T> RWORK(2*A.n_rows); + + arma_extra_debug_print("lapack::cx_gesvx()"); + lapack::cx_gesvx + ( + &fact, &trans, &n, &nrhs, + A.memptr(), &lda, + AF.memptr(), &ldaf, + IPIV.memptr(), + &equed, + R.memptr(), + C.memptr(), + const_cast<eT*>(B.memptr()), &ldb, + out.memptr(), &ldx, + &rcond, + FERR.memptr(), + BERR.memptr(), + WORK.memptr(), + RWORK.memptr(), + &info + ); + + // NOTE: using const_cast<eT*>(B.memptr()) to allow B to be overwritten for equilibration; + // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr + + out_rcond = rcond; + + return ((info == 0) || (info == (n+1))); + } + #else + { + arma_ignore(out); + arma_ignore(out_rcond); + arma_ignore(A); + arma_ignore(B_expr); + arma_ignore(equilibrate); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename T1> +inline +bool +auxlib::solve_sympd_fast(Mat<typename T1::elem_type>& out, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_CRIPPLED_LAPACK) + { + arma_extra_debug_print("auxlib::solve_sympd_fast(): redirecting to auxlib::solve_square_fast() due to crippled LAPACK"); + + return auxlib::solve_square_fast(out, A, B_expr); + } + #else + { + return auxlib::solve_sympd_fast_common(out, A, B_expr); + } + #endif + } + + + +template<typename T1> +inline +bool +auxlib::solve_sympd_fast_common(Mat<typename T1::elem_type>& out, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + out = B_expr.get_ref(); + + const uword B_n_rows = out.n_rows; + const uword B_n_cols = out.n_cols; + + arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); + + if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::elem_type eT; + + arma_debug_assert_blas_size(A, out); + + char uplo = 'L'; + blas_int n = blas_int(A.n_rows); // assuming A is square + blas_int nrhs = blas_int(B_n_cols); + blas_int lda = blas_int(A.n_rows); + blas_int ldb = blas_int(B_n_rows); + blas_int info = blas_int(0); + + arma_extra_debug_print("lapack::posv()"); + lapack::posv<eT>(&uplo, &n, &nrhs, A.memptr(), &lda, out.memptr(), &ldb, &info); + + return (info == 0); + } + #else + { + arma_ignore(out); + arma_ignore(A); + arma_ignore(B_expr); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! solve a system of linear equations via Cholesky decomposition with rcond estimate (real matrices) +template<typename T1> +inline +bool +auxlib::solve_sympd_rcond(Mat<typename T1::pod_type>& out, bool& out_sympd_state, typename T1::pod_type& out_rcond, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::elem_type eT; + typedef typename T1::pod_type T; + + out_sympd_state = false; + out_rcond = T(0); + + out = B_expr.get_ref(); + + const uword B_n_rows = out.n_rows; + const uword B_n_cols = out.n_cols; + + arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); + + if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } + + arma_debug_assert_blas_size(A, out); + + char norm_id = '1'; + char uplo = 'L'; + blas_int n = blas_int(A.n_rows); // assuming A is square + blas_int nrhs = blas_int(B_n_cols); + blas_int info = blas_int(0); + T norm_val = T(0); + + podarray<T> work(A.n_rows); + + arma_extra_debug_print("lapack::lansy()"); + norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr()); + + arma_extra_debug_print("lapack::potrf()"); + lapack::potrf<eT>(&uplo, &n, A.memptr(), &n, &info); + + if(info != 0) { return false; } + + out_sympd_state = true; + + arma_extra_debug_print("lapack::potrs()"); + lapack::potrs<eT>(&uplo, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info); + + if(info != 0) { return false; } + + out_rcond = auxlib::lu_rcond_sympd<T>(A, norm_val); + + return true; + } + #else + { + arma_ignore(out); + arma_ignore(out_sympd_state); + arma_ignore(out_rcond); + arma_ignore(A); + arma_ignore(B_expr); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! solve a system of linear equations via Cholesky decomposition with rcond estimate (complex matrices) +template<typename T1> +inline +bool +auxlib::solve_sympd_rcond(Mat< std::complex<typename T1::pod_type> >& out, bool& out_sympd_state, typename T1::pod_type& out_rcond, Mat< std::complex<typename T1::pod_type> >& A, const Base< std::complex<typename T1::pod_type>,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_CRIPPLED_LAPACK) + { + arma_extra_debug_print("auxlib::solve_sympd_rcond(): redirecting to auxlib::solve_square_rcond() due to crippled LAPACK"); + + out_sympd_state = false; + + return auxlib::solve_square_rcond(out, out_rcond, A, B_expr); + } + #elif defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + typedef typename std::complex<T> eT; + + out_sympd_state = false; + out_rcond = T(0); + + out = B_expr.get_ref(); + + const uword B_n_rows = out.n_rows; + const uword B_n_cols = out.n_cols; + + arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); + + if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } + + arma_debug_assert_blas_size(A, out); + + char norm_id = '1'; + char uplo = 'L'; + blas_int n = blas_int(A.n_rows); // assuming A is square + blas_int nrhs = blas_int(B_n_cols); + blas_int info = blas_int(0); + T norm_val = T(0); + + podarray<T> work(A.n_rows); + + arma_extra_debug_print("lapack::lanhe()"); + norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &n, work.memptr()); + + arma_extra_debug_print("lapack::potrf()"); + lapack::potrf<eT>(&uplo, &n, A.memptr(), &n, &info); + + if(info != 0) { return false; } + + out_sympd_state = true; + + arma_extra_debug_print("lapack::potrs()"); + lapack::potrs<eT>(&uplo, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info); + + if(info != 0) { return false; } + + out_rcond = auxlib::lu_rcond_sympd<T>(A, norm_val); + + return true; + } + #else + { + arma_ignore(out); + arma_ignore(out_sympd_state); + arma_ignore(out_rcond); + arma_ignore(A); + arma_ignore(B_expr); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! solve a system of linear equations via Cholesky decomposition with refinement (real matrices) +template<typename T1> +inline +bool +auxlib::solve_sympd_refine(Mat<typename T1::pod_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr, const bool equilibrate) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type eT; + + // Mat<eT> B = B_expr.get_ref(); // B is overwritten by lapack::posvx() if equilibrate is enabled + + quasi_unwrap<T1> UB(B_expr.get_ref()); // deliberately not declaring as const + + const Mat<eT>& UB_M_as_Mat = UB.M; // so we don't confuse the ?: operator below + + const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out)); + + Mat<eT> B_tmp; if(use_copy) { B_tmp = UB_M_as_Mat; } + + const Mat<eT>& B = (use_copy) ? B_tmp : UB_M_as_Mat; + + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); + + if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } + + arma_debug_assert_blas_size(A,B); + + out.set_size(A.n_rows, B.n_cols); + + char fact = (equilibrate) ? 'E' : 'N'; + char uplo = 'L'; + char equed = char(0); + blas_int n = blas_int(A.n_rows); + blas_int nrhs = blas_int(B.n_cols); + blas_int lda = blas_int(A.n_rows); + blas_int ldaf = blas_int(A.n_rows); + blas_int ldb = blas_int(A.n_rows); + blas_int ldx = blas_int(A.n_rows); + blas_int info = blas_int(0); + eT rcond = eT(0); + + Mat<eT> AF(A.n_rows, A.n_rows, arma_nozeros_indicator()); + + podarray<eT> S( A.n_rows); + podarray<eT> FERR( B.n_cols); + podarray<eT> BERR( B.n_cols); + podarray<eT> WORK(3*A.n_rows); + podarray<blas_int> IWORK( A.n_rows); + + arma_extra_debug_print("lapack::posvx()"); + lapack::posvx(&fact, &uplo, &n, &nrhs, A.memptr(), &lda, AF.memptr(), &ldaf, &equed, S.memptr(), const_cast<eT*>(B.memptr()), &ldb, out.memptr(), &ldx, &rcond, FERR.memptr(), BERR.memptr(), WORK.memptr(), IWORK.memptr(), &info); + + // NOTE: using const_cast<eT*>(B.memptr()) to allow B to be overwritten for equilibration; + // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr + + // NOTE: lapack::posvx() sets rcond to zero if A is not sympd + out_rcond = rcond; + + return ((info == 0) || (info == (n+1))); + } + #else + { + arma_ignore(out); + arma_ignore(out_rcond); + arma_ignore(A); + arma_ignore(B_expr); + arma_ignore(equilibrate); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! solve a system of linear equations via Cholesky decomposition with refinement (complex matrices) +template<typename T1> +inline +bool +auxlib::solve_sympd_refine(Mat< std::complex<typename T1::pod_type> >& out, typename T1::pod_type& out_rcond, Mat< std::complex<typename T1::pod_type> >& A, const Base<std::complex<typename T1::pod_type>,T1>& B_expr, const bool equilibrate) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_CRIPPLED_LAPACK) + { + arma_extra_debug_print("auxlib::solve_sympd_refine(): redirecting to auxlib::solve_square_refine() due to crippled LAPACK"); + + return auxlib::solve_square_refine(out, out_rcond, A, B_expr, equilibrate); + } + #elif defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + typedef typename std::complex<T> eT; + + // Mat<eT> B = B_expr.get_ref(); // B is overwritten by lapack::cx_posvx() if equilibrate is enabled + + quasi_unwrap<T1> UB(B_expr.get_ref()); // deliberately not declaring as const + + const Mat<eT>& UB_M_as_Mat = UB.M; // so we don't confuse the ?: operator below + + const bool use_copy = ((equilibrate && UB.is_const) || UB.is_alias(out)); + + Mat<eT> B_tmp; if(use_copy) { B_tmp = UB_M_as_Mat; } + + const Mat<eT>& B = (use_copy) ? B_tmp : UB_M_as_Mat; + + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); + + if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } + + arma_debug_assert_blas_size(A,B); + + out.set_size(A.n_rows, B.n_cols); + + char fact = (equilibrate) ? 'E' : 'N'; + char uplo = 'L'; + char equed = char(0); + blas_int n = blas_int(A.n_rows); + blas_int nrhs = blas_int(B.n_cols); + blas_int lda = blas_int(A.n_rows); + blas_int ldaf = blas_int(A.n_rows); + blas_int ldb = blas_int(A.n_rows); + blas_int ldx = blas_int(A.n_rows); + blas_int info = blas_int(0); + T rcond = T(0); + + Mat<eT> AF(A.n_rows, A.n_rows, arma_nozeros_indicator()); + + podarray< T> S( A.n_rows); + podarray< T> FERR( B.n_cols); + podarray< T> BERR( B.n_cols); + podarray<eT> WORK(2*A.n_rows); + podarray< T> RWORK( A.n_rows); + + arma_extra_debug_print("lapack::cx_posvx()"); + lapack::cx_posvx(&fact, &uplo, &n, &nrhs, A.memptr(), &lda, AF.memptr(), &ldaf, &equed, S.memptr(), const_cast<eT*>(B.memptr()), &ldb, out.memptr(), &ldx, &rcond, FERR.memptr(), BERR.memptr(), WORK.memptr(), RWORK.memptr(), &info); + + // NOTE: using const_cast<eT*>(B.memptr()) to allow B to be overwritten for equilibration; + // NOTE: B is created as a copy of B_expr if equilibration is enabled; otherwise B is a reference to B_expr + + // NOTE: lapack::cx_posvx() sets rcond to zero if A is not sympd + out_rcond = rcond; + + return ((info == 0) || (info == (n+1))); + } + #else + { + arma_ignore(out); + arma_ignore(out_rcond); + arma_ignore(A); + arma_ignore(B_expr); + arma_ignore(equilibrate); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! solve a non-square full-rank system via QR or LQ decomposition +template<typename T1> +inline +bool +auxlib::solve_rect_fast(Mat<typename T1::elem_type>& out, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::elem_type eT; + + const unwrap<T1> U(B_expr.get_ref()); + const Mat<eT>& B = U.M; + + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); + + if(A.is_empty() || B.is_empty()) { out.zeros(A.n_cols, B.n_cols); return true; } + + arma_debug_assert_blas_size(A,B); + + Mat<eT> tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() ); + + if(arma::size(tmp) == arma::size(B)) + { + tmp = B; + } + else + { + tmp.zeros(); + tmp(0,0, arma::size(B)) = B; + } + + char trans = 'N'; + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int lda = blas_int(A.n_rows); + blas_int ldb = blas_int(tmp.n_rows); + blas_int nrhs = blas_int(B.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int lwork_min = (std::max)(blas_int(1), min_mn + (std::max)(min_mn, nrhs)); + blas_int info = 0; + + blas_int lwork_proposed = 0; + + if(A.n_elem >= ((is_cx<eT>::yes) ? uword(256) : uword(1024))) + { + eT work_query[2] = {}; + blas_int lwork_query = -1; + + arma_extra_debug_print("lapack::gels()"); + lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, &work_query[0], &lwork_query, &info ); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + } + + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::gels()"); + lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, work.memptr(), &lwork_final, &info ); + + if(info != 0) { return false; } + + if(tmp.n_rows == A.n_cols) + { + out.steal_mem(tmp); + } + else + { + out = tmp.head_rows(A.n_cols); + } + + return true; + } + #else + { + arma_ignore(out); + arma_ignore(A); + arma_ignore(B_expr); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! solve a non-square full-rank system via QR or LQ decomposition with rcond estimate (experimental) +template<typename T1> +inline +bool +auxlib::solve_rect_rcond(Mat<typename T1::elem_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::elem_type eT; + typedef typename T1::pod_type T; + + out_rcond = T(0); + + const unwrap<T1> U(B_expr.get_ref()); + const Mat<eT>& B = U.M; + + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); + + if(A.is_empty() || B.is_empty()) { out.zeros(A.n_cols, B.n_cols); return true; } + + arma_debug_assert_blas_size(A,B); + + Mat<eT> tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() ); + + if(arma::size(tmp) == arma::size(B)) + { + tmp = B; + } + else + { + tmp.zeros(); + tmp(0,0, arma::size(B)) = B; + } + + char trans = 'N'; + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int lda = blas_int(A.n_rows); + blas_int ldb = blas_int(tmp.n_rows); + blas_int nrhs = blas_int(B.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int lwork_min = (std::max)(blas_int(1), min_mn + (std::max)(min_mn, nrhs)); + blas_int info = 0; + + blas_int lwork_proposed = 0; + + if(A.n_elem >= ((is_cx<eT>::yes) ? uword(256) : uword(1024))) + { + eT work_query[2] = {}; + blas_int lwork_query = -1; + + arma_extra_debug_print("lapack::gels()"); + lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, &work_query[0], &lwork_query, &info ); + + if(info != 0) { return false; } + + lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + } + + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::gels()"); + lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, work.memptr(), &lwork_final, &info ); + + if(info != 0) { return false; } + + if(A.n_rows >= A.n_cols) + { + arma_extra_debug_print("estimating rcond via R"); + + // xGELS docs: for M >= N, A contains details of its QR decomposition as returned by xGEQRF + // xGEQRF docs: elements on and above the diagonal contain the min(M,N)-by-N upper trapezoidal matrix R + + Mat<eT> R(A.n_cols, A.n_cols, arma_zeros_indicator()); + + for(uword col=0; col < A.n_cols; ++col) + { + for(uword row=0; row <= col; ++row) + { + R.at(row,col) = A.at(row,col); + } + } + + // determine quality of solution + out_rcond = auxlib::rcond_trimat(R, 0); // 0: upper triangular; 1: lower triangular + } + else + if(A.n_rows < A.n_cols) + { + arma_extra_debug_print("estimating rcond via L"); + + // xGELS docs: for M < N, A contains details of its LQ decomposition as returned by xGELQF + // xGELQF docs: elements on and below the diagonal contain the M-by-min(M,N) lower trapezoidal matrix L + + Mat<eT> L(A.n_rows, A.n_rows, arma_zeros_indicator()); + + for(uword col=0; col < A.n_rows; ++col) + { + for(uword row=col; row < A.n_rows; ++row) + { + L.at(row,col) = A.at(row,col); + } + } + + // determine quality of solution + out_rcond = auxlib::rcond_trimat(L, 1); // 0: upper triangular; 1: lower triangular + } + + if(tmp.n_rows == A.n_cols) + { + out.steal_mem(tmp); + } + else + { + out = tmp.head_rows(A.n_cols); + } + + return true; + } + #else + { + arma_ignore(out); + arma_ignore(out_rcond); + arma_ignore(A); + arma_ignore(B_expr); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename T1> +inline +bool +auxlib::solve_approx_svd(Mat<typename T1::pod_type>& out, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type eT; + + const unwrap<T1> U(B_expr.get_ref()); + const Mat<eT>& B = U.M; + + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); + + if(A.is_empty() || B.is_empty()) { out.zeros(A.n_cols, B.n_cols); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A,B); + + Mat<eT> tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() ); + + if(arma::size(tmp) == arma::size(B)) + { + tmp = B; + } + else + { + tmp.zeros(); + tmp(0,0, arma::size(B)) = B; + } + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m, n); + blas_int nrhs = blas_int(B.n_cols); + blas_int lda = blas_int(A.n_rows); + blas_int ldb = blas_int(tmp.n_rows); + //eT rcond = eT(-1); // -1 means "use machine precision" + eT rcond = (std::max)(A.n_rows, A.n_cols) * std::numeric_limits<eT>::epsilon(); + blas_int rank = blas_int(0); + blas_int info = blas_int(0); + + podarray<eT> S( static_cast<uword>(min_mn) ); + + // NOTE: with LAPACK 3.8, can use the workspace query to also obtain liwork, + // NOTE: which makes the call to lapack::laenv() redundant + + blas_int ispec = blas_int(9); + + const char* const_name = (is_float<eT>::value) ? "SGELSD" : "DGELSD"; + const char* const_opts = " "; + + char* name = const_cast<char*>(const_name); + char* opts = const_cast<char*>(const_opts); + + blas_int n1 = m; + blas_int n2 = n; + blas_int n3 = nrhs; + blas_int n4 = lda; + + blas_int laenv_result = (arma_config::hidden_args) ? blas_int(lapack::laenv(&ispec, name, opts, &n1, &n2, &n3, &n4, 6, 1)) : blas_int(0); + + blas_int smlsiz = (std::max)( blas_int(25), laenv_result ); + blas_int smlsiz_p1 = blas_int(1) + smlsiz; + + blas_int nlvl = (std::max)( blas_int(0), blas_int(1) + blas_int( std::log2( double(min_mn)/double(smlsiz_p1) ) ) ); + blas_int liwork = (std::max)( blas_int(1), (blas_int(3)*min_mn*nlvl + blas_int(11)*min_mn) ); + + podarray<blas_int> iwork( static_cast<uword>(liwork) ); + + blas_int lwork_min = blas_int(12)*min_mn + blas_int(2)*min_mn*smlsiz + blas_int(8)*min_mn*nlvl + min_mn*nrhs + smlsiz_p1*smlsiz_p1; + + eT work_query[2] = {}; + blas_int lwork_query = blas_int(-1); + + arma_extra_debug_print("lapack::gelsd()"); + lapack::gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, &work_query[0], &lwork_query, iwork.memptr(), &info); + + if(info != 0) { return false; } + + // NOTE: in LAPACK 3.8, iwork[0] returns the minimum liwork + + blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::gelsd()"); + lapack::gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, work.memptr(), &lwork_final, iwork.memptr(), &info); + + if(info != 0) { return false; } + + if(tmp.n_rows == A.n_cols) + { + out.steal_mem(tmp); + } + else + { + out = tmp.head_rows(A.n_cols); + } + + return true; + } + #else + { + arma_ignore(out); + arma_ignore(A); + arma_ignore(B_expr); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename T1> +inline +bool +auxlib::solve_approx_svd(Mat< std::complex<typename T1::pod_type> >& out, Mat< std::complex<typename T1::pod_type> >& A, const Base<std::complex<typename T1::pod_type>,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + typedef typename std::complex<T> eT; + + const unwrap<T1> U(B_expr.get_ref()); + const Mat<eT>& B = U.M; + + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); + + if(A.is_empty() || B.is_empty()) { out.zeros(A.n_cols, B.n_cols); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A,B); + + Mat<eT> tmp( (std::max)(A.n_rows, A.n_cols), B.n_cols, arma_nozeros_indicator() ); + + if(arma::size(tmp) == arma::size(B)) + { + tmp = B; + } + else + { + tmp.zeros(); + tmp(0,0, arma::size(B)) = B; + } + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m, n); + blas_int nrhs = blas_int(B.n_cols); + blas_int lda = blas_int(A.n_rows); + blas_int ldb = blas_int(tmp.n_rows); + //T rcond = T(-1); // -1 means "use machine precision" + T rcond = (std::max)(A.n_rows, A.n_cols) * std::numeric_limits<T>::epsilon(); + blas_int rank = blas_int(0); + blas_int info = blas_int(0); + + podarray<T> S( static_cast<uword>(min_mn) ); + + blas_int ispec = blas_int(9); + + const char* const_name = (is_float<T>::value) ? "CGELSD" : "ZGELSD"; + const char* const_opts = " "; + + char* name = const_cast<char*>(const_name); + char* opts = const_cast<char*>(const_opts); + + blas_int n1 = m; + blas_int n2 = n; + blas_int n3 = nrhs; + blas_int n4 = lda; + + blas_int laenv_result = (arma_config::hidden_args) ? blas_int(lapack::laenv(&ispec, name, opts, &n1, &n2, &n3, &n4, 6, 1)) : blas_int(0); + + blas_int smlsiz = (std::max)( blas_int(25), laenv_result ); + blas_int smlsiz_p1 = blas_int(1) + smlsiz; + + blas_int nlvl = (std::max)( blas_int(0), blas_int(1) + blas_int( std::log2( double(min_mn)/double(smlsiz_p1) ) ) ); + + blas_int lrwork = (m >= n) + ? blas_int(10)*n + blas_int(2)*n*smlsiz + blas_int(8)*n*nlvl + blas_int(3)*smlsiz*nrhs + (std::max)( (smlsiz_p1)*(smlsiz_p1), n*(blas_int(1)+nrhs) + blas_int(2)*nrhs ) + : blas_int(10)*m + blas_int(2)*m*smlsiz + blas_int(8)*m*nlvl + blas_int(3)*smlsiz*nrhs + (std::max)( (smlsiz_p1)*(smlsiz_p1), n*(blas_int(1)+nrhs) + blas_int(2)*nrhs ); + + blas_int liwork = (std::max)( blas_int(1), (blas_int(3)*blas_int(min_mn)*nlvl + blas_int(11)*blas_int(min_mn)) ); + + podarray<T> rwork( static_cast<uword>(lrwork) ); + podarray<blas_int> iwork( static_cast<uword>(liwork) ); + + blas_int lwork_min = 2*min_mn + min_mn*nrhs; + + eT work_query[2] = {}; + blas_int lwork_query = blas_int(-1); + + arma_extra_debug_print("lapack::cx_gelsd()"); + lapack::cx_gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, &work_query[0], &lwork_query, rwork.memptr(), iwork.memptr(), &info); + + if(info != 0) { return false; } + + blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real( work_query[0]) ); + blas_int lwork_final = (std::max)(lwork_proposed, lwork_min); + + podarray<eT> work( static_cast<uword>(lwork_final) ); + + arma_extra_debug_print("lapack::cx_gelsd()"); + lapack::cx_gelsd(&m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, S.memptr(), &rcond, &rank, work.memptr(), &lwork_final, rwork.memptr(), iwork.memptr(), &info); + + if(info != 0) { return false; } + + if(tmp.n_rows == A.n_cols) + { + out.steal_mem(tmp); + } + else + { + out = tmp.head_rows(A.n_cols); + } + + return true; + } + #else + { + arma_ignore(out); + arma_ignore(A); + arma_ignore(B_expr); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename T1> +inline +bool +auxlib::solve_trimat_fast(Mat<typename T1::elem_type>& out, const Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr, const uword layout) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + out = B_expr.get_ref(); + + const uword B_n_rows = out.n_rows; + const uword B_n_cols = out.n_cols; + + arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); + + if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } + + arma_debug_assert_blas_size(A,out); + + char uplo = (layout == 0) ? 'U' : 'L'; + char trans = 'N'; + char diag = 'N'; + blas_int n = blas_int(A.n_rows); + blas_int nrhs = blas_int(B_n_cols); + blas_int info = 0; + + arma_extra_debug_print("lapack::trtrs()"); + lapack::trtrs(&uplo, &trans, &diag, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info); + + return (info == 0); + } + #else + { + arma_ignore(out); + arma_ignore(A); + arma_ignore(B_expr); + arma_ignore(layout); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename T1> +inline +bool +auxlib::solve_trimat_rcond(Mat<typename T1::elem_type>& out, typename T1::pod_type& out_rcond, const Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr, const uword layout) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + + out_rcond = T(0); + + out = B_expr.get_ref(); + + const uword B_n_rows = out.n_rows; + const uword B_n_cols = out.n_cols; + + arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); + + if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } + + arma_debug_assert_blas_size(A,out); + + char uplo = (layout == 0) ? 'U' : 'L'; + char trans = 'N'; + char diag = 'N'; + blas_int n = blas_int(A.n_rows); + blas_int nrhs = blas_int(B_n_cols); + blas_int info = 0; + + arma_extra_debug_print("lapack::trtrs()"); + lapack::trtrs(&uplo, &trans, &diag, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info); + + if(info != 0) { return false; } + + // determine quality of solution + out_rcond = auxlib::rcond_trimat(A, layout); + + return true; + } + #else + { + arma_ignore(out); + arma_ignore(out_rcond); + arma_ignore(A); + arma_ignore(B_expr); + arma_ignore(layout); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! solve a system of linear equations via LU decomposition (real band matrix) +template<typename T1> +inline +bool +auxlib::solve_band_fast(Mat<typename T1::pod_type>& out, Mat<typename T1::pod_type>& A, const uword KL, const uword KU, const Base<typename T1::pod_type,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + return auxlib::solve_band_fast_common(out, A, KL, KU, B_expr); + } + + + +//! solve a system of linear equations via LU decomposition (complex band matrix) +template<typename T1> +inline +bool +auxlib::solve_band_fast(Mat< std::complex<typename T1::pod_type> >& out, Mat< std::complex<typename T1::pod_type> >& A, const uword KL, const uword KU, const Base< std::complex<typename T1::pod_type>,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_CRIPPLED_LAPACK) + { + arma_extra_debug_print("auxlib::solve_band_fast(): redirecting to auxlib::solve_square_fast() due to crippled LAPACK"); + + arma_ignore(KL); + arma_ignore(KU); + + return auxlib::solve_square_fast(out, A, B_expr); + } + #else + { + return auxlib::solve_band_fast_common(out, A, KL, KU, B_expr); + } + #endif + } + + + +//! solve a system of linear equations via LU decomposition (band matrix) +template<typename T1> +inline +bool +auxlib::solve_band_fast_common(Mat<typename T1::elem_type>& out, const Mat<typename T1::elem_type>& A, const uword KL, const uword KU, const Base<typename T1::elem_type,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::elem_type eT; + + out = B_expr.get_ref(); + + const uword B_n_rows = out.n_rows; + const uword B_n_cols = out.n_cols; + + arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); + + if(A.is_empty() || out.is_empty()) { out.zeros(A.n_rows, B_n_cols); return true; } + + // for gbsv, matrix AB size: 2*KL+KU+1 x N; band representation of A stored in rows KL+1 to 2*KL+KU+1 (note: fortran counts from 1) + + Mat<eT> AB; + band_helper::compress(AB, A, KL, KU, true); + + const uword N = AB.n_cols; // order of the original square matrix A + + arma_debug_assert_blas_size(AB,out); + + blas_int n = blas_int(N); + blas_int kl = blas_int(KL); + blas_int ku = blas_int(KU); + blas_int nrhs = blas_int(B_n_cols); + blas_int ldab = blas_int(AB.n_rows); + blas_int ldb = blas_int(B_n_rows); + blas_int info = blas_int(0); + + podarray<blas_int> ipiv(N + 2); // +2 for paranoia + + // NOTE: AB is overwritten + + arma_extra_debug_print("lapack::gbsv()"); + lapack::gbsv<eT>(&n, &kl, &ku, &nrhs, AB.memptr(), &ldab, ipiv.memptr(), out.memptr(), &ldb, &info); + + return (info == 0); + } + #else + { + arma_ignore(out); + arma_ignore(A); + arma_ignore(KL); + arma_ignore(KU); + arma_ignore(B_expr); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! solve a system of linear equations via LU decomposition (real band matrix) +template<typename T1> +inline +bool +auxlib::solve_band_rcond(Mat<typename T1::pod_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::pod_type>& A, const uword KL, const uword KU, const Base<typename T1::pod_type,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + return auxlib::solve_band_rcond_common(out, out_rcond, A, KL, KU, B_expr); + } + + + +//! solve a system of linear equations via LU decomposition (complex band matrix) +template<typename T1> +inline +bool +auxlib::solve_band_rcond(Mat< std::complex<typename T1::pod_type> >& out, typename T1::pod_type& out_rcond, Mat< std::complex<typename T1::pod_type> >& A, const uword KL, const uword KU, const Base< std::complex<typename T1::pod_type>,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_CRIPPLED_LAPACK) + { + arma_extra_debug_print("auxlib::solve_band_rcond(): redirecting to auxlib::solve_square_rcond() due to crippled LAPACK"); + + arma_ignore(KL); + arma_ignore(KU); + + return auxlib::solve_square_rcond(out, out_rcond, A, B_expr); + } + #else + { + return auxlib::solve_band_rcond_common(out, out_rcond, A, KL, KU, B_expr); + } + #endif + } + + + +//! solve a system of linear equations via LU decomposition (band matrix) +template<typename T1> +inline +bool +auxlib::solve_band_rcond_common(Mat<typename T1::elem_type>& out, typename T1::pod_type& out_rcond, const Mat<typename T1::elem_type>& A, const uword KL, const uword KU, const Base<typename T1::elem_type,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::elem_type eT; + typedef typename T1::pod_type T; + + out_rcond = T(0); + + out = B_expr.get_ref(); + + const uword B_n_rows = out.n_rows; + const uword B_n_cols = out.n_cols; + + arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); + + if(A.is_empty() || out.is_empty()) { out.zeros(A.n_rows, B_n_cols); return true; } + + // for gbtrf, matrix AB size: 2*KL+KU+1 x N; band representation of A stored in rows KL+1 to 2*KL+KU+1 (note: fortran counts from 1) + + Mat<eT> AB; + band_helper::compress(AB, A, KL, KU, true); + + const uword N = AB.n_cols; // order of the original square matrix A + + arma_debug_assert_blas_size(AB,out); + + //char norm_id = '1'; + char trans = 'N'; + blas_int n = blas_int(N); // assuming square matrix + blas_int kl = blas_int(KL); + blas_int ku = blas_int(KU); + blas_int nrhs = blas_int(B_n_cols); + blas_int ldab = blas_int(AB.n_rows); + blas_int ldb = blas_int(B_n_rows); + blas_int info = blas_int(0); + T norm_val = T(0); + + //podarray<T> junk(1); + podarray<blas_int> ipiv(N + 2); // +2 for paranoia + + // // NOTE: lapack::langb() and lapack::gbtrf() use incompatible storage formats for the band matrix + // arma_extra_debug_print("lapack::langb()"); + // norm_val = lapack::langb<eT>(&norm_id, &n, &kl, &ku, AB.memptr(), &ldab, junk.memptr()); + + norm_val = auxlib::norm1_band(A,KL,KU); + + arma_extra_debug_print("lapack::gbtrf()"); + lapack::gbtrf<eT>(&n, &n, &kl, &ku, AB.memptr(), &ldab, ipiv.memptr(), &info); + + if(info != 0) { return false; } + + arma_extra_debug_print("lapack::gbtrs()"); + lapack::gbtrs<eT>(&trans, &n, &kl, &ku, &nrhs, AB.memptr(), &ldab, ipiv.memptr(), out.memptr(), &ldb, &info); + + if(info != 0) { return false; } + + out_rcond = auxlib::lu_rcond_band<T>(AB, KL, KU, ipiv, norm_val); + + return true; + } + #else + { + arma_ignore(out); + arma_ignore(out_rcond); + arma_ignore(A); + arma_ignore(KL); + arma_ignore(KU); + arma_ignore(B_expr); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! solve a system of linear equations via LU decomposition with refinement (real band matrices) +template<typename T1> +inline +bool +auxlib::solve_band_refine(Mat<typename T1::pod_type>& out, typename T1::pod_type& out_rcond, Mat<typename T1::pod_type>& A, const uword KL, const uword KU, const Base<typename T1::pod_type,T1>& B_expr, const bool equilibrate) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type eT; + + Mat<eT> B = B_expr.get_ref(); // B is overwritten + + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); + + if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } + + // for gbsvx, matrix AB size: KL+KU+1 x N; band representation of A stored in rows 1 to KL+KU+1 (note: fortran counts from 1) + + Mat<eT> AB; + band_helper::compress(AB, A, KL, KU, false); + + const uword N = AB.n_cols; + + arma_debug_assert_blas_size(AB,B); + + out.set_size(N, B.n_cols); + + Mat<eT> AFB(2*KL+KU+1, N, arma_nozeros_indicator()); + + char fact = (equilibrate) ? 'E' : 'N'; + char trans = 'N'; + char equed = char(0); + blas_int n = blas_int(N); + blas_int kl = blas_int(KL); + blas_int ku = blas_int(KU); + blas_int nrhs = blas_int(B.n_cols); + blas_int ldab = blas_int(AB.n_rows); + blas_int ldafb = blas_int(AFB.n_rows); + blas_int ldb = blas_int(B.n_rows); + blas_int ldx = blas_int(N); + blas_int info = blas_int(0); + eT rcond = eT(0); + + podarray<blas_int> IPIV( N); + podarray<eT> R( N); + podarray<eT> C( N); + podarray<eT> FERR( B.n_cols); + podarray<eT> BERR( B.n_cols); + podarray<eT> WORK(3*N); + podarray<blas_int> IWORK( N); + + arma_extra_debug_print("lapack::gbsvx()"); + lapack::gbsvx + ( + &fact, &trans, &n, &kl, &ku, &nrhs, + AB.memptr(), &ldab, + AFB.memptr(), &ldafb, + IPIV.memptr(), + &equed, + R.memptr(), + C.memptr(), + B.memptr(), &ldb, + out.memptr(), &ldx, + &rcond, + FERR.memptr(), + BERR.memptr(), + WORK.memptr(), + IWORK.memptr(), + &info + ); + + out_rcond = rcond; + + return ((info == 0) || (info == (n+1))); + } + #else + { + arma_ignore(out); + arma_ignore(out_rcond); + arma_ignore(A); + arma_ignore(KL); + arma_ignore(KU); + arma_ignore(B_expr); + arma_ignore(equilibrate); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! solve a system of linear equations via LU decomposition with refinement (complex band matrices) +template<typename T1> +inline +bool +auxlib::solve_band_refine(Mat< std::complex<typename T1::pod_type> >& out, typename T1::pod_type& out_rcond, Mat< std::complex<typename T1::pod_type> >& A, const uword KL, const uword KU, const Base<std::complex<typename T1::pod_type>,T1>& B_expr, const bool equilibrate) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_CRIPPLED_LAPACK) + { + arma_extra_debug_print("auxlib::solve_band_refine(): redirecting to auxlib::solve_square_refine() due to crippled LAPACK"); + + arma_ignore(KL); + arma_ignore(KU); + + return auxlib::solve_square_refine(out, out_rcond, A, B_expr, equilibrate); + } + #elif defined(ARMA_USE_LAPACK) + { + typedef typename T1::pod_type T; + typedef typename std::complex<T> eT; + + Mat<eT> B = B_expr.get_ref(); // B is overwritten + + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); + + if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } + + // for gbsvx, matrix AB size: KL+KU+1 x N; band representation of A stored in rows 1 to KL+KU+1 (note: fortran counts from 1) + + Mat<eT> AB; + band_helper::compress(AB, A, KL, KU, false); + + const uword N = AB.n_cols; + + arma_debug_assert_blas_size(AB,B); + + out.set_size(N, B.n_cols); + + Mat<eT> AFB(2*KL+KU+1, N, arma_nozeros_indicator()); + + char fact = (equilibrate) ? 'E' : 'N'; + char trans = 'N'; + char equed = char(0); + blas_int n = blas_int(N); + blas_int kl = blas_int(KL); + blas_int ku = blas_int(KU); + blas_int nrhs = blas_int(B.n_cols); + blas_int ldab = blas_int(AB.n_rows); + blas_int ldafb = blas_int(AFB.n_rows); + blas_int ldb = blas_int(B.n_rows); + blas_int ldx = blas_int(N); + blas_int info = blas_int(0); + T rcond = T(0); + + podarray<blas_int> IPIV( N); + podarray< T> R( N); + podarray< T> C( N); + podarray< T> FERR( B.n_cols); + podarray< T> BERR( B.n_cols); + podarray<eT> WORK(2*N); + podarray< T> RWORK( N); // NOTE: according to lapack 3.6.1 docs, the size of RWORK in zgbsvx is different to RWORK in dgesvx + + arma_extra_debug_print("lapack::cx_gbsvx()"); + lapack::cx_gbsvx + ( + &fact, &trans, &n, &kl, &ku, &nrhs, + AB.memptr(), &ldab, + AFB.memptr(), &ldafb, + IPIV.memptr(), + &equed, + R.memptr(), + C.memptr(), + B.memptr(), &ldb, + out.memptr(), &ldx, + &rcond, + FERR.memptr(), + BERR.memptr(), + WORK.memptr(), + RWORK.memptr(), + &info + ); + + out_rcond = rcond; + + return ((info == 0) || (info == (n+1))); + } + #else + { + arma_ignore(out); + arma_ignore(out_rcond); + arma_ignore(A); + arma_ignore(KL); + arma_ignore(KU); + arma_ignore(B_expr); + arma_ignore(equilibrate); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +//! solve a system of linear equations via Gaussian elimination with partial pivoting (real tridiagonal band matrix) +template<typename T1> +inline +bool +auxlib::solve_tridiag_fast(Mat<typename T1::pod_type>& out, Mat<typename T1::pod_type>& A, const Base<typename T1::pod_type,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + return auxlib::solve_tridiag_fast_common(out, A, B_expr); + } + + + +//! solve a system of linear equations via Gaussian elimination with partial pivoting (complex tridiagonal band matrix) +template<typename T1> +inline +bool +auxlib::solve_tridiag_fast(Mat< std::complex<typename T1::pod_type> >& out, Mat< std::complex<typename T1::pod_type> >& A, const Base< std::complex<typename T1::pod_type>,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_CRIPPLED_LAPACK) + { + arma_extra_debug_print("auxlib::solve_tridiag_fast(): redirecting to auxlib::solve_square_fast() due to crippled LAPACK"); + + return auxlib::solve_square_fast(out, A, B_expr); + } + #else + { + return auxlib::solve_tridiag_fast_common(out, A, B_expr); + } + #endif + } + + + +//! solve a system of linear equations via Gaussian elimination with partial pivoting (tridiagonal band matrix) +template<typename T1> +inline +bool +auxlib::solve_tridiag_fast_common(Mat<typename T1::elem_type>& out, const Mat<typename T1::elem_type>& A, const Base<typename T1::elem_type,T1>& B_expr) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename T1::elem_type eT; + + out = B_expr.get_ref(); + + const uword B_n_rows = out.n_rows; + const uword B_n_cols = out.n_cols; + + arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); + + if(A.is_empty() || out.is_empty()) { out.zeros(A.n_rows, B_n_cols); return true; } + + Mat<eT> tridiag; + band_helper::extract_tridiag(tridiag, A); + + arma_debug_assert_blas_size(tridiag, out); + + blas_int n = blas_int(A.n_rows); + blas_int nrhs = blas_int(B_n_cols); + blas_int ldb = blas_int(B_n_rows); + blas_int info = blas_int(0); + + arma_extra_debug_print("lapack::gtsv()"); + lapack::gtsv<eT>(&n, &nrhs, tridiag.colptr(0), tridiag.colptr(1), tridiag.colptr(2), out.memptr(), &ldb, &info); + + return (info == 0); + } + #else + { + arma_ignore(out); + arma_ignore(A); + arma_ignore(B_expr); + arma_stop_logic_error("solve(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +// +// Schur decomposition + +template<typename eT, typename T1> +inline +bool +auxlib::schur(Mat<eT>& U, Mat<eT>& S, const Base<eT,T1>& X, const bool calc_U) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + S = X.get_ref(); + + arma_debug_check( (S.is_square() == false), "schur(): given matrix must be square sized" ); + + if(S.is_empty()) { U.reset(); S.reset(); return true; } + + arma_debug_assert_blas_size(S); + + const uword S_n_rows = S.n_rows; + + if(calc_U) { U.set_size(S_n_rows, S_n_rows); } else { U.set_size(1,1); } + + char jobvs = calc_U ? 'V' : 'N'; + char sort = 'N'; + void* select = 0; + blas_int n = blas_int(S_n_rows); + blas_int sdim = 0; + blas_int ldvs = calc_U ? n : blas_int(1); + blas_int lwork = 64*n; // lwork_min = (std::max)(blas_int(1), 3*n) + blas_int info = 0; + + podarray<eT> wr(S_n_rows); + podarray<eT> wi(S_n_rows); + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray<blas_int> bwork(S_n_rows); + + arma_extra_debug_print("lapack::gees()"); + lapack::gees(&jobvs, &sort, select, &n, S.memptr(), &n, &sdim, wr.memptr(), wi.memptr(), U.memptr(), &ldvs, work.memptr(), &lwork, bwork.memptr(), &info); + + return (info == 0); + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(X); + arma_ignore(calc_U); + arma_stop_logic_error("schur(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename T, typename T1> +inline +bool +auxlib::schur(Mat< std::complex<T> >& U, Mat< std::complex<T> >& S, const Base<std::complex<T>,T1>& X, const bool calc_U) + { + arma_extra_debug_sigprint(); + + S = X.get_ref(); + + arma_debug_check( (S.is_square() == false), "schur(): given matrix must be square sized" ); + + return auxlib::schur(U,S,calc_U); + } + + + +template<typename T> +inline +bool +auxlib::schur(Mat< std::complex<T> >& U, Mat< std::complex<T> >& S, const bool calc_U) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef std::complex<T> eT; + + if(S.is_empty()) { U.reset(); S.reset(); return true; } + + arma_debug_assert_blas_size(S); + + const uword S_n_rows = S.n_rows; + + if(calc_U) { U.set_size(S_n_rows, S_n_rows); } else { U.set_size(1,1); } + + char jobvs = calc_U ? 'V' : 'N'; + char sort = 'N'; + void* select = 0; + blas_int n = blas_int(S_n_rows); + blas_int sdim = 0; + blas_int ldvs = calc_U ? n : blas_int(1); + blas_int lwork = 64*n; // lwork_min = (std::max)(blas_int(1), 2*n) + blas_int info = 0; + + podarray<eT> w(S_n_rows); + podarray<eT> work( static_cast<uword>(lwork) ); + podarray< T> rwork(S_n_rows); + podarray<blas_int> bwork(S_n_rows); + + arma_extra_debug_print("lapack::cx_gees()"); + lapack::cx_gees(&jobvs, &sort, select, &n, S.memptr(), &n, &sdim, w.memptr(), U.memptr(), &ldvs, work.memptr(), &lwork, rwork.memptr(), bwork.memptr(), &info); + + return (info == 0); + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(calc_U); + arma_stop_logic_error("schur(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +// +// solve the Sylvester equation AX + XB = C + +template<typename eT> +inline +bool +auxlib::syl(Mat<eT>& X, const Mat<eT>& A, const Mat<eT>& B, const Mat<eT>& C) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + arma_debug_check( (A.is_square() == false) || (B.is_square() == false), "syl(): given matrices must be square sized" ); + + arma_debug_check( (C.n_rows != A.n_rows) || (C.n_cols != B.n_cols), "syl(): matrices are not conformant" ); + + if(A.is_empty() || B.is_empty() || C.is_empty()) { X.reset(); return true; } + + Mat<eT> Z1, Z2, T1, T2; + + const bool status_sd1 = auxlib::schur(Z1, T1, A); + const bool status_sd2 = auxlib::schur(Z2, T2, B); + + if( (status_sd1 == false) || (status_sd2 == false) ) { return false; } + + char trana = 'N'; + char tranb = 'N'; + blas_int isgn = +1; + blas_int m = blas_int(T1.n_rows); + blas_int n = blas_int(T2.n_cols); + + eT scale = eT(0); + blas_int info = 0; + + Mat<eT> Y = trans(Z1) * C * Z2; + + arma_extra_debug_print("lapack::trsyl()"); + lapack::trsyl<eT>(&trana, &tranb, &isgn, &m, &n, T1.memptr(), &m, T2.memptr(), &n, Y.memptr(), &m, &scale, &info); + + if(info < 0) { return false; } + + //Y /= scale; + Y /= (-scale); + + X = Z1 * Y * trans(Z2); + + return true; + } + #else + { + arma_ignore(X); + arma_ignore(A); + arma_ignore(B); + arma_ignore(C); + arma_stop_logic_error("syl(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +// +// QZ decomposition of general square real matrix pair + +template<typename T, typename T1, typename T2> +inline +bool +auxlib::qz(Mat<T>& A, Mat<T>& B, Mat<T>& vsl, Mat<T>& vsr, const Base<T,T1>& X_expr, const Base<T,T2>& Y_expr, const char mode) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + A = X_expr.get_ref(); + B = Y_expr.get_ref(); + + arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "qz(): given matrices must be square sized", [&](){ A.soft_reset(); B.soft_reset(); } ); + + arma_debug_check( (A.n_rows != B.n_rows), "qz(): given matrices must have the same size" ); + + if(A.is_empty()) { A.reset(); B.reset(); vsl.reset(); vsr.reset(); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A); + + vsl.set_size(A.n_rows, A.n_rows); + vsr.set_size(A.n_rows, A.n_rows); + + char jobvsl = 'V'; + char jobvsr = 'V'; + char eigsort = 'N'; + void* selctg = 0; + blas_int N = blas_int(A.n_rows); + blas_int sdim = 0; + blas_int lwork = 64*N+16; // lwork_min = (std::max)(blas_int(1),8*N+16) + blas_int info = 0; + + if(mode == 'l') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_lhp<T>)); } + else if(mode == 'r') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_rhp<T>)); } + else if(mode == 'i') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_iuc<T>)); } + else if(mode == 'o') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::select_ouc<T>)); } + + podarray<T> alphar(A.n_rows); + podarray<T> alphai(A.n_rows); + podarray<T> beta(A.n_rows); + + podarray<T> work( static_cast<uword>(lwork) ); + podarray<blas_int> bwork( static_cast<uword>(N) ); + + arma_extra_debug_print("lapack::gges()"); + + lapack::gges + ( + &jobvsl, &jobvsr, &eigsort, selctg, &N, + A.memptr(), &N, B.memptr(), &N, &sdim, + alphar.memptr(), alphai.memptr(), beta.memptr(), + vsl.memptr(), &N, vsr.memptr(), &N, + work.memptr(), &lwork, bwork.memptr(), + &info + ); + + if(info != 0) { return false; } + + op_strans::apply_mat_inplace(vsl); + + return true; + } + #else + { + arma_ignore(A); + arma_ignore(B); + arma_ignore(vsl); + arma_ignore(vsr); + arma_ignore(X_expr); + arma_ignore(Y_expr); + arma_ignore(mode); + arma_stop_logic_error("qz(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +// +// QZ decomposition of general square complex matrix pair + +template<typename T, typename T1, typename T2> +inline +bool +auxlib::qz(Mat< std::complex<T> >& A, Mat< std::complex<T> >& B, Mat< std::complex<T> >& vsl, Mat< std::complex<T> >& vsr, const Base< std::complex<T>, T1 >& X_expr, const Base< std::complex<T>, T2 >& Y_expr, const char mode) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename std::complex<T> eT; + + A = X_expr.get_ref(); + B = Y_expr.get_ref(); + + arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "qz(): given matrices must be square sized", [&](){ A.soft_reset(); B.soft_reset(); } ); + + arma_debug_check( (A.n_rows != B.n_rows), "qz(): given matrices must have the same size" ); + + if(A.is_empty()) { A.reset(); B.reset(); vsl.reset(); vsr.reset(); return true; } + + if(arma_config::check_nonfinite && A.internal_has_nonfinite()) { return false; } + if(arma_config::check_nonfinite && B.internal_has_nonfinite()) { return false; } + + arma_debug_assert_blas_size(A); + + vsl.set_size(A.n_rows, A.n_rows); + vsr.set_size(A.n_rows, A.n_rows); + + char jobvsl = 'V'; + char jobvsr = 'V'; + char eigsort = 'N'; + void* selctg = 0; + blas_int N = blas_int(A.n_rows); + blas_int sdim = 0; + blas_int lwork = 64*N; // lwork_min = (std::max)(blas_int(1),2*N) + blas_int info = 0; + + if(mode == 'l') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_lhp<T>)); } + else if(mode == 'r') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_rhp<T>)); } + else if(mode == 'i') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_iuc<T>)); } + else if(mode == 'o') { eigsort = 'S'; selctg = qz_helper::ptr_cast(&(qz_helper::cx_select_ouc<T>)); } + + podarray<eT> alpha(A.n_rows); + podarray<eT> beta(A.n_rows); + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray< T> rwork( static_cast<uword>(8*N) ); + podarray<blas_int> bwork( static_cast<uword>(N) ); + + arma_extra_debug_print("lapack::cx_gges()"); + + lapack::cx_gges + ( + &jobvsl, &jobvsr, &eigsort, selctg, &N, + A.memptr(), &N, B.memptr(), &N, &sdim, + alpha.memptr(), beta.memptr(), + vsl.memptr(), &N, vsr.memptr(), &N, + work.memptr(), &lwork, rwork.memptr(), bwork.memptr(), + &info + ); + + if(info != 0) { return false; } + + op_htrans::apply_mat_inplace(vsl); + + return true; + } + #else + { + arma_ignore(A); + arma_ignore(B); + arma_ignore(vsl); + arma_ignore(vsr); + arma_ignore(X_expr); + arma_ignore(Y_expr); + arma_ignore(mode); + arma_stop_logic_error("qz(): use of LAPACK must be enabled"); + return false; + } + #endif + } + + + +template<typename eT> +inline +eT +auxlib::rcond(Mat<eT>& A) + { + #if defined(ARMA_USE_LAPACK) + { + arma_debug_assert_blas_size(A); + + char norm_id = '1'; + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_rows); // assuming square matrix + blas_int lda = blas_int(A.n_rows); + eT norm_val = eT(0); + eT rcond = eT(0); + blas_int info = blas_int(0); + + podarray<eT> work(4*A.n_rows); + podarray<blas_int> iwork( A.n_rows); + podarray<blas_int> ipiv( (std::min)(A.n_rows, A.n_cols) ); + + arma_extra_debug_print("lapack::lange()"); + norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_gen(A) : lapack::lange(&norm_id, &m, &n, A.memptr(), &lda, work.memptr()); + + arma_extra_debug_print("lapack::getrf()"); + lapack::getrf(&m, &n, A.memptr(), &lda, ipiv.memptr(), &info); + + if(info != blas_int(0)) { return eT(0); } + + arma_extra_debug_print("lapack::gecon()"); + lapack::gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), iwork.memptr(), &info); + + if(info != blas_int(0)) { return eT(0); } + + return rcond; + } + #else + { + arma_ignore(A); + arma_stop_logic_error("rcond(): use of LAPACK must be enabled"); + return eT(0); + } + #endif + } + + + +template<typename T> +inline +T +auxlib::rcond(Mat< std::complex<T> >& A) + { + #if defined(ARMA_USE_LAPACK) + { + typedef typename std::complex<T> eT; + + arma_debug_assert_blas_size(A); + + char norm_id = '1'; + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_rows); // assuming square matrix + blas_int lda = blas_int(A.n_rows); + T norm_val = T(0); + T rcond = T(0); + blas_int info = blas_int(0); + + podarray< T> junk(1); + podarray<eT> work(2*A.n_rows); + podarray< T> rwork(2*A.n_rows); + podarray<blas_int> ipiv( (std::min)(A.n_rows, A.n_cols) ); + + arma_extra_debug_print("lapack::lange()"); + norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_gen(A) : lapack::lange(&norm_id, &m, &n, A.memptr(), &lda, junk.memptr()); + + arma_extra_debug_print("lapack::getrf()"); + lapack::getrf(&m, &n, A.memptr(), &lda, ipiv.memptr(), &info); + + if(info != blas_int(0)) { return T(0); } + + arma_extra_debug_print("lapack::cx_gecon()"); + lapack::cx_gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), rwork.memptr(), &info); + + if(info != blas_int(0)) { return T(0); } + + return rcond; + } + #else + { + arma_ignore(A); + arma_stop_logic_error("rcond(): use of LAPACK must be enabled"); + return T(0); + } + #endif + } + + + +template<typename eT> +inline +eT +auxlib::rcond_sympd(Mat<eT>& A, bool& calc_ok) + { + #if defined(ARMA_USE_LAPACK) + { + arma_debug_assert_blas_size(A); + + calc_ok = false; + + char norm_id = '1'; + char uplo = 'L'; + blas_int n = blas_int(A.n_rows); // assuming square matrix + blas_int lda = blas_int(A.n_rows); + eT norm_val = eT(0); + eT rcond = eT(0); + blas_int info = blas_int(0); + + podarray<eT> work(3*A.n_rows); + podarray<blas_int> iwork( A.n_rows); + + arma_extra_debug_print("lapack::lansy()"); + norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lansy(&norm_id, &uplo, &n, A.memptr(), &lda, work.memptr()); + + arma_extra_debug_print("lapack::potrf()"); + lapack::potrf(&uplo, &n, A.memptr(), &lda, &info); + + if(info != blas_int(0)) { return eT(0); } + + arma_extra_debug_print("lapack::pocon()"); + lapack::pocon(&uplo, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), iwork.memptr(), &info); + + if(info != blas_int(0)) { return eT(0); } + + calc_ok = true; + + return rcond; + } + #else + { + arma_ignore(A); + calc_ok = false; + arma_stop_logic_error("rcond(): use of LAPACK must be enabled"); + return eT(0); + } + #endif + } + + + +template<typename T> +inline +T +auxlib::rcond_sympd(Mat< std::complex<T> >& A, bool& calc_ok) + { + #if defined(ARMA_CRIPPLED_LAPACK) + { + arma_extra_debug_print("auxlib::rcond_sympd(): redirecting to auxlib::rcond() due to crippled LAPACK"); + + calc_ok = true; + + return auxlib::rcond(A); + } + #elif defined(ARMA_USE_LAPACK) + { + typedef typename std::complex<T> eT; + + arma_debug_assert_blas_size(A); + + calc_ok = false; + + char norm_id = '1'; + char uplo = 'L'; + blas_int n = blas_int(A.n_rows); // assuming square matrix + blas_int lda = blas_int(A.n_rows); + T norm_val = T(0); + T rcond = T(0); + blas_int info = blas_int(0); + + podarray<eT> work(2*A.n_rows); + podarray< T> rwork( A.n_rows); + + arma_extra_debug_print("lapack::lanhe()"); + norm_val = (has_blas_float_bug<eT>::value) ? auxlib::norm1_sym(A) : lapack::lanhe(&norm_id, &uplo, &n, A.memptr(), &lda, rwork.memptr()); + + arma_extra_debug_print("lapack::potrf()"); + lapack::potrf(&uplo, &n, A.memptr(), &lda, &info); + + if(info != blas_int(0)) { return T(0); } + + arma_extra_debug_print("lapack::cx_pocon()"); + lapack::cx_pocon(&uplo, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), rwork.memptr(), &info); + + if(info != blas_int(0)) { return T(0); } + + calc_ok = true; + + return rcond; + } + #else + { + arma_ignore(A); + calc_ok = false; + arma_stop_logic_error("rcond(): use of LAPACK must be enabled"); + return T(0); + } + #endif + } + + + +template<typename eT> +inline +eT +auxlib::rcond_trimat(const Mat<eT>& A, const uword layout) + { + #if defined(ARMA_USE_LAPACK) + { + arma_debug_assert_blas_size(A); + + char norm_id = '1'; + char uplo = (layout == 0) ? 'U' : 'L'; + char diag = 'N'; + blas_int n = blas_int(A.n_rows); // assuming square matrix + eT rcond = eT(0); + blas_int info = blas_int(0); + + podarray<eT> work(3*A.n_rows); + podarray<blas_int> iwork( A.n_rows); + + arma_extra_debug_print("lapack::trcon()"); + lapack::trcon(&norm_id, &uplo, &diag, &n, A.memptr(), &n, &rcond, work.memptr(), iwork.memptr(), &info); + + if(info != blas_int(0)) { return eT(0); } + + return rcond; + } + #else + { + arma_ignore(A); + arma_ignore(layout); + arma_stop_logic_error("rcond(): use of LAPACK must be enabled"); + return eT(0); + } + #endif + } + + + +template<typename T> +inline +T +auxlib::rcond_trimat(const Mat< std::complex<T> >& A, const uword layout) + { + #if defined(ARMA_USE_LAPACK) + { + typedef typename std::complex<T> eT; + + arma_debug_assert_blas_size(A); + + char norm_id = '1'; + char uplo = (layout == 0) ? 'U' : 'L'; + char diag = 'N'; + blas_int n = blas_int(A.n_rows); // assuming square matrix + T rcond = T(0); + blas_int info = blas_int(0); + + podarray<eT> work(2*A.n_rows); + podarray< T> rwork( A.n_rows); + + arma_extra_debug_print("lapack::cx_trcon()"); + lapack::cx_trcon(&norm_id, &uplo, &diag, &n, A.memptr(), &n, &rcond, work.memptr(), rwork.memptr(), &info); + + if(info != blas_int(0)) { return T(0); } + + return rcond; + } + #else + { + arma_ignore(A); + arma_ignore(layout); + arma_stop_logic_error("rcond(): use of LAPACK must be enabled"); + return T(0); + } + #endif + } + + + +template<typename eT> +inline +eT +auxlib::lu_rcond(const Mat<eT>& A, const eT norm_val) + { + #if defined(ARMA_USE_LAPACK) + { + char norm_id = '1'; + blas_int n = blas_int(A.n_rows); // assuming square matrix + blas_int lda = blas_int(A.n_rows); + eT rcond = eT(0); + blas_int info = blas_int(0); + + podarray<eT> work(4*A.n_rows); + podarray<blas_int> iwork( A.n_rows); + + arma_extra_debug_print("lapack::gecon()"); + lapack::gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), iwork.memptr(), &info); + + if(info != blas_int(0)) { return eT(0); } + + return rcond; + } + #else + { + arma_ignore(A); + arma_ignore(norm_val); + return eT(0); + } + #endif + } + + + +template<typename T> +inline +T +auxlib::lu_rcond(const Mat< std::complex<T> >& A, const T norm_val) + { + #if defined(ARMA_USE_LAPACK) + { + typedef typename std::complex<T> eT; + + char norm_id = '1'; + blas_int n = blas_int(A.n_rows); // assuming square matrix + blas_int lda = blas_int(A.n_rows); + T rcond = T(0); + blas_int info = blas_int(0); + + podarray<eT> work(2*A.n_rows); + podarray< T> rwork(2*A.n_rows); + + arma_extra_debug_print("lapack::cx_gecon()"); + lapack::cx_gecon(&norm_id, &n, A.memptr(), &lda, &norm_val, &rcond, work.memptr(), rwork.memptr(), &info); + + if(info != blas_int(0)) { return T(0); } + + return rcond; + } + #else + { + arma_ignore(A); + arma_ignore(norm_val); + return T(0); + } + #endif + } + + + +template<typename eT> +inline +eT +auxlib::lu_rcond_sympd(const Mat<eT>& A, const eT norm_val) + { + #if defined(ARMA_USE_LAPACK) + { + char uplo = 'L'; + blas_int n = blas_int(A.n_rows); // assuming square matrix + eT rcond = eT(0); + blas_int info = blas_int(0); + + podarray<eT> work(3*A.n_rows); + podarray<blas_int> iwork( A.n_rows); + + arma_extra_debug_print("lapack::pocon()"); + lapack::pocon(&uplo, &n, A.memptr(), &n, &norm_val, &rcond, work.memptr(), iwork.memptr(), &info); + + if(info != blas_int(0)) { return eT(0); } + + return rcond; + } + #else + { + arma_ignore(A); + arma_ignore(norm_val); + return eT(0); + } + #endif + } + + + +template<typename T> +inline +T +auxlib::lu_rcond_sympd(const Mat< std::complex<T> >& A, const T norm_val) + { + #if defined(ARMA_CRIPPLED_LAPACK) + { + arma_ignore(A); + arma_ignore(norm_val); + return T(0); + } + #elif defined(ARMA_USE_LAPACK) + { + typedef typename std::complex<T> eT; + + char uplo = 'L'; + blas_int n = blas_int(A.n_rows); // assuming square matrix + T rcond = T(0); + blas_int info = blas_int(0); + + podarray<eT> work(2*A.n_rows); + podarray< T> rwork( A.n_rows); + + arma_extra_debug_print("lapack::cx_pocon()"); + lapack::cx_pocon(&uplo, &n, A.memptr(), &n, &norm_val, &rcond, work.memptr(), rwork.memptr(), &info); + + if(info != blas_int(0)) { return T(0); } + + return rcond; + } + #else + { + arma_ignore(A); + arma_ignore(norm_val); + return T(0); + } + #endif + } + + + +template<typename eT> +inline +eT +auxlib::lu_rcond_band(const Mat<eT>& AB, const uword KL, const uword KU, const podarray<blas_int>& ipiv, const eT norm_val) + { + #if defined(ARMA_USE_LAPACK) + { + const uword N = AB.n_cols; // order of the original square matrix A + + char norm_id = '1'; + blas_int n = blas_int(N); + blas_int kl = blas_int(KL); + blas_int ku = blas_int(KU); + blas_int ldab = blas_int(AB.n_rows); + eT rcond = eT(0); + blas_int info = blas_int(0); + + podarray<eT> work(3*N); + podarray<blas_int> iwork( N); + + arma_extra_debug_print("lapack::gbcon()"); + lapack::gbcon<eT>(&norm_id, &n, &kl, &ku, AB.memptr(), &ldab, ipiv.memptr(), &norm_val, &rcond, work.memptr(), iwork.memptr(), &info); + + if(info != blas_int(0)) { return eT(0); } + + return rcond; + } + #else + { + arma_ignore(AB); + arma_ignore(KL); + arma_ignore(KU); + arma_ignore(ipiv); + arma_ignore(norm_val); + return eT(0); + } + #endif + } + + + +template<typename T> +inline +T +auxlib::lu_rcond_band(const Mat< std::complex<T> >& AB, const uword KL, const uword KU, const podarray<blas_int>& ipiv, const T norm_val) + { + #if defined(ARMA_CRIPPLED_LAPACK) + { + arma_ignore(AB); + arma_ignore(KL); + arma_ignore(KU); + arma_ignore(ipiv); + arma_ignore(norm_val); + return T(0); + } + #elif defined(ARMA_USE_LAPACK) + { + typedef typename std::complex<T> eT; + + const uword N = AB.n_cols; // order of the original square matrix A + + char norm_id = '1'; + blas_int n = blas_int(N); + blas_int kl = blas_int(KL); + blas_int ku = blas_int(KU); + blas_int ldab = blas_int(AB.n_rows); + T rcond = T(0); + blas_int info = blas_int(0); + + podarray<eT> work(2*N); + podarray< T> rwork( N); + + arma_extra_debug_print("lapack::cx_gbcon()"); + lapack::cx_gbcon<T>(&norm_id, &n, &kl, &ku, AB.memptr(), &ldab, ipiv.memptr(), &norm_val, &rcond, work.memptr(), rwork.memptr(), &info); + + if(info != blas_int(0)) { return T(0); } + + return rcond; + } + #else + { + arma_ignore(AB); + arma_ignore(KL); + arma_ignore(KU); + arma_ignore(ipiv); + arma_ignore(norm_val); + return T(0); + } + #endif + } + + + +template<typename T1> +inline +bool +auxlib::crippled_lapack(const Base<typename T1::elem_type, T1>&) + { + #if defined(ARMA_CRIPPLED_LAPACK) + { + arma_extra_debug_print("auxlib::crippled_lapack(): true"); + + return (is_cx<typename T1::elem_type>::yes); + } + #else + { + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +auxlib::rudimentary_sym_check(const Mat<eT>& X) + { + arma_extra_debug_sigprint(); + + const uword N = X.n_rows; + const uword Nm2 = N-2; + + if(N != X.n_cols) { return false; } + if(N <= uword(1)) { return true; } + + const eT* X_mem = X.memptr(); + + const eT* X_offsetA = &(X_mem[Nm2 ]); + const eT* X_offsetB = &(X_mem[Nm2*N]); + + const eT A1 = *(X_offsetA ); + const eT A2 = *(X_offsetA+1); // bottom-left corner (ie. last value in first column) + const eT B1 = *(X_offsetB ); + const eT B2 = *(X_offsetB+N); // top-right corner (ie. first value in last column) + + const eT C1 = (std::max)(std::abs(A1), std::abs(B1)); + const eT C2 = (std::max)(std::abs(A2), std::abs(B2)); + + const eT delta1 = std::abs(A1 - B1); + const eT delta2 = std::abs(A2 - B2); + + const eT tol = eT(10000)*std::numeric_limits<eT>::epsilon(); // allow some leeway + + const bool okay1 = ( (delta1 <= tol) || (delta1 <= (C1 * tol)) ); + const bool okay2 = ( (delta2 <= tol) || (delta2 <= (C2 * tol)) ); + + return (okay1 && okay2); + } + + + +template<typename T> +inline +bool +auxlib::rudimentary_sym_check(const Mat< std::complex<T> >& X) + { + arma_extra_debug_sigprint(); + + // NOTE: the function name is a misnomer, as it checks for hermitian complex matrices; + // NOTE: for simplicity of use, the function name is the same as for real matrices + + typedef typename std::complex<T> eT; + + const uword N = X.n_rows; + const uword Nm1 = N-1; + + if(N != X.n_cols) { return false; } + if(N == uword(0)) { return true; } + + const eT* X_mem = X.memptr(); + + const T tol = T(10000)*std::numeric_limits<T>::epsilon(); // allow some leeway + + if(std::abs(X_mem[0 ].imag()) > tol) { return false; } // check top-left + if(std::abs(X_mem[X.n_elem-1].imag()) > tol) { return false; } // check bottom-right + + const eT& A = X_mem[Nm1 ]; // bottom-left corner (ie. last value in first column) + const eT& B = X_mem[Nm1*N]; // top-right corner (ie. first value in last column) + + const T C_real = (std::max)(std::abs(A.real()), std::abs(B.real())); + const T C_imag = (std::max)(std::abs(A.imag()), std::abs(B.imag())); + + const T delta_real = std::abs(A.real() - B.real()); + const T delta_imag = std::abs(A.imag() + B.imag()); // take into account the conjugate + + const bool okay_real = ( (delta_real <= tol) || (delta_real <= (C_real * tol)) ); + const bool okay_imag = ( (delta_imag <= tol) || (delta_imag <= (C_imag * tol)) ); + + return (okay_real && okay_imag); + } + + + +template<typename eT> +inline +typename get_pod_type<eT>::result +auxlib::norm1_gen(const Mat<eT>& A) + { + arma_extra_debug_sigprint(); + + typedef typename get_pod_type<eT>::result T; + + if(A.n_elem == 0) { return T(0); } + + const uword n_rows = A.n_rows; + const uword n_cols = A.n_cols; + + T max_val = T(0); + + for(uword c=0; c < n_cols; ++c) + { + const eT* colmem = A.colptr(c); + T acc_val = T(0); + + for(uword r=0; r < n_rows; ++r) { acc_val += std::abs(colmem[r]); } + + max_val = (acc_val > max_val) ? acc_val : max_val; + } + + return max_val; + } + + + +template<typename eT> +inline +typename get_pod_type<eT>::result +auxlib::norm1_sym(const Mat<eT>& A) + { + arma_extra_debug_sigprint(); + + typedef typename get_pod_type<eT>::result T; + + if(A.n_elem == 0) { return T(0); } + + const uword N = (std::min)(A.n_rows, A.n_cols); + + T max_val = T(0); + + for(uword col=0; col < N; ++col) + { + const eT* colmem = A.colptr(col); + T acc_val = T(0); + + for(uword c=0; c < col; ++c) { acc_val += std::abs(A.at(col,c)); } + + for(uword r=col; r < N; ++r) { acc_val += std::abs(colmem[r]); } + + max_val = (acc_val > max_val) ? acc_val : max_val; + } + + return max_val; + } + + + +template<typename eT> +inline +typename get_pod_type<eT>::result +auxlib::norm1_band(const Mat<eT>& A, const uword KL, const uword KU) + { + arma_extra_debug_sigprint(); + + typedef typename get_pod_type<eT>::result T; + + if(A.n_elem == 0) { return T(0); } + + const uword n_rows = A.n_rows; + const uword n_cols = A.n_cols; + + T max_val = T(0); + + for(uword c=0; c < n_cols; ++c) + { + const eT* colmem = A.colptr(c); + T acc_val = T(0); + + // use values only from main diagonal + KU upper diagonals + KL lower diagonals + + const uword start = ( c > KU ) ? (c - KU) : 0; + const uword end = ((c + KL) < n_rows) ? (c + KL) : (n_rows-1); + + for(uword r=start; r <= end; ++r) { acc_val += std::abs(colmem[r]); } + + max_val = (acc_val > max_val) ? acc_val : max_val; + } + + return max_val; + } + + + +// + + + +namespace qz_helper +{ + +// sgges() and dgges() require an external function with three arguments: +// select(alpha_real, alpha_imag, beta) +// where the eigenvalue is defined as complex(alpha_real, alpha_imag) / beta + +template<typename T> +inline +blas_int +select_lhp(const T* x_ptr, const T* y_ptr, const T* z_ptr) + { + arma_extra_debug_sigprint(); + + // cout << "select_lhp(): (*x_ptr) = " << (*x_ptr) << endl; + // cout << "select_lhp(): (*y_ptr) = " << (*y_ptr) << endl; + // cout << "select_lhp(): (*z_ptr) = " << (*z_ptr) << endl; + + arma_ignore(y_ptr); // ignore imaginary part + + const T x = (*x_ptr); + const T z = (*z_ptr); + + if(z == T(0)) { return blas_int(0); } // consider an infinite eig value not to lie in either lhp or rhp + + return ((x/z) < T(0)) ? blas_int(1) : blas_int(0); + } + + + +template<typename T> +inline +blas_int +select_rhp(const T* x_ptr, const T* y_ptr, const T* z_ptr) + { + arma_extra_debug_sigprint(); + + // cout << "select_rhp(): (*x_ptr) = " << (*x_ptr) << endl; + // cout << "select_rhp(): (*y_ptr) = " << (*y_ptr) << endl; + // cout << "select_rhp(): (*z_ptr) = " << (*z_ptr) << endl; + + arma_ignore(y_ptr); // ignore imaginary part + + const T x = (*x_ptr); + const T z = (*z_ptr); + + if(z == T(0)) { return blas_int(0); } // consider an infinite eig value not to lie in either lhp or rhp + + return ((x/z) > T(0)) ? blas_int(1) : blas_int(0); + } + + + +template<typename T> +inline +blas_int +select_iuc(const T* x_ptr, const T* y_ptr, const T* z_ptr) + { + arma_extra_debug_sigprint(); + + // cout << "select_iuc(): (*x_ptr) = " << (*x_ptr) << endl; + // cout << "select_iuc(): (*y_ptr) = " << (*y_ptr) << endl; + // cout << "select_iuc(): (*z_ptr) = " << (*z_ptr) << endl; + + const T x = (*x_ptr); + const T y = (*y_ptr); + const T z = (*z_ptr); + + if(z == T(0)) { return blas_int(0); } // consider an infinite eig value to be outside of the unit circle + + //return (std::abs(std::complex<T>(x,y) / z) < T(1)) ? blas_int(1) : blas_int(0); + return (std::sqrt(x*x + y*y) < std::abs(z)) ? blas_int(1) : blas_int(0); + } + + + +template<typename T> +inline +blas_int +select_ouc(const T* x_ptr, const T* y_ptr, const T* z_ptr) + { + arma_extra_debug_sigprint(); + + // cout << "select_ouc(): (*x_ptr) = " << (*x_ptr) << endl; + // cout << "select_ouc(): (*y_ptr) = " << (*y_ptr) << endl; + // cout << "select_ouc(): (*z_ptr) = " << (*z_ptr) << endl; + + const T x = (*x_ptr); + const T y = (*y_ptr); + const T z = (*z_ptr); + + if(z == T(0)) + { + return (x == T(0)) ? blas_int(0) : blas_int(1); // consider an infinite eig value to be outside of the unit circle + } + + //return (std::abs(std::complex<T>(x,y) / z) > T(1)) ? blas_int(1) : blas_int(0); + return (std::sqrt(x*x + y*y) > std::abs(z)) ? blas_int(1) : blas_int(0); + } + + + +// cgges() and zgges() require an external function with two arguments: +// select(alpha, beta) +// where the complex eigenvalue is defined as (alpha / beta) + +template<typename T> +inline +blas_int +cx_select_lhp(const std::complex<T>* x_ptr, const std::complex<T>* y_ptr) + { + arma_extra_debug_sigprint(); + + // cout << "cx_select_lhp(): (*x_ptr) = " << (*x_ptr) << endl; + // cout << "cx_select_lhp(): (*y_ptr) = " << (*y_ptr) << endl; + + const std::complex<T>& x = (*x_ptr); + const std::complex<T>& y = (*y_ptr); + + if( (y.real() == T(0)) && (y.imag() == T(0)) ) { return blas_int(0); } // consider an infinite eig value not to lie in either lhp or rhp + + return (std::real(x / y) < T(0)) ? blas_int(1) : blas_int(0); + } + + + +template<typename T> +inline +blas_int +cx_select_rhp(const std::complex<T>* x_ptr, const std::complex<T>* y_ptr) + { + arma_extra_debug_sigprint(); + + // cout << "cx_select_rhp(): (*x_ptr) = " << (*x_ptr) << endl; + // cout << "cx_select_rhp(): (*y_ptr) = " << (*y_ptr) << endl; + + const std::complex<T>& x = (*x_ptr); + const std::complex<T>& y = (*y_ptr); + + if( (y.real() == T(0)) && (y.imag() == T(0)) ) { return blas_int(0); } // consider an infinite eig value not to lie in either lhp or rhp + + return (std::real(x / y) > T(0)) ? blas_int(1) : blas_int(0); + } + + + +template<typename T> +inline +blas_int +cx_select_iuc(const std::complex<T>* x_ptr, const std::complex<T>* y_ptr) + { + arma_extra_debug_sigprint(); + + // cout << "cx_select_iuc(): (*x_ptr) = " << (*x_ptr) << endl; + // cout << "cx_select_iuc(): (*y_ptr) = " << (*y_ptr) << endl; + + const std::complex<T>& x = (*x_ptr); + const std::complex<T>& y = (*y_ptr); + + if( (y.real() == T(0)) && (y.imag() == T(0)) ) { return blas_int(0); } // consider an infinite eig value to be outside of the unit circle + + return (std::abs(x / y) < T(1)) ? blas_int(1) : blas_int(0); + } + + + +template<typename T> +inline +blas_int +cx_select_ouc(const std::complex<T>* x_ptr, const std::complex<T>* y_ptr) + { + arma_extra_debug_sigprint(); + + // cout << "cx_select_ouc(): (*x_ptr) = " << (*x_ptr) << endl; + // cout << "cx_select_ouc(): (*y_ptr) = " << (*y_ptr) << endl; + + const std::complex<T>& x = (*x_ptr); + const std::complex<T>& y = (*y_ptr); + + if( (y.real() == T(0)) && (y.imag() == T(0)) ) + { + return ((x.real() == T(0)) && (x.imag() == T(0))) ? blas_int(0) : blas_int(1); // consider an infinite eig value to be outside of the unit circle + } + + return (std::abs(x / y) > T(1)) ? blas_int(1) : blas_int(0); + } + + + +// need to do shenanigans with pointers due to: +// - we're using LAPACK ?gges() defined to expect pointer-to-function to be passed as pointer-to-object +// - explicit casting between pointer-to-function and pointer-to-object is a non-standard extension in C +// - the extension is essentially mandatory on POSIX systems +// - some compilers will complain about the extension in pedantic mode + +template<typename T> +inline +void_ptr +ptr_cast(blas_int (*function)(const T*, const T*, const T*)) + { + union converter + { + blas_int (*fn)(const T*, const T*, const T*); + void_ptr obj; + }; + + converter tmp; + + tmp.obj = 0; + tmp.fn = function; + + return tmp.obj; + } + + + +template<typename T> +inline +void_ptr +ptr_cast(blas_int (*function)(const std::complex<T>*, const std::complex<T>*)) + { + union converter + { + blas_int (*fn)(const std::complex<T>*, const std::complex<T>*); + void_ptr obj; + }; + + converter tmp; + + tmp.obj = 0; + tmp.fn = function; + + return tmp.obj; + } + + + +} // end of namespace qz_helper + + +//! @} |