diff options
author | Nao Pross <np@0hm.ch> | 2024-02-12 14:52:43 +0100 |
---|---|---|
committer | Nao Pross <np@0hm.ch> | 2024-02-12 14:52:43 +0100 |
commit | eda5bc26f44ee9a6f83dcf8c91f17296d7fc509d (patch) | |
tree | bc2efa38ff4e350f9a111ac87065cd7ae9a911c7 /src/armadillo/include/armadillo_bits/sp_auxlib_meat.hpp | |
download | fsisotool-eda5bc26f44ee9a6f83dcf8c91f17296d7fc509d.tar.gz fsisotool-eda5bc26f44ee9a6f83dcf8c91f17296d7fc509d.zip |
Move into version control
Diffstat (limited to 'src/armadillo/include/armadillo_bits/sp_auxlib_meat.hpp')
-rw-r--r-- | src/armadillo/include/armadillo_bits/sp_auxlib_meat.hpp | 2814 |
1 files changed, 2814 insertions, 0 deletions
diff --git a/src/armadillo/include/armadillo_bits/sp_auxlib_meat.hpp b/src/armadillo/include/armadillo_bits/sp_auxlib_meat.hpp new file mode 100644 index 0000000..dbfdf2d --- /dev/null +++ b/src/armadillo/include/armadillo_bits/sp_auxlib_meat.hpp @@ -0,0 +1,2814 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au) +// Copyright 2008-2016 National ICT Australia (NICTA) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ------------------------------------------------------------------------ + + +//! \addtogroup sp_auxlib +//! @{ + + +inline +sp_auxlib::form_type +sp_auxlib::interpret_form_str(const char* form_str) + { + arma_extra_debug_sigprint(); + + // the order of the 3 if statements below is important + if( form_str == nullptr ) { return form_none; } + if( form_str[0] == char(0) ) { return form_none; } + if( form_str[1] == char(0) ) { return form_none; } + + const char c1 = form_str[0]; + const char c2 = form_str[1]; + + if(c1 == 'l') + { + if(c2 == 'm') { return form_lm; } + if(c2 == 'r') { return form_lr; } + if(c2 == 'i') { return form_li; } + if(c2 == 'a') { return form_la; } + } + else + if(c1 == 's') + { + if(c2 == 'm') { return form_sm; } + if(c2 == 'r') { return form_sr; } + if(c2 == 'i') { return form_si; } + if(c2 == 'a') { return form_sa; } + } + + return form_none; + } + + + +//! immediate eigendecomposition of symmetric real sparse object +template<typename eT, typename T1> +inline +bool +sp_auxlib::eigs_sym(Col<eT>& eigval, Mat<eT>& eigvec, const SpBase<eT, T1>& X, const uword n_eigvals, const form_type form_val, const eigs_opts& opts) + { + arma_extra_debug_sigprint(); + + const unwrap_spmat<T1> U(X.get_ref()); + + arma_debug_check( (U.M.is_square() == false), "eigs_sym(): given matrix must be square sized" ); + + if((arma_config::debug) && (sp_auxlib::rudimentary_sym_check(U.M) == false)) + { + if(is_cx<eT>::no ) { arma_debug_warn_level(1, "eigs_sym(): given matrix is not symmetric"); } + if(is_cx<eT>::yes) { arma_debug_warn_level(1, "eigs_sym(): given matrix is not hermitian"); } + } + + if(arma_config::check_nonfinite && U.M.internal_has_nonfinite()) + { + arma_debug_warn_level(3, "eigs_sym(): detected non-finite elements"); + return false; + } + + // TODO: investigate optional redirection of "sm" to ARPACK as it's capable of shift-invert; + // TODO: in shift-invert mode, "sm" maps to "lm" of the shift-inverted matrix (with sigma = 0) + + #if defined(ARMA_USE_NEWARP) + { + return sp_auxlib::eigs_sym_newarp(eigval, eigvec, U.M, n_eigvals, form_val, opts); + } + #elif defined(ARMA_USE_ARPACK) + { + constexpr eT sigma = eT(0); + + return sp_auxlib::eigs_sym_arpack<eT,false>(eigval, eigvec, U.M, n_eigvals, form_val, sigma, opts); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(n_eigvals); + arma_ignore(form_val); + arma_ignore(opts); + + arma_stop_logic_error("eigs_sym(): use of NEWARP or ARPACK must be enabled"); + return false; + } + #endif + } + + + +//! immediate eigendecomposition of symmetric real sparse object +template<typename eT, typename T1> +inline +bool +sp_auxlib::eigs_sym(Col<eT>& eigval, Mat<eT>& eigvec, const SpBase<eT, T1>& X, const uword n_eigvals, const eT sigma, const eigs_opts& opts) + { + arma_extra_debug_sigprint(); + + const unwrap_spmat<T1> U(X.get_ref()); + + arma_debug_check( (U.M.is_square() == false), "eigs_sym(): given matrix must be square sized" ); + + if((arma_config::debug) && (sp_auxlib::rudimentary_sym_check(U.M) == false)) + { + if(is_cx<eT>::no ) { arma_debug_warn_level(1, "eigs_sym(): given matrix is not symmetric"); } + if(is_cx<eT>::yes) { arma_debug_warn_level(1, "eigs_sym(): given matrix is not hermitian"); } + } + + if(arma_config::check_nonfinite && U.M.internal_has_nonfinite()) + { + arma_debug_warn_level(3, "eigs_sym(): detected non-finite elements"); + return false; + } + + #if (defined(ARMA_USE_NEWARP) && defined(ARMA_USE_SUPERLU)) + { + return sp_auxlib::eigs_sym_newarp(eigval, eigvec, U.M, n_eigvals, sigma, opts); + } + #elif (defined(ARMA_USE_ARPACK) && defined(ARMA_USE_SUPERLU)) + { + constexpr form_type form_val = form_sigma; + + return sp_auxlib::eigs_sym_arpack<eT,true>(eigval, eigvec, U.M, n_eigvals, form_val, sigma, opts); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(n_eigvals); + arma_ignore(sigma); + arma_ignore(opts); + + arma_stop_logic_error("eigs_sym(): use of NEWARP or ARPACK as well as SuperLU must be enabled to use 'sigma'"); + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +sp_auxlib::eigs_sym_newarp(Col<eT>& eigval, Mat<eT>& eigvec, const SpMat<eT>& X, const uword n_eigvals, const form_type form_val, const eigs_opts& opts) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_NEWARP) + { + arma_debug_check( (form_val != form_lm) && (form_val != form_sm) && (form_val != form_la) && (form_val != form_sa), "eigs_sym(): unknown form specified" ); + + if(X.is_square() == false) { return false; } + + const newarp::SparseGenMatProd<eT> op(X); + + arma_debug_check( (n_eigvals >= op.n_rows), "eigs_sym(): n_eigvals must be less than the number of rows in the matrix" ); + + // If the matrix is empty, the case is trivial. + if( (op.n_cols == 0) || (n_eigvals == 0) ) // We already know n_cols == n_rows. + { + eigval.reset(); + eigvec.reset(); + return true; + } + + uword n = op.n_rows; + + // Use max(2*k+1, 20) as default subspace dimension for the sym case; MATLAB uses max(2*k, 20), but we need to be backward-compatible. + uword ncv_default = uword( ((2*n_eigvals+1)>(20)) ? (2*n_eigvals+1) : (20) ); + + // Use opts.subdim only if it's within the limits, otherwise cap it. + uword ncv = ncv_default; + + if(opts.subdim != 0) + { + if(opts.subdim < (n_eigvals + 1)) + { + arma_debug_warn_level(1, "eigs_sym(): opts.subdim must be greater than k; using k+1 instead of ", opts.subdim); + ncv = uword(n_eigvals + 1); + } + else + if(opts.subdim > n) + { + arma_debug_warn_level(1, "eigs_sym(): opts.subdim cannot be greater than n_rows; using n_rows instead of ", opts.subdim); + ncv = n; + } + else + { + ncv = uword(opts.subdim); + } + } + + // Re-check that we are within the limits + if(ncv < (n_eigvals + 1)) { ncv = (n_eigvals + 1); } + if(ncv > n ) { ncv = n; } + + eT tol = (std::max)(eT(opts.tol), std::numeric_limits<eT>::epsilon()); + + uword maxiter = uword(opts.maxiter); + + // eigval.set_size(n_eigvals); + // eigvec.set_size(n, n_eigvals); + + bool status = true; + + uword nconv = 0; + + try + { + if(form_val == form_lm) + { + newarp::SymEigsSolver< eT, newarp::EigsSelect::LARGEST_MAGN, newarp::SparseGenMatProd<eT> > eigs(op, n_eigvals, ncv); + eigs.init(); + nconv = eigs.compute(maxiter, tol); + eigval = eigs.eigenvalues(); + eigvec = eigs.eigenvectors(); + } + else + if(form_val == form_sm) + { + newarp::SymEigsSolver< eT, newarp::EigsSelect::SMALLEST_MAGN, newarp::SparseGenMatProd<eT> > eigs(op, n_eigvals, ncv); + eigs.init(); + nconv = eigs.compute(maxiter, tol); + eigval = eigs.eigenvalues(); + eigvec = eigs.eigenvectors(); + } + else + if(form_val == form_la) + { + newarp::SymEigsSolver< eT, newarp::EigsSelect::LARGEST_ALGE, newarp::SparseGenMatProd<eT> > eigs(op, n_eigvals, ncv); + eigs.init(); + nconv = eigs.compute(maxiter, tol); + eigval = eigs.eigenvalues(); + eigvec = eigs.eigenvectors(); + } + else + if(form_val == form_sa) + { + newarp::SymEigsSolver< eT, newarp::EigsSelect::SMALLEST_ALGE, newarp::SparseGenMatProd<eT> > eigs(op, n_eigvals, ncv); + eigs.init(); + nconv = eigs.compute(maxiter, tol); + eigval = eigs.eigenvalues(); + eigvec = eigs.eigenvectors(); + } + } + catch(const std::runtime_error&) + { + status = false; + } + + if(status == true) + { + if(nconv == 0) { status = false; } + } + + return status; + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(X); + arma_ignore(n_eigvals); + arma_ignore(form_val); + arma_ignore(opts); + + return false; + } + #endif + } + + + +template<typename eT> +inline +bool +sp_auxlib::eigs_sym_newarp(Col<eT>& eigval, Mat<eT>& eigvec, const SpMat<eT>& X, const uword n_eigvals, const eT sigma, const eigs_opts& opts) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_NEWARP) + { + if(X.is_square() == false) { return false; } + + const newarp::SparseGenRealShiftSolve<eT> op(X, sigma); + + if(op.valid == false) { return false; } + + arma_debug_check( (n_eigvals >= op.n_rows), "eigs_sym(): n_eigvals must be less than the number of rows in the matrix" ); + + // If the matrix is empty, the case is trivial. + if( (op.n_cols == 0) || (n_eigvals == 0) ) // We already know n_cols == n_rows. + { + eigval.reset(); + eigvec.reset(); + return true; + } + + uword n = op.n_rows; + + // Use max(2*k+1, 20) as default subspace dimension for the sym case; MATLAB uses max(2*k, 20), but we need to be backward-compatible. + uword ncv_default = uword( ((2*n_eigvals+1)>(20)) ? (2*n_eigvals+1) : (20) ); + + // Use opts.subdim only if it's within the limits, otherwise cap it. + uword ncv = ncv_default; + + if(opts.subdim != 0) + { + if(opts.subdim < (n_eigvals + 1)) + { + arma_debug_warn_level(1, "eigs_sym(): opts.subdim must be greater than k; using k+1 instead of ", opts.subdim); + ncv = uword(n_eigvals + 1); + } + else + if(opts.subdim > n) + { + arma_debug_warn_level(1, "eigs_sym(): opts.subdim cannot be greater than n_rows; using n_rows instead of ", opts.subdim); + ncv = n; + } + else + { + ncv = uword(opts.subdim); + } + } + + // Re-check that we are within the limits + if(ncv < (n_eigvals + 1)) { ncv = (n_eigvals + 1); } + if(ncv > n ) { ncv = n; } + + eT tol = (std::max)(eT(opts.tol), std::numeric_limits<eT>::epsilon()); + + uword maxiter = uword(opts.maxiter); + + // eigval.set_size(n_eigvals); + // eigvec.set_size(n, n_eigvals); + + bool status = true; + + uword nconv = 0; + + try + { + newarp::SymEigsShiftSolver< eT, newarp::EigsSelect::LARGEST_MAGN, newarp::SparseGenRealShiftSolve<eT> > eigs(op, n_eigvals, ncv, sigma); + eigs.init(); + nconv = eigs.compute(maxiter, tol); + eigval = eigs.eigenvalues(); + eigvec = eigs.eigenvectors(); + } + catch(const std::runtime_error&) + { + status = false; + } + + if(status == true) + { + if(nconv == 0) { status = false; } + } + + return status; + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(X); + arma_ignore(n_eigvals); + arma_ignore(sigma); + arma_ignore(opts); + + return false; + } + #endif + } + + + +template<typename eT, bool use_sigma> +inline +bool +sp_auxlib::eigs_sym_arpack(Col<eT>& eigval, Mat<eT>& eigvec, const SpMat<eT>& X, const uword n_eigvals, const form_type form_val, const eT sigma, const eigs_opts& opts) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_ARPACK) + { + arma_debug_check( (form_val != form_lm) && (form_val != form_sm) && (form_val != form_la) && (form_val != form_sa) && (form_val != form_sigma), "eigs_sym(): unknown form specified" ); + + if(X.is_square() == false) { return false; } + + char which_sm[3] = "SM"; + char which_lm[3] = "LM"; + char which_sa[3] = "SA"; + char which_la[3] = "LA"; + char* which; + + switch(form_val) + { + case form_sm: which = which_sm; break; + case form_lm: which = which_lm; break; + case form_sa: which = which_sa; break; + case form_la: which = which_la; break; + + default: which = which_lm; break; + } + + // Make sure we aren't asking for every eigenvalue. + // The _saupd() functions allow asking for one more eigenvalue than the _naupd() functions. + arma_debug_check( (n_eigvals >= X.n_rows), "eigs_sym(): n_eigvals must be less than the number of rows in the matrix" ); + + // If the matrix is empty, the case is trivial. + if( (X.n_cols == 0) || (n_eigvals == 0) ) // We already know n_cols == n_rows. + { + eigval.reset(); + eigvec.reset(); + return true; + } + + // Set up variables that get used for neupd(). + blas_int n, ncv, ncv_default, ldv, lworkl, info, maxiter; + + eT tol = eT(opts.tol); + maxiter = blas_int(opts.maxiter); + + podarray<eT> resid, v, workd, workl; + podarray<blas_int> iparam, ipntr; + podarray<eT> rwork; // Not used in this case. + + n = blas_int(X.n_rows); // The size of the matrix. + + // Use max(2*k+1, 20) as default subspace dimension for the sym case; MATLAB uses max(2*k, 20), but we need to be backward-compatible. + ncv_default = blas_int( ((2*n_eigvals+1)>(20)) ? (2*n_eigvals+1) : (20) ); + + // Use opts.subdim only if it's within the limits + ncv = ncv_default; + + if(opts.subdim != 0) + { + if(opts.subdim < (n_eigvals + 1)) + { + arma_debug_warn_level(1, "eigs_sym(): opts.subdim must be greater than k; using k+1 instead of ", opts.subdim); + ncv = blas_int(n_eigvals + 1); + } + else + if(blas_int(opts.subdim) > n) + { + arma_debug_warn_level(1, "eigs_sym(): opts.subdim cannot be greater than n_rows; using n_rows instead of ", opts.subdim); + ncv = n; + } + else + { + ncv = blas_int(opts.subdim); + } + } + + if(use_sigma) + //if(form_val == form_sigma) + { + run_aupd_shiftinvert(n_eigvals, sigma, X, true /* sym, not gen */, n, tol, maxiter, resid, ncv, v, ldv, iparam, ipntr, workd, workl, lworkl, rwork, info); + } + else + { + const SpMat<eT> Xst = X.st(); + + run_aupd_plain(n_eigvals, which, X, Xst, true /* sym, not gen */, n, tol, maxiter, resid, ncv, v, ldv, iparam, ipntr, workd, workl, lworkl, rwork, info); + } + + if(info != 0) { return false; } + + // The process has converged, and now we need to recover the actual eigenvectors using seupd() + blas_int rvec = 1; // .TRUE + blas_int nev = blas_int(n_eigvals); + + char howmny = 'A'; + char bmat = 'I'; // We are considering the standard eigenvalue problem. + + podarray<blas_int> select(ncv, arma_zeros_indicator()); // Logical array of dimension NCV. + blas_int ldz = n; + + // seupd() will output directly into the eigval and eigvec objects. + eigval.zeros( n_eigvals); + eigvec.zeros(n, n_eigvals); + + arpack::seupd(&rvec, &howmny, select.memptr(), eigval.memptr(), eigvec.memptr(), &ldz, (eT*) &sigma, &bmat, &n, which, &nev, &tol, resid.memptr(), &ncv, v.memptr(), &ldv, iparam.memptr(), ipntr.memptr(), workd.memptr(), workl.memptr(), &lworkl, &info); + + // Check for errors. + if(info != 0) { arma_debug_warn_level(1, "eigs_sym(): ARPACK error ", info, " in seupd()"); return false; } + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(X); + arma_ignore(n_eigvals); + arma_ignore(form_val); + arma_ignore(sigma); + arma_ignore(opts); + + return false; + } + #endif + } + + + +//! immediate eigendecomposition of non-symmetric real sparse object +template<typename T, typename T1> +inline +bool +sp_auxlib::eigs_gen(Col< std::complex<T> >& eigval, Mat< std::complex<T> >& eigvec, const SpBase<T, T1>& X, const uword n_eigvals, const form_type form_val, const eigs_opts& opts) + { + arma_extra_debug_sigprint(); + + const unwrap_spmat<T1> U(X.get_ref()); + + arma_debug_check( (U.M.is_square() == false), "eigs_gen(): given matrix must be square sized" ); + + if(arma_config::check_nonfinite && U.M.internal_has_nonfinite()) + { + arma_debug_warn_level(3, "eigs_gen(): detected non-finite elements"); + return false; + } + + // TODO: investigate optional redirection of "sm" to ARPACK as it's capable of shift-invert; + // TODO: in shift-invert mode, "sm" maps to "lm" of the shift-inverted matrix (with sigma = 0) + + #if defined(ARMA_USE_NEWARP) + { + return sp_auxlib::eigs_gen_newarp(eigval, eigvec, U.M, n_eigvals, form_val, opts); + } + #elif defined(ARMA_USE_ARPACK) + { + constexpr std::complex<T> sigma = T(0); + + return sp_auxlib::eigs_gen_arpack<T,false>(eigval, eigvec, U.M, n_eigvals, form_val, sigma, opts); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(n_eigvals); + arma_ignore(form_val); + arma_ignore(opts); + + arma_stop_logic_error("eigs_gen(): use of NEWARP or ARPACK must be enabled"); + return false; + } + #endif + } + + + +//! immediate eigendecomposition of non-symmetric real sparse object +template<typename T, typename T1> +inline +bool +sp_auxlib::eigs_gen(Col< std::complex<T> >& eigval, Mat< std::complex<T> >& eigvec, const SpBase<T, T1>& X, const uword n_eigvals, const std::complex<T> sigma, const eigs_opts& opts) + { + arma_extra_debug_sigprint(); + + const unwrap_spmat<T1> U(X.get_ref()); + + arma_debug_check( (U.M.is_square() == false), "eigs_gen(): given matrix must be square sized" ); + + if(arma_config::check_nonfinite && U.M.internal_has_nonfinite()) + { + arma_debug_warn_level(3, "eigs_gen(): detected non-finite elements"); + return false; + } + + #if (defined(ARMA_USE_ARPACK) && defined(ARMA_USE_SUPERLU)) + { + constexpr form_type form_val = form_sigma; + + return sp_auxlib::eigs_gen_arpack<T,true>(eigval, eigvec, U.M, n_eigvals, form_val, sigma, opts); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(n_eigvals); + arma_ignore(sigma); + arma_ignore(opts); + + arma_stop_logic_error("eigs_gen(): use of ARPACK and SuperLU must be enabled to use 'sigma'"); + return false; + } + #endif + } + + + +template<typename T> +inline +bool +sp_auxlib::eigs_gen_newarp(Col< std::complex<T> >& eigval, Mat< std::complex<T> >& eigvec, const SpMat<T>& X, const uword n_eigvals, const form_type form_val, const eigs_opts& opts) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_NEWARP) + { + arma_debug_check( (form_val != form_lm) && (form_val != form_sm) && (form_val != form_lr) && (form_val != form_sr) && (form_val != form_li) && (form_val != form_si), "eigs_gen(): unknown form specified" ); + + if(X.is_square() == false) { return false; } + + const newarp::SparseGenMatProd<T> op(X); + + arma_debug_check( (n_eigvals + 1 >= op.n_rows), "eigs_gen(): n_eigvals + 1 must be less than the number of rows in the matrix" ); + + // If the matrix is empty, the case is trivial. + if( (op.n_cols == 0) || (n_eigvals == 0) ) // We already know n_cols == n_rows. + { + eigval.reset(); + eigvec.reset(); + return true; + } + + uword n = op.n_rows; + + // Use max(2*k+1, 20) as default subspace dimension for the gen case; same as MATLAB. + uword ncv_default = uword( ((2*n_eigvals+1)>(20)) ? (2*n_eigvals+1) : (20) ); + + // Use opts.subdim only if it's within the limits + uword ncv = ncv_default; + + if(opts.subdim != 0) + { + if(opts.subdim < (n_eigvals + 3)) + { + arma_debug_warn_level(1, "eigs_gen(): opts.subdim must be greater than k+2; using k+3 instead of ", opts.subdim); + ncv = uword(n_eigvals + 3); + } + else + if(opts.subdim > n) + { + arma_debug_warn_level(1, "eigs_gen(): opts.subdim cannot be greater than n_rows; using n_rows instead of ", opts.subdim); + ncv = n; + } + else + { + ncv = uword(opts.subdim); + } + } + + // Re-check that we are within the limits + if(ncv < (n_eigvals + 3)) { ncv = (n_eigvals + 3); } + if(ncv > n ) { ncv = n; } + + T tol = (std::max)(T(opts.tol), std::numeric_limits<T>::epsilon()); + + uword maxiter = uword(opts.maxiter); + + // eigval.set_size(n_eigvals); + // eigvec.set_size(n, n_eigvals); + + bool status = true; + + uword nconv = 0; + + try + { + if(form_val == form_lm) + { + newarp::GenEigsSolver< T, newarp::EigsSelect::LARGEST_MAGN, newarp::SparseGenMatProd<T> > eigs(op, n_eigvals, ncv); + eigs.init(); + nconv = eigs.compute(maxiter, tol); + eigval = eigs.eigenvalues(); + eigvec = eigs.eigenvectors(); + } + else + if(form_val == form_sm) + { + newarp::GenEigsSolver< T, newarp::EigsSelect::SMALLEST_MAGN, newarp::SparseGenMatProd<T> > eigs(op, n_eigvals, ncv); + eigs.init(); + nconv = eigs.compute(maxiter, tol); + eigval = eigs.eigenvalues(); + eigvec = eigs.eigenvectors(); + } + else + if(form_val == form_lr) + { + newarp::GenEigsSolver< T, newarp::EigsSelect::LARGEST_REAL, newarp::SparseGenMatProd<T> > eigs(op, n_eigvals, ncv); + eigs.init(); + nconv = eigs.compute(maxiter, tol); + eigval = eigs.eigenvalues(); + eigvec = eigs.eigenvectors(); + } + else + if(form_val == form_sr) + { + newarp::GenEigsSolver< T, newarp::EigsSelect::SMALLEST_REAL, newarp::SparseGenMatProd<T> > eigs(op, n_eigvals, ncv); + eigs.init(); + nconv = eigs.compute(maxiter, tol); + eigval = eigs.eigenvalues(); + eigvec = eigs.eigenvectors(); + } + else + if(form_val == form_li) + { + newarp::GenEigsSolver< T, newarp::EigsSelect::LARGEST_IMAG, newarp::SparseGenMatProd<T> > eigs(op, n_eigvals, ncv); + eigs.init(); + nconv = eigs.compute(maxiter, tol); + eigval = eigs.eigenvalues(); + eigvec = eigs.eigenvectors(); + } + else + if(form_val == form_si) + { + newarp::GenEigsSolver< T, newarp::EigsSelect::SMALLEST_IMAG, newarp::SparseGenMatProd<T> > eigs(op, n_eigvals, ncv); + eigs.init(); + nconv = eigs.compute(maxiter, tol); + eigval = eigs.eigenvalues(); + eigvec = eigs.eigenvectors(); + } + } + catch(const std::runtime_error&) + { + status = false; + } + + if(status == true) + { + if(nconv == 0) { status = false; } + } + + return status; + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(X); + arma_ignore(n_eigvals); + arma_ignore(form_val); + arma_ignore(opts); + + return false; + } + #endif + } + + + + +template<typename T, bool use_sigma> +inline +bool +sp_auxlib::eigs_gen_arpack(Col< std::complex<T> >& eigval, Mat< std::complex<T> >& eigvec, const SpMat<T>& X, const uword n_eigvals, const form_type form_val, const std::complex<T> sigma, const eigs_opts& opts) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_ARPACK) + { + arma_debug_check( (form_val != form_lm) && (form_val != form_sm) && (form_val != form_lr) && (form_val != form_sr) && (form_val != form_li) && (form_val != form_si) && (form_val != form_sigma), "eigs_gen(): unknown form specified" ); + + if(X.is_square() == false) { return false; } + + char which_lm[3] = "LM"; + char which_sm[3] = "SM"; + char which_lr[3] = "LR"; + char which_sr[3] = "SR"; + char which_li[3] = "LI"; + char which_si[3] = "SI"; + + char* which; + + switch(form_val) + { + case form_lm: which = which_lm; break; + case form_sm: which = which_sm; break; + case form_lr: which = which_lr; break; + case form_sr: which = which_sr; break; + case form_li: which = which_li; break; + case form_si: which = which_si; break; + + default: which = which_lm; + } + + // Make sure we aren't asking for every eigenvalue. + arma_debug_check( (n_eigvals + 1 >= X.n_rows), "eigs_gen(): n_eigvals + 1 must be less than the number of rows in the matrix" ); + + // If the matrix is empty, the case is trivial. + if( (X.n_cols == 0) || (n_eigvals == 0) ) // We already know n_cols == n_rows. + { + eigval.reset(); + eigvec.reset(); + return true; + } + + // Set up variables that get used for neupd(). + blas_int n, ncv, ncv_default, ldv, lworkl, info, maxiter; + + T tol = T(opts.tol); + maxiter = blas_int(opts.maxiter); + + podarray<T> resid, v, workd, workl; + podarray<blas_int> iparam, ipntr; + podarray<T> rwork; // Not used in the real case. + + n = blas_int(X.n_rows); // The size of the matrix. + + // Use max(2*k+1, 20) as default subspace dimension for the gen case; same as MATLAB. + ncv_default = blas_int( ((2*n_eigvals+1)>(20)) ? (2*n_eigvals+1) : (20) ); + + // Use opts.subdim only if it's within the limits + ncv = ncv_default; + + if(opts.subdim != 0) + { + if(opts.subdim < (n_eigvals + 3)) + { + arma_debug_warn_level(1, "eigs_gen(): opts.subdim must be greater than k+2; using k+3 instead of ", opts.subdim); + ncv = blas_int(n_eigvals + 3); + } + else + if(blas_int(opts.subdim) > n) + { + arma_debug_warn_level(1, "eigs_gen(): opts.subdim cannot be greater than n_rows; using n_rows instead of ", opts.subdim); + ncv = n; + } + else + { + ncv = blas_int(opts.subdim); + } + } + + // WARNING!!! + // We are still not able to apply truly complex shifts to real matrices, + // in which case the OP that ARPACK wants is different (see [s/d]naupd). + // Also, if sigma contains a non-zero imaginary part, retrieving the eigenvalues + // becomes utterly messy (see [s/d]eupd, remark #3). + // We should never get to the point in which the imaginary part of sigma is non-zero; + // the user-facing functions currently convert X from real to complex if a complex sigma is detected. + // The check here is just for extra safety, and as a reminder of what's missing. + T sigmar = real(sigma); + T sigmai = imag(sigma); + + if(use_sigma) + //if(form_val == form_sigma) + { + if(sigmai != T(0)) { arma_stop_logic_error("eigs_gen(): complex 'sigma' not applicable to real matrix"); return false; } + + run_aupd_shiftinvert(n_eigvals, sigmar, X, false /* gen, not sym */, n, tol, maxiter, resid, ncv, v, ldv, iparam, ipntr, workd, workl, lworkl, rwork, info); + } + else + { + const SpMat<T> Xst = X.st(); + + run_aupd_plain(n_eigvals, which, X, Xst, false /* gen, not sym */, n, tol, maxiter, resid, ncv, v, ldv, iparam, ipntr, workd, workl, lworkl, rwork, info); + } + + if(info != 0) { return false; } + + // The process has converged, and now we need to recover the actual eigenvectors using neupd(). + blas_int rvec = 1; // .TRUE + blas_int nev = blas_int(n_eigvals); + + char howmny = 'A'; + char bmat = 'I'; // We are considering the standard eigenvalue problem. + + podarray<blas_int> select(ncv, arma_zeros_indicator()); // logical array of dimension NCV + podarray<T> dr(nev + 1, arma_zeros_indicator()); // real array of dimension NEV + 1 + podarray<T> di(nev + 1, arma_zeros_indicator()); // real array of dimension NEV + 1 + podarray<T> z(n * (nev + 1), arma_zeros_indicator()); // real N by NEV array if HOWMNY = 'A' + podarray<T> workev(3 * ncv, arma_zeros_indicator()); + + blas_int ldz = n; + + arpack::neupd(&rvec, &howmny, select.memptr(), dr.memptr(), di.memptr(), z.memptr(), &ldz, (T*) &sigmar, (T*) &sigmai, workev.memptr(), &bmat, &n, which, &nev, &tol, resid.memptr(), &ncv, v.memptr(), &ldv, iparam.memptr(), ipntr.memptr(), workd.memptr(), workl.memptr(), &lworkl, rwork.memptr(), &info); + + // Check for errors. + if(info != 0) { arma_debug_warn_level(1, "eigs_gen(): ARPACK error ", info, " in neupd()"); return false; } + + // Put it into the outputs. + eigval.set_size(n_eigvals); + eigvec.zeros(n, n_eigvals); + + for(uword i = 0; i < n_eigvals; ++i) + { + eigval[i] = std::complex<T>(dr[i], di[i]); + } + + // Now recover the eigenvectors. + for(uword i = 0; i < n_eigvals; ++i) + { + // ARPACK ?neupd lays things out kinda odd in memory; + // so does LAPACK ?geev -- see auxlib::eig_gen() + if((i < n_eigvals - 1) && (eigval[i] == std::conj(eigval[i + 1]))) + { + for(uword j = 0; j < uword(n); ++j) + { + eigvec.at(j, i) = std::complex<T>(z[n * i + j], z[n * (i + 1) + j]); + eigvec.at(j, i + 1) = std::complex<T>(z[n * i + j], -z[n * (i + 1) + j]); + } + ++i; // Skip the next one. + } + else + if((i == n_eigvals - 1) && (std::complex<T>(eigval[i]).imag() != 0.0)) + { + // We don't have the matched conjugate eigenvalue. + for(uword j = 0; j < uword(n); ++j) + { + eigvec.at(j, i) = std::complex<T>(z[n * i + j], z[n * (i + 1) + j]); + } + } + else + { + // The eigenvector is entirely real. + for(uword j = 0; j < uword(n); ++j) + { + eigvec.at(j, i) = std::complex<T>(z[n * i + j], T(0)); + } + } + } + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(X); + arma_ignore(n_eigvals); + arma_ignore(form_val); + arma_ignore(sigma); + arma_ignore(opts); + + return false; + } + #endif + } + + + +//! immediate eigendecomposition of non-symmetric complex sparse object +template<typename T, typename T1> +inline +bool +sp_auxlib::eigs_gen(Col< std::complex<T> >& eigval, Mat< std::complex<T> >& eigvec, const SpBase< std::complex<T>, T1>& X_expr, const uword n_eigvals, const form_type form_val, const eigs_opts& opts) + { + arma_extra_debug_sigprint(); + + const unwrap_spmat<T1> U(X_expr.get_ref()); + + arma_debug_check( (U.M.is_square() == false), "eigs_gen(): given matrix must be square sized" ); + + if(arma_config::check_nonfinite && U.M.internal_has_nonfinite()) + { + arma_debug_warn_level(3, "eigs_gen(): detected non-finite elements"); + return false; + } + + constexpr std::complex<T> sigma = T(0); + + return sp_auxlib::eigs_gen<T, false>(eigval, eigvec, U.M, n_eigvals, form_val, sigma, opts); + } + + + +//! immediate eigendecomposition of non-symmetric complex sparse object +template<typename T, typename T1> +inline +bool +sp_auxlib::eigs_gen(Col< std::complex<T> >& eigval, Mat< std::complex<T> >& eigvec, const SpBase< std::complex<T>, T1>& X, const uword n_eigvals, const std::complex<T> sigma, const eigs_opts& opts) + { + arma_extra_debug_sigprint(); + + const unwrap_spmat<T1> U(X.get_ref()); + + arma_debug_check( (U.M.is_square() == false), "eigs_gen(): given matrix must be square sized" ); + + if(arma_config::check_nonfinite && U.M.internal_has_nonfinite()) + { + arma_debug_warn_level(3, "eigs_gen(): detected non-finite elements"); + return false; + } + + #if (defined(ARMA_USE_ARPACK) && defined(ARMA_USE_SUPERLU)) + { + constexpr form_type form_val = form_sigma; + + return sp_auxlib::eigs_gen<T, true>(eigval, eigvec, U.M, n_eigvals, form_val, sigma, opts); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(n_eigvals); + arma_ignore(sigma); + arma_ignore(opts); + + arma_stop_logic_error("eigs_gen(): use of ARPACK and SuperLU must be enabled to use 'sigma'"); + return false; + } + #endif + } + + + +template<typename T, bool use_sigma> +inline +bool +sp_auxlib::eigs_gen(Col< std::complex<T> >& eigval, Mat< std::complex<T> >& eigvec, const SpMat< std::complex<T> >& X, const uword n_eigvals, const form_type form_val, const std::complex<T> sigma, const eigs_opts& opts) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_ARPACK) + { + // typedef typename std::complex<T> eT; + + arma_debug_check( (form_val != form_lm) && (form_val != form_sm) && (form_val != form_lr) && (form_val != form_sr) && (form_val != form_li) && (form_val != form_si) && (form_val != form_sigma), "eigs_gen(): unknown form specified" ); + + if(X.is_square() == false) { return false; } + + char which_lm[3] = "LM"; + char which_sm[3] = "SM"; + char which_lr[3] = "LR"; + char which_sr[3] = "SR"; + char which_li[3] = "LI"; + char which_si[3] = "SI"; + + char* which; + + switch(form_val) + { + case form_lm: which = which_lm; break; + case form_sm: which = which_sm; break; + case form_lr: which = which_lr; break; + case form_sr: which = which_sr; break; + case form_li: which = which_li; break; + case form_si: which = which_si; break; + + default: which = which_lm; + } + + // Make sure we aren't asking for every eigenvalue. + arma_debug_check( (n_eigvals + 1 >= X.n_rows), "eigs_gen(): n_eigvals + 1 must be less than the number of rows in the matrix" ); + + // If the matrix is empty, the case is trivial. + if( (X.n_cols == 0) || (n_eigvals == 0) ) // We already know n_cols == n_rows. + { + eigval.reset(); + eigvec.reset(); + return true; + } + + // Set up variables that get used for neupd(). + blas_int n, ncv, ncv_default, ldv, lworkl, info, maxiter; + + T tol = T(opts.tol); + maxiter = blas_int(opts.maxiter); + + podarray< std::complex<T> > resid, v, workd, workl; + podarray<blas_int> iparam, ipntr; + podarray<T> rwork; + + n = blas_int(X.n_rows); // The size of the matrix. + + // Use max(2*k+1, 20) as default subspace dimension for the gen case; same as MATLAB. + ncv_default = blas_int( ((2*n_eigvals+1)>(20)) ? (2*n_eigvals+1) : (20) ); + + // Use opts.subdim only if it's within the limits + ncv = ncv_default; + + if(opts.subdim != 0) + { + if(opts.subdim < (n_eigvals + 3)) + { + arma_debug_warn_level(1, "eigs_gen(): opts.subdim must be greater than k+2; using k+3 instead of ", opts.subdim); + ncv = blas_int(n_eigvals + 3); + } + else + if(blas_int(opts.subdim) > n) + { + arma_debug_warn_level(1, "eigs_gen(): opts.subdim cannot be greater than n_rows; using n_rows instead of ", opts.subdim); + ncv = n; + } + else + { + ncv = blas_int(opts.subdim); + } + } + + if(use_sigma) + //if(form_val == form_sigma) + { + run_aupd_shiftinvert(n_eigvals, sigma, X, false /* gen, not sym */, n, tol, maxiter, resid, ncv, v, ldv, iparam, ipntr, workd, workl, lworkl, rwork, info); + } + else + { + const SpMat< std::complex<T> > Xst = X.st(); + + run_aupd_plain(n_eigvals, which, X, Xst, false /* gen, not sym */, n, tol, maxiter, resid, ncv, v, ldv, iparam, ipntr, workd, workl, lworkl, rwork, info); + } + + if(info != 0) { return false; } + + // The process has converged, and now we need to recover the actual eigenvectors using neupd(). + blas_int rvec = 1; // .TRUE + blas_int nev = blas_int(n_eigvals); + + char howmny = 'A'; + char bmat = 'I'; // We are considering the standard eigenvalue problem. + + podarray<blas_int> select(ncv, arma_zeros_indicator()); // logical array of dimension NCV + podarray<std::complex<T>> d(nev + 1, arma_zeros_indicator()); // complex array of dimension NEV + 1 + podarray<std::complex<T>> z(n * nev, arma_zeros_indicator()); // complex N by NEV array if HOWMNY = 'A' + podarray<std::complex<T>> workev(2 * ncv, arma_zeros_indicator()); + + blas_int ldz = n; + + // Prepare the outputs; neupd() will write directly to them. + eigval.zeros(n_eigvals); + eigvec.zeros(n, n_eigvals); + + arpack::neupd(&rvec, &howmny, select.memptr(), eigval.memptr(), +(std::complex<T>*) NULL, eigvec.memptr(), &ldz, (std::complex<T>*) &sigma, (std::complex<T>*) NULL, workev.memptr(), &bmat, &n, which, &nev, &tol, resid.memptr(), &ncv, v.memptr(), &ldv, iparam.memptr(), ipntr.memptr(), workd.memptr(), workl.memptr(), &lworkl, rwork.memptr(), &info); + + // Check for errors. + if(info != 0) { arma_debug_warn_level(1, "eigs_gen(): ARPACK error ", info, " in neupd()"); return false; } + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(X); + arma_ignore(n_eigvals); + arma_ignore(form_val); + arma_ignore(sigma); + arma_ignore(opts); + + arma_stop_logic_error("eigs_gen(): use of ARPACK must be enabled for decomposition of complex matrices"); + return false; + } + #endif + } + + + +template<typename T1, typename T2> +inline +bool +sp_auxlib::spsolve_simple(Mat<typename T1::elem_type>& X, const SpBase<typename T1::elem_type, T1>& A_expr, const Base<typename T1::elem_type, T2>& B_expr, const superlu_opts& user_opts) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_SUPERLU) + { + typedef typename T1::elem_type eT; + + superlu::superlu_options_t options; + sp_auxlib::set_superlu_opts(options, user_opts); + + const unwrap_spmat<T1> tmp1(A_expr.get_ref()); + const SpMat<eT>& A = tmp1.M; + + X = B_expr.get_ref(); // superlu::gssv() uses X as input (the B matrix) and as output (the solution) + + if(A.is_square() == false) + { + X.soft_reset(); + arma_stop_logic_error("spsolve(): solving under-determined / over-determined systems is currently not supported"); + return false; + } + + arma_debug_check( (A.n_rows != X.n_rows), "spsolve(): number of rows in the given objects must be the same", [&](){ X.soft_reset(); } ); + + if(A.is_empty() || X.is_empty()) + { + X.zeros(A.n_cols, X.n_cols); + return true; + } + + if(A.n_nonzero == uword(0)) { X.soft_reset(); return false; } + + if(arma_config::check_nonfinite && (A.internal_has_nonfinite() || X.internal_has_nonfinite())) + { + arma_debug_warn_level(3, "spsolve(): detected non-finite elements"); + return false; + } + + if(arma_config::debug) + { + bool overflow = false; + + overflow = (A.n_nonzero > INT_MAX); + overflow = (A.n_rows > INT_MAX) || overflow; + overflow = (A.n_cols > INT_MAX) || overflow; + overflow = (X.n_rows > INT_MAX) || overflow; + overflow = (X.n_cols > INT_MAX) || overflow; + + if(overflow) + { + arma_stop_runtime_error("spsolve(): integer overflow: matrix dimensions are too large for integer type used by SuperLU"); + return false; + } + } + + superlu_supermatrix_wrangler x; + superlu_supermatrix_wrangler a; + + const bool status_x = wrap_to_supermatrix(x.get_ref(), X); + const bool status_a = copy_to_supermatrix(a.get_ref(), A); + + if( (status_x == false) || (status_a == false) ) { X.soft_reset(); return false; } + + superlu_supermatrix_wrangler l; + superlu_supermatrix_wrangler u; + + // paranoia: use SuperLU's memory allocation, in case it reallocs + + superlu_array_wrangler<int> perm_c(A.n_cols+1); // extra paranoia: increase array length by 1 + superlu_array_wrangler<int> perm_r(A.n_rows+1); + + superlu_stat_wrangler stat; + + int info = 0; // Return code. + + arma_extra_debug_print("superlu::gssv()"); + superlu::gssv<eT>(&options, a.get_ptr(), perm_c.get_ptr(), perm_r.get_ptr(), l.get_ptr(), u.get_ptr(), x.get_ptr(), stat.get_ptr(), &info); + + + // Process the return code. + if( (info > 0) && (info <= int(A.n_cols)) ) + { + // std::ostringstream tmp; + // tmp << "spsolve(): could not solve system; LU factorisation completed, but detected zero in U(" << (info-1) << ',' << (info-1) << ')'; + // arma_debug_warn_level(1, tmp.str()); + } + else + if(info > int(A.n_cols)) + { + arma_debug_warn_level(1, "spsolve(): memory allocation failure"); + } + else + if(info < 0) + { + arma_debug_warn_level(1, "spsolve(): unknown SuperLU error code from gssv(): ", info); + } + + // No need to extract the data from x, since it's using the same memory as X + + return (info == 0); + } + #else + { + arma_ignore(X); + arma_ignore(A_expr); + arma_ignore(B_expr); + arma_ignore(user_opts); + arma_stop_logic_error("spsolve(): use of SuperLU must be enabled"); + return false; + } + #endif + } + + + +template<typename T1, typename T2> +inline +bool +sp_auxlib::spsolve_refine(Mat<typename T1::elem_type>& X, typename T1::pod_type& out_rcond, const SpBase<typename T1::elem_type, T1>& A_expr, const Base<typename T1::elem_type, T2>& B_expr, const superlu_opts& user_opts) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_SUPERLU) + { + typedef typename T1::pod_type T; + typedef typename T1::elem_type eT; + + superlu::superlu_options_t options; + sp_auxlib::set_superlu_opts(options, user_opts); + + const unwrap_spmat<T1> tmp1(A_expr.get_ref()); + const SpMat<eT>& A = tmp1.M; + + const unwrap<T2> tmp2(B_expr.get_ref()); + const Mat<eT>& B_unwrap = tmp2.M; + + const bool B_is_modified = ( (user_opts.equilibrate) || (&B_unwrap == &X) ); + + Mat<eT> B_copy; if(B_is_modified) { B_copy = B_unwrap; } + + const Mat<eT>& B = (B_is_modified) ? B_copy : B_unwrap; + + if(A.is_square() == false) + { + X.soft_reset(); + arma_stop_logic_error("spsolve(): solving under-determined / over-determined systems is currently not supported"); + return false; + } + + arma_debug_check( (A.n_rows != B.n_rows), "spsolve(): number of rows in the given objects must be the same", [&](){ X.soft_reset(); } ); + + X.zeros(A.n_cols, B.n_cols); // set the elements to zero, as we don't trust the SuperLU spaghetti code + + if(A.is_empty() || B.is_empty()) { return true; } + + if(A.n_nonzero == uword(0)) { X.soft_reset(); return false; } + + if(arma_config::check_nonfinite && (A.internal_has_nonfinite() || B.internal_has_nonfinite())) + { + arma_debug_warn_level(3, "spsolve(): detected non-finite elements"); + return false; + } + + if(arma_config::debug) + { + bool overflow; + + overflow = (A.n_nonzero > INT_MAX); + overflow = (A.n_rows > INT_MAX) || overflow; + overflow = (A.n_cols > INT_MAX) || overflow; + overflow = (B.n_rows > INT_MAX) || overflow; + overflow = (B.n_cols > INT_MAX) || overflow; + overflow = (X.n_rows > INT_MAX) || overflow; + overflow = (X.n_cols > INT_MAX) || overflow; + + if(overflow) + { + arma_stop_runtime_error("spsolve(): integer overflow: matrix dimensions are too large for integer type used by SuperLU"); + return false; + } + } + + superlu_supermatrix_wrangler x; + superlu_supermatrix_wrangler a; + superlu_supermatrix_wrangler b; + + const bool status_x = wrap_to_supermatrix(x.get_ref(), X); + const bool status_a = copy_to_supermatrix(a.get_ref(), A); // NOTE: superlu::gssvx() modifies 'a' if equilibration is enabled + const bool status_b = wrap_to_supermatrix(b.get_ref(), B); // NOTE: superlu::gssvx() modifies 'b' if equilibration is enabled + + if( (status_x == false) || (status_a == false) || (status_b == false) ) { X.soft_reset(); return false; } + + superlu_supermatrix_wrangler l; + superlu_supermatrix_wrangler u; + + // paranoia: use SuperLU's memory allocation, in case it reallocs + + superlu_array_wrangler<int> perm_c(A.n_cols+1); // extra paranoia: increase array length by 1 + superlu_array_wrangler<int> perm_r(A.n_rows+1); + superlu_array_wrangler<int> etree(A.n_cols+1); + + superlu_array_wrangler<T> R(A.n_rows+1); + superlu_array_wrangler<T> C(A.n_cols+1); + superlu_array_wrangler<T> ferr(B.n_cols+1); + superlu_array_wrangler<T> berr(B.n_cols+1); + + superlu::GlobalLU_t glu; + arrayops::fill_zeros(reinterpret_cast<char*>(&glu), sizeof(superlu::GlobalLU_t)); + + superlu::mem_usage_t mu; + arrayops::fill_zeros(reinterpret_cast<char*>(&mu), sizeof(superlu::mem_usage_t)); + + superlu_stat_wrangler stat; + + char equed[8] = {}; // extra characters for paranoia + T rpg = T(0); + T rcond = T(0); + int info = int(0); // Return code. + + char work[8] = {}; + int lwork = int(0); // 0 means superlu will allocate memory + + arma_extra_debug_print("superlu::gssvx()"); + superlu::gssvx<eT>(&options, a.get_ptr(), perm_c.get_ptr(), perm_r.get_ptr(), etree.get_ptr(), equed, R.get_ptr(), C.get_ptr(), l.get_ptr(), u.get_ptr(), &work[0], lwork, b.get_ptr(), x.get_ptr(), &rpg, &rcond, ferr.get_ptr(), berr.get_ptr(), &glu, &mu, stat.get_ptr(), &info); + + bool status = false; + + // Process the return code. + if(info == 0) + { + status = true; + } + if( (info > 0) && (info <= int(A.n_cols)) ) + { + // std::ostringstream tmp; + // tmp << "spsolve(): could not solve system; LU factorisation completed, but detected zero in U(" << (info-1) << ',' << (info-1) << ')'; + // arma_debug_warn_level(1, tmp.str()); + } + else + if( (info == int(A.n_cols+1)) && (user_opts.allow_ugly) ) + { + arma_debug_warn_level(2, "spsolve(): system is singular to working precision (rcond: ", rcond, ")"); + status = true; + } + else + if(info > int(A.n_cols+1)) + { + arma_debug_warn_level(1, "spsolve(): memory allocation failure"); + } + else + if(info < 0) + { + arma_debug_warn_level(1, "spsolve(): unknown SuperLU error code from gssvx(): ", info); + } + + // No need to extract the data from x, since it's using the same memory as X + + out_rcond = rcond; + + return status; + } + #else + { + arma_ignore(X); + arma_ignore(out_rcond); + arma_ignore(A_expr); + arma_ignore(B_expr); + arma_ignore(user_opts); + arma_stop_logic_error("spsolve(): use of SuperLU must be enabled"); + return false; + } + #endif + } + + + +#if defined(ARMA_USE_SUPERLU) + + template<typename eT> + inline + typename get_pod_type<eT>::result + sp_auxlib::norm1(superlu::SuperMatrix* A) + { + arma_extra_debug_sigprint(); + + char norm_id = '1'; + + arma_extra_debug_print("superlu::langs()"); + return superlu::langs<eT>(&norm_id, A); + } + + + + template<typename eT> + inline + typename get_pod_type<eT>::result + sp_auxlib::lu_rcond(superlu::SuperMatrix* L, superlu::SuperMatrix* U, typename get_pod_type<eT>::result norm_val) + { + arma_extra_debug_sigprint(); + + typedef typename get_pod_type<eT>::result T; + + char norm_id = '1'; + T rcond_out = T(0); + int info = int(0); + + superlu_stat_wrangler stat; + + arma_extra_debug_print("superlu::gscon()"); + superlu::gscon<eT>(&norm_id, L, U, norm_val, &rcond_out, stat.get_ptr(), &info); + + return (info == 0) ? T(rcond_out) : T(0); + } + + + + inline + void + sp_auxlib::set_superlu_opts(superlu::superlu_options_t& options, const superlu_opts& user_opts) + { + arma_extra_debug_sigprint(); + + // default options as the starting point + superlu::set_default_opts(&options); + + // our settings + options.Trans = superlu::NOTRANS; + options.ConditionNumber = superlu::YES; + + // process user_opts + + if(user_opts.equilibrate == true) { options.Equil = superlu::YES; } + if(user_opts.equilibrate == false) { options.Equil = superlu::NO; } + + if(user_opts.symmetric == true) { options.SymmetricMode = superlu::YES; } + if(user_opts.symmetric == false) { options.SymmetricMode = superlu::NO; } + + options.DiagPivotThresh = user_opts.pivot_thresh; + + if(user_opts.permutation == superlu_opts::NATURAL) { options.ColPerm = superlu::NATURAL; } + if(user_opts.permutation == superlu_opts::MMD_ATA) { options.ColPerm = superlu::MMD_ATA; } + if(user_opts.permutation == superlu_opts::MMD_AT_PLUS_A) { options.ColPerm = superlu::MMD_AT_PLUS_A; } + if(user_opts.permutation == superlu_opts::COLAMD) { options.ColPerm = superlu::COLAMD; } + + if(user_opts.refine == superlu_opts::REF_NONE) { options.IterRefine = superlu::NOREFINE; } + if(user_opts.refine == superlu_opts::REF_SINGLE) { options.IterRefine = superlu::SLU_SINGLE; } + if(user_opts.refine == superlu_opts::REF_DOUBLE) { options.IterRefine = superlu::SLU_DOUBLE; } + if(user_opts.refine == superlu_opts::REF_EXTRA) { options.IterRefine = superlu::SLU_EXTRA; } + } + + + + template<typename eT> + inline + bool + sp_auxlib::copy_to_supermatrix(superlu::SuperMatrix& out, const SpMat<eT>& A) + { + arma_extra_debug_sigprint(); + + // We store in column-major CSC. + out.Stype = superlu::SLU_NC; + + if( is_float<eT>::value) { out.Dtype = superlu::SLU_S; } + else if( is_double<eT>::value) { out.Dtype = superlu::SLU_D; } + else if( is_cx_float<eT>::value) { out.Dtype = superlu::SLU_C; } + else if(is_cx_double<eT>::value) { out.Dtype = superlu::SLU_Z; } + + out.Mtype = superlu::SLU_GE; // Just a general matrix. We don't know more now. + + // We have to actually create the object which stores the data. + // This gets cleaned by destroy_supermatrix(). + // We have to use SuperLU's problematic memory allocation routines since they are + // not guaranteed to be new and delete. See the comments in def_superlu.hpp + superlu::NCformat* nc = (superlu::NCformat*)superlu::malloc(sizeof(superlu::NCformat)); + + if(nc == nullptr) { return false; } + + A.sync(); + + nc->nnz = A.n_nonzero; + nc->nzval = (void*) superlu::malloc(sizeof(eT) * A.n_nonzero ); + nc->colptr = (superlu::int_t*)superlu::malloc(sizeof(superlu::int_t) * (A.n_cols + 1)); + nc->rowind = (superlu::int_t*)superlu::malloc(sizeof(superlu::int_t) * A.n_nonzero ); + + if( (nc->nzval == nullptr) || (nc->colptr == nullptr) || (nc->rowind == nullptr) ) { return false; } + + // Fill the matrix. + arrayops::copy((eT*) nc->nzval, A.values, A.n_nonzero); + + // // These have to be copied by hand, because the types may differ. + // for(uword i = 0; i <= A.n_cols; ++i) { nc->colptr[i] = (int_t) A.col_ptrs[i]; } + // for(uword i = 0; i < A.n_nonzero; ++i) { nc->rowind[i] = (int_t) A.row_indices[i]; } + + arrayops::convert(nc->colptr, A.col_ptrs, A.n_cols+1 ); + arrayops::convert(nc->rowind, A.row_indices, A.n_nonzero); + + out.nrow = superlu::int_t(A.n_rows); + out.ncol = superlu::int_t(A.n_cols); + out.Store = (void*) nc; + + return true; + } + + + + // memory efficient implementation of out = A - shift*I, where A is a square matrix + template<typename eT> + inline + bool + sp_auxlib::copy_to_supermatrix_with_shift(superlu::SuperMatrix& out, const SpMat<eT>& A, const eT shift) + { + arma_extra_debug_sigprint(); + + arma_debug_check( (A.is_square() == false), "sp_auxlib::copy_to_supermatrix_with_shift(): given matrix must be square sized" ); + + if(shift == eT(0)) + { + arma_extra_debug_print("sp_auxlib::copy_to_supermatrix_with_shift(): shift is zero; redirecting to sp_auxlib::copy_to_supermatrix()"); + return sp_auxlib::copy_to_supermatrix(out, A); + } + + // We store in column-major CSC. + out.Stype = superlu::SLU_NC; + + if( is_float<eT>::value) { out.Dtype = superlu::SLU_S; } + else if( is_double<eT>::value) { out.Dtype = superlu::SLU_D; } + else if( is_cx_float<eT>::value) { out.Dtype = superlu::SLU_C; } + else if(is_cx_double<eT>::value) { out.Dtype = superlu::SLU_Z; } + + out.Mtype = superlu::SLU_GE; // Just a general matrix. We don't know more now. + + // We have to actually create the object which stores the data. + // This gets cleaned by destroy_supermatrix(). + superlu::NCformat* nc = (superlu::NCformat*)superlu::malloc(sizeof(superlu::NCformat)); + + if(nc == nullptr) { return false; } + + A.sync(); + + uword n_nonzero_diag_old = 0; + uword n_nonzero_diag_new = 0; + + const uword n_search_cols = (std::min)(A.n_rows, A.n_cols); + + for(uword j=0; j < n_search_cols; ++j) + { + const uword col_offset = A.col_ptrs[j ]; + const uword next_col_offset = A.col_ptrs[j + 1]; + + const uword* start_ptr = &(A.row_indices[ col_offset]); + const uword* end_ptr = &(A.row_indices[next_col_offset]); + + const uword wanted_row = j; + + const uword* pos_ptr = std::lower_bound(start_ptr, end_ptr, wanted_row); // binary search + + if( (pos_ptr != end_ptr) && ((*pos_ptr) == wanted_row) ) + { + // element on the main diagonal is non-zero + ++n_nonzero_diag_old; + + const uword offset = uword(pos_ptr - start_ptr); + const uword index = offset + col_offset; + + const eT new_val = A.values[index] - shift; + + if(new_val != eT(0)) { ++n_nonzero_diag_new; } + } + else + { + // element on the main diagonal is zero, but sigma is non-zero, + // so the number of new non-zero elments on the diagonal is increased + ++n_nonzero_diag_new; + } + } + + const uword out_n_nonzero = A.n_nonzero - n_nonzero_diag_old + n_nonzero_diag_new; + + arma_extra_debug_print( arma_str::format("A.n_nonzero: %u") % A.n_nonzero ); + arma_extra_debug_print( arma_str::format("n_nonzero_diag_old: %u") % n_nonzero_diag_old ); + arma_extra_debug_print( arma_str::format("n_nonzero_diag_new: %u") % n_nonzero_diag_new ); + arma_extra_debug_print( arma_str::format("out_n_nonzero: %u") % out_n_nonzero ); + + nc->nnz = out_n_nonzero; + nc->nzval = (void*) superlu::malloc(sizeof(eT) * out_n_nonzero ); + nc->colptr = (superlu::int_t*)superlu::malloc(sizeof(superlu::int_t) * (A.n_cols + 1)); + nc->rowind = (superlu::int_t*)superlu::malloc(sizeof(superlu::int_t) * out_n_nonzero ); + + if( (nc->nzval == nullptr) || (nc->colptr == nullptr) || (nc->rowind == nullptr) ) { return false; } + + // fill the matrix column by column, and insert diagonal elements when necessary + + nc->colptr[0] = 0; + + eT* values_current = (eT*) nc->nzval; + superlu::int_t* rowind_current = nc->rowind; + + uword count = 0; + + for(uword j=0; j < A.n_cols; ++j) + { + const uword idx_start = A.col_ptrs[j ]; + const uword idx_end = A.col_ptrs[j + 1]; + + const eT* values_start = values_current; + + uword i = idx_start; + + // elements in the upper triangular part, excluding the main diagonal + for(; (i < idx_end) && (A.row_indices[i] < j); ++i) + { + (*values_current) = A.values[i]; + (*rowind_current) = superlu::int_t(A.row_indices[i]); + + ++values_current; + ++rowind_current; + + ++count; + } + + // elements on the main diagonal + if( (i < idx_end) && (A.row_indices[i] == j) ) + { + // A(j,j) is non-zero + + const eT new_diag_val = A.values[i] - shift; + + if(new_diag_val != eT(0)) + { + (*values_current) = new_diag_val; + (*rowind_current) = superlu::int_t(j); + + ++values_current; + ++rowind_current; + + ++count; + } + + ++i; + } + else + { + // A(j,j) is zero, so insert a new element + + if(j < n_search_cols) + { + (*values_current) = -shift; + (*rowind_current) = superlu::int_t(j); + + ++values_current; + ++rowind_current; + + ++count; + } + } + + // elements in the lower triangular part, excluding the main diagonal + for(; i < idx_end; ++i) + { + (*values_current) = A.values[i]; + (*rowind_current) = superlu::int_t(A.row_indices[i]); + + ++values_current; + ++rowind_current; + + ++count; + } + + // number of non-zero elements in the j-th column of out + const uword nnz_col = values_current - values_start; + nc->colptr[j + 1] = superlu::int_t(nc->colptr[j] + nnz_col); + } + + arma_extra_debug_print( arma_str::format("count: %u") % count ); + + arma_check( (count != out_n_nonzero), "internal error: sp_auxlib::copy_to_supermatrix_with_shift(): count != out_n_nonzero" ); + + out.nrow = superlu::int_t(A.n_rows); + out.ncol = superlu::int_t(A.n_cols); + out.Store = (void*) nc; + + return true; + } + + + +// // for debugging only +// template<typename eT> +// inline +// void +// sp_auxlib::copy_to_spmat(SpMat<eT>& out, const superlu::SuperMatrix& A) +// { +// arma_extra_debug_sigprint(); +// +// bool type_matched = false; +// +// if( is_float<eT>::value) { type_matched = (A.Dtype == superlu::SLU_S); } +// else if( is_double<eT>::value) { type_matched = (A.Dtype == superlu::SLU_D); } +// else if( is_cx_float<eT>::value) { type_matched = (A.Dtype == superlu::SLU_C); } +// else if(is_cx_double<eT>::value) { type_matched = (A.Dtype == superlu::SLU_Z); } +// +// arma_debug_check( (type_matched == false), "copy_to_spmat(): type mismatch" ); +// arma_debug_check( (A.Mtype != superlu::SLU_GE), "copy_to_spmat(): unknown layout" ); +// +// // NOTE: the l and u instances of SuperMatrix resulting from superlu::gstrf() +// // NOTE: do not have the superlu::SLU_GE layout +// +// const superlu::NCformat* nc = (const superlu::NCformat*)(A.Store); +// +// if(nc == nullptr) { out.reset(); return; } +// +// if( (nc->nzval == nullptr) || (nc->colptr == nullptr) || (nc->rowind == nullptr) ) { out.reset(); return; } +// +// const uword A_n_rows = uword(A.nrow ); +// const uword A_n_cols = uword(A.ncol ); +// const uword A_n_nonzero = uword(nc->nnz); +// +// if(A_n_nonzero == 0) { out.zeros(A_n_rows, A_n_cols); return; } +// +// out.reserve(A_n_rows, A_n_cols, A_n_nonzero); +// +// arrayops::copy(access::rwp(out.values), (const eT*)(nc->nzval), A_n_nonzero); +// +// arrayops::convert(access::rwp(out.col_ptrs), nc->colptr, A_n_cols+1 ); +// arrayops::convert(access::rwp(out.row_indices), nc->rowind, A_n_nonzero); +// +// out.remove_zeros(); // in case SuperLU has bugs and stores zeros in sparse matrices +// } + + + + template<typename eT> + inline + bool + sp_auxlib::wrap_to_supermatrix(superlu::SuperMatrix& out, const Mat<eT>& A) + { + arma_extra_debug_sigprint(); + + // NOTE: this function re-uses memory from matrix A + + // This is being stored as a dense matrix. + out.Stype = superlu::SLU_DN; + + if( is_float<eT>::value) { out.Dtype = superlu::SLU_S; } + else if( is_double<eT>::value) { out.Dtype = superlu::SLU_D; } + else if( is_cx_float<eT>::value) { out.Dtype = superlu::SLU_C; } + else if(is_cx_double<eT>::value) { out.Dtype = superlu::SLU_Z; } + + out.Mtype = superlu::SLU_GE; + + // We have to create the object that stores the data. + superlu::DNformat* dn = (superlu::DNformat*)superlu::malloc(sizeof(superlu::DNformat)); + + if(dn == nullptr) { return false; } + + dn->lda = A.n_rows; + dn->nzval = (void*) A.memptr(); // re-use memory instead of copying + + out.nrow = A.n_rows; + out.ncol = A.n_cols; + out.Store = (void*) dn; + + return true; + } + + + + inline + void + sp_auxlib::destroy_supermatrix(superlu::SuperMatrix& out) + { + arma_extra_debug_sigprint(); + + // Clean up. + if(out.Stype == superlu::SLU_NC) + { + superlu::destroy_compcol_mat(&out); + } + else + if(out.Stype == superlu::SLU_NCP) + { + superlu::destroy_compcolperm_mat(&out); + } + else + if(out.Stype == superlu::SLU_DN) + { + // superlu::destroy_dense_mat(&out); + + // since dn->nzval is set to re-use memory from a Mat object (which manages its own memory), + // we cannot simply call superlu::destroy_dense_mat(). + // Only the out.Store structure can be freed. + + superlu::DNformat* dn = (superlu::DNformat*) out.Store; + + if(dn != nullptr) { superlu::free(dn); } + } + else + if(out.Stype == superlu::SLU_SC) + { + superlu::destroy_supernode_mat(&out); + } + else + { + // Uh, crap. + + std::ostringstream tmp; + + tmp << "sp_auxlib::destroy_supermatrix(): unhandled Stype" << std::endl; + tmp << "Stype val: " << out.Stype << std::endl; + tmp << "Stype name: "; + + if(out.Stype == superlu::SLU_NC) { tmp << "SLU_NC"; } + if(out.Stype == superlu::SLU_NCP) { tmp << "SLU_NCP"; } + if(out.Stype == superlu::SLU_NR) { tmp << "SLU_NR"; } + if(out.Stype == superlu::SLU_SC) { tmp << "SLU_SC"; } + if(out.Stype == superlu::SLU_SCP) { tmp << "SLU_SCP"; } + if(out.Stype == superlu::SLU_SR) { tmp << "SLU_SR"; } + if(out.Stype == superlu::SLU_DN) { tmp << "SLU_DN"; } + if(out.Stype == superlu::SLU_NR_loc) { tmp << "SLU_NR_loc"; } + + arma_debug_warn_level(1, tmp.str()); + arma_stop_runtime_error("internal error: sp_auxlib::destroy_supermatrix()"); + } + } + +#endif + + + +template<typename eT, typename T> +inline +void +sp_auxlib::run_aupd_plain + ( + const uword n_eigvals, char* which, + const SpMat<T>& X, const SpMat<T>& Xst, const bool sym, + blas_int& n, eT& tol, blas_int& maxiter, + podarray<T>& resid, blas_int& ncv, podarray<T>& v, blas_int& ldv, + podarray<blas_int>& iparam, podarray<blas_int>& ipntr, + podarray<T>& workd, podarray<T>& workl, blas_int& lworkl, podarray<eT>& rwork, + blas_int& info + ) + { + #if defined(ARMA_USE_ARPACK) + { + // ARPACK provides a "reverse communication interface" which is an + // entertainingly archaic FORTRAN software engineering technique that + // basically means that we call saupd()/naupd() and it tells us with some + // return code what we need to do next (usually a matrix-vector product) and + // then call it again. So this results in some type of iterative process + // where we call saupd()/naupd() many times. + + blas_int ido = 0; // This must be 0 for the first call. + char bmat = 'I'; // We are considering the standard eigenvalue problem. + n = X.n_rows; // The size of the matrix (should already be set outside). + blas_int nev = n_eigvals; + + // resid.zeros(n); + eigs_randu_filler<T> randu_filler; + randu_filler.fill(resid, n); // use deterministic starting point + + // Two contraints on NCV: (NCV > NEV) for sym problems or + // (NCV > NEV + 2) for gen problems and (NCV <= N) + // + // We're calling either arpack::saupd() or arpack::naupd(), + // which have slighly different minimum constraint and recommended value for NCV: + // http://www.caam.rice.edu/software/ARPACK/UG/node136.html + // http://www.caam.rice.edu/software/ARPACK/UG/node138.html + + if(ncv < (nev + (sym ? 1 : 3))) { ncv = (nev + (sym ? 1 : 3)); } + if(ncv > n ) { ncv = n; } + + v.zeros(n * ncv); // Array N by NCV (output). + rwork.zeros(ncv); // Work array of size NCV for complex calls. + ldv = n; // "Leading dimension of V exactly as declared in the calling program." + + // IPARAM: integer array of length 11. + iparam.zeros(11); + iparam(0) = 1; // Exact shifts (not provided by us). + iparam(2) = maxiter; // Maximum iterations; all the examples use 300, but they were written in the ancient times. + iparam(6) = 1; // Mode 1: A * x = lambda * x. + + // IPNTR: integer array of length 14 (output). + ipntr.zeros(14); + + // Real work array used in the basic Arnoldi iteration for reverse communication. + workd.zeros(3 * n); + + // lworkl must be at least 3 * NCV^2 + 6 * NCV. + lworkl = 3 * (ncv * ncv) + 6 * ncv; + + // Real work array of length lworkl. + workl.zeros(lworkl); + + // info = 0; // resid to be filled with random values by ARPACK (non-deterministic) + info = 1; // resid is already filled with random values (deterministic) + + // All the parameters have been set or created. Time to loop a lot. + while(ido != 99) + { + // Call saupd() or naupd() with the current parameters. + if(sym) + { + arma_extra_debug_print("arpack::saupd()"); + arpack::saupd(&ido, &bmat, &n, which, &nev, &tol, resid.memptr(), &ncv, v.memptr(), &ldv, iparam.memptr(), ipntr.memptr(), workd.memptr(), workl.memptr(), &lworkl, &info); + } + else + { + arma_extra_debug_print("arpack::naupd()"); + arpack::naupd(&ido, &bmat, &n, which, &nev, &tol, resid.memptr(), &ncv, v.memptr(), &ldv, iparam.memptr(), ipntr.memptr(), workd.memptr(), workl.memptr(), &lworkl, rwork.memptr(), &info); + } + + // What do we do now? + switch (ido) + { + case -1: + // fallthrough + case 1: + { + // We need to calculate the matrix-vector multiplication y = OP * x + // where x is of length n and starts at workd(ipntr(0)), and y is of + // length n and starts at workd(ipntr(1)). + + // // OLD METHOD + // + // // operator*(sp_mat, vec) doesn't properly put the result into the + // // right place so we'll just reimplement it here for now... + // + // // Set the output to point at the right memory. We have to subtract + // // one from FORTRAN pointers... + // Col<T> out(workd.memptr() + ipntr(1) - 1, n, false /* don't copy */); + // // Set the input to point at the right memory. + // Col<T> in(workd.memptr() + ipntr(0) - 1, n, false /* don't copy */); + // + // out.zeros(); + // + // T* out_mem = out.memptr(); + // const T* in_mem = in.memptr(); + // + // typename SpMat<T>::const_iterator X_it = X.begin(); + // + // const uword X_nnz = X.n_nonzero; + // + // for(uword count=0; count < X_nnz; ++count, ++X_it) + // { + // const eT X_it_val = (*X_it); + // const uword X_it_row = X_it.row(); + // const uword X_it_col = X_it.col(); + // + // out_mem[X_it_row] += X_it_val * in_mem[X_it_col]; + // } + // + // // No need to modify memory further since it was all done in-place. + + + // NEW METHOD + // + // both operator*(rowvec, sp_mat) and operator*(sp_mat, colvec) can now write to an existing object + + Row<T> out(workd.memptr() + ipntr(1) - 1, n, false, true); + Row<T> in(workd.memptr() + ipntr(0) - 1, n, false, true); + + out = in * Xst; + + break; + } + case 99: + // Nothing to do here, things have converged. + break; + default: + { + return; // Parent frame can look at the value of info. + } + } + } + + // The process has ended; check the return code. + if( (info != 0) && (info != 1) ) + { + // Print warnings if there was a failure. + + if(sym) + { + arma_debug_warn_level(1, "eigs_sym(): ARPACK error ", info, " in saupd()"); + } + else + { + arma_debug_warn_level(1, "eigs_gen(): ARPACK error ", info, " in naupd()"); + } + + return; // Parent frame can look at the value of info. + } + } + #else + { + arma_ignore(n_eigvals); + arma_ignore(which); + arma_ignore(X); + arma_ignore(sym); + arma_ignore(n); + arma_ignore(tol); + arma_ignore(maxiter); + arma_ignore(resid); + arma_ignore(ncv); + arma_ignore(v); + arma_ignore(ldv); + arma_ignore(iparam); + arma_ignore(ipntr); + arma_ignore(workd); + arma_ignore(workl); + arma_ignore(lworkl); + arma_ignore(rwork); + arma_ignore(info); + } + #endif + } + + + +// Here 'sigma' is 'T', but should be 'eT'. +// Applying complex shifts to real matrices is currently not directly implemented +template<typename eT, typename T> +inline +void +sp_auxlib::run_aupd_shiftinvert + ( + const uword n_eigvals, const T sigma, + const SpMat<T>& X, const bool sym, + blas_int& n, eT& tol, blas_int& maxiter, + podarray<T>& resid, blas_int& ncv, podarray<T>& v, blas_int& ldv, + podarray<blas_int>& iparam, podarray<blas_int>& ipntr, + podarray<T>& workd, podarray<T>& workl, blas_int& lworkl, podarray<eT>& rwork, + blas_int& info + ) + { + // TODO: inconsistent use of type names: T can be complex while eT can be real + + #if (defined(ARMA_USE_ARPACK) && defined(ARMA_USE_SUPERLU)) + { + char which_lm[3] = "LM"; + + char* which = which_lm; // NOTE: which_lm is the assumed operation when using shift-invert + + blas_int ido = 0; // This must be 0 for the first call. + char bmat = 'I'; // We are considering the standard eigenvalue problem. + n = X.n_rows; // The size of the matrix (should already be set outside). + blas_int nev = n_eigvals; + + // resid.zeros(n); + eigs_randu_filler<T> randu_filler; + randu_filler.fill(resid, n); // use deterministic starting point + + // Two contraints on NCV: (NCV > NEV) for sym problems or + // (NCV > NEV + 2) for gen problems and (NCV <= N) + // + // We're calling either arpack::saupd() or arpack::naupd(), + // which have slighly different minimum constraint and recommended value for NCV: + // http://www.caam.rice.edu/software/ARPACK/UG/node136.html + // http://www.caam.rice.edu/software/ARPACK/UG/node138.html + + if(ncv < (nev + (sym ? 1 : 3))) { ncv = (nev + (sym ? 1 : 3)); } + if(ncv > n ) { ncv = n; } + + v.zeros(n * ncv); // Array N by NCV (output). + rwork.zeros(ncv); // Work array of size NCV for complex calls. + ldv = n; // "Leading dimension of V exactly as declared in the calling program." + + // IPARAM: integer array of length 11. + iparam.zeros(11); + iparam(0) = 1; // Exact shifts (not provided by us). + iparam(2) = maxiter; // Maximum iterations; all the examples use 300, but they were written in the ancient times. + // iparam(6) = 1; // Mode 1: A * x = lambda * x. + + // Change IPARAM for shift-invert + iparam(6) = 3; // Mode 3: A * x = lambda * M * x, M symmetric semi-definite. OP = inv[A - sigma*M]*M (A complex) or Real_Part{ inv[A - sigma*M]*M } (A real) and B = M. + + // IPNTR: integer array of length 14 (output). + ipntr.zeros(14); + + // Real work array used in the basic Arnoldi iteration for reverse communication. + workd.zeros(3 * n); + + // lworkl must be at least 3 * NCV^2 + 6 * NCV. + lworkl = 3 * (ncv * ncv) + 6 * ncv; + + // Real work array of length lworkl. + workl.zeros(lworkl); + + // info = 0; // resid to be filled with random values by ARPACK (non-deterministic) + info = 1; // resid is already filled with random values (deterministic) + + superlu_opts superlu_opts_default; + superlu::superlu_options_t options; + sp_auxlib::set_superlu_opts(options, superlu_opts_default); + int lwork = 0; + superlu::trans_t trans = superlu::NOTRANS; + + superlu::GlobalLU_t Glu; /* Not needed on return. */ + arrayops::fill_zeros(reinterpret_cast<char*>(&Glu), sizeof(superlu::GlobalLU_t)); + + superlu_supermatrix_wrangler x; + superlu_supermatrix_wrangler xC; + + const bool status_x = sp_auxlib::copy_to_supermatrix_with_shift(x.get_ref(), X, sigma); + + if(status_x == false) + { + arma_stop_runtime_error("run_aupd_shiftinvert(): could not construct SuperLU matrix"); + info = blas_int(-1); + return; + } + + // // for debugging only + // if(true) + // { + // cout << "*** testing output of copy_to_supermatrix_with_shift()" << endl; + // cout << "*** sigma: " << sigma << endl; + // + // SpMat<T> Y(X); + // Y.diag() -= sigma; + // + // SpMat<T> Z; + // + // sp_auxlib::copy_to_spmat(Z, x.get_ref()); + // + // cout << "*** size(Y): " << arma::size(Y) << endl; + // cout << "*** size(Z): " << arma::size(Z) << endl; + // cout << "*** accu(abs(Y)): " << accu(abs(Y)) << endl; + // cout << "*** accu(abs(Z)): " << accu(abs(Z)) << endl; + // + // if(arma::size(Y) == arma::size(Z)) + // { + // cout << "*** error: " << accu(abs(Y-Z)) << endl; + // } + // } + + superlu_supermatrix_wrangler l; + superlu_supermatrix_wrangler u; + + superlu_array_wrangler<int> perm_c(X.n_cols+1); // paranoia: increase array length by 1 + superlu_array_wrangler<int> perm_r(X.n_rows+1); + superlu_array_wrangler<int> etree(X.n_cols+1); + + superlu_stat_wrangler stat; + + int panel_size = superlu::sp_ispec_environ(1); + int relax = superlu::sp_ispec_environ(2); + int slu_info = 0; // Return code. + + arma_extra_debug_print("superlu::gstrf()"); + superlu::get_permutation_c(options.ColPerm, x.get_ptr(), perm_c.get_ptr()); + superlu::sp_preorder_mat(&options, x.get_ptr(), perm_c.get_ptr(), etree.get_ptr(), xC.get_ptr()); + superlu::gstrf<T>(&options, xC.get_ptr(), relax, panel_size, etree.get_ptr(), NULL, lwork, perm_c.get_ptr(), perm_r.get_ptr(), l.get_ptr(), u.get_ptr(), &Glu, stat.get_ptr(), &slu_info); + + if(slu_info != 0) + { + arma_debug_warn_level(2, "matrix is singular to working precision"); + info = blas_int(-1); + return; + } + + // NOTE: potential problem with inconsistent/mismatched use of eT and T types + eT x_norm_val = sp_auxlib::norm1<T>(x.get_ptr()); + eT x_rcond = sp_auxlib::lu_rcond<T>(l.get_ptr(), u.get_ptr(), x_norm_val); + + if( (x_rcond < std::numeric_limits<eT>::epsilon()) || arma_isnan(x_rcond) ) + { + arma_debug_warn_level(2, "matrix is singular to working precision (rcond: ", x_rcond, ")"); + info = blas_int(-1); + return; + } + + // All the parameters have been set or created. Time to loop a lot. + while(ido != 99) + { + // Call saupd() or naupd() with the current parameters. + if(sym) + { + arma_extra_debug_print("arpack::saupd()"); + arpack::saupd(&ido, &bmat, &n, which, &nev, &tol, resid.memptr(), &ncv, v.memptr(), &ldv, iparam.memptr(), ipntr.memptr(), workd.memptr(), workl.memptr(), &lworkl, &info); + } + else + { + arma_extra_debug_print("arpack::naupd()"); + arpack::naupd(&ido, &bmat, &n, which, &nev, &tol, resid.memptr(), &ncv, v.memptr(), &ldv, iparam.memptr(), ipntr.memptr(), workd.memptr(), workl.memptr(), &lworkl, rwork.memptr(), &info); + } + + // What do we do now? + switch (ido) + { + case -1: + // fallthrough + case 1: + { + // We need to calculate the matrix-vector multiplication y = OP * x + // where x is of length n and starts at workd(ipntr(0)), and y is of + // length n and starts at workd(ipntr(1)). + + // Set the output to point at the right memory. We have to subtract + // one from FORTRAN pointers... + Col<T> out(workd.memptr() + ipntr(1) - 1, n, false /* don't copy */); + // Set the input to point at the right memory. + Col<T> in(workd.memptr() + ipntr(0) - 1, n, false /* don't copy */); + + // Consider getting the LU factorization from ZGSTRF, and then + // solve the system L*U*out = in (possibly with permutation matrix?) + // Instead of "spsolve(out,X,in)" we call gstrf above and gstrs below + + out = in; + superlu_supermatrix_wrangler out_slu; + + const bool status_out_slu = sp_auxlib::wrap_to_supermatrix(out_slu.get_ref(), out); + + if(status_out_slu == false) { arma_stop_runtime_error("run_aupd_shiftinvert(): could not construct SuperLU matrix"); return; } + + arma_extra_debug_print("superlu::gstrs()"); + superlu::gstrs<T>(trans, l.get_ptr(), u.get_ptr(), perm_c.get_ptr(), perm_r.get_ptr(), out_slu.get_ptr(), stat.get_ptr(), &info); + + // No need to modify memory further since it was all done in-place. + + break; + } + case 99: + // Nothing to do here, things have converged. + break; + default: + { + return; // Parent frame can look at the value of info. + } + } + } + + // The process has ended; check the return code. + if( (info != 0) && (info != 1) ) + { + // Print warnings if there was a failure. + + if(sym) + { + arma_debug_warn_level(2, "eigs_sym(): ARPACK error ", info, " in saupd()"); + } + else + { + arma_debug_warn_level(2, "eigs_gen(): ARPACK error ", info, " in naupd()"); + } + + return; // Parent frame can look at the value of info. + } + } + #else + { + arma_ignore(n_eigvals); + arma_ignore(sigma); + arma_ignore(X); + arma_ignore(sym); + arma_ignore(n); + arma_ignore(tol); + arma_ignore(maxiter); + arma_ignore(resid); + arma_ignore(ncv); + arma_ignore(v); + arma_ignore(ldv); + arma_ignore(iparam); + arma_ignore(ipntr); + arma_ignore(workd); + arma_ignore(workl); + arma_ignore(lworkl); + arma_ignore(rwork); + arma_ignore(info); + } + #endif + } + + + +template<typename eT> +inline +bool +sp_auxlib::rudimentary_sym_check(const SpMat<eT>& X) + { + arma_extra_debug_sigprint(); + + if(X.n_rows != X.n_cols) { return false; } + + const eT tol = eT(10000) * std::numeric_limits<eT>::epsilon(); // allow some leeway + + typename SpMat<eT>::const_iterator it = X.begin(); + typename SpMat<eT>::const_iterator it_end = X.end(); + + const uword n_check_limit = (std::max)( uword(2), uword(X.n_nonzero/100) ); + + uword n_check = 1; + + while( (it != it_end) && (n_check <= n_check_limit) ) + { + const uword it_row = it.row(); + const uword it_col = it.col(); + + if(it_row != it_col) + { + const eT A = (*it); + const eT B = X.at( it_col, it_row ); // deliberately swapped + + const eT C = (std::max)(std::abs(A), std::abs(B)); + + const eT delta = std::abs(A - B); + + if(( (delta <= tol) || (delta <= (C * tol)) ) == false) { return false; } + + ++n_check; + } + + ++it; + } + + return true; + } + + + +template<typename T> +inline +bool +sp_auxlib::rudimentary_sym_check(const SpMat< std::complex<T> >& X) + { + arma_extra_debug_sigprint(); + + // NOTE: the function name is a misnomer, as it checks for hermitian complex matrices; + // NOTE: for simplicity of use, the function name is the same as for real matrices + + typedef typename std::complex<T> eT; + + if(X.n_rows != X.n_cols) { return false; } + + const T tol = T(10000) * std::numeric_limits<T>::epsilon(); // allow some leeway + + typename SpMat<eT>::const_iterator it = X.begin(); + typename SpMat<eT>::const_iterator it_end = X.end(); + + const uword n_check_limit = (std::max)( uword(2), uword(X.n_nonzero/100) ); + + uword n_check = 1; + + while( (it != it_end) && (n_check <= n_check_limit) ) + { + const uword it_row = it.row(); + const uword it_col = it.col(); + + if(it_row != it_col) + { + const eT A = (*it); + const eT B = X.at( it_col, it_row ); // deliberately swapped + + const T C_real = (std::max)(std::abs(A.real()), std::abs(B.real())); + const T C_imag = (std::max)(std::abs(A.imag()), std::abs(B.imag())); + + const T delta_real = std::abs(A.real() - B.real()); + const T delta_imag = std::abs(A.imag() + B.imag()); // take into account the conjugate + + const bool okay_real = ( (delta_real <= tol) || (delta_real <= (C_real * tol)) ); + const bool okay_imag = ( (delta_imag <= tol) || (delta_imag <= (C_imag * tol)) ); + + if( (okay_real == false) || (okay_imag == false) ) { return false; } + + ++n_check; + } + else + { + const eT A = (*it); + + if(std::abs(A.imag()) > tol) { return false; } + } + + ++it; + } + + return true; + } + + + +// + + + +template<typename eT> +inline +eigs_randu_filler<eT>::eigs_randu_filler() + { + arma_extra_debug_sigprint(); + + typedef typename std::mt19937_64::result_type local_seed_type; + + local_engine.seed(local_seed_type(123)); + + typedef typename std::uniform_real_distribution<eT>::param_type local_param_type; + + local_u_distr.param(local_param_type(-1.0, +1.0)); + } + + +template<typename eT> +inline +void +eigs_randu_filler<eT>::fill(podarray<eT>& X, const uword N) + { + arma_extra_debug_sigprint(); + + X.set_size(N); + + eT* X_mem = X.memptr(); + + for(uword i=0; i<N; ++i) { X_mem[i] = eT( local_u_distr(local_engine) ); } + } + + +template<typename T> +inline +eigs_randu_filler< std::complex<T> >::eigs_randu_filler() + { + arma_extra_debug_sigprint(); + + typedef typename std::mt19937_64::result_type local_seed_type; + + local_engine.seed(local_seed_type(123)); + + typedef typename std::uniform_real_distribution<T>::param_type local_param_type; + + local_u_distr.param(local_param_type(-1.0, +1.0)); + } + + +template<typename T> +inline +void +eigs_randu_filler< std::complex<T> >::fill(podarray< std::complex<T> >& X, const uword N) + { + arma_extra_debug_sigprint(); + + typedef typename std::complex<T> eT; + + X.set_size(N); + + eT* X_mem = X.memptr(); + + for(uword i=0; i<N; ++i) + { + eT& X_mem_i = X_mem[i]; + + X_mem_i.real( T(local_u_distr(local_engine)) ); + X_mem_i.imag( T(local_u_distr(local_engine)) ); + } + } + + + +// + + + +#if defined(ARMA_USE_SUPERLU) + +inline +superlu_supermatrix_wrangler::~superlu_supermatrix_wrangler() + { + arma_extra_debug_sigprint_this(this); + + if(used == false) { return; } + + char* m_char = reinterpret_cast<char*>(&m); + bool all_zero = true; + + for(size_t i=0; i < sizeof(superlu::SuperMatrix); ++i) + { + if(m_char[i] != char(0)) { all_zero = false; break; } + } + + if(all_zero == false) { sp_auxlib::destroy_supermatrix(m); } + } + +inline +superlu_supermatrix_wrangler::superlu_supermatrix_wrangler() + { + arma_extra_debug_sigprint_this(this); + + arrayops::fill_zeros(reinterpret_cast<char*>(&m), sizeof(superlu::SuperMatrix)); + } + +inline +superlu::SuperMatrix& +superlu_supermatrix_wrangler::get_ref() + { + used = true; + + return m; + } + +inline +superlu::SuperMatrix* +superlu_supermatrix_wrangler::get_ptr() + { + used = true; + + return &m; + } + + +// + + +inline +superlu_stat_wrangler::~superlu_stat_wrangler() + { + arma_extra_debug_sigprint_this(this); + + superlu::free_stat(&stat); + } + +inline +superlu_stat_wrangler::superlu_stat_wrangler() + { + arma_extra_debug_sigprint_this(this); + + arrayops::fill_zeros(reinterpret_cast<char*>(&stat), sizeof(superlu::SuperLUStat_t)); + + superlu::init_stat(&stat); + } + +inline +superlu::SuperLUStat_t* +superlu_stat_wrangler::get_ptr() + { + return &stat; + } + + +// + + +template<typename eT> +inline +superlu_array_wrangler<eT>::~superlu_array_wrangler() + { + arma_extra_debug_sigprint_this(this); + + (*this).reset(); + } + +template<typename eT> +inline +superlu_array_wrangler<eT>::superlu_array_wrangler() + : mem(nullptr) + { + arma_extra_debug_sigprint_this(this); + } + +template<typename eT> +inline +superlu_array_wrangler<eT>::superlu_array_wrangler(const uword n_elem) + : mem(nullptr) + { + arma_extra_debug_sigprint_this(this); + + (*this).set_size(n_elem); + } + +template<typename eT> +inline +void +superlu_array_wrangler<eT>::set_size(const uword n_elem) + { + arma_extra_debug_sigprint(); + + if(mem != nullptr) { (*this).reset(); } + + mem = (eT*)(superlu::malloc(n_elem * sizeof(eT))); + + arma_check_bad_alloc( (mem == nullptr), "superlu::malloc(): out of memory" ); + + arrayops::fill_zeros(mem, n_elem); + } + +template<typename eT> +inline +void +superlu_array_wrangler<eT>::reset() + { + arma_extra_debug_sigprint(); + + if(mem != nullptr) + { + superlu::free(mem); + mem = nullptr; + } + } + +template<typename eT> +inline +eT* +superlu_array_wrangler<eT>::get_ptr() + { + return mem; + } + + +// + + +template<typename eT> +inline +superlu_worker<eT>::~superlu_worker() + { + arma_extra_debug_sigprint_this(this); + + if(l != nullptr) { delete l; l = nullptr; } + if(u != nullptr) { delete u; u = nullptr; } + } + + +template<typename eT> +inline +superlu_worker<eT>::superlu_worker() + { + arma_extra_debug_sigprint_this(this); + } + + +template<typename eT> +inline +bool +superlu_worker<eT>::factorise(typename get_pod_type<eT>::result& out_rcond, const SpMat<eT>& A, const superlu_opts& user_opts) + { + arma_extra_debug_sigprint(); + + typedef typename get_pod_type<eT>::result T; + + factorisation_valid = false; + + if(l != nullptr) { delete l; l = nullptr; } + if(u != nullptr) { delete u; u = nullptr; } + + l = new(std::nothrow) superlu_supermatrix_wrangler; + u = new(std::nothrow) superlu_supermatrix_wrangler; + + if( (l == nullptr) || (u == nullptr) ) + { + arma_debug_warn_level(3, "superlu_worker()::factorise(): could not construct SuperLU matrix"); + return false; + } + + superlu_supermatrix_wrangler& l_ref = (*l); + superlu_supermatrix_wrangler& u_ref = (*u); + + superlu::superlu_options_t options; + sp_auxlib::set_superlu_opts(options, user_opts); + + superlu_supermatrix_wrangler AA; + superlu_supermatrix_wrangler AAc; + + const bool status_AA = sp_auxlib::copy_to_supermatrix(AA.get_ref(), A); + + if(status_AA == false) + { + arma_debug_warn_level(3, "superlu_worker()::factorise(): could not construct SuperLU matrix"); + return false; + } + + (*this).perm_c.set_size(A.n_cols+1); // paranoia: increase array length by 1 + (*this).perm_r.set_size(A.n_rows+1); + + superlu_array_wrangler<int> etree(A.n_cols+1); + + superlu::GlobalLU_t Glu; + arrayops::fill_zeros(reinterpret_cast<char*>(&Glu), sizeof(superlu::GlobalLU_t)); + + int panel_size = superlu::sp_ispec_environ(1); + int relax = superlu::sp_ispec_environ(2); + int lwork = 0; + int info = 0; + + arma_extra_debug_print("superlu::superlu::get_permutation_c()"); + superlu::get_permutation_c(options.ColPerm, AA.get_ptr(), perm_c.get_ptr()); + + arma_extra_debug_print("superlu::superlu::sp_preorder_mat()"); + superlu::sp_preorder_mat(&options, AA.get_ptr(), perm_c.get_ptr(), etree.get_ptr(), AAc.get_ptr()); + + arma_extra_debug_print("superlu::gstrf()"); + superlu::gstrf<eT>(&options, AAc.get_ptr(), relax, panel_size, etree.get_ptr(), NULL, lwork, perm_c.get_ptr(), perm_r.get_ptr(), l_ref.get_ptr(), u_ref.get_ptr(), &Glu, stat.get_ptr(), &info); + + if(info != 0) + { + arma_debug_warn_level(3, "superlu_worker()::factorise(): LU factorisation failed"); + return false; + } + + const T AA_norm = sp_auxlib::norm1<T>(AA.get_ptr()); + const T AA_rcond = sp_auxlib::lu_rcond<eT>(l_ref.get_ptr(), u_ref.get_ptr(), AA_norm); + + out_rcond = AA_rcond; + + if(arma_isnan(AA_rcond)) { return false; } + // if(AA_rcond == T(0)) { return false; } + + factorisation_valid = true; + + return true; + } + + +template<typename eT> +inline +bool +superlu_worker<eT>::solve(Mat<eT>& X, const Mat<eT>& B) + { + arma_extra_debug_sigprint(); + + if(factorisation_valid == false) { return false; } + if( (l == nullptr) || (u == nullptr) ) { return false; } + + superlu_supermatrix_wrangler& l_ref = (*l); + superlu_supermatrix_wrangler& u_ref = (*u); + + X = B; + + superlu_supermatrix_wrangler XX; + + const bool status_XX = sp_auxlib::wrap_to_supermatrix(XX.get_ref(), X); + + if(status_XX == false) + { + arma_debug_warn_level(3, "superlu_worker()::solve(): could not construct SuperLU matrix"); + return false; + } + + superlu::trans_t trans = superlu::NOTRANS; + int info = 0; + + arma_extra_debug_print("superlu::gstrs()"); + superlu::gstrs<eT>(trans, l_ref.get_ptr(), u_ref.get_ptr(), perm_c.get_ptr(), perm_r.get_ptr(), XX.get_ptr(), stat.get_ptr(), &info); + + return (info == 0); + } + + +#endif + + +//! @} |