// SPDX-License-Identifier: Apache-2.0 // // Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au) // Copyright 2008-2016 National ICT Australia (NICTA) // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ------------------------------------------------------------------------ //! \addtogroup op_sum //! @{ template inline void op_sum::apply(Mat& out, const Op& in) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; const uword dim = in.aux_uword_a; arma_debug_check( (dim > 1), "sum(): parameter 'dim' must be 0 or 1" ); const Proxy P(in.m); if(P.is_alias(out) == false) { op_sum::apply_noalias(out, P, dim); } else { Mat tmp; op_sum::apply_noalias(tmp, P, dim); out.steal_mem(tmp); } } template inline void op_sum::apply_noalias(Mat& out, const Proxy& P, const uword dim) { arma_extra_debug_sigprint(); if(is_Mat::stored_type>::value || (arma_config::openmp && Proxy::use_mp)) { op_sum::apply_noalias_unwrap(out, P, dim); } else { op_sum::apply_noalias_proxy(out, P, dim); } } template inline void op_sum::apply_noalias_unwrap(Mat& out, const Proxy& P, const uword dim) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; typedef typename Proxy::stored_type P_stored_type; const unwrap tmp(P.Q); const typename unwrap::stored_type& X = tmp.M; const uword X_n_rows = X.n_rows; const uword X_n_cols = X.n_cols; const uword out_n_rows = (dim == 0) ? uword(1) : X_n_rows; const uword out_n_cols = (dim == 0) ? X_n_cols : uword(1); out.set_size(out_n_rows, out_n_cols); if(X.n_elem == 0) { out.zeros(); return; } const eT* X_colptr = X.memptr(); eT* out_mem = out.memptr(); if(dim == 0) { for(uword col=0; col < X_n_cols; ++col) { out_mem[col] = arrayops::accumulate( X_colptr, X_n_rows ); X_colptr += X_n_rows; } } else { arrayops::copy(out_mem, X_colptr, X_n_rows); X_colptr += X_n_rows; for(uword col=1; col < X_n_cols; ++col) { arrayops::inplace_plus( out_mem, X_colptr, X_n_rows ); X_colptr += X_n_rows; } } } template inline void op_sum::apply_noalias_proxy(Mat& out, const Proxy& P, const uword dim) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; const uword P_n_rows = P.get_n_rows(); const uword P_n_cols = P.get_n_cols(); const uword out_n_rows = (dim == 0) ? uword(1) : P_n_rows; const uword out_n_cols = (dim == 0) ? P_n_cols : uword(1); out.set_size(out_n_rows, out_n_cols); if(P.get_n_elem() == 0) { out.zeros(); return; } eT* out_mem = out.memptr(); if(Proxy::use_at == false) { if(dim == 0) { uword count = 0; for(uword col=0; col < P_n_cols; ++col) { eT val1 = eT(0); eT val2 = eT(0); uword j; for(j=1; j < P_n_rows; j+=2) { val1 += P[count]; ++count; val2 += P[count]; ++count; } if((j-1) < P_n_rows) { val1 += P[count]; ++count; } out_mem[col] = (val1 + val2); } } else { uword count = 0; for(uword row=0; row < P_n_rows; ++row) { out_mem[row] = P[count]; ++count; } for(uword col=1; col < P_n_cols; ++col) for(uword row=0; row < P_n_rows; ++row) { out_mem[row] += P[count]; ++count; } } } else { if(dim == 0) { for(uword col=0; col < P_n_cols; ++col) { eT val1 = eT(0); eT val2 = eT(0); uword i,j; for(i=0, j=1; j < P_n_rows; i+=2, j+=2) { val1 += P.at(i,col); val2 += P.at(j,col); } if(i < P_n_rows) { val1 += P.at(i,col); } out_mem[col] = (val1 + val2); } } else { for(uword row=0; row < P_n_rows; ++row) { out_mem[row] = P.at(row,0); } for(uword col=1; col < P_n_cols; ++col) for(uword row=0; row < P_n_rows; ++row) { out_mem[row] += P.at(row,col); } } } } // // cubes template inline void op_sum::apply(Cube& out, const OpCube& in) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; const uword dim = in.aux_uword_a; arma_debug_check( (dim > 2), "sum(): parameter 'dim' must be 0 or 1 or 2" ); const ProxyCube P(in.m); if(P.is_alias(out) == false) { op_sum::apply_noalias(out, P, dim); } else { Cube tmp; op_sum::apply_noalias(tmp, P, dim); out.steal_mem(tmp); } } template inline void op_sum::apply_noalias(Cube& out, const ProxyCube& P, const uword dim) { arma_extra_debug_sigprint(); if(is_Cube::stored_type>::value || (arma_config::openmp && ProxyCube::use_mp)) { op_sum::apply_noalias_unwrap(out, P, dim); } else { op_sum::apply_noalias_proxy(out, P, dim); } } template inline void op_sum::apply_noalias_unwrap(Cube& out, const ProxyCube& P, const uword dim) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; typedef typename ProxyCube::stored_type P_stored_type; const unwrap_cube tmp(P.Q); const Cube& X = tmp.M; const uword X_n_rows = X.n_rows; const uword X_n_cols = X.n_cols; const uword X_n_slices = X.n_slices; if(dim == 0) { out.set_size(1, X_n_cols, X_n_slices); for(uword slice=0; slice < X_n_slices; ++slice) { eT* out_mem = out.slice_memptr(slice); for(uword col=0; col < X_n_cols; ++col) { out_mem[col] = arrayops::accumulate( X.slice_colptr(slice,col), X_n_rows ); } } } else if(dim == 1) { out.zeros(X_n_rows, 1, X_n_slices); for(uword slice=0; slice < X_n_slices; ++slice) { eT* out_mem = out.slice_memptr(slice); for(uword col=0; col < X_n_cols; ++col) { arrayops::inplace_plus( out_mem, X.slice_colptr(slice,col), X_n_rows ); } } } else if(dim == 2) { out.zeros(X_n_rows, X_n_cols, 1); eT* out_mem = out.memptr(); for(uword slice=0; slice < X_n_slices; ++slice) { arrayops::inplace_plus(out_mem, X.slice_memptr(slice), X.n_elem_slice ); } } } template inline void op_sum::apply_noalias_proxy(Cube& out, const ProxyCube& P, const uword dim) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; const uword P_n_rows = P.get_n_rows(); const uword P_n_cols = P.get_n_cols(); const uword P_n_slices = P.get_n_slices(); if(dim == 0) { out.set_size(1, P_n_cols, P_n_slices); for(uword slice=0; slice < P_n_slices; ++slice) { eT* out_mem = out.slice_memptr(slice); for(uword col=0; col < P_n_cols; ++col) { eT val1 = eT(0); eT val2 = eT(0); uword i,j; for(i=0, j=1; j < P_n_rows; i+=2, j+=2) { val1 += P.at(i,col,slice); val2 += P.at(j,col,slice); } if(i < P_n_rows) { val1 += P.at(i,col,slice); } out_mem[col] = (val1 + val2); } } } else if(dim == 1) { out.zeros(P_n_rows, 1, P_n_slices); for(uword slice=0; slice < P_n_slices; ++slice) { eT* out_mem = out.slice_memptr(slice); for(uword col=0; col < P_n_cols; ++col) for(uword row=0; row < P_n_rows; ++row) { out_mem[row] += P.at(row,col,slice); } } } else if(dim == 2) { out.zeros(P_n_rows, P_n_cols, 1); for(uword slice=0; slice < P_n_slices; ++slice) { for(uword col=0; col < P_n_cols; ++col) { eT* out_mem = out.slice_colptr(0,col); for(uword row=0; row < P_n_rows; ++row) { out_mem[row] += P.at(row,col,slice); } } } } } //! @}