From eda5bc26f44ee9a6f83dcf8c91f17296d7fc509d Mon Sep 17 00:00:00 2001 From: Nao Pross Date: Mon, 12 Feb 2024 14:52:43 +0100 Subject: Move into version control --- .../include/armadillo_bits/op_sum_meat.hpp | 430 +++++++++++++++++++++ 1 file changed, 430 insertions(+) create mode 100644 src/armadillo/include/armadillo_bits/op_sum_meat.hpp (limited to 'src/armadillo/include/armadillo_bits/op_sum_meat.hpp') diff --git a/src/armadillo/include/armadillo_bits/op_sum_meat.hpp b/src/armadillo/include/armadillo_bits/op_sum_meat.hpp new file mode 100644 index 0000000..f759daa --- /dev/null +++ b/src/armadillo/include/armadillo_bits/op_sum_meat.hpp @@ -0,0 +1,430 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au) +// Copyright 2008-2016 National ICT Australia (NICTA) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ------------------------------------------------------------------------ + + +//! \addtogroup op_sum +//! @{ + + + +template +inline +void +op_sum::apply(Mat& out, const Op& in) + { + arma_extra_debug_sigprint(); + + typedef typename T1::elem_type eT; + + const uword dim = in.aux_uword_a; + arma_debug_check( (dim > 1), "sum(): parameter 'dim' must be 0 or 1" ); + + const Proxy P(in.m); + + if(P.is_alias(out) == false) + { + op_sum::apply_noalias(out, P, dim); + } + else + { + Mat tmp; + + op_sum::apply_noalias(tmp, P, dim); + + out.steal_mem(tmp); + } + } + + + +template +inline +void +op_sum::apply_noalias(Mat& out, const Proxy& P, const uword dim) + { + arma_extra_debug_sigprint(); + + if(is_Mat::stored_type>::value || (arma_config::openmp && Proxy::use_mp)) + { + op_sum::apply_noalias_unwrap(out, P, dim); + } + else + { + op_sum::apply_noalias_proxy(out, P, dim); + } + } + + + +template +inline +void +op_sum::apply_noalias_unwrap(Mat& out, const Proxy& P, const uword dim) + { + arma_extra_debug_sigprint(); + + typedef typename T1::elem_type eT; + + typedef typename Proxy::stored_type P_stored_type; + + const unwrap tmp(P.Q); + + const typename unwrap::stored_type& X = tmp.M; + + const uword X_n_rows = X.n_rows; + const uword X_n_cols = X.n_cols; + + const uword out_n_rows = (dim == 0) ? uword(1) : X_n_rows; + const uword out_n_cols = (dim == 0) ? X_n_cols : uword(1); + + out.set_size(out_n_rows, out_n_cols); + + if(X.n_elem == 0) { out.zeros(); return; } + + const eT* X_colptr = X.memptr(); + eT* out_mem = out.memptr(); + + if(dim == 0) + { + for(uword col=0; col < X_n_cols; ++col) + { + out_mem[col] = arrayops::accumulate( X_colptr, X_n_rows ); + + X_colptr += X_n_rows; + } + } + else + { + arrayops::copy(out_mem, X_colptr, X_n_rows); + + X_colptr += X_n_rows; + + for(uword col=1; col < X_n_cols; ++col) + { + arrayops::inplace_plus( out_mem, X_colptr, X_n_rows ); + + X_colptr += X_n_rows; + } + } + } + + + +template +inline +void +op_sum::apply_noalias_proxy(Mat& out, const Proxy& P, const uword dim) + { + arma_extra_debug_sigprint(); + + typedef typename T1::elem_type eT; + + const uword P_n_rows = P.get_n_rows(); + const uword P_n_cols = P.get_n_cols(); + + const uword out_n_rows = (dim == 0) ? uword(1) : P_n_rows; + const uword out_n_cols = (dim == 0) ? P_n_cols : uword(1); + + out.set_size(out_n_rows, out_n_cols); + + if(P.get_n_elem() == 0) { out.zeros(); return; } + + eT* out_mem = out.memptr(); + + if(Proxy::use_at == false) + { + if(dim == 0) + { + uword count = 0; + + for(uword col=0; col < P_n_cols; ++col) + { + eT val1 = eT(0); + eT val2 = eT(0); + + uword j; + for(j=1; j < P_n_rows; j+=2) + { + val1 += P[count]; ++count; + val2 += P[count]; ++count; + } + + if((j-1) < P_n_rows) + { + val1 += P[count]; ++count; + } + + out_mem[col] = (val1 + val2); + } + } + else + { + uword count = 0; + + for(uword row=0; row < P_n_rows; ++row) + { + out_mem[row] = P[count]; ++count; + } + + for(uword col=1; col < P_n_cols; ++col) + for(uword row=0; row < P_n_rows; ++row) + { + out_mem[row] += P[count]; ++count; + } + } + } + else + { + if(dim == 0) + { + for(uword col=0; col < P_n_cols; ++col) + { + eT val1 = eT(0); + eT val2 = eT(0); + + uword i,j; + for(i=0, j=1; j < P_n_rows; i+=2, j+=2) + { + val1 += P.at(i,col); + val2 += P.at(j,col); + } + + if(i < P_n_rows) + { + val1 += P.at(i,col); + } + + out_mem[col] = (val1 + val2); + } + } + else + { + for(uword row=0; row < P_n_rows; ++row) + { + out_mem[row] = P.at(row,0); + } + + for(uword col=1; col < P_n_cols; ++col) + for(uword row=0; row < P_n_rows; ++row) + { + out_mem[row] += P.at(row,col); + } + } + } + } + + + +// +// cubes + + + +template +inline +void +op_sum::apply(Cube& out, const OpCube& in) + { + arma_extra_debug_sigprint(); + + typedef typename T1::elem_type eT; + + const uword dim = in.aux_uword_a; + arma_debug_check( (dim > 2), "sum(): parameter 'dim' must be 0 or 1 or 2" ); + + const ProxyCube P(in.m); + + if(P.is_alias(out) == false) + { + op_sum::apply_noalias(out, P, dim); + } + else + { + Cube tmp; + + op_sum::apply_noalias(tmp, P, dim); + + out.steal_mem(tmp); + } + } + + + +template +inline +void +op_sum::apply_noalias(Cube& out, const ProxyCube& P, const uword dim) + { + arma_extra_debug_sigprint(); + + if(is_Cube::stored_type>::value || (arma_config::openmp && ProxyCube::use_mp)) + { + op_sum::apply_noalias_unwrap(out, P, dim); + } + else + { + op_sum::apply_noalias_proxy(out, P, dim); + } + } + + + +template +inline +void +op_sum::apply_noalias_unwrap(Cube& out, const ProxyCube& P, const uword dim) + { + arma_extra_debug_sigprint(); + + typedef typename T1::elem_type eT; + + typedef typename ProxyCube::stored_type P_stored_type; + + const unwrap_cube tmp(P.Q); + + const Cube& X = tmp.M; + + const uword X_n_rows = X.n_rows; + const uword X_n_cols = X.n_cols; + const uword X_n_slices = X.n_slices; + + if(dim == 0) + { + out.set_size(1, X_n_cols, X_n_slices); + + for(uword slice=0; slice < X_n_slices; ++slice) + { + eT* out_mem = out.slice_memptr(slice); + + for(uword col=0; col < X_n_cols; ++col) + { + out_mem[col] = arrayops::accumulate( X.slice_colptr(slice,col), X_n_rows ); + } + } + } + else + if(dim == 1) + { + out.zeros(X_n_rows, 1, X_n_slices); + + for(uword slice=0; slice < X_n_slices; ++slice) + { + eT* out_mem = out.slice_memptr(slice); + + for(uword col=0; col < X_n_cols; ++col) + { + arrayops::inplace_plus( out_mem, X.slice_colptr(slice,col), X_n_rows ); + } + } + } + else + if(dim == 2) + { + out.zeros(X_n_rows, X_n_cols, 1); + + eT* out_mem = out.memptr(); + + for(uword slice=0; slice < X_n_slices; ++slice) + { + arrayops::inplace_plus(out_mem, X.slice_memptr(slice), X.n_elem_slice ); + } + } + } + + + +template +inline +void +op_sum::apply_noalias_proxy(Cube& out, const ProxyCube& P, const uword dim) + { + arma_extra_debug_sigprint(); + + typedef typename T1::elem_type eT; + + const uword P_n_rows = P.get_n_rows(); + const uword P_n_cols = P.get_n_cols(); + const uword P_n_slices = P.get_n_slices(); + + if(dim == 0) + { + out.set_size(1, P_n_cols, P_n_slices); + + for(uword slice=0; slice < P_n_slices; ++slice) + { + eT* out_mem = out.slice_memptr(slice); + + for(uword col=0; col < P_n_cols; ++col) + { + eT val1 = eT(0); + eT val2 = eT(0); + + uword i,j; + for(i=0, j=1; j < P_n_rows; i+=2, j+=2) + { + val1 += P.at(i,col,slice); + val2 += P.at(j,col,slice); + } + + if(i < P_n_rows) + { + val1 += P.at(i,col,slice); + } + + out_mem[col] = (val1 + val2); + } + } + } + else + if(dim == 1) + { + out.zeros(P_n_rows, 1, P_n_slices); + + for(uword slice=0; slice < P_n_slices; ++slice) + { + eT* out_mem = out.slice_memptr(slice); + + for(uword col=0; col < P_n_cols; ++col) + for(uword row=0; row < P_n_rows; ++row) + { + out_mem[row] += P.at(row,col,slice); + } + } + } + else + if(dim == 2) + { + out.zeros(P_n_rows, P_n_cols, 1); + + for(uword slice=0; slice < P_n_slices; ++slice) + { + for(uword col=0; col < P_n_cols; ++col) + { + eT* out_mem = out.slice_colptr(0,col); + + for(uword row=0; row < P_n_rows; ++row) + { + out_mem[row] += P.at(row,col,slice); + } + } + } + } + } + + + +//! @} -- cgit v1.2.1