diff options
Diffstat (limited to 'src/armadillo/include/armadillo_bits/memory.hpp')
-rw-r--r-- | src/armadillo/include/armadillo_bits/memory.hpp | 224 |
1 files changed, 224 insertions, 0 deletions
diff --git a/src/armadillo/include/armadillo_bits/memory.hpp b/src/armadillo/include/armadillo_bits/memory.hpp new file mode 100644 index 0000000..ffa4d2c --- /dev/null +++ b/src/armadillo/include/armadillo_bits/memory.hpp @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au) +// Copyright 2008-2016 National ICT Australia (NICTA) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ------------------------------------------------------------------------ + + +//! \addtogroup memory +//! @{ + + +class memory + { + public: + + template<typename eT> arma_malloc inline static eT* acquire(const uword n_elem); + + template<typename eT> arma_inline static void release(eT* mem); + + template<typename eT> arma_inline static bool is_aligned(const eT* mem); + template<typename eT> arma_inline static void mark_as_aligned( eT*& mem); + template<typename eT> arma_inline static void mark_as_aligned(const eT*& mem); + }; + + + +template<typename eT> +arma_malloc +inline +eT* +memory::acquire(const uword n_elem) + { + if(n_elem == 0) { return nullptr; } + + arma_debug_check + ( + ( size_t(n_elem) > (std::numeric_limits<size_t>::max() / sizeof(eT)) ), + "arma::memory::acquire(): requested size is too large" + ); + + eT* out_memptr; + + #if defined(ARMA_ALIEN_MEM_ALLOC_FUNCTION) + { + out_memptr = (eT *) ARMA_ALIEN_MEM_ALLOC_FUNCTION(sizeof(eT)*n_elem); + } + #elif defined(ARMA_USE_TBB_ALLOC) + { + out_memptr = (eT *) scalable_malloc(sizeof(eT)*n_elem); + } + #elif defined(ARMA_USE_MKL_ALLOC) + { + out_memptr = (eT *) mkl_malloc( sizeof(eT)*n_elem, 32 ); + } + #elif defined(ARMA_HAVE_POSIX_MEMALIGN) + { + eT* memptr = nullptr; + + const size_t n_bytes = sizeof(eT)*size_t(n_elem); + const size_t alignment = (n_bytes >= size_t(1024)) ? size_t(32) : size_t(16); + + // TODO: investigate apparent memory leak when using alignment >= 64 (as shown on Fedora 28, glibc 2.27) + int status = posix_memalign((void **)&memptr, ( (alignment >= sizeof(void*)) ? alignment : sizeof(void*) ), n_bytes); + + out_memptr = (status == 0) ? memptr : nullptr; + } + #elif defined(_MSC_VER) + { + // Windoze is too primitive to handle C++17 std::aligned_alloc() + + //out_memptr = (eT *) malloc(sizeof(eT)*n_elem); + //out_memptr = (eT *) _aligned_malloc( sizeof(eT)*n_elem, 16 ); // lives in malloc.h + + const size_t n_bytes = sizeof(eT)*size_t(n_elem); + const size_t alignment = (n_bytes >= size_t(1024)) ? size_t(32) : size_t(16); + + out_memptr = (eT *) _aligned_malloc( n_bytes, alignment ); + } + #else + { + //return ( new(std::nothrow) eT[n_elem] ); + out_memptr = (eT *) malloc(sizeof(eT)*n_elem); + } + #endif + + // TODO: for mingw, use __mingw_aligned_malloc + + arma_check_bad_alloc( (out_memptr == nullptr), "arma::memory::acquire(): out of memory" ); + + return out_memptr; + } + + + +template<typename eT> +arma_inline +void +memory::release(eT* mem) + { + if(mem == nullptr) { return; } + + #if defined(ARMA_ALIEN_MEM_FREE_FUNCTION) + { + ARMA_ALIEN_MEM_FREE_FUNCTION( (void *)(mem) ); + } + #elif defined(ARMA_USE_TBB_ALLOC) + { + scalable_free( (void *)(mem) ); + } + #elif defined(ARMA_USE_MKL_ALLOC) + { + mkl_free( (void *)(mem) ); + } + #elif defined(ARMA_HAVE_POSIX_MEMALIGN) + { + free( (void *)(mem) ); + } + #elif defined(_MSC_VER) + { + //free( (void *)(mem) ); + _aligned_free( (void *)(mem) ); + } + #else + { + //delete [] mem; + free( (void *)(mem) ); + } + #endif + + // TODO: for mingw, use __mingw_aligned_free + } + + + +template<typename eT> +arma_inline +bool +memory::is_aligned(const eT* mem) + { + #if (defined(ARMA_HAVE_ICC_ASSUME_ALIGNED) || defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)) && !defined(ARMA_DONT_CHECK_ALIGNMENT) + { + return (sizeof(std::size_t) >= sizeof(eT*)) ? ((std::size_t(mem) & 0x0F) == 0) : false; + } + #else + { + arma_ignore(mem); + + return false; + } + #endif + } + + + +template<typename eT> +arma_inline +void +memory::mark_as_aligned(eT*& mem) + { + #if defined(ARMA_HAVE_ICC_ASSUME_ALIGNED) + { + __assume_aligned(mem, 16); + } + #elif defined(ARMA_HAVE_GCC_ASSUME_ALIGNED) + { + mem = (eT*)__builtin_assume_aligned(mem, 16); + } + #else + { + arma_ignore(mem); + } + #endif + + // TODO: look into C++20 std::assume_aligned() + // TODO: https://en.cppreference.com/w/cpp/memory/assume_aligned + + // TODO: MSVC? __assume( (mem & 0x0F) == 0 ); + // + // http://comments.gmane.org/gmane.comp.gcc.patches/239430 + // GCC __builtin_assume_aligned is similar to ICC's __assume_aligned, + // so for lvalue first argument ICC's __assume_aligned can be emulated using + // #define __assume_aligned(lvalueptr, align) lvalueptr = __builtin_assume_aligned (lvalueptr, align) + // + // http://www.inf.ethz.ch/personal/markusp/teaching/263-2300-ETH-spring11/slides/class19.pdf + // http://software.intel.com/sites/products/documentation/hpc/composerxe/en-us/cpp/lin/index.htm + // http://d3f8ykwhia686p.cloudfront.net/1live/intel/CompilerAutovectorizationGuide.pdf + } + + + +template<typename eT> +arma_inline +void +memory::mark_as_aligned(const eT*& mem) + { + #if defined(ARMA_HAVE_ICC_ASSUME_ALIGNED) + { + __assume_aligned(mem, 16); + } + #elif defined(ARMA_HAVE_GCC_ASSUME_ALIGNED) + { + mem = (const eT*)__builtin_assume_aligned(mem, 16); + } + #else + { + arma_ignore(mem); + } + #endif + } + + + +//! @} |