diff options
Diffstat (limited to 'src/armadillo/include/armadillo_bits/fft_engine_kissfft.hpp')
-rw-r--r-- | src/armadillo/include/armadillo_bits/fft_engine_kissfft.hpp | 392 |
1 files changed, 392 insertions, 0 deletions
diff --git a/src/armadillo/include/armadillo_bits/fft_engine_kissfft.hpp b/src/armadillo/include/armadillo_bits/fft_engine_kissfft.hpp new file mode 100644 index 0000000..0c8c40c --- /dev/null +++ b/src/armadillo/include/armadillo_bits/fft_engine_kissfft.hpp @@ -0,0 +1,392 @@ +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause +// +// Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au) +// Copyright 2008-2016 National ICT Australia (NICTA) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ------------------------------------------------------------------------ +// +// This file includes portions of Kiss FFT software, +// licensed under the following conditions. +// +// Copyright (c) 2003-2010 Mark Borgerding +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the author nor the names of any contributors may be used to +// endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +// OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ------------------------------------------------------------------------ + + +//! \addtogroup fft_engine_kissfft +//! @{ + + +template<typename cx_type, bool inverse> +class fft_engine_kissfft + { + public: + + typedef typename get_pod_type<cx_type>::result T; + + const uword N; + + podarray<cx_type> coeffs_array; + podarray<cx_type> tmp_array; + + podarray<uword> residue; + podarray<uword> radix; + + + template<bool fill> + inline + uword + calc_radix() + { + uword i = 0; + + for(uword n = N, r=4; n >= 2; ++i) + { + while( (n % r) > 0 ) + { + switch(r) + { + case 2: r = 3; break; + case 4: r = 2; break; + default: r += 2; break; + } + + if(r*r > n) { r = n; } + } + + n /= r; + + if(fill) + { + residue[i] = n; + radix[i] = r; + } + } + + return i; + } + + + + inline + fft_engine_kissfft(const uword in_N) + : N(in_N) + { + arma_extra_debug_sigprint(); + + const uword len = calc_radix<false>(); + + residue.set_size(len); + radix.set_size(len); + + calc_radix<true>(); + + + // calculate the constant coefficients + + coeffs_array.set_size(N); + + cx_type* coeffs = coeffs_array.memptr(); + + const T k = T( (inverse) ? +2 : -2 ) * std::acos( T(-1) ) / T(N); + + for(uword i=0; i < N; ++i) { coeffs[i] = std::exp( cx_type(T(0), i*k) ); } + } + + + + arma_hot + inline + void + butterfly_2(cx_type* Y, const uword stride, const uword m) const + { + // arma_extra_debug_sigprint(); + + const cx_type* coeffs = coeffs_array.memptr(); + + for(uword i=0; i < m; ++i) + { + const cx_type t = Y[i+m] * coeffs[i*stride]; + + Y[i+m] = Y[i] - t; + Y[i ] += t; + } + } + + + + arma_hot + inline + void + butterfly_3(cx_type* Y, const uword stride, const uword m) const + { + // arma_extra_debug_sigprint(); + + arma_aligned cx_type tmp[5]; + + const cx_type* coeffs1 = coeffs_array.memptr(); + const cx_type* coeffs2 = coeffs1; + + const T coeff_sm_imag = coeffs1[stride*m].imag(); + + const uword n = m*2; + + // TODO: rearrange the indices within tmp[] into a more sane order + + for(uword i = m; i > 0; --i) + { + tmp[1] = Y[m] * (*coeffs1); + tmp[2] = Y[n] * (*coeffs2); + + tmp[0] = tmp[1] - tmp[2]; + tmp[0] *= coeff_sm_imag; + + tmp[3] = tmp[1] + tmp[2]; + + Y[m] = cx_type( (Y[0].real() - (T(0.5)*tmp[3].real())), (Y[0].imag() - (T(0.5)*tmp[3].imag())) ); + + Y[0] += tmp[3]; + + + Y[n] = cx_type( (Y[m].real() + tmp[0].imag()), (Y[m].imag() - tmp[0].real()) ); + + Y[m] += cx_type( -tmp[0].imag(), tmp[0].real() ); + + Y++; + + coeffs1 += stride; + coeffs2 += stride*2; + } + } + + + + arma_hot + inline + void + butterfly_4(cx_type* Y, const uword stride, const uword m) const + { + // arma_extra_debug_sigprint(); + + arma_aligned cx_type tmp[7]; + + const cx_type* coeffs = coeffs_array.memptr(); + + const uword m2 = m*2; + const uword m3 = m*3; + + // TODO: rearrange the indices within tmp[] into a more sane order + + for(uword i=0; i < m; ++i) + { + tmp[0] = Y[i + m ] * coeffs[i*stride ]; + tmp[2] = Y[i + m3] * coeffs[i*stride*3]; + tmp[3] = tmp[0] + tmp[2]; + + //tmp[4] = tmp[0] - tmp[2]; + //tmp[4] = (inverse) ? cx_type( -(tmp[4].imag()), tmp[4].real() ) : cx_type( tmp[4].imag(), -tmp[4].real() ); + + tmp[4] = (inverse) + ? cx_type( (tmp[2].imag() - tmp[0].imag()), (tmp[0].real() - tmp[2].real()) ) + : cx_type( (tmp[0].imag() - tmp[2].imag()), (tmp[2].real() - tmp[0].real()) ); + + tmp[1] = Y[i + m2] * coeffs[i*stride*2]; + tmp[5] = Y[i] - tmp[1]; + + + Y[i ] += tmp[1]; + Y[i + m2] = Y[i] - tmp[3]; + Y[i ] += tmp[3]; + Y[i + m ] = tmp[5] + tmp[4]; + Y[i + m3] = tmp[5] - tmp[4]; + } + } + + + + arma_hot + inline + void + butterfly_5(cx_type* Y, const uword stride, const uword m) const + { + // arma_extra_debug_sigprint(); + + arma_aligned cx_type tmp[13]; + + const cx_type* coeffs = coeffs_array.memptr(); + + const T a_real = coeffs[stride*1*m].real(); + const T a_imag = coeffs[stride*1*m].imag(); + + const T b_real = coeffs[stride*2*m].real(); + const T b_imag = coeffs[stride*2*m].imag(); + + cx_type* Y0 = Y; + cx_type* Y1 = Y + 1*m; + cx_type* Y2 = Y + 2*m; + cx_type* Y3 = Y + 3*m; + cx_type* Y4 = Y + 4*m; + + for(uword i=0; i < m; ++i) + { + tmp[0] = (*Y0); + + tmp[1] = (*Y1) * coeffs[stride*1*i]; + tmp[2] = (*Y2) * coeffs[stride*2*i]; + tmp[3] = (*Y3) * coeffs[stride*3*i]; + tmp[4] = (*Y4) * coeffs[stride*4*i]; + + tmp[7] = tmp[1] + tmp[4]; + tmp[8] = tmp[2] + tmp[3]; + tmp[9] = tmp[2] - tmp[3]; + tmp[10] = tmp[1] - tmp[4]; + + (*Y0) += tmp[7]; + (*Y0) += tmp[8]; + + tmp[5] = tmp[0] + cx_type( ( (tmp[7].real() * a_real) + (tmp[8].real() * b_real) ), ( (tmp[7].imag() * a_real) + (tmp[8].imag() * b_real) ) ); + + tmp[6] = cx_type( ( (tmp[10].imag() * a_imag) + (tmp[9].imag() * b_imag) ), ( -(tmp[10].real() * a_imag) - (tmp[9].real() * b_imag) ) ); + + (*Y1) = tmp[5] - tmp[6]; + (*Y4) = tmp[5] + tmp[6]; + + tmp[11] = tmp[0] + cx_type( ( (tmp[7].real() * b_real) + (tmp[8].real() * a_real) ), ( (tmp[7].imag() * b_real) + (tmp[8].imag() * a_real) ) ); + + tmp[12] = cx_type( ( -(tmp[10].imag() * b_imag) + (tmp[9].imag() * a_imag) ), ( (tmp[10].real() * b_imag) - (tmp[9].real() * a_imag) ) ); + + (*Y2) = tmp[11] + tmp[12]; + (*Y3) = tmp[11] - tmp[12]; + + Y0++; + Y1++; + Y2++; + Y3++; + Y4++; + } + } + + + + arma_hot + inline + void + butterfly_N(cx_type* Y, const uword stride, const uword m, const uword r) + { + // arma_extra_debug_sigprint(); + + const cx_type* coeffs = coeffs_array.memptr(); + + tmp_array.set_min_size(r); + cx_type* tmp = tmp_array.memptr(); + + for(uword u=0; u < m; ++u) + { + uword k = u; + + for(uword v=0; v < r; ++v) + { + tmp[v] = Y[k]; + k += m; + } + + k = u; + + for(uword v=0; v < r; ++v) + { + Y[k] = tmp[0]; + + uword j = 0; + + for(uword w=1; w < r; ++w) + { + j += stride * k; + + if(j >= N) { j -= N; } + + Y[k] += tmp[w] * coeffs[j]; + } + + k += m; + } + } + } + + + + inline + void + run(cx_type* Y, const cx_type* X, const uword stage = 0, const uword stride = 1) + { + arma_extra_debug_sigprint(); + + const uword m = residue[stage]; + const uword r = radix[stage]; + + const cx_type *Y_end = Y + r*m; + + if(m == 1) + { + for(cx_type* Yi = Y; Yi != Y_end; Yi++, X += stride) { (*Yi) = (*X); } + } + else + { + const uword next_stage = stage + 1; + const uword next_stride = stride * r; + + for(cx_type* Yi = Y; Yi != Y_end; Yi += m, X += stride) { run(Yi, X, next_stage, next_stride); } + } + + switch(r) + { + case 2: butterfly_2(Y, stride, m ); break; + case 3: butterfly_3(Y, stride, m ); break; + case 4: butterfly_4(Y, stride, m ); break; + case 5: butterfly_5(Y, stride, m ); break; + default: butterfly_N(Y, stride, m, r); break; + } + } + + + }; + + +//! @} |