blob: ec9295197c5ee4e50eec7e64da7a353b0478f247 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
#ifndef EIGEN_NEON_SPECIALFUNCTIONS_H
#define EIGEN_NEON_SPECIALFUNCTIONS_H
namespace Eigen {
namespace internal {
#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
#define NEON_HALF_TO_FLOAT_FUNCTIONS(METHOD) \
template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
Packet8hf METHOD<Packet8hf>(const Packet8hf& x) { \
const Packet4f lo = METHOD<Packet4f>(vcvt_f32_f16(vget_low_f16(x))); \
const Packet4f hi = METHOD<Packet4f>(vcvt_f32_f16(vget_high_f16(x))); \
return vcombine_f16(vcvt_f16_f32(lo), vcvt_f16_f32(hi)); \
} \
\
template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
Packet4hf METHOD<Packet4hf>(const Packet4hf& x) { \
return vcvt_f16_f32(METHOD<Packet4f>(vcvt_f32_f16(x))); \
}
NEON_HALF_TO_FLOAT_FUNCTIONS(perf)
NEON_HALF_TO_FLOAT_FUNCTIONS(pndtri)
#undef NEON_HALF_TO_FLOAT_FUNCTIONS
#endif
BF16_PACKET_FUNCTION(Packet4f, Packet4bf, perf)
BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pndtri)
} // namespace internal
} // namespace Eigen
#endif // EIGEN_NEON_SPECIALFUNCTIONS_H
|