libstdc++
simd_math.h
1 // Math overloads for simd -*- C++ -*-
2 
3 // Copyright (C) 2020-2024 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
27 
28 #if __cplusplus >= 201703L
29 
30 #include <utility>
31 #include <iomanip>
32 
33 _GLIBCXX_SIMD_BEGIN_NAMESPACE
34 template <typename _Tp, typename _V>
35  using _Samesize = fixed_size_simd<_Tp, _V::size()>;
36 
37 // _Math_return_type {{{
38 template <typename _DoubleR, typename _Tp, typename _Abi>
39  struct _Math_return_type;
40 
41 template <typename _DoubleR, typename _Tp, typename _Abi>
42  using _Math_return_type_t =
43  typename _Math_return_type<_DoubleR, _Tp, _Abi>::type;
44 
45 template <typename _Tp, typename _Abi>
46  struct _Math_return_type<double, _Tp, _Abi>
47  { using type = simd<_Tp, _Abi>; };
48 
49 template <typename _Tp, typename _Abi>
50  struct _Math_return_type<bool, _Tp, _Abi>
51  { using type = simd_mask<_Tp, _Abi>; };
52 
53 template <typename _DoubleR, typename _Tp, typename _Abi>
54  struct _Math_return_type
55  { using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; };
56 
57 //}}}
58 // _GLIBCXX_SIMD_MATH_CALL_ {{{
59 #define _GLIBCXX_SIMD_MATH_CALL_(__name) \
60 template <typename _Tp, typename _Abi, typename..., \
61  typename _R = _Math_return_type_t< \
62  decltype(std::__name(declval<double>())), _Tp, _Abi>> \
63  _GLIBCXX_SIMD_ALWAYS_INLINE \
64  enable_if_t<is_floating_point_v<_Tp>, _R> \
65  __name(simd<_Tp, _Abi> __x) \
66  { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
67 
68 // }}}
69 //_Extra_argument_type{{{
70 template <typename _Up, typename _Tp, typename _Abi>
71  struct _Extra_argument_type;
72 
73 template <typename _Tp, typename _Abi>
74  struct _Extra_argument_type<_Tp*, _Tp, _Abi>
75  {
76  using type = simd<_Tp, _Abi>*;
77  static constexpr double* declval();
78  static constexpr bool __needs_temporary_scalar = true;
79 
80  _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
81  { return &__data(*__x); }
82  };
83 
84 template <typename _Up, typename _Tp, typename _Abi>
85  struct _Extra_argument_type<_Up*, _Tp, _Abi>
86  {
87  static_assert(is_integral_v<_Up>);
88  using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*;
89  static constexpr _Up* declval();
90  static constexpr bool __needs_temporary_scalar = true;
91 
92  _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
93  { return &__data(*__x); }
94  };
95 
96 template <typename _Tp, typename _Abi>
97  struct _Extra_argument_type<_Tp, _Tp, _Abi>
98  {
99  using type = simd<_Tp, _Abi>;
100  static constexpr double declval();
101  static constexpr bool __needs_temporary_scalar = false;
102 
103  _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
104  _S_data(const type& __x)
105  { return __data(__x); }
106  };
107 
108 template <typename _Up, typename _Tp, typename _Abi>
109  struct _Extra_argument_type
110  {
111  static_assert(is_integral_v<_Up>);
112  using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>;
113  static constexpr _Up declval();
114  static constexpr bool __needs_temporary_scalar = false;
115 
116  _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
117  _S_data(const type& __x)
118  { return __data(__x); }
119  };
120 
121 //}}}
122 // _GLIBCXX_SIMD_MATH_CALL2_ {{{
123 #define _GLIBCXX_SIMD_MATH_CALL2_(__name, __arg2) \
124 template < \
125  typename _Tp, typename _Abi, typename..., \
126  typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>, \
127  typename _R = _Math_return_type_t< \
128  decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>> \
129  _GLIBCXX_SIMD_ALWAYS_INLINE \
130  enable_if_t<is_floating_point_v<_Tp>, _R> \
131  __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y) \
132  { \
133  return {__private_init, \
134  _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))}; \
135  } \
136 template <typename _Up, typename _Tp, typename _Abi> \
137  _GLIBCXX_SIMD_INTRINSIC _Math_return_type_t< \
138  decltype(std::__name( \
139  declval<double>(), \
140  declval<enable_if_t< \
141  conjunction_v< \
142  is_same<__arg2, _Tp>, \
143  negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>, \
144  is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>, \
145  double>>())), \
146  _Tp, _Abi> \
147  __name(_Up&& __xx, const simd<_Tp, _Abi>& __yy) \
148  { return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); }
149 
150 // }}}
151 // _GLIBCXX_SIMD_MATH_CALL3_ {{{
152 #define _GLIBCXX_SIMD_MATH_CALL3_(__name, __arg2, __arg3) \
153 template <typename _Tp, typename _Abi, typename..., \
154  typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>, \
155  typename _Arg3 = _Extra_argument_type<__arg3, _Tp, _Abi>, \
156  typename _R = _Math_return_type_t< \
157  decltype(std::__name(declval<double>(), _Arg2::declval(), \
158  _Arg3::declval())), \
159  _Tp, _Abi>> \
160  _GLIBCXX_SIMD_ALWAYS_INLINE \
161  enable_if_t<is_floating_point_v<_Tp>, _R> \
162  __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y, \
163  const typename _Arg3::type& __z) \
164  { \
165  return {__private_init, \
166  _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y), \
167  _Arg3::_S_data(__z))}; \
168  } \
169 template < \
170  typename _T0, typename _T1, typename _T2, typename..., \
171  typename _U0 = __remove_cvref_t<_T0>, \
172  typename _U1 = __remove_cvref_t<_T1>, \
173  typename _U2 = __remove_cvref_t<_T2>, \
174  typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>, \
175  typename = enable_if_t<conjunction_v< \
176  is_simd<_Simd>, is_convertible<_T0&&, _Simd>, \
177  is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>, \
178  negation<conjunction< \
179  is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>> \
180  _GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(), \
181  declval<const _Simd&>(), \
182  declval<const _Simd&>())) \
183  __name(_T0&& __xx, _T1&& __yy, _T2&& __zz) \
184  { \
185  return __name(_Simd(static_cast<_T0&&>(__xx)), \
186  _Simd(static_cast<_T1&&>(__yy)), \
187  _Simd(static_cast<_T2&&>(__zz))); \
188  }
189 
190 // }}}
191 // __cosSeries {{{
192 template <typename _Abi>
193  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
194  __cosSeries(const simd<float, _Abi>& __x)
195  {
196  const simd<float, _Abi> __x2 = __x * __x;
197  simd<float, _Abi> __y;
198  __y = 0x1.ap-16f; // 1/8!
199  __y = __y * __x2 - 0x1.6c1p-10f; // -1/6!
200  __y = __y * __x2 + 0x1.555556p-5f; // 1/4!
201  return __y * (__x2 * __x2) - .5f * __x2 + 1.f;
202  }
203 
204 template <typename _Abi>
205  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
206  __cosSeries(const simd<double, _Abi>& __x)
207  {
208  const simd<double, _Abi> __x2 = __x * __x;
209  simd<double, _Abi> __y;
210  __y = 0x1.AC00000000000p-45; // 1/16!
211  __y = __y * __x2 - 0x1.9394000000000p-37; // -1/14!
212  __y = __y * __x2 + 0x1.1EED8C0000000p-29; // 1/12!
213  __y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10!
214  __y = __y * __x2 + 0x1.A01A01A018000p-16; // 1/8!
215  __y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6!
216  __y = __y * __x2 + 0x1.5555555555554p-5; // 1/4!
217  return (__y * __x2 - .5f) * __x2 + 1.f;
218  }
219 
220 // }}}
221 // __sinSeries {{{
222 template <typename _Abi>
223  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
224  __sinSeries(const simd<float, _Abi>& __x)
225  {
226  const simd<float, _Abi> __x2 = __x * __x;
227  simd<float, _Abi> __y;
228  __y = -0x1.9CC000p-13f; // -1/7!
229  __y = __y * __x2 + 0x1.111100p-7f; // 1/5!
230  __y = __y * __x2 - 0x1.555556p-3f; // -1/3!
231  return __y * (__x2 * __x) + __x;
232  }
233 
234 template <typename _Abi>
235  _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
236  __sinSeries(const simd<double, _Abi>& __x)
237  {
238  // __x = [0, 0.7854 = pi/4]
239  // __x² = [0, 0.6169 = pi²/8]
240  const simd<double, _Abi> __x2 = __x * __x;
241  simd<double, _Abi> __y;
242  __y = -0x1.ACF0000000000p-41; // -1/15!
243  __y = __y * __x2 + 0x1.6124400000000p-33; // 1/13!
244  __y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11!
245  __y = __y * __x2 + 0x1.71DE3A5540000p-19; // 1/9!
246  __y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7!
247  __y = __y * __x2 + 0x1.1111111111110p-7; // 1/5!
248  __y = __y * __x2 - 0x1.5555555555555p-3; // -1/3!
249  return __y * (__x2 * __x) + __x;
250  }
251 
252 // }}}
253 // __zero_low_bits {{{
254 template <int _Bits, typename _Tp, typename _Abi>
255  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
256  __zero_low_bits(simd<_Tp, _Abi> __x)
257  {
258  const simd<_Tp, _Abi> __bitmask
259  = __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits);
260  return {__private_init,
261  _Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))};
262  }
263 
264 // }}}
265 // __fold_input {{{
266 
267 /**@internal
268  * Fold @p x into [-¼π, ¼π] and remember the quadrant it came from:
269  * quadrant 0: [-¼π, ¼π]
270  * quadrant 1: [ ¼π, ¾π]
271  * quadrant 2: [ ¾π, 1¼π]
272  * quadrant 3: [1¼π, 1¾π]
273  *
274  * The algorithm determines `y` as the multiple `x - y * ¼π = [-¼π, ¼π]`. Using
275  * a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as
276  * ```
277  * y = trunc(x / ¼π);
278  * y += fmod(y, 2);
279  * ```
280  * This can be simplified by moving the (implicit) division by 2 into the
281  * truncation expression. The `+= fmod` effect can the be achieved by using
282  * rounding instead of truncation: `y = round(x / ½π) * 2`. If precision allows,
283  * `2/π * x` is better (faster).
284  */
285 template <typename _Tp, typename _Abi>
286  struct _Folded
287  {
288  simd<_Tp, _Abi> _M_x;
289  rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant;
290  };
291 
292 namespace __math_float {
293 inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // π/4
294 inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/π
295 inline constexpr float __pi_2_5bits0
296  = 0x1.921fc0p0f; // π/2, 5 0-bits (least significant)
297 inline constexpr float __pi_2_5bits0_rem
298  = -0x1.5777a6p-21f; // π/2 - __pi_2_5bits0
299 } // namespace __math_float
300 namespace __math_double {
301 inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // π/4
302 inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/π
303 inline constexpr double __pi_2 = 0x1.921fb54442d18p0; // π/2
304 } // namespace __math_double
305 
306 template <typename _Abi>
307  _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi>
308  __fold_input(const simd<float, _Abi>& __x)
309  {
310  using _V = simd<float, _Abi>;
311  using _IV = rebind_simd_t<int, _V>;
312  using namespace __math_float;
313  _Folded<float, _Abi> __r;
314  __r._M_x = abs(__x);
315 #if 0
316  // zero most mantissa bits:
317  constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/π
318  const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f;
319  // split π into 4 parts, the first three with 13 trailing zeros (to make the
320  // following multiplications precise):
321  constexpr float __pi0 = 0x1.920000p1f;
322  constexpr float __pi1 = 0x1.fb4000p-11f;
323  constexpr float __pi2 = 0x1.444000p-23f;
324  constexpr float __pi3 = 0x1.68c234p-38f;
325  __r._M_x - __y*__pi0 - __y*__pi1 - __y*__pi2 - __y*__pi3
326 #else
327  if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
328  __r._M_quadrant = 0;
329  else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4)))
330  {
331  const _V __y = nearbyint(__r._M_x * __2_over_pi);
332  __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4
333  __r._M_x -= __y * __pi_2_5bits0;
334  __r._M_x -= __y * __pi_2_5bits0_rem;
335  }
336  else
337  {
338  using __math_double::__2_over_pi;
339  using __math_double::__pi_2;
340  using _VD = rebind_simd_t<double, _V>;
341  _VD __xd = static_simd_cast<_VD>(__r._M_x);
342  _VD __y = nearbyint(__xd * __2_over_pi);
343  __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4
344  __r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2);
345  }
346 #endif
347  return __r;
348  }
349 
350 template <typename _Abi>
351  _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi>
352  __fold_input(const simd<double, _Abi>& __x)
353  {
354  using _V = simd<double, _Abi>;
355  using _IV = rebind_simd_t<int, _V>;
356  using namespace __math_double;
357 
358  _Folded<double, _Abi> __r;
359  __r._M_x = abs(__x);
360  if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
361  {
362  __r._M_quadrant = 0;
363  return __r;
364  }
365  const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4));
366  __r._M_quadrant = static_simd_cast<_IV>(__y) & 3;
367 
368  if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4)))
369  {
370  // x - y * pi/2, y uses no more than 11 mantissa bits
371  __r._M_x -= __y * 0x1.921FB54443000p0;
372  __r._M_x -= __y * -0x1.73DCB3B39A000p-43;
373  __r._M_x -= __y * 0x1.45C06E0E68948p-86;
374  }
375  else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30)))
376  {
377  // x - y * pi/2, y uses no more than 29 mantissa bits
378  __r._M_x -= __y * 0x1.921FB40000000p0;
379  __r._M_x -= __y * 0x1.4442D00000000p-24;
380  __r._M_x -= __y * 0x1.8469898CC5170p-48;
381  }
382  else
383  {
384  // x - y * pi/2, y may require all mantissa bits
385  const _V __y_hi = __zero_low_bits<26>(__y);
386  const _V __y_lo = __y - __y_hi;
387  const auto __pi_2_1 = 0x1.921FB50000000p0;
388  const auto __pi_2_2 = 0x1.110B460000000p-26;
389  const auto __pi_2_3 = 0x1.1A62630000000p-54;
390  const auto __pi_2_4 = 0x1.8A2E03707344Ap-81;
391  __r._M_x = __r._M_x - __y_hi * __pi_2_1
392  - max(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
393  - min(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
394  - max(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
395  - min(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
396  - max(__y * __pi_2_4, __y_lo * __pi_2_3)
397  - min(__y * __pi_2_4, __y_lo * __pi_2_3);
398  }
399  return __r;
400  }
401 
402 // }}}
403 // __extract_exponent_as_int {{{
404 template <typename _Tp, typename _Abi>
405  _GLIBCXX_SIMD_INTRINSIC
406  rebind_simd_t<int, simd<_Tp, _Abi>>
407  __extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
408  {
409  using _Vp = simd<_Tp, _Abi>;
410  using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
411  using namespace std::experimental::__float_bitwise_operators;
412  using namespace std::experimental::__proposed;
413  const _Vp __exponent_mask
414  = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
415  return static_simd_cast<rebind_simd_t<int, _Vp>>(
416  simd_bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask)
417  >> (__digits_v<_Tp> - 1));
418  }
419 
420 // }}}
421 // __impl_or_fallback {{{
422 template <typename ImplFun, typename FallbackFun, typename... _Args>
423  _GLIBCXX_SIMD_INTRINSIC auto
424  __impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&,
425  _Args&&... __args)
426  -> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
427  { return __impl_fun(static_cast<_Args&&>(__args)...); }
428 
429 template <typename ImplFun, typename FallbackFun, typename... _Args,
430  typename = __detail::__odr_helper>
431  inline auto
432  __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
433  _Args&&... __args)
434  -> decltype(__fallback_fun(static_cast<_Args&&>(__args)...))
435  { return __fallback_fun(static_cast<_Args&&>(__args)...); }
436 
437 template <typename... _Args>
438  _GLIBCXX_SIMD_INTRINSIC auto
439  __impl_or_fallback(_Args&&... __args)
440  {
441  return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...);
442  }
443 //}}}
444 
445 // trigonometric functions {{{
446 _GLIBCXX_SIMD_MATH_CALL_(acos)
447 _GLIBCXX_SIMD_MATH_CALL_(asin)
448 _GLIBCXX_SIMD_MATH_CALL_(atan)
449 _GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
450 
451 /*
452  * algorithm for sine and cosine:
453  *
454  * The result can be calculated with sine or cosine depending on the π/4 section
455  * the input is in. sine ≈ __x + __x³ cosine ≈ 1 - __x²
456  *
457  * sine:
458  * Map -__x to __x and invert the output
459  * Extend precision of __x - n * π/4 by calculating
460  * ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4)
461  *
462  * Calculate Taylor series with tuned coefficients.
463  * Fix sign.
464  */
465 // cos{{{
466 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
467  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
468  cos(const simd<_Tp, _Abi>& __x)
469  {
470  using _V = simd<_Tp, _Abi>;
471  if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
472  return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))};
473  else
474  {
475  if constexpr (is_same_v<_Tp, float>)
476  if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382)))
477  return static_simd_cast<_V>(
478  cos(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
479 
480  const auto __f = __fold_input(__x);
481  // quadrant | effect
482  // 0 | cosSeries, +
483  // 1 | sinSeries, -
484  // 2 | cosSeries, -
485  // 3 | sinSeries, +
486  using namespace std::experimental::__float_bitwise_operators;
487  const _V __sign_flip
488  = _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30);
489 
490  const auto __need_cos = (__f._M_quadrant & 1) == 0;
491  if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos)))
492  return __sign_flip ^ __cosSeries(__f._M_x);
493  else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos)))
494  return __sign_flip ^ __sinSeries(__f._M_x);
495  else // some_of(__need_cos)
496  {
497  _V __r = __sinSeries(__f._M_x);
498  where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x);
499  return __r ^ __sign_flip;
500  }
501  }
502  }
503 
504 template <typename _Tp>
505  _GLIBCXX_SIMD_ALWAYS_INLINE
506  enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
507  cos(simd<_Tp, simd_abi::scalar> __x)
508  { return std::cos(__data(__x)); }
509 
510 //}}}
511 // sin{{{
512 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
513  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
514  sin(const simd<_Tp, _Abi>& __x)
515  {
516  using _V = simd<_Tp, _Abi>;
517  if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
518  return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))};
519  else
520  {
521  if constexpr (is_same_v<_Tp, float>)
522  if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449)))
523  return static_simd_cast<_V>(
524  sin(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
525 
526  const auto __f = __fold_input(__x);
527  // quadrant | effect
528  // 0 | sinSeries
529  // 1 | cosSeries
530  // 2 | sinSeries, sign flip
531  // 3 | cosSeries, sign flip
532  using namespace std::experimental::__float_bitwise_operators;
533  const auto __sign_flip
534  = (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.));
535 
536  const auto __need_sin = (__f._M_quadrant & 1) == 0;
537  if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin)))
538  return __sign_flip ^ __sinSeries(__f._M_x);
539  else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin)))
540  return __sign_flip ^ __cosSeries(__f._M_x);
541  else // some_of(__need_sin)
542  {
543  _V __r = __cosSeries(__f._M_x);
544  where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x);
545  return __sign_flip ^ __r;
546  }
547  }
548  }
549 
550 template <typename _Tp>
551  _GLIBCXX_SIMD_ALWAYS_INLINE
552  enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
553  sin(simd<_Tp, simd_abi::scalar> __x)
554  { return std::sin(__data(__x)); }
555 
556 //}}}
557 _GLIBCXX_SIMD_MATH_CALL_(tan)
558 _GLIBCXX_SIMD_MATH_CALL_(acosh)
559 _GLIBCXX_SIMD_MATH_CALL_(asinh)
560 _GLIBCXX_SIMD_MATH_CALL_(atanh)
561 _GLIBCXX_SIMD_MATH_CALL_(cosh)
562 _GLIBCXX_SIMD_MATH_CALL_(sinh)
563 _GLIBCXX_SIMD_MATH_CALL_(tanh)
564 // }}}
565 // exponential functions {{{
566 _GLIBCXX_SIMD_MATH_CALL_(exp)
567 _GLIBCXX_SIMD_MATH_CALL_(exp2)
568 _GLIBCXX_SIMD_MATH_CALL_(expm1)
569 
570 // }}}
571 // frexp {{{
572 #if _GLIBCXX_SIMD_X86INTRIN
573 template <typename _Tp, size_t _Np>
574  _GLIBCXX_SIMD_INTRINSIC
575  _SimdWrapper<_Tp, _Np>
576  __getexp(_SimdWrapper<_Tp, _Np> __x)
577  {
578  if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
579  return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x)));
580  else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
581  return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x))));
582  else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
583  return _mm_getexp_pd(__x);
584  else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
585  return __lo128(_mm512_getexp_pd(__auto_bitcast(__x)));
586  else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
587  return _mm256_getexp_ps(__x);
588  else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
589  return __lo256(_mm512_getexp_ps(__auto_bitcast(__x)));
590  else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
591  return _mm256_getexp_pd(__x);
592  else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
593  return __lo256(_mm512_getexp_pd(__auto_bitcast(__x)));
594  else if constexpr (__is_avx512_ps<_Tp, _Np>())
595  return _mm512_getexp_ps(__x);
596  else if constexpr (__is_avx512_pd<_Tp, _Np>())
597  return _mm512_getexp_pd(__x);
598  else
599  __assert_unreachable<_Tp>();
600  }
601 
602 template <typename _Tp, size_t _Np>
603  _GLIBCXX_SIMD_INTRINSIC
604  _SimdWrapper<_Tp, _Np>
605  __getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
606  {
607  if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
608  return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1,
609  _MM_MANT_SIGN_src));
610  else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
611  return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)),
612  _MM_MANT_NORM_p5_1,
613  _MM_MANT_SIGN_src));
614  else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
615  return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
616  else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
617  return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
618  _MM_MANT_SIGN_src));
619  else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
620  return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
621  else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
622  return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
623  _MM_MANT_SIGN_src));
624  else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
625  return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
626  else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
627  return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
628  _MM_MANT_SIGN_src));
629  else if constexpr (__is_avx512_ps<_Tp, _Np>())
630  return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
631  else if constexpr (__is_avx512_pd<_Tp, _Np>())
632  return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
633  else
634  __assert_unreachable<_Tp>();
635  }
636 #endif // _GLIBCXX_SIMD_X86INTRIN
637 
638 /**
639  * splits @p __v into exponent and mantissa, the sign is kept with the mantissa
640  *
641  * The return value will be in the range [0.5, 1.0[
642  * The @p __e value will be an integer defining the power-of-two exponent
643  */
644 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
645  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
646  frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
647  {
648  if constexpr (simd_size_v<_Tp, _Abi> == 1)
649  {
650  int __tmp;
651  const auto __r = std::frexp(__x[0], &__tmp);
652  (*__exp)[0] = __tmp;
653  return __r;
654  }
655  else if constexpr (__is_sve_abi<_Abi>())
656  {
657  simd<_Tp, _Abi> __r;
658  __execute_n_times<simd_size_v<_Tp, _Abi>>(
659  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
660  int __tmp;
661  const auto __ri = std::frexp(__x[__i], &__tmp);
662  (*__exp)[__i] = __tmp;
663  __r[__i] = __ri;
664  });
665  return __r;
666  }
667  else if constexpr (__is_fixed_size_abi_v<_Abi>)
668  return {__private_init, _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
669 #if _GLIBCXX_SIMD_X86INTRIN
670  else if constexpr (__have_avx512f)
671  {
672  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
673  constexpr size_t _NI = _Np < 4 ? 4 : _Np;
674  const auto __v = __data(__x);
675  const auto __isnonzero
676  = _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data);
677  const _SimdWrapper<int, _NI> __exp_plus1
678  = 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data;
679  const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>(
680  _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero),
681  _SimdWrapper<int, _NI>(), __exp_plus1));
682  simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp);
683  return {__private_init,
684  _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>(
685  __isnonzero),
686  __v, __getmant_avx512(__v))};
687  }
688 #endif // _GLIBCXX_SIMD_X86INTRIN
689  else
690  {
691  // fallback implementation
692  static_assert(sizeof(_Tp) == 4 || sizeof(_Tp) == 8);
693  using _V = simd<_Tp, _Abi>;
694  using _IV = rebind_simd_t<int, _V>;
695  using namespace std::experimental::__proposed;
696  using namespace std::experimental::__float_bitwise_operators;
697 
698  constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe;
699  constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200;
700  constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512;
701  _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask
702  = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
703  _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent
704  = -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff
705 
706  _V __mant = __p5_1_exponent & (__exponent_mask | __x); // +/-[.5, 1)
707  const _IV __exponent_bits = __extract_exponent_as_int(__x);
708  if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))))
709  {
710  *__exp
711  = simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust);
712  return __mant;
713  }
714 
715 #if __FINITE_MATH_ONLY__
716  // at least one element of __x is 0 or subnormal, the rest is normal
717  // (inf and NaN are excluded by -ffinite-math-only)
718  const auto __iszero_inf_nan = __x == 0;
719 #else
720  using _Ip = __int_for_sizeof_t<_Tp>;
721  const auto __as_int = simd_bit_cast<rebind_simd_t<_Ip, _V>>(abs(__x));
722  const auto __inf = simd_bit_cast<rebind_simd_t<_Ip, _V>>(_V(__infinity_v<_Tp>));
723  const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>(
724  __as_int == 0 || __as_int >= __inf);
725 #endif
726 
727  const _V __scaled_subnormal = __x * __subnorm_scale;
728  const _V __mant_subnormal
729  = __p5_1_exponent & (__exponent_mask | __scaled_subnormal);
730  where(!isnormal(__x), __mant) = __mant_subnormal;
731  where(__iszero_inf_nan, __mant) = __x;
732  _IV __e = __extract_exponent_as_int(__scaled_subnormal);
733  using _MaskType =
734  typename conditional_t<sizeof(typename _V::value_type) == sizeof(int),
735  _V, _IV>::mask_type;
736  const _MaskType __value_isnormal = isnormal(__x).__cvt();
737  where(__value_isnormal.__cvt(), __e) = __exponent_bits;
738  static_assert(sizeof(_IV) == sizeof(__value_isnormal));
739  const _IV __offset
740  = (simd_bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust))
741  | (simd_bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0)
742  & static_simd_cast<_MaskType>(__x != 0))
743  & _IV(__exp_adjust + __exp_offset));
744  *__exp = simd_cast<_Samesize<int, _V>>(__e - __offset);
745  return __mant;
746  }
747  }
748 
749 // }}}
750 _GLIBCXX_SIMD_MATH_CALL2_(ldexp, int)
751 _GLIBCXX_SIMD_MATH_CALL_(ilogb)
752 
753 // logarithms {{{
754 _GLIBCXX_SIMD_MATH_CALL_(log)
755 _GLIBCXX_SIMD_MATH_CALL_(log10)
756 _GLIBCXX_SIMD_MATH_CALL_(log1p)
757 _GLIBCXX_SIMD_MATH_CALL_(log2)
758 
759 //}}}
760 // logb{{{
761 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
762  enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
763  logb(const simd<_Tp, _Abi>& __x)
764  {
765  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
766  if constexpr (_Np == 1)
767  return std::logb(__x[0]);
768  else if constexpr (__is_fixed_size_abi_v<_Abi>)
769  return {__private_init, _Abi::_SimdImpl::_S_logb(__data(__x))};
770 #if _GLIBCXX_SIMD_X86INTRIN // {{{
771  else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
772  return {__private_init,
773  __auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))};
774  else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
775  return {__private_init, _mm_getexp_pd(__data(__x))};
776  else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
777  return {__private_init, _mm256_getexp_ps(__data(__x))};
778  else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
779  return {__private_init, _mm256_getexp_pd(__data(__x))};
780  else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
781  return {__private_init,
782  __lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))};
783  else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
784  return {__private_init,
785  __lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))};
786  else if constexpr (__is_avx512_ps<_Tp, _Np>())
787  return {__private_init, _mm512_getexp_ps(__data(__x))};
788  else if constexpr (__is_avx512_pd<_Tp, _Np>())
789  return {__private_init, _mm512_getexp_pd(__data(__x))};
790 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
791  else
792  {
793  using _V = simd<_Tp, _Abi>;
794  using namespace std::experimental::__proposed;
795  auto __is_normal = isnormal(__x);
796 
797  // work on abs(__x) to reflect the return value on Linux for negative
798  // inputs (domain-error => implementation-defined value is returned)
799  const _V abs_x = abs(__x);
800 
801  // __exponent(__x) returns the exponent value (bias removed) as
802  // simd<_Up> with integral _Up
803  auto&& __exponent = [](const _V& __v) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
804  using namespace std::experimental::__proposed;
805  using _IV = rebind_simd_t<
806  conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>;
807  return (simd_bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1))
808  - (__max_exponent_v<_Tp> - 1);
809  };
810  _V __r = static_simd_cast<_V>(__exponent(abs_x));
811  if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal)))
812  // without corner cases (nan, inf, subnormal, zero) we have our
813  // answer:
814  return __r;
815  const auto __is_zero = __x == 0;
816  const auto __is_nan = isnan(__x);
817  const auto __is_inf = isinf(__x);
818  where(__is_zero, __r) = -__infinity_v<_Tp>;
819  where(__is_nan, __r) = __x;
820  where(__is_inf, __r) = __infinity_v<_Tp>;
821  __is_normal |= __is_zero || __is_nan || __is_inf;
822  if (all_of(__is_normal))
823  // at this point everything but subnormals is handled
824  return __r;
825  // subnormals repeat the exponent extraction after multiplication of the
826  // input with __a floating point value that has 112 (0x70) in its exponent
827  // (not too big for sp and large enough for dp)
828  const _V __scaled = abs_x * _Tp(0x1p112);
829  _V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112);
830  where(__is_normal, __scaled_exp) = __r;
831  return __scaled_exp;
832  }
833  }
834 
835 //}}}
836 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
837  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
838  modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
839  {
840  if constexpr (simd_size_v<_Tp, _Abi> == 1)
841  {
842  _Tp __tmp;
843  _Tp __r = std::modf(__x[0], &__tmp);
844  __iptr[0] = __tmp;
845  return __r;
846  }
847  else
848  {
849  const auto __integral = trunc(__x);
850  *__iptr = __integral;
851  auto __r = __x - __integral;
852 #if !__FINITE_MATH_ONLY__
853  where(isinf(__x), __r) = _Tp();
854 #endif
855  return copysign(__r, __x);
856  }
857  }
858 
859 _GLIBCXX_SIMD_MATH_CALL2_(scalbn, int)
860 _GLIBCXX_SIMD_MATH_CALL2_(scalbln, long)
861 
862 _GLIBCXX_SIMD_MATH_CALL_(cbrt)
863 
864 _GLIBCXX_SIMD_MATH_CALL_(abs)
865 _GLIBCXX_SIMD_MATH_CALL_(fabs)
866 
867 // [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
868 // allow signed integral _Tp
869 template <typename _Tp, typename _Abi>
870  _GLIBCXX_SIMD_ALWAYS_INLINE
871  enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
872  abs(const simd<_Tp, _Abi>& __x)
873  { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
874 
875 #define _GLIBCXX_SIMD_CVTING2(_NAME) \
876 template <typename _Tp, typename _Abi> \
877  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
878  const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \
879  { \
880  return _NAME(__x, __y); \
881  } \
882  \
883 template <typename _Tp, typename _Abi> \
884  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
885  const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \
886  { \
887  return _NAME(__x, __y); \
888  }
889 
890 #define _GLIBCXX_SIMD_CVTING3(_NAME) \
891 template <typename _Tp, typename _Abi> \
892  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
893  const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
894  const simd<_Tp, _Abi>& __z) \
895  { \
896  return _NAME(__x, __y, __z); \
897  } \
898  \
899 template <typename _Tp, typename _Abi> \
900  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
901  const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
902  const simd<_Tp, _Abi>& __z) \
903  { \
904  return _NAME(__x, __y, __z); \
905  } \
906  \
907 template <typename _Tp, typename _Abi> \
908  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
909  const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, \
910  const __type_identity_t<simd<_Tp, _Abi>>& __z) \
911  { \
912  return _NAME(__x, __y, __z); \
913  } \
914  \
915 template <typename _Tp, typename _Abi> \
916  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
917  const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
918  const __type_identity_t<simd<_Tp, _Abi>>& __z) \
919  { \
920  return _NAME(__x, __y, __z); \
921  } \
922  \
923 template <typename _Tp, typename _Abi> \
924  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
925  const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
926  const __type_identity_t<simd<_Tp, _Abi>>& __z) \
927  { \
928  return _NAME(__x, __y, __z); \
929  } \
930  \
931 template <typename _Tp, typename _Abi> \
932  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
933  const __type_identity_t<simd<_Tp, _Abi>>& __x, \
934  const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \
935  { \
936  return _NAME(__x, __y, __z); \
937  }
938 
939 template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
940  _GLIBCXX_SIMD_INTRINSIC _R
941  __fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0,
942  const _Tps&... __args)
943  {
944  return {__private_init,
945  __data(__arg0)._M_apply_per_chunk(
946  [&](auto __impl, const auto&... __inner) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
947  using _V = typename decltype(__impl)::simd_type;
948  return __data(__apply(_V(__private_init, __inner)...));
949  },
950  __data(__args)...)};
951  }
952 
953 template <typename _VV, typename = __detail::__odr_helper>
954  __remove_cvref_t<_VV>
955  __hypot(_VV __x, _VV __y)
956  {
957  using _V = __remove_cvref_t<_VV>;
958  using _Tp = typename _V::value_type;
959  if constexpr (_V::size() == 1)
960  return std::hypot(_Tp(__x[0]), _Tp(__y[0]));
961  else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
962  {
963  return __fixed_size_apply<_V>([](auto __a,
964  auto __b) { return hypot(__a, __b); },
965  __x, __y);
966  }
967  else
968  {
969  // A simple solution for _Tp == float would be to cast to double and
970  // simply calculate sqrt(x²+y²) as it can't over-/underflow anymore with
971  // dp. It still needs the Annex F fixups though and isn't faster on
972  // Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for
973  // AVX-512).
974  using namespace __float_bitwise_operators;
975  using namespace __proposed;
976  _V __absx = abs(__x); // no error
977  _V __absy = abs(__y); // no error
978  _V __hi = max(__absx, __absy); // no error
979  _V __lo = min(__absy, __absx); // no error
980 
981  // round __hi down to the next power-of-2:
982  _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
983 
984 #ifndef __FAST_MATH__
985  if constexpr (__have_neon && !__have_neon_a32)
986  { // With ARMv7 NEON, we have no subnormals and must use slightly
987  // different strategy
988  const _V __hi_exp = __hi & __inf;
989  _V __scale_back = __hi_exp;
990  // For large exponents (max & max/2) the inversion comes too close
991  // to subnormals. Subtract 3 from the exponent:
992  where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
993  // Invert and adjust for the off-by-one error of inversion via xor:
994  const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
995  const _V __h1 = __hi * __scale;
996  const _V __l1 = __lo * __scale;
997  _V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1);
998  // Fix up hypot(0, 0) to not be NaN:
999  where(__hi == 0, __r) = 0;
1000  return __r;
1001  }
1002 #endif
1003 
1004 #ifdef __FAST_MATH__
1005  // With fast-math, ignore precision of subnormals and inputs from
1006  // __finite_max_v/2 to __finite_max_v. This removes all
1007  // branching/masking.
1008  if constexpr (true)
1009 #else
1010  if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
1011  && all_of(isnormal(__y))))
1012 #endif
1013  {
1014  const _V __hi_exp = __hi & __inf;
1015  //((__hi + __hi) & __inf) ^ __inf almost works for computing
1016  //__scale,
1017  // except when (__hi + __hi) & __inf == __inf, in which case __scale
1018  // becomes 0 (should be min/2 instead) and thus loses the
1019  // information from __lo.
1020 #ifdef __FAST_MATH__
1021  using _Ip = __int_for_sizeof_t<_Tp>;
1022  using _IV = rebind_simd_t<_Ip, _V>;
1023  const auto __as_int = simd_bit_cast<_IV>(__hi_exp);
1024  const _V __scale
1025  = simd_bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
1026 #else
1027  const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1028 #endif
1029  _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask
1030  = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1031  const _V __h1 = (__hi & __mant_mask) | _V(1);
1032  const _V __l1 = __lo * __scale;
1033  return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1034  }
1035  else
1036  {
1037  // slower path to support subnormals
1038  // if __hi is subnormal, avoid scaling by inf & final mul by 0
1039  // (which yields NaN) by using min()
1040  _V __scale = _V(1 / __norm_min_v<_Tp>);
1041  // invert exponent w/o error and w/o using the slow divider unit:
1042  // xor inverts the exponent but off by 1. Multiplication with .5
1043  // adjusts for the discrepancy.
1044  where(__hi >= __norm_min_v<_Tp>, __scale)
1045  = ((__hi & __inf) ^ __inf) * _Tp(.5);
1046  // adjust final exponent for subnormal inputs
1047  _V __hi_exp = __norm_min_v<_Tp>;
1048  where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1049  = __hi & __inf; // no error
1050  _V __h1 = __hi * __scale; // no error
1051  _V __l1 = __lo * __scale; // no error
1052 
1053  // sqrt(x²+y²) = e*sqrt((x/e)²+(y/e)²):
1054  // this ensures no overflow in the argument to sqrt
1055  _V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1056 #ifdef __STDC_IEC_559__
1057  // fixup for Annex F requirements
1058  // the naive fixup goes like this:
1059  //
1060  // where(__l1 == 0, __r) = __hi;
1061  // where(isunordered(__x, __y), __r) = __quiet_NaN_v<_Tp>;
1062  // where(isinf(__absx) || isinf(__absy), __r) = __inf;
1063  //
1064  // The fixup can be prepared in parallel with the sqrt, requiring a
1065  // single blend step after hi_exp * sqrt, reducing latency and
1066  // throughput:
1067  _V __fixup = __hi; // __lo == 0
1068  where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>;
1069  where(isinf(__absx) || isinf(__absy), __fixup) = __inf;
1070  where(!(__lo == 0 || isunordered(__x, __y)
1071  || (isinf(__absx) || isinf(__absy))),
1072  __fixup)
1073  = __r;
1074  __r = __fixup;
1075 #endif
1076  return __r;
1077  }
1078  }
1079  }
1080 
1081 template <typename _Tp, typename _Abi>
1082  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1083  hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1084  {
1085  return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1086  const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1087  __y);
1088  }
1089 
1090 _GLIBCXX_SIMD_CVTING2(hypot)
1091 
1092  template <typename _VV, typename = __detail::__odr_helper>
1093  __remove_cvref_t<_VV>
1094  __hypot(_VV __x, _VV __y, _VV __z)
1095  {
1096  using _V = __remove_cvref_t<_VV>;
1097  using _Abi = typename _V::abi_type;
1098  using _Tp = typename _V::value_type;
1099  /* FIXME: enable after PR77776 is resolved
1100  if constexpr (_V::size() == 1)
1101  return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0]));
1102  else
1103  */
1104  if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1)
1105  {
1106  return __fixed_size_apply<simd<_Tp, _Abi>>(
1107  [](auto __a, auto __b, auto __c) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1108  return hypot(__a, __b, __c);
1109  }, __x, __y, __z);
1110  }
1111  else
1112  {
1113  using namespace __float_bitwise_operators;
1114  using namespace __proposed;
1115  const _V __absx = abs(__x); // no error
1116  const _V __absy = abs(__y); // no error
1117  const _V __absz = abs(__z); // no error
1118  _V __hi = max(max(__absx, __absy), __absz); // no error
1119  _V __l0 = min(__absz, max(__absx, __absy)); // no error
1120  _V __l1 = min(__absy, __absx); // no error
1121  if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000
1122  && __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1)
1123  { // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or
1124  // NaN. In this case the bit-tricks don't work, they require IEC559
1125  // binary32 or binary64 format.
1126 #ifdef __STDC_IEC_559__
1127  // fixup for Annex F requirements
1128  if (isinf(__absx[0]) || isinf(__absy[0]) || isinf(__absz[0]))
1129  return __infinity_v<_Tp>;
1130  else if (isunordered(__absx[0], __absy[0] + __absz[0]))
1131  return __quiet_NaN_v<_Tp>;
1132  else if (__l0[0] == 0 && __l1[0] == 0)
1133  return __hi;
1134 #endif
1135  _V __hi_exp = __hi;
1136  const _ULLong __tmp = 0x8000'0000'0000'0000ull;
1137  __builtin_memcpy(&__data(__hi_exp), &__tmp, 8);
1138  const _V __scale = 1 / __hi_exp;
1139  __hi *= __scale;
1140  __l0 *= __scale;
1141  __l1 *= __scale;
1142  return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi);
1143  }
1144  else
1145  {
1146  // round __hi down to the next power-of-2:
1147  _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
1148 
1149 #ifndef __FAST_MATH__
1150  if constexpr (_V::size() > 1
1151  && __is_neon_abi<_Abi>() && __have_neon && !__have_neon_a32)
1152  { // With ARMv7 NEON, we have no subnormals and must use slightly
1153  // different strategy
1154  const _V __hi_exp = __hi & __inf;
1155  _V __scale_back = __hi_exp;
1156  // For large exponents (max & max/2) the inversion comes too
1157  // close to subnormals. Subtract 3 from the exponent:
1158  where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
1159  // Invert and adjust for the off-by-one error of inversion via
1160  // xor:
1161  const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
1162  const _V __h1 = __hi * __scale;
1163  __l0 *= __scale;
1164  __l1 *= __scale;
1165  _V __lo = __l0 * __l0
1166  + __l1 * __l1; // add the two smaller values first
1167  asm("" : "+m"(__lo));
1168  _V __r = __scale_back * sqrt(__h1 * __h1 + __lo);
1169  // Fix up hypot(0, 0, 0) to not be NaN:
1170  where(__hi == 0, __r) = 0;
1171  return __r;
1172  }
1173 #endif
1174 
1175 #ifdef __FAST_MATH__
1176  // With fast-math, ignore precision of subnormals and inputs from
1177  // __finite_max_v/2 to __finite_max_v. This removes all
1178  // branching/masking.
1179  if constexpr (true)
1180 #else
1181  if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
1182  && all_of(isnormal(__y))
1183  && all_of(isnormal(__z))))
1184 #endif
1185  {
1186  const _V __hi_exp = __hi & __inf;
1187  //((__hi + __hi) & __inf) ^ __inf almost works for computing
1188  //__scale, except when (__hi + __hi) & __inf == __inf, in which
1189  // case __scale
1190  // becomes 0 (should be min/2 instead) and thus loses the
1191  // information from __lo.
1192 #ifdef __FAST_MATH__
1193  using _Ip = __int_for_sizeof_t<_Tp>;
1194  using _IV = rebind_simd_t<_Ip, _V>;
1195  const auto __as_int = simd_bit_cast<_IV>(__hi_exp);
1196  const _V __scale
1197  = simd_bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
1198 #else
1199  const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1200 #endif
1201  constexpr _Tp __mant_mask
1202  = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1203  const _V __h1 = (__hi & _V(__mant_mask)) | _V(1);
1204  __l0 *= __scale;
1205  __l1 *= __scale;
1206  const _V __lo
1207  = __l0 * __l0
1208  + __l1 * __l1; // add the two smaller values first
1209  return __hi_exp * sqrt(__lo + __h1 * __h1);
1210  }
1211  else
1212  {
1213  // slower path to support subnormals
1214  // if __hi is subnormal, avoid scaling by inf & final mul by 0
1215  // (which yields NaN) by using min()
1216  _V __scale = _V(1 / __norm_min_v<_Tp>);
1217  // invert exponent w/o error and w/o using the slow divider
1218  // unit: xor inverts the exponent but off by 1. Multiplication
1219  // with .5 adjusts for the discrepancy.
1220  where(__hi >= __norm_min_v<_Tp>, __scale)
1221  = ((__hi & __inf) ^ __inf) * _Tp(.5);
1222  // adjust final exponent for subnormal inputs
1223  _V __hi_exp = __norm_min_v<_Tp>;
1224  where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1225  = __hi & __inf; // no error
1226  _V __h1 = __hi * __scale; // no error
1227  __l0 *= __scale; // no error
1228  __l1 *= __scale; // no error
1229  _V __lo = __l0 * __l0
1230  + __l1 * __l1; // add the two smaller values first
1231  _V __r = __hi_exp * sqrt(__lo + __h1 * __h1);
1232 #ifdef __STDC_IEC_559__
1233  // fixup for Annex F requirements
1234  _V __fixup = __hi; // __lo == 0
1235  // where(__lo == 0, __fixup) = __hi;
1236  where(isunordered(__x, __y + __z), __fixup)
1237  = __quiet_NaN_v<_Tp>;
1238  where(isinf(__absx) || isinf(__absy) || isinf(__absz), __fixup)
1239  = __inf;
1240  // Instead of __lo == 0, the following could depend on __h1² ==
1241  // __h1² + __lo (i.e. __hi is so much larger than the other two
1242  // inputs that the result is exactly __hi). While this may
1243  // improve precision, it is likely to reduce efficiency if the
1244  // ISA has FMAs (because __h1² + __lo is an FMA, but the
1245  // intermediate
1246  // __h1² must be kept)
1247  where(!(__lo == 0 || isunordered(__x, __y + __z)
1248  || isinf(__absx) || isinf(__absy) || isinf(__absz)),
1249  __fixup)
1250  = __r;
1251  __r = __fixup;
1252 #endif
1253  return __r;
1254  }
1255  }
1256  }
1257  }
1258 
1259  template <typename _Tp, typename _Abi>
1260  _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1261  hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,
1262  const simd<_Tp, _Abi>& __z)
1263  {
1264  return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1265  const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1266  __y,
1267  __z);
1268  }
1269 
1270 _GLIBCXX_SIMD_CVTING3(hypot)
1271 
1272 _GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp)
1273 
1274 _GLIBCXX_SIMD_MATH_CALL_(sqrt)
1275 _GLIBCXX_SIMD_MATH_CALL_(erf)
1276 _GLIBCXX_SIMD_MATH_CALL_(erfc)
1277 _GLIBCXX_SIMD_MATH_CALL_(lgamma)
1278 _GLIBCXX_SIMD_MATH_CALL_(tgamma)
1279 _GLIBCXX_SIMD_MATH_CALL_(ceil)
1280 _GLIBCXX_SIMD_MATH_CALL_(floor)
1281 _GLIBCXX_SIMD_MATH_CALL_(nearbyint)
1282 _GLIBCXX_SIMD_MATH_CALL_(rint)
1283 _GLIBCXX_SIMD_MATH_CALL_(lrint)
1284 _GLIBCXX_SIMD_MATH_CALL_(llrint)
1285 
1286 _GLIBCXX_SIMD_MATH_CALL_(round)
1287 _GLIBCXX_SIMD_MATH_CALL_(lround)
1288 _GLIBCXX_SIMD_MATH_CALL_(llround)
1289 
1290 _GLIBCXX_SIMD_MATH_CALL_(trunc)
1291 
1292 _GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
1293 _GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
1294 _GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
1295 
1296 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1297  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1298  copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1299  {
1300  if constexpr (simd_size_v<_Tp, _Abi> == 1)
1301  return std::copysign(__x[0], __y[0]);
1302  else if constexpr (__is_fixed_size_abi_v<_Abi>)
1303  return {__private_init, _Abi::_SimdImpl::_S_copysign(__data(__x), __data(__y))};
1304  else
1305  {
1306  using _V = simd<_Tp, _Abi>;
1307  using namespace std::experimental::__float_bitwise_operators;
1308  _GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1);
1309  return (__x & ~__signmask) | (__y & __signmask);
1310  }
1311  }
1312 
1313 _GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp)
1314 // not covered in [parallel.simd.math]:
1315 // _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double)
1316 _GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp)
1317 _GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp)
1318 _GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp)
1319 
1320 _GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp)
1321 _GLIBCXX_SIMD_MATH_CALL_(fpclassify)
1322 _GLIBCXX_SIMD_MATH_CALL_(isfinite)
1323 
1324 // isnan and isinf require special treatment because old glibc may declare
1325 // `int isinf(double)`.
1326 template <typename _Tp, typename _Abi, typename...,
1327  typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1328  _GLIBCXX_SIMD_ALWAYS_INLINE
1329  enable_if_t<is_floating_point_v<_Tp>, _R>
1330  isinf(simd<_Tp, _Abi> __x)
1331  { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
1332 
1333 template <typename _Tp, typename _Abi, typename...,
1334  typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1335  _GLIBCXX_SIMD_ALWAYS_INLINE
1336  enable_if_t<is_floating_point_v<_Tp>, _R>
1337  isnan(simd<_Tp, _Abi> __x)
1338  { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
1339 
1340 _GLIBCXX_SIMD_MATH_CALL_(isnormal)
1341 
1342 template <typename..., typename _Tp, typename _Abi>
1343  _GLIBCXX_SIMD_ALWAYS_INLINE
1344  simd_mask<_Tp, _Abi>
1345  signbit(simd<_Tp, _Abi> __x)
1346  {
1347  if constexpr (is_integral_v<_Tp>)
1348  {
1349  if constexpr (is_unsigned_v<_Tp>)
1350  return simd_mask<_Tp, _Abi>{}; // false
1351  else
1352  return __x < 0;
1353  }
1354  else
1355  return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))};
1356  }
1357 
1358 _GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp)
1359 _GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp)
1360 _GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp)
1361 _GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp)
1362 _GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp)
1363 _GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp)
1364 
1365 /* not covered in [parallel.simd.math]
1366 template <typename _Abi> __doublev<_Abi> nan(const char* tagp);
1367 template <typename _Abi> __floatv<_Abi> nanf(const char* tagp);
1368 template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp);
1369 
1370 template <typename _V> struct simd_div_t {
1371  _V quot, rem;
1372 };
1373 
1374 template <typename _Abi>
1375 simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer,
1376  _SCharv<_Abi> denom);
1377 template <typename _Abi>
1378 simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer,
1379  __shortv<_Abi> denom);
1380 template <typename _Abi>
1381 simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom);
1382 template <typename _Abi>
1383 simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer,
1384  __longv<_Abi> denom);
1385 template <typename _Abi>
1386 simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
1387  __llongv<_Abi> denom);
1388 */
1389 
1390 // special math {{{
1391 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1392  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1393  assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1394  const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1395  const simd<_Tp, _Abi>& __x)
1396  {
1397  return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1398  return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
1399  });
1400  }
1401 
1402 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1403  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1404  assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1405  const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1406  const simd<_Tp, _Abi>& __x)
1407  {
1408  return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1409  return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
1410  });
1411  }
1412 
1413 _GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp)
1414 _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1)
1415 _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2)
1416 _GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp)
1417 _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp)
1418 _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp)
1419 _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp)
1420 _GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp)
1421 _GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp)
1422 _GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
1423 _GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
1424 _GLIBCXX_SIMD_MATH_CALL_(expint)
1425 
1426 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1427  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1428  hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1429  const simd<_Tp, _Abi>& __x)
1430  {
1431  return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1432  return std::hermite(__n[__i], __x[__i]);
1433  });
1434  }
1435 
1436 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1437  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1438  laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1439  const simd<_Tp, _Abi>& __x)
1440  {
1441  return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1442  return std::laguerre(__n[__i], __x[__i]);
1443  });
1444  }
1445 
1446 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1447  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1448  legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1449  const simd<_Tp, _Abi>& __x)
1450  {
1451  return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1452  return std::legendre(__n[__i], __x[__i]);
1453  });
1454  }
1455 
1456 _GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
1457 
1458 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1459  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1460  sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1461  const simd<_Tp, _Abi>& __x)
1462  {
1463  return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1464  return std::sph_bessel(__n[__i], __x[__i]);
1465  });
1466  }
1467 
1468 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1469  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1470  sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
1471  const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1472  const simd<_Tp, _Abi>& theta)
1473  {
1474  return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1475  return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
1476  });
1477  }
1478 
1479 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1480  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1481  sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1482  const simd<_Tp, _Abi>& __x)
1483  {
1484  return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1485  return std::sph_neumann(__n[__i], __x[__i]);
1486  });
1487  }
1488 // }}}
1489 
1490 #undef _GLIBCXX_SIMD_CVTING2
1491 #undef _GLIBCXX_SIMD_CVTING3
1492 #undef _GLIBCXX_SIMD_MATH_CALL_
1493 #undef _GLIBCXX_SIMD_MATH_CALL2_
1494 #undef _GLIBCXX_SIMD_MATH_CALL3_
1495 
1496 _GLIBCXX_SIMD_END_NAMESPACE
1497 
1498 #endif // __cplusplus >= 201703L
1499 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
1500 
1501 // vim: foldmethod=marker sw=2 ts=8 noet sts=2
ISO C++ entities toplevel namespace is std.
complex< _Tp > tan(const complex< _Tp > &)
Return complex tangent of z.
Definition: complex:1226
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:257
complex< _Tp > exp(const complex< _Tp > &)
Return complex base e exponential of z.
Definition: complex:1063
complex< _Tp > cos(const complex< _Tp > &)
Return complex cosine of z.
Definition: complex:1007
complex< _Tp > log10(const complex< _Tp > &)
Return complex base 10 logarithm of z.
Definition: complex:1095
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:233
complex< _Tp > cosh(const complex< _Tp > &)
Return complex hyperbolic cosine of z.
Definition: complex:1037
auto declval() noexcept -> decltype(__declval< _Tp >(0))
Definition: type_traits:2470
complex< _Tp > sinh(const complex< _Tp > &)
Return complex hyperbolic sine of z.
Definition: complex:1155
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition: type_traits:2700
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
Definition: range_access.h:262
complex< _Tp > sqrt(const complex< _Tp > &)
Return complex square root of z.
Definition: complex:1199
_Tp fabs(const std::complex< _Tp > &)
fabs(__z) [8.1.8].
Definition: complex:2429
_Tp abs(const complex< _Tp > &)
Return magnitude of z.
Definition: complex:896
complex< _Tp > log(const complex< _Tp > &)
Return complex natural logarithm of z.
Definition: complex:1090
complex< _Tp > tanh(const complex< _Tp > &)
Return complex hyperbolic tangent of z.
Definition: complex:1254
complex< _Tp > sin(const complex< _Tp > &)
Return complex sine of z.
Definition: complex:1125