Move __HAVE_FAST_FMA to math_config.h
Define it consistently with other HAVE_* macros that only affect code using math_config.h. This is also closer to the Arm Optimized Routines code.
This commit is contained in:
parent
cbe50607fb
commit
393a1cb4ea
|
@ -65,17 +65,6 @@
|
||||||
double and single precision arithmetics has similar latency and it
|
double and single precision arithmetics has similar latency and it
|
||||||
has no legacy SVID matherr support, only POSIX errno and fenv
|
has no legacy SVID matherr support, only POSIX errno and fenv
|
||||||
exception based error handling.
|
exception based error handling.
|
||||||
|
|
||||||
__HAVE_FAST_FMA_DEFAULT
|
|
||||||
|
|
||||||
Default value for __HAVE_FAST_FMA if that's not set by the user.
|
|
||||||
It should be set here based on predefined feature macros.
|
|
||||||
|
|
||||||
__HAVE_FAST_FMA
|
|
||||||
|
|
||||||
It should be set to 1 if the compiler can inline an fma call as a
|
|
||||||
single instruction. Some math code has a separate faster code
|
|
||||||
path assuming the target has single instruction fma.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if (defined(__arm__) || defined(__thumb__)) && !defined(__MAVERICK__)
|
#if (defined(__arm__) || defined(__thumb__)) && !defined(__MAVERICK__)
|
||||||
|
@ -91,9 +80,6 @@
|
||||||
# endif
|
# endif
|
||||||
# if __ARM_FP & 0x8
|
# if __ARM_FP & 0x8
|
||||||
# define __OBSOLETE_MATH_DEFAULT 0
|
# define __OBSOLETE_MATH_DEFAULT 0
|
||||||
# if __ARM_FEATURE_FMA
|
|
||||||
# define __HAVE_FAST_FMA_DEFAULT 1
|
|
||||||
# endif
|
|
||||||
# endif
|
# endif
|
||||||
#else
|
#else
|
||||||
# define __IEEE_BIG_ENDIAN
|
# define __IEEE_BIG_ENDIAN
|
||||||
|
@ -110,7 +96,6 @@
|
||||||
#define __IEEE_BIG_ENDIAN
|
#define __IEEE_BIG_ENDIAN
|
||||||
#endif
|
#endif
|
||||||
#define __OBSOLETE_MATH_DEFAULT 0
|
#define __OBSOLETE_MATH_DEFAULT 0
|
||||||
#define __HAVE_FAST_FMA_DEFAULT 1
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __epiphany__
|
#ifdef __epiphany__
|
||||||
|
@ -479,14 +464,6 @@
|
||||||
#define __OBSOLETE_MATH __OBSOLETE_MATH_DEFAULT
|
#define __OBSOLETE_MATH __OBSOLETE_MATH_DEFAULT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef __HAVE_FAST_FMA_DEFAULT
|
|
||||||
/* Assume slow fma by default. */
|
|
||||||
#define __HAVE_FAST_FMA_DEFAULT 0
|
|
||||||
#endif
|
|
||||||
#ifndef __HAVE_FAST_FMA
|
|
||||||
#define __HAVE_FAST_FMA __HAVE_FAST_FMA_DEFAULT
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __IEEE_BIG_ENDIAN
|
#ifndef __IEEE_BIG_ENDIAN
|
||||||
#ifndef __IEEE_LITTLE_ENDIAN
|
#ifndef __IEEE_LITTLE_ENDIAN
|
||||||
#error Endianess not declared!!
|
#error Endianess not declared!!
|
||||||
|
|
|
@ -146,7 +146,7 @@ log (double x)
|
||||||
|
|
||||||
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
|
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
|
||||||
/* r ~= z/c - 1, |r| < 1/(2*N). */
|
/* r ~= z/c - 1, |r| < 1/(2*N). */
|
||||||
#if __HAVE_FAST_FMA
|
#if HAVE_FAST_FMA
|
||||||
/* rounding error: 0x1p-55/N. */
|
/* rounding error: 0x1p-55/N. */
|
||||||
r = fma (z, invc, -1.0);
|
r = fma (z, invc, -1.0);
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -72,7 +72,7 @@ double
|
||||||
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
|
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
|
||||||
return 0;
|
return 0;
|
||||||
r = x - 1.0;
|
r = x - 1.0;
|
||||||
#if __HAVE_FAST_FMA
|
#if HAVE_FAST_FMA
|
||||||
hi = r * InvLn2hi;
|
hi = r * InvLn2hi;
|
||||||
lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
|
lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
|
||||||
#else
|
#else
|
||||||
|
@ -123,7 +123,7 @@ double
|
||||||
|
|
||||||
/* log2(x) = log2(z/c) + log2(c) + k. */
|
/* log2(x) = log2(z/c) + log2(c) + k. */
|
||||||
/* r ~= z/c - 1, |r| < 1/(2*N). */
|
/* r ~= z/c - 1, |r| < 1/(2*N). */
|
||||||
#if __HAVE_FAST_FMA
|
#if HAVE_FAST_FMA
|
||||||
/* rounding error: 0x1p-55/N. */
|
/* rounding error: 0x1p-55/N. */
|
||||||
r = fma (z, invc, -1.0);
|
r = fma (z, invc, -1.0);
|
||||||
t1 = r * InvLn2hi;
|
t1 = r * InvLn2hi;
|
||||||
|
|
|
@ -134,7 +134,7 @@ const struct log2_data __log2_data = {
|
||||||
{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
|
{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
|
||||||
#endif
|
#endif
|
||||||
},
|
},
|
||||||
#if !__HAVE_FAST_FMA
|
#if !HAVE_FAST_FMA
|
||||||
.tab2 = {
|
.tab2 = {
|
||||||
# if N == 64
|
# if N == 64
|
||||||
{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
|
{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
|
||||||
|
@ -203,6 +203,6 @@ const struct log2_data __log2_data = {
|
||||||
{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
|
{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
|
||||||
# endif
|
# endif
|
||||||
},
|
},
|
||||||
#endif /* !__HAVE_FAST_FMA */
|
#endif /* !HAVE_FAST_FMA */
|
||||||
};
|
};
|
||||||
#endif /* __OBSOLETE_MATH */
|
#endif /* __OBSOLETE_MATH */
|
||||||
|
|
|
@ -307,7 +307,7 @@ const struct log_data __log_data = {
|
||||||
{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
|
{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
|
||||||
#endif
|
#endif
|
||||||
},
|
},
|
||||||
#if !__HAVE_FAST_FMA
|
#if !HAVE_FAST_FMA
|
||||||
.tab2 = {
|
.tab2 = {
|
||||||
# if N == 64
|
# if N == 64
|
||||||
{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
|
{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
|
||||||
|
@ -505,6 +505,6 @@ const struct log_data __log_data = {
|
||||||
{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
|
{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
|
||||||
#endif
|
#endif
|
||||||
},
|
},
|
||||||
#endif /* !__HAVE_FAST_FMA */
|
#endif /* !HAVE_FAST_FMA */
|
||||||
};
|
};
|
||||||
#endif /* __OBSOLETE_MATH */
|
#endif /* __OBSOLETE_MATH */
|
||||||
|
|
|
@ -61,6 +61,15 @@
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Compiler can inline fma as a single instruction. */
|
||||||
|
#ifndef HAVE_FAST_FMA
|
||||||
|
# if __aarch64__ || __ARM_FEATURE_FMA
|
||||||
|
# define HAVE_FAST_FMA 1
|
||||||
|
# else
|
||||||
|
# define HAVE_FAST_FMA 0
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if HAVE_FAST_ROUND
|
#if HAVE_FAST_ROUND
|
||||||
# define TOINT_INTRINSICS 1
|
# define TOINT_INTRINSICS 1
|
||||||
|
|
||||||
|
@ -366,7 +375,7 @@ extern const struct log_data
|
||||||
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
|
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
|
||||||
double poly1[LOG_POLY1_ORDER - 1];
|
double poly1[LOG_POLY1_ORDER - 1];
|
||||||
struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
|
struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
|
||||||
#if !__HAVE_FAST_FMA
|
#if !HAVE_FAST_FMA
|
||||||
struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
|
struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
|
||||||
#endif
|
#endif
|
||||||
} __log_data HIDDEN;
|
} __log_data HIDDEN;
|
||||||
|
@ -381,7 +390,7 @@ extern const struct log2_data
|
||||||
double poly[LOG2_POLY_ORDER - 1];
|
double poly[LOG2_POLY_ORDER - 1];
|
||||||
double poly1[LOG2_POLY1_ORDER - 1];
|
double poly1[LOG2_POLY1_ORDER - 1];
|
||||||
struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
|
struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
|
||||||
#if !__HAVE_FAST_FMA
|
#if !HAVE_FAST_FMA
|
||||||
struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
|
struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
|
||||||
#endif
|
#endif
|
||||||
} __log2_data HIDDEN;
|
} __log2_data HIDDEN;
|
||||||
|
|
|
@ -80,7 +80,7 @@ log_inline (uint64_t ix, double_t *tail)
|
||||||
logctail = T[i].logctail;
|
logctail = T[i].logctail;
|
||||||
|
|
||||||
/* r = z/c - 1, arranged to be exact. */
|
/* r = z/c - 1, arranged to be exact. */
|
||||||
#if __HAVE_FAST_FMA
|
#if HAVE_FAST_FMA
|
||||||
r = fma (z, invc, -1.0);
|
r = fma (z, invc, -1.0);
|
||||||
#else
|
#else
|
||||||
double_t zhi = asdouble (iz & (-1ULL << 32));
|
double_t zhi = asdouble (iz & (-1ULL << 32));
|
||||||
|
@ -102,7 +102,7 @@ log_inline (uint64_t ix, double_t *tail)
|
||||||
ar2 = r * ar;
|
ar2 = r * ar;
|
||||||
ar3 = r * ar2;
|
ar3 = r * ar2;
|
||||||
/* k*Ln2 + log(c) + r + A[0]*r*r. */
|
/* k*Ln2 + log(c) + r + A[0]*r*r. */
|
||||||
#if __HAVE_FAST_FMA
|
#if HAVE_FAST_FMA
|
||||||
hi = t2 + ar2;
|
hi = t2 + ar2;
|
||||||
lo3 = fma (ar, r, -ar2);
|
lo3 = fma (ar, r, -ar2);
|
||||||
lo4 = t2 - hi + ar2;
|
lo4 = t2 - hi + ar2;
|
||||||
|
@ -376,7 +376,7 @@ pow (double x, double y)
|
||||||
double_t lo;
|
double_t lo;
|
||||||
double_t hi = log_inline (ix, &lo);
|
double_t hi = log_inline (ix, &lo);
|
||||||
double_t ehi, elo;
|
double_t ehi, elo;
|
||||||
#if __HAVE_FAST_FMA
|
#if HAVE_FAST_FMA
|
||||||
ehi = y * hi;
|
ehi = y * hi;
|
||||||
elo = y * lo + fma (y, hi, -ehi);
|
elo = y * lo + fma (y, hi, -ehi);
|
||||||
#else
|
#else
|
||||||
|
|
Loading…
Reference in New Issue