Move __HAVE_FAST_FMA to math_config.h

Define it consistently with other HAVE_* macros that only affect code
using math_config.h.  This is also closer to the Arm Optimized Routines
code.
This commit is contained in:
Szabolcs Nagy 2018-07-05 12:37:25 +01:00 committed by Corinna Vinschen
parent cbe50607fb
commit 393a1cb4ea
7 changed files with 21 additions and 35 deletions

View File

@ -65,17 +65,6 @@
double and single precision arithmetics has similar latency and it double and single precision arithmetics has similar latency and it
has no legacy SVID matherr support, only POSIX errno and fenv has no legacy SVID matherr support, only POSIX errno and fenv
exception based error handling. exception based error handling.
__HAVE_FAST_FMA_DEFAULT
Default value for __HAVE_FAST_FMA if that's not set by the user.
It should be set here based on predefined feature macros.
__HAVE_FAST_FMA
It should be set to 1 if the compiler can inline an fma call as a
single instruction. Some math code has a separate faster code
path assuming the target has single instruction fma.
*/ */
#if (defined(__arm__) || defined(__thumb__)) && !defined(__MAVERICK__) #if (defined(__arm__) || defined(__thumb__)) && !defined(__MAVERICK__)
@ -91,9 +80,6 @@
# endif # endif
# if __ARM_FP & 0x8 # if __ARM_FP & 0x8
# define __OBSOLETE_MATH_DEFAULT 0 # define __OBSOLETE_MATH_DEFAULT 0
# if __ARM_FEATURE_FMA
# define __HAVE_FAST_FMA_DEFAULT 1
# endif
# endif # endif
#else #else
# define __IEEE_BIG_ENDIAN # define __IEEE_BIG_ENDIAN
@ -110,7 +96,6 @@
#define __IEEE_BIG_ENDIAN #define __IEEE_BIG_ENDIAN
#endif #endif
#define __OBSOLETE_MATH_DEFAULT 0 #define __OBSOLETE_MATH_DEFAULT 0
#define __HAVE_FAST_FMA_DEFAULT 1
#endif #endif
#ifdef __epiphany__ #ifdef __epiphany__
@ -479,14 +464,6 @@
#define __OBSOLETE_MATH __OBSOLETE_MATH_DEFAULT #define __OBSOLETE_MATH __OBSOLETE_MATH_DEFAULT
#endif #endif
#ifndef __HAVE_FAST_FMA_DEFAULT
/* Assume slow fma by default. */
#define __HAVE_FAST_FMA_DEFAULT 0
#endif
#ifndef __HAVE_FAST_FMA
#define __HAVE_FAST_FMA __HAVE_FAST_FMA_DEFAULT
#endif
#ifndef __IEEE_BIG_ENDIAN #ifndef __IEEE_BIG_ENDIAN
#ifndef __IEEE_LITTLE_ENDIAN #ifndef __IEEE_LITTLE_ENDIAN
#error Endianess not declared!! #error Endianess not declared!!

View File

@ -146,7 +146,7 @@ log (double x)
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
/* r ~= z/c - 1, |r| < 1/(2*N). */ /* r ~= z/c - 1, |r| < 1/(2*N). */
#if __HAVE_FAST_FMA #if HAVE_FAST_FMA
/* rounding error: 0x1p-55/N. */ /* rounding error: 0x1p-55/N. */
r = fma (z, invc, -1.0); r = fma (z, invc, -1.0);
#else #else

View File

@ -72,7 +72,7 @@ double
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0))) if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
return 0; return 0;
r = x - 1.0; r = x - 1.0;
#if __HAVE_FAST_FMA #if HAVE_FAST_FMA
hi = r * InvLn2hi; hi = r * InvLn2hi;
lo = r * InvLn2lo + fma (r, InvLn2hi, -hi); lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
#else #else
@ -123,7 +123,7 @@ double
/* log2(x) = log2(z/c) + log2(c) + k. */ /* log2(x) = log2(z/c) + log2(c) + k. */
/* r ~= z/c - 1, |r| < 1/(2*N). */ /* r ~= z/c - 1, |r| < 1/(2*N). */
#if __HAVE_FAST_FMA #if HAVE_FAST_FMA
/* rounding error: 0x1p-55/N. */ /* rounding error: 0x1p-55/N. */
r = fma (z, invc, -1.0); r = fma (z, invc, -1.0);
t1 = r * InvLn2hi; t1 = r * InvLn2hi;

View File

@ -134,7 +134,7 @@ const struct log2_data __log2_data = {
{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2}, {0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
#endif #endif
}, },
#if !__HAVE_FAST_FMA #if !HAVE_FAST_FMA
.tab2 = { .tab2 = {
# if N == 64 # if N == 64
{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55}, {0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
@ -203,6 +203,6 @@ const struct log2_data __log2_data = {
{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54}, {0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
# endif # endif
}, },
#endif /* !__HAVE_FAST_FMA */ #endif /* !HAVE_FAST_FMA */
}; };
#endif /* __OBSOLETE_MATH */ #endif /* __OBSOLETE_MATH */

View File

@ -307,7 +307,7 @@ const struct log_data __log_data = {
{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2}, {0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
#endif #endif
}, },
#if !__HAVE_FAST_FMA #if !HAVE_FAST_FMA
.tab2 = { .tab2 = {
# if N == 64 # if N == 64
{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56}, {0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
@ -505,6 +505,6 @@ const struct log_data __log_data = {
{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54}, {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
#endif #endif
}, },
#endif /* !__HAVE_FAST_FMA */ #endif /* !HAVE_FAST_FMA */
}; };
#endif /* __OBSOLETE_MATH */ #endif /* __OBSOLETE_MATH */

View File

@ -61,6 +61,15 @@
# endif # endif
#endif #endif
/* Compiler can inline fma as a single instruction. */
#ifndef HAVE_FAST_FMA
# if __aarch64__ || __ARM_FEATURE_FMA
# define HAVE_FAST_FMA 1
# else
# define HAVE_FAST_FMA 0
# endif
#endif
#if HAVE_FAST_ROUND #if HAVE_FAST_ROUND
# define TOINT_INTRINSICS 1 # define TOINT_INTRINSICS 1
@ -366,7 +375,7 @@ extern const struct log_data
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */ double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
double poly1[LOG_POLY1_ORDER - 1]; double poly1[LOG_POLY1_ORDER - 1];
struct {double invc, logc;} tab[1 << LOG_TABLE_BITS]; struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
#if !__HAVE_FAST_FMA #if !HAVE_FAST_FMA
struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS]; struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
#endif #endif
} __log_data HIDDEN; } __log_data HIDDEN;
@ -381,7 +390,7 @@ extern const struct log2_data
double poly[LOG2_POLY_ORDER - 1]; double poly[LOG2_POLY_ORDER - 1];
double poly1[LOG2_POLY1_ORDER - 1]; double poly1[LOG2_POLY1_ORDER - 1];
struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS]; struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
#if !__HAVE_FAST_FMA #if !HAVE_FAST_FMA
struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS]; struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
#endif #endif
} __log2_data HIDDEN; } __log2_data HIDDEN;

View File

@ -80,7 +80,7 @@ log_inline (uint64_t ix, double_t *tail)
logctail = T[i].logctail; logctail = T[i].logctail;
/* r = z/c - 1, arranged to be exact. */ /* r = z/c - 1, arranged to be exact. */
#if __HAVE_FAST_FMA #if HAVE_FAST_FMA
r = fma (z, invc, -1.0); r = fma (z, invc, -1.0);
#else #else
double_t zhi = asdouble (iz & (-1ULL << 32)); double_t zhi = asdouble (iz & (-1ULL << 32));
@ -102,7 +102,7 @@ log_inline (uint64_t ix, double_t *tail)
ar2 = r * ar; ar2 = r * ar;
ar3 = r * ar2; ar3 = r * ar2;
/* k*Ln2 + log(c) + r + A[0]*r*r. */ /* k*Ln2 + log(c) + r + A[0]*r*r. */
#if __HAVE_FAST_FMA #if HAVE_FAST_FMA
hi = t2 + ar2; hi = t2 + ar2;
lo3 = fma (ar, r, -ar2); lo3 = fma (ar, r, -ar2);
lo4 = t2 - hi + ar2; lo4 = t2 - hi + ar2;
@ -376,7 +376,7 @@ pow (double x, double y)
double_t lo; double_t lo;
double_t hi = log_inline (ix, &lo); double_t hi = log_inline (ix, &lo);
double_t ehi, elo; double_t ehi, elo;
#if __HAVE_FAST_FMA #if HAVE_FAST_FMA
ehi = y * hi; ehi = y * hi;
elo = y * lo + fma (y, hi, -ehi); elo = y * lo + fma (y, hi, -ehi);
#else #else