Support SSE float environment in fenv.h functions.
* cpu_features.c: New file. * cpu_features.h: New file. * crt1.c: Include "cpu_features.h". (__mingw_CRTStartup): Call cpu_features_init(). * Makefile.in (MING_OBJS): Add cpu_features.c. (SRCDIST_FILES): Add cpu_features.c, cpu_features.h. * include/fenv,h ( fenv_t;): Append __mxcsr field. (__MXCSR_EXCEPT_FLAG_SHIFT): New define. (__MXCSR_EXCEPT_MASK_SHIFT): New define. (__MXCSR_ROUND_FLAG_SHIFT): New define. * mingwex/feclearexcept.c: Include "cpu_features.h". Handle SSE environment. * mingwex/fegetenv.c: Likewise. * mingwex/feholdexcept.c: Likewise. * mingwex/fesetenv.c: Likewise. * mingwex/fesetexceptflag.c: Likewise. * mingwex/fesetround.c: Likewise. * mingwex/fetestexcept.c: Likewise. * mingwex/feupdateenv.c: Likewise. * mingwex/fegetround.c: Add comment.
This commit is contained in:
parent
69d5f3329f
commit
f34428eb35
|
@ -1,3 +1,6 @@
|
|||
2006-07-03 Danny Smith <dannysmith@users.sourceforge.net>
|
||||
|
||||
|
||||
2006-06-25 Chris Sutcliffe <ir0nh34d@users.sourceforge.net>
|
||||
|
||||
* Include/_mingw.h: Increment version to 3.10.
|
||||
|
|
|
@ -156,7 +156,7 @@ CRT0S = crt1.o dllcrt1.o crt2.o dllcrt2.o CRT_noglob.o crtmt.o crtst.o \
|
|||
CRT_fp8.o CRT_fp10.o txtmode.o binmode.o
|
||||
MINGW_OBJS = CRTglob.o CRTfmode.o CRTinit.o dllmain.o gccmain.o \
|
||||
main.o crtst.o mthr_stub.o CRT_fp10.o txtmode.o \
|
||||
pseudo-reloc.o pseudo-reloc-list.o
|
||||
pseudo-reloc.o pseudo-reloc-list.o cpu_features.o
|
||||
|
||||
MOLD_OBJS = isascii.o iscsym.o iscsymf.o toascii.o \
|
||||
strcasecmp.o strncasecmp.o wcscmpi.o
|
||||
|
@ -187,7 +187,7 @@ mthr.c mthr_init.c mthr_stub.c readme.txt \
|
|||
isascii.c iscsym.c iscsymf.c toascii.c \
|
||||
strcasecmp.c strncasecmp.c wcscmpi.c \
|
||||
CRT_fp8.c CRT_fp10.c test_headers.c txtmode.c binmode.c pseudo-reloc.c \
|
||||
pseudo-reloc-list.c \
|
||||
pseudo-reloc-list.c cpu_features.c cpu_features.h\
|
||||
DISCLAIMER CONTRIBUTORS
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
#include <stdbool.h>
|
||||
#include "cpu_features.h"
|
||||
|
||||
/* level 1 edx bits */
|
||||
#define EDX_CX8 (1 << 8) /* CMPXCHG8B */
|
||||
#define EDX_CMOV (1 << 15)
|
||||
#define EDX_MMX (1 << 23)
|
||||
#define EDX_FXSR (1 << 24) /* FXSAVE and FXRSTOR */
|
||||
#define EDX_SSE (1 << 25)
|
||||
#define EDX_SSE2 (1 << 26)
|
||||
|
||||
/* level 1 ecx bits */
|
||||
#define ECX_SSE3 (1 << 0)
|
||||
#define ECX_CX16 (1 << 13) /* CMPXCHG16B */
|
||||
|
||||
/* extended level 0x80000001 edx bits */
|
||||
#define EDX_3DNOW (1 << 31)
|
||||
#define EDX_3DNOWP (1 << 30)
|
||||
#define EDX_LM (1 << 29) /*LONG MODE */
|
||||
|
||||
#define __cpuid(level,a,b,c,d) \
|
||||
__asm__ __volatile__ ("cpuid;" \
|
||||
: "=a" (a), "=b" (b), "=c" (c), "=d" (d)\
|
||||
: "0" (level))
|
||||
|
||||
/* Combine the different cpuid flags into a single bitmap. */
|
||||
|
||||
unsigned int __cpu_features = 0;
|
||||
|
||||
void __cpu_features_init (void)
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
/* Try to change the value of CPUID bit (bit 21) in EFLAGS.
|
||||
If the bit can be toggled, CPUID is supported. */
|
||||
asm volatile ("pushfl; pushfl; popl %0;"
|
||||
"movl %0,%1; xorl %2,%0;"
|
||||
"pushl %0; popfl; pushfl; popl %0; popfl"
|
||||
: "=&r" (eax), "=&r" (ebx)
|
||||
: "i" (0x00200000));
|
||||
|
||||
if (((eax ^ ebx) & 0x00200000) == 0)
|
||||
return;
|
||||
|
||||
__cpuid (0, eax, ebx, ecx, edx);
|
||||
if (eax == 0)
|
||||
return;
|
||||
|
||||
__cpuid (1, eax, ebx, ecx, edx);
|
||||
|
||||
if (edx & EDX_CX8)
|
||||
__cpu_features |= _CRT_CMPXCHG8B;
|
||||
if (edx & EDX_CMOV)
|
||||
__cpu_features |= _CRT_CMOV;
|
||||
|
||||
if (edx & EDX_MMX)
|
||||
__cpu_features |= _CRT_MMX;
|
||||
if (edx & EDX_FXSR)
|
||||
__cpu_features |= _CRT_FXSR;
|
||||
if (edx & EDX_SSE)
|
||||
__cpu_features |= _CRT_SSE;
|
||||
if (edx & EDX_SSE2)
|
||||
__cpu_features |= _CRT_SSE2;
|
||||
|
||||
|
||||
if (ecx & ECX_SSE3)
|
||||
__cpu_features |= _CRT_SSE3;
|
||||
if (ecx & ECX_CX16)
|
||||
__cpu_features |= _CRT_CMPXCHG16B;
|
||||
|
||||
__cpuid (0x80000000, eax, ebx, ecx, edx);
|
||||
if (eax < 0x80000001)
|
||||
return;
|
||||
__cpuid (0x80000001, eax, ebx, ecx, edx);
|
||||
if (edx & EDX_3DNOW);
|
||||
__cpu_features |= _CRT_3DNOW;
|
||||
if (edx & EDX_3DNOWP)
|
||||
__cpu_features |= _CRT_3DNOWP;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef TEST
|
||||
|
||||
#include <stdio.h>
|
||||
#define report(feature) \
|
||||
if ((feature) & __cpu_features) printf( #feature " found\n")
|
||||
|
||||
int main()
|
||||
{
|
||||
__cpu_features_init();
|
||||
|
||||
report(_CRT_CMPXCHG8B);
|
||||
report(_CRT_CMOV);
|
||||
report(_CRT_MMX);
|
||||
report(_CRT_FXSR);
|
||||
report(_CRT_SSE);
|
||||
report(_CRT_SSE2);
|
||||
report(_CRT_SSE3);
|
||||
report(_CRT_CMPXCHG16B);
|
||||
report(_CRT_3DNOW);
|
||||
report(_CRT_3DNOWP);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,23 @@
|
|||
#ifndef _CPU_FEATURES_H
|
||||
#define _CPU_FEATURES_H
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#define _CRT_CMPXCHG8B 0x0001
|
||||
#define _CRT_CMOV 0x0002
|
||||
#define _CRT_MMX 0x0004
|
||||
#define _CRT_FXSR 0x0008
|
||||
#define _CRT_SSE 0x0010
|
||||
#define _CRT_SSE2 0x0020
|
||||
#define _CRT_SSE3 0x0040
|
||||
#define _CRT_CMPXCHG16B 0x0080
|
||||
#define _CRT_3DNOW 0x0100
|
||||
#define _CRT_3DNOWP 0x0200
|
||||
|
||||
extern unsigned int __cpu_features;
|
||||
|
||||
/* Currently we use this in fpenv functions */
|
||||
#define __HAS_SSE __cpu_features & _CRT_SSE
|
||||
|
||||
|
||||
#endif
|
|
@ -27,6 +27,7 @@
|
|||
* be manually synchronized, but it does lead to this not-generally-
|
||||
* a-good-idea use of include. */
|
||||
#include "init.c"
|
||||
#include "cpu_features.h"
|
||||
|
||||
extern void _pei386_runtime_relocator (void);
|
||||
|
||||
|
@ -195,6 +196,7 @@ __mingw_CRTStartup (void)
|
|||
/*
|
||||
* Initialize floating point unit.
|
||||
*/
|
||||
__cpu_features_init (); /* Do we have SSE, etc.*/
|
||||
_fpreset (); /* Supplied by the runtime library. */
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
#ifndef _FENV_H_
|
||||
#define _FENV_H_
|
||||
|
||||
|
||||
/* FPU status word exception flags */
|
||||
#define FE_INVALID 0x01
|
||||
#define FE_DENORMAL 0x02
|
||||
|
@ -18,6 +17,18 @@
|
|||
#define FE_UPWARD 0x0800
|
||||
#define FE_TOWARDZERO 0x0c00
|
||||
|
||||
/* The MXCSR exception flags are the same as the
|
||||
FE flags. */
|
||||
#define __MXCSR_EXCEPT_FLAG_SHIFT 0
|
||||
|
||||
/* How much to shift FE status word exception flags
|
||||
to get the MXCSR exeptions masks, */
|
||||
#define __MXCSR_EXCEPT_MASK_SHIFT 7
|
||||
|
||||
/* How much to shift FE control word rounding flags
|
||||
to get MXCSR rounding flags, */
|
||||
#define __MXCSR_ROUND_FLAG_SHIFT 3
|
||||
|
||||
#ifndef RC_INVOKED
|
||||
/*
|
||||
For now, support only for the basic abstraction of flags that are
|
||||
|
@ -26,8 +37,10 @@
|
|||
*/
|
||||
typedef unsigned short fexcept_t;
|
||||
|
||||
/* This 28-byte struct represents the entire floating point
|
||||
environment as stored by fnstenv or fstenv */
|
||||
/* This 32-byte struct represents the entire floating point
|
||||
environment as stored by fnstenv or fstenv, augmented by
|
||||
the contents of the MXCSR register, as stored by stmxcsr
|
||||
(if CPU supports it). */
|
||||
typedef struct
|
||||
{
|
||||
unsigned short __control_word;
|
||||
|
@ -40,8 +53,9 @@ typedef struct
|
|||
unsigned short __ip_selector;
|
||||
unsigned short __opcode;
|
||||
unsigned int __data_offset;
|
||||
unsigned short __data_selector;
|
||||
unsigned short __unused3;
|
||||
unsigned short __data_selector;
|
||||
unsigned short __unused3;
|
||||
unsigned int __mxcsr; /* contents of the MXCSR register */
|
||||
} fenv_t;
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include <fenv.h>
|
||||
#include "cpu_features.h"
|
||||
|
||||
/* 7.6.2.1
|
||||
The feclearexcept function clears the supported exceptions
|
||||
|
@ -7,9 +8,17 @@
|
|||
int feclearexcept (int excepts)
|
||||
{
|
||||
fenv_t _env;
|
||||
excepts &= FE_ALL_EXCEPT;
|
||||
__asm__ volatile ("fnstenv %0;" : "=m" (_env)); /* get the env */
|
||||
_env.__status_word &= ~(excepts & FE_ALL_EXCEPT); /* clear the except */
|
||||
_env.__status_word &= ~excepts; /* clear the except */
|
||||
__asm__ volatile ("fldenv %0;" :: "m" (_env)); /*set the env */
|
||||
|
||||
if (__HAS_SSE)
|
||||
{
|
||||
unsigned _csr;
|
||||
__asm__ volatile("stmxcsr %0" : "=m" (_csr)); /* get the register */
|
||||
_csr &= ~excepts; /* clear the except */
|
||||
__asm__ volatile("ldmxcsr %0" : : "m" (_csr)); /* set the register */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include <fenv.h>
|
||||
#include "cpu_features.h"
|
||||
|
||||
/* 7.6.4.1
|
||||
The fegetenv function stores the current floating-point environment
|
||||
|
@ -10,5 +11,10 @@ int fegetenv (fenv_t * envp)
|
|||
/* fnstenv sets control word to non-stop for all exceptions, so we
|
||||
need to reload our env to restore the original mask. */
|
||||
__asm__ ("fldenv %0" : : "m" (*envp));
|
||||
|
||||
/* And the SSE environment. */
|
||||
if (__HAS_SSE)
|
||||
__asm__ ("stmxcsr %0" : "=m" (envp->__mxcsr));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include <fenv.h>
|
||||
#include "cpu_features.h"
|
||||
|
||||
/* 7.6.3.1
|
||||
The fegetround function returns the value of the rounding direction
|
||||
|
@ -9,6 +10,10 @@ fegetround (void)
|
|||
{
|
||||
unsigned short _cw;
|
||||
__asm__ ("fnstcw %0;" : "=m" (_cw));
|
||||
|
||||
/* If the MXCSR flag is different, there is no way to indicate, so just
|
||||
report the FPU flag. */
|
||||
return _cw
|
||||
& (FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO);
|
||||
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include <fenv.h>
|
||||
#include "cpu_features.h"
|
||||
|
||||
/* 7.6.4.2
|
||||
The feholdexcept function saves the current floating-point
|
||||
|
@ -12,5 +13,18 @@ int feholdexcept (fenv_t * envp)
|
|||
/* fnstenv sets control word to non-stop for all exceptions, so all we
|
||||
need to do is clear the exception flags. */
|
||||
__asm__ ("fnclex");
|
||||
|
||||
if (__HAS_SSE)
|
||||
{
|
||||
unsigned int _csr;
|
||||
/* Save the SSE MXCSR register. */
|
||||
__asm__ ("stmxcsr %0" : "=m" (envp->__mxcsr));
|
||||
/* Clear the exception flags. */
|
||||
_csr = envp->__mxcsr & ~FE_ALL_EXCEPT;
|
||||
/* Set exception mask to non-stop */
|
||||
_csr |= (FE_ALL_EXCEPT << __MXCSR_EXCEPT_MASK_SHIFT) /*= 0x1f80 */;
|
||||
__asm__ volatile ("ldmxcsr %0" : : "m" (_csr));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#include <fenv.h>
|
||||
#include <float.h>
|
||||
#include "cpu_features.h"
|
||||
|
||||
/* 7.6.4.3
|
||||
The fesetenv function establishes the floating-point environment
|
||||
|
@ -15,6 +16,11 @@ extern void (*_imp___fpreset)( void ) ;
|
|||
|
||||
int fesetenv (const fenv_t * envp)
|
||||
{
|
||||
/* Default mxcsr status is to mask all exceptions. All other bits
|
||||
are zero. */
|
||||
|
||||
unsigned int _csr = FE_ALL_EXCEPT << __MXCSR_EXCEPT_MASK_SHIFT /*= 0x1f80 */;
|
||||
|
||||
if (envp == FE_PC64_ENV)
|
||||
/*
|
||||
* fninit initializes the control register to 0x37f,
|
||||
|
@ -37,7 +43,15 @@ int fesetenv (const fenv_t * envp)
|
|||
_fpreset();
|
||||
|
||||
else
|
||||
__asm__ ("fldenv %0;" : : "m" (*envp));
|
||||
{
|
||||
__asm__ ("fldenv %0;" : : "m" (*envp));
|
||||
/* Setting the reserved high order bits of MXCSR causes a segfault */
|
||||
_csr = envp ->__mxcsr & 0xffff;
|
||||
}
|
||||
|
||||
/* Set MXCSR */
|
||||
if (__HAS_SSE)
|
||||
__asm__ volatile ("ldmxcsr %0" : : "m" (_csr));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include <fenv.h>
|
||||
#include "cpu_features.h"
|
||||
|
||||
/* 7.6.2.4
|
||||
The fesetexceptflag function sets the complete status for those
|
||||
|
@ -18,5 +19,15 @@ int fesetexceptflag (const fexcept_t * flagp, int excepts)
|
|||
_env.__status_word &= ~excepts;
|
||||
_env.__status_word |= (*flagp & excepts);
|
||||
__asm__ volatile ("fldenv %0;" : : "m" (_env));
|
||||
|
||||
if (__HAS_SSE)
|
||||
{
|
||||
unsigned int _csr;
|
||||
__asm__ __volatile__("stmxcsr %0" : "=m" (_csr));
|
||||
_csr &= ~excepts;
|
||||
_csr |= *flagp & excepts;
|
||||
__asm__ volatile ("ldmxcsr %0" : : "m" (_csr));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
#include <fenv.h>
|
||||
#include "cpu_features.h"
|
||||
|
||||
/* 7.6.3.2
|
||||
The fesetround function establishes the rounding direction
|
||||
represented by its argument round. If the argument is not equal
|
||||
|
@ -15,5 +17,14 @@ int fesetround (int mode)
|
|||
_cw &= ~(FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO);
|
||||
_cw |= mode;
|
||||
__asm__ volatile ("fldcw %0;" : : "m" (_cw));
|
||||
|
||||
if (__HAS_SSE)
|
||||
{
|
||||
__asm__ volatile ("stmxcsr %0" : "=m" (_cw));
|
||||
_cw &= ~ 0x6000;
|
||||
_cw |= (mode << __MXCSR_ROUND_FLAG_SHIFT);
|
||||
__asm__ volatile ("ldmxcsr %0" : : "m" (_cw));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include <fenv.h>
|
||||
#include <fenv.h>
|
||||
#include "cpu_features.h"
|
||||
/* 7.6.2.5
|
||||
The fetestexcept function determines which of a specified subset of
|
||||
the exception flags are currently set. The excepts argument
|
||||
|
@ -9,7 +10,18 @@
|
|||
|
||||
int fetestexcept (int excepts)
|
||||
{
|
||||
unsigned short _sw;
|
||||
__asm__ ("fnstsw %%ax" : "=a" (_sw));
|
||||
return _sw & excepts & FE_ALL_EXCEPT;
|
||||
|
||||
unsigned int _res;
|
||||
__asm__ ("fnstsw %%ax" : "=a" (_res));
|
||||
|
||||
|
||||
/* If SSE supported, return the union of the FPU and SSE flags. */
|
||||
if (__HAS_SSE)
|
||||
{
|
||||
unsigned int _csr;
|
||||
__asm__ volatile("stmxcsr %0" : "=m" (_csr));
|
||||
_res |= _csr;
|
||||
}
|
||||
|
||||
return (_res & excepts & FE_ALL_EXCEPT);
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include <fenv.h>
|
||||
#include "cpu_features.h"
|
||||
|
||||
/* 7.6.4.4
|
||||
The feupdateenv function saves the currently raised exceptions in
|
||||
|
@ -8,13 +9,18 @@
|
|||
set by a call to feholdexcept or fegetenv, or equal the macro
|
||||
FE_DFL_ENV or an implementation-defined environment macro. */
|
||||
|
||||
/* FIXME: this works but surely there must be a better way. */
|
||||
|
||||
int feupdateenv (const fenv_t * envp)
|
||||
{
|
||||
unsigned int _fexcept = fetestexcept (FE_ALL_EXCEPT); /*save excepts */
|
||||
unsigned int _fexcept;
|
||||
__asm__ ("fnstsw %%ax" : "=a" (_fexcept)); /*save excepts */
|
||||
if (__HAS_SSE)
|
||||
{
|
||||
unsigned int _csr;
|
||||
__asm__ ("stmxcsr %0" : "=m" (_csr));
|
||||
_fexcept |= _csr;
|
||||
}
|
||||
fesetenv (envp); /* install the env */
|
||||
feraiseexcept (_fexcept); /* raise the execept */
|
||||
feraiseexcept (_fexcept & FE_ALL_EXCEPT); /* raise the execeptions */
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue