[libc-commits] [libc] 805899e - [libc] Change FEnv to use MXCSR as source of truth

Michael Jones via libc-commits libc-commits at lists.llvm.org
Wed Mar 23 16:08:04 PDT 2022


Author: Michael Jones
Date: 2022-03-23T16:08:00-07:00
New Revision: 805899e68a83916211a164600c3278f3a65b5292

URL: https://github.com/llvm/llvm-project/commit/805899e68a83916211a164600c3278f3a65b5292
DIFF: https://github.com/llvm/llvm-project/commit/805899e68a83916211a164600c3278f3a65b5292.diff

LOG: [libc] Change FEnv to use MXCSR as source of truth

This patch primarily fixes the fenv implementation on Windows, since
Windows uses the MXCSR in place of the x87 status registers for storing
information about the floating point environment. This allows FEnv to
work correctly on Windows, and successfully build.

Reviewed By: lntue

Differential Revision: https://reviews.llvm.org/D121839

Added: 
    

Modified: 
    libc/config/windows/entrypoints.txt
    libc/src/__support/FPUtil/x86_64/FEnvImpl.h
    libc/test/src/fenv/getenv_and_setenv_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt
index acb5f80daf516..ac561a08a615f 100644
--- a/libc/config/windows/entrypoints.txt
+++ b/libc/config/windows/entrypoints.txt
@@ -17,9 +17,6 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.ctype.tolower
     libc.src.ctype.toupper
 
-    # errno.h entrypoints
-    libc.src.errno.__errno_location
-
     # string.h entrypoints
     libc.src.string.bcmp
     libc.src.string.bzero

diff  --git a/libc/src/__support/FPUtil/x86_64/FEnvImpl.h b/libc/src/__support/FPUtil/x86_64/FEnvImpl.h
index 731c617b52b09..846996ec93afd 100644
--- a/libc/src/__support/FPUtil/x86_64/FEnvImpl.h
+++ b/libc/src/__support/FPUtil/x86_64/FEnvImpl.h
@@ -63,6 +63,7 @@ struct ExceptionFlags {
 // In the x87 control word, they occupy the first 6 bits. In the MXCSR
 // register, they occupy bits 7 to 12.
 static constexpr uint16_t X87_EXCEPTION_CONTROL_BIT_POSITION = 0;
+static constexpr uint16_t X87_EXCEPTION_CONTROL_BIT_POSITION_HIGH = 24;
 static constexpr uint16_t MXCSR_EXCEPTION_CONTOL_BIT_POISTION = 7;
 
 // Exception flags are individual bits in the corresponding registers.
@@ -194,8 +195,8 @@ static inline int disable_except(int excepts) {
 }
 
 static inline int get_except() {
-  uint16_t x87_cw = internal::get_x87_control_word();
-  uint16_t enabled_excepts = ~x87_cw & 0x3F;
+  uint16_t mxcsr = internal::get_mxcsr();
+  uint16_t enabled_excepts = ~(mxcsr >> 7) & 0x3F;
   return internal::exception_status_to_macro(enabled_excepts);
 }
 
@@ -214,9 +215,8 @@ static inline int clear_except(int excepts) {
 static inline int test_except(int excepts) {
   uint16_t status_value = internal::get_status_value_for_except(excepts);
   // Check both x87 status word and MXCSR.
-  return internal::exception_status_to_macro(
-      (status_value & internal::get_x87_status_word()) |
-      (status_value & internal::get_mxcsr()));
+  return internal::exception_status_to_macro(status_value &
+                                             internal::get_mxcsr());
 }
 
 // Sets the exception flags but does not trigger the exception handler.
@@ -252,9 +252,13 @@ static inline int raise_except(int excepts) {
 
   auto raise_helper = [](uint16_t singleExceptFlag) {
     internal::X87StateDescriptor state;
+    uint32_t mxcsr = 0;
     internal::get_x87_state_descriptor(state);
+    mxcsr = internal::get_mxcsr();
     state.status_word |= singleExceptFlag;
+    mxcsr |= singleExceptFlag;
     internal::write_x87_state_descriptor(state);
+    internal::write_mxcsr(mxcsr);
     internal::fwait();
   };
 
@@ -345,8 +349,8 @@ namespace internal {
 // MSVC fenv.h defines a very simple representation of the floating point state
 // which just consists of control and status words of the x87 unit.
 struct FPState {
-  uint32_t ControlWord;
-  uint32_t StatusWord;
+  uint32_t control_word;
+  uint32_t status_word;
 };
 #else
 struct FPState {
@@ -362,22 +366,191 @@ static_assert(
     "Internal floating point state does not match the public fenv_t type.");
 
 #ifdef _WIN32
+
+// The exception flags in the Windows FEnv struct and the MXCSR have almost
+// reversed bit positions.
+struct WinExceptionFlags {
+  static constexpr uint32_t INEXACT = 0x01;
+  static constexpr uint32_t UNDERFLOW = 0x02;
+  static constexpr uint32_t OVERFLOW = 0x04;
+  static constexpr uint32_t DIV_BY_ZERO = 0x08;
+  static constexpr uint32_t INVALID = 0x10;
+  static constexpr uint32_t DENORMAL = 0x20;
+
+  // The Windows FEnv struct has a second copy of all of these bits in the high
+  // byte of the 32 bit control word. These are used as the source of truth when
+  // calling fesetenv.
+  static constexpr uint32_t HIGH_OFFSET = 24;
+
+  static constexpr uint32_t HIGH_INEXACT = INEXACT << HIGH_OFFSET;
+  static constexpr uint32_t HIGH_UNDERFLOW = UNDERFLOW << HIGH_OFFSET;
+  static constexpr uint32_t HIGH_OVERFLOW = OVERFLOW << HIGH_OFFSET;
+  static constexpr uint32_t HIGH_DIV_BY_ZERO = DIV_BY_ZERO << HIGH_OFFSET;
+  static constexpr uint32_t HIGH_INVALID = INVALID << HIGH_OFFSET;
+  static constexpr uint32_t HIGH_DENORMAL = DENORMAL << HIGH_OFFSET;
+};
+
+/*
+    fenv_t control word format:
+
+    Windows (at least for x64) uses a 4 byte control fenv control word stored in
+    a 32 bit integer. The first byte contains just the rounding mode and the
+    exception masks, while the last two bytes contain that same information as
+    well as the flush-to-zero and denormals-are-zero flags. The flags are
+    represented with a truth table:
+
+    00 - No flags set
+    01 - Flush-to-zero and Denormals-are-zero set
+    11 - Flush-to-zero set
+    10 - Denormals-are-zero set
+
+    U represents unused.
+
+     +-----Rounding Mode-----+
+     |                       |
+    ++                      ++
+    ||                      ||
+    RRMMMMMM UUUUUUUU UUUUFFRR UUMMMMMM
+      |    |              ||     |    |
+      +----+      flags---++     +----+
+           |                          |
+           +------Exception Masks-----+
+
+
+    fenv_t status word format:
+
+    The status word is a lot simpler for this conversion, since only the
+    exception flags are used in the MXCSR.
+
+      +----+---Exception Flags---+----+
+      |    |                     |    |
+    UUEEEEEE UUUUUUUU UUUUUUUU UUEEEEEE
+
+
+
+    MXCSR Format:
+
+    The MXCSR format is the same information, just organized 
diff erently. Since
+    the fenv_t struct for windows doesn't include the mxcsr bits, they must be
+    generated from the control word bits.
+
+      Exception Masks---+           +---Exception Flags
+                        |           |
+     Flush-to-zero---+  +----+ +----+
+                     |  |    | |    |
+                     FRRMMMMMMDEEEEEE
+                      ||      |
+                      ++      +---Denormals-are-zero
+                      |
+                      +---Rounding Mode
+
+
+    The mask and flag order is as follows:
+
+    fenv_t      mxcsr
+
+    denormal    inexact
+    invalid     underflow
+    div by 0    overflow
+    overflow    div by 0
+    underflow   denormal
+    inexact     invalid
+
+    This is almost reverse, except for denormal and invalid which are in the
+    same order in both.
+  */
+
 static inline int get_env(fenv_t *envp) {
   internal::FPState *state = reinterpret_cast<internal::FPState *>(envp);
-  internal::X87StateDescriptor X87Status;
-  internal::getX87StateDescriptor(X87Status);
-  state->ControlWord = X87Status.ControlWord;
-  state->StatusWord = X87Status.StatusWord;
+
+  uint32_t status_word = 0;
+  uint32_t control_word = 0;
+
+  uint32_t mxcsr = internal::get_mxcsr();
+
+  // Set exception flags in the status word
+  status_word |= (mxcsr & (internal::ExceptionFlags::INVALID |
+                           internal::ExceptionFlags::DENORMAL))
+                 << 4;
+  status_word |= (mxcsr & internal::ExceptionFlags::DIV_BY_ZERO) << 1;
+  status_word |= (mxcsr & internal::ExceptionFlags::OVERFLOW) >> 1;
+  status_word |= (mxcsr & internal::ExceptionFlags::UNDERFLOW) >> 3;
+  status_word |= (mxcsr & internal::ExceptionFlags::INEXACT) >> 5;
+  status_word |= status_word << WinExceptionFlags::HIGH_OFFSET;
+
+  // Set exception masks in bits 0-5 and 24-29
+  control_word |=
+      (mxcsr &
+       ((internal::ExceptionFlags::INVALID | internal::ExceptionFlags::DENORMAL)
+        << 7)) >>
+      3;
+  control_word |= (mxcsr & (internal::ExceptionFlags::DIV_BY_ZERO << 7)) >> 6;
+  control_word |= (mxcsr & (internal::ExceptionFlags::OVERFLOW << 7)) >> 8;
+  control_word |= (mxcsr & (internal::ExceptionFlags::UNDERFLOW << 7)) >> 10;
+  control_word |= (mxcsr & (internal::ExceptionFlags::INEXACT << 7)) >> 12;
+  control_word |= control_word << WinExceptionFlags::HIGH_OFFSET;
+
+  // Set rounding in bits 8-9 and 30-31
+  control_word |= (mxcsr & 0x6000) >> 5;
+  control_word |= (mxcsr & 0x6000) << 17;
+
+  // Set flush-to-zero in bit 10
+  control_word |= (mxcsr & 0x8000) >> 5;
+
+  // Set denormals-are-zero xor flush-to-zero in bit 11
+  control_word |= (((mxcsr & 0x8000) >> 9) ^ (mxcsr & 0x0040)) << 5;
+
+  state->control_word = control_word;
+  state->status_word = status_word;
   return 0;
 }
 
 static inline int set_env(const fenv_t *envp) {
   const internal::FPState *state =
       reinterpret_cast<const internal::FPState *>(envp);
-  internal::X87StateDescriptor X87Status;
-  X87Status.ControlWord = state->ControlWord;
-  X87Status.StatusWord = state->StatusWord;
-  internal::writeX87StateDescriptor(X87Status);
+
+  uint32_t mxcsr = 0;
+
+  // Set exception flags from the status word
+  mxcsr |= static_cast<uint16_t>(
+      (state->status_word &
+       (WinExceptionFlags::HIGH_DENORMAL | WinExceptionFlags::HIGH_INVALID)) >>
+      28);
+  mxcsr |= static_cast<uint16_t>(
+      (state->status_word & WinExceptionFlags::HIGH_DIV_BY_ZERO) >> 25);
+  mxcsr |= static_cast<uint16_t>(
+      (state->status_word & WinExceptionFlags::HIGH_OVERFLOW) >> 23);
+  mxcsr |= static_cast<uint16_t>(
+      (state->status_word & WinExceptionFlags::HIGH_UNDERFLOW) >> 21);
+  mxcsr |= static_cast<uint16_t>(
+      (state->status_word & WinExceptionFlags::HIGH_INEXACT) >> 19);
+
+  // Set denormals-are-zero from bit 10 xor bit 11
+  mxcsr |= static_cast<uint16_t>(
+      (((state->control_word & 0x800) >> 1) ^ (state->control_word & 0x400)) >>
+      4);
+
+  // Set exception masks from bits 24-29
+  mxcsr |= static_cast<uint16_t>(
+      (state->control_word &
+       (WinExceptionFlags::HIGH_DENORMAL | WinExceptionFlags::HIGH_INVALID)) >>
+      21);
+  mxcsr |= static_cast<uint16_t>(
+      (state->control_word & WinExceptionFlags::HIGH_DIV_BY_ZERO) >> 18);
+  mxcsr |= static_cast<uint16_t>(
+      (state->control_word & WinExceptionFlags::HIGH_OVERFLOW) >> 16);
+  mxcsr |= static_cast<uint16_t>(
+      (state->control_word & WinExceptionFlags::HIGH_UNDERFLOW) >> 14);
+  mxcsr |= static_cast<uint16_t>(
+      (state->control_word & WinExceptionFlags::HIGH_INEXACT) >> 12);
+
+  // Set rounding from bits 30-31
+  mxcsr |= static_cast<uint16_t>((state->control_word & 0xc0000000) >> 17);
+
+  // Set flush-to-zero from bit 10
+  mxcsr |= static_cast<uint16_t>((state->control_word & 0x400) << 5);
+
+  internal::write_mxcsr(mxcsr);
   return 0;
 }
 #else

diff  --git a/libc/test/src/fenv/getenv_and_setenv_test.cpp b/libc/test/src/fenv/getenv_and_setenv_test.cpp
index 583753c813d68..ffd731e1abd71 100644
--- a/libc/test/src/fenv/getenv_and_setenv_test.cpp
+++ b/libc/test/src/fenv/getenv_and_setenv_test.cpp
@@ -70,3 +70,16 @@ TEST(LlvmLibcFenvTest, Set_FE_DFL_ENV) {
   int rm = __llvm_libc::fegetround();
   EXPECT_EQ(rm, FE_TONEAREST);
 }
+
+#ifdef _WIN32
+TEST(LlvmLibcFenvTest, Windows_Set_Get_Test) {
+  // If a valid fenv_t is written, then reading it back out should be identical.
+  fenv_t setEnv = {0x7e00053e, 0x0f00000f};
+  fenv_t getEnv;
+  ASSERT_EQ(__llvm_libc::fesetenv(&setEnv), 0);
+  ASSERT_EQ(__llvm_libc::fegetenv(&getEnv), 0);
+
+  ASSERT_EQ(setEnv._Fe_ctl, getEnv._Fe_ctl);
+  ASSERT_EQ(setEnv._Fe_stat, getEnv._Fe_stat);
+}
+#endif


        


More information about the libc-commits mailing list