[llvm] be794e3 - [X86][FPEnv] Lowering of {get,set,reset}_fpenv

Serge Pavlov via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 14 08:12:51 PDT 2023


Author: Serge Pavlov
Date: 2023-07-14T22:10:53+07:00
New Revision: be794e3d921b5305fc7849761de0722bbcf6c916

URL: https://github.com/llvm/llvm-project/commit/be794e3d921b5305fc7849761de0722bbcf6c916
DIFF: https://github.com/llvm/llvm-project/commit/be794e3d921b5305fc7849761de0722bbcf6c916.diff

LOG: [X86][FPEnv] Lowering of {get,set,reset}_fpenv

The change implements lowering of `get_fpenv`, `set_fpenv` and
`reset_fpenv`.

Differential Revision: https://reviews.llvm.org/D81833

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86ISelLowering.h
    llvm/lib/Target/X86/X86InstrFPStack.td
    llvm/test/CodeGen/X86/fpenv.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b20826b4e62334..91506bed01d5ee 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -396,6 +396,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
     setOperationAction(ISD::GET_ROUNDING   , MVT::i32  , Custom);
     setOperationAction(ISD::SET_ROUNDING   , MVT::Other, Custom);
+    setOperationAction(ISD::GET_FPENV_MEM  , MVT::Other, Custom);
+    setOperationAction(ISD::SET_FPENV_MEM  , MVT::Other, Custom);
+    setOperationAction(ISD::RESET_FPENV    , MVT::Other, Custom);
   }
 
   // Promote the i8 variants and force them on up to i32 which has a shorter
@@ -30068,6 +30071,122 @@ SDValue X86TargetLowering::LowerSET_ROUNDING(SDValue Op,
   return Chain;
 }
 
+const unsigned X87StateSize = 28;
+const unsigned FPStateSize = 32;
+const unsigned FPStateSizeInBits = FPStateSize * 8;
+
+SDValue X86TargetLowering::LowerGET_FPENV_MEM(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  SDLoc DL(Op);
+  SDValue Chain = Op->getOperand(0);
+  SDValue Ptr = Op->getOperand(1);
+  auto *Node = cast<FPStateAccessSDNode>(Op);
+  EVT MemVT = Node->getMemoryVT();
+  assert(MemVT.getSizeInBits() == FPStateSizeInBits);
+  MachineMemOperand *MMO = cast<FPStateAccessSDNode>(Op)->getMemOperand();
+
+  // Get x87 state, if it presents.
+  if (Subtarget.hasX87()) {
+    Chain =
+        DAG.getMemIntrinsicNode(X86ISD::FNSTENVm, DL, DAG.getVTList(MVT::Other),
+                                {Chain, Ptr}, MemVT, MMO);
+
+    // FNSTENV changes the exception mask, so load back the stored environment.
+    MachineMemOperand::Flags NewFlags =
+        MachineMemOperand::MOLoad |
+        (MMO->getFlags() & ~MachineMemOperand::MOStore);
+    MMO = MF.getMachineMemOperand(MMO, NewFlags);
+    Chain =
+        DAG.getMemIntrinsicNode(X86ISD::FLDENVm, DL, DAG.getVTList(MVT::Other),
+                                {Chain, Ptr}, MemVT, MMO);
+  }
+
+  // If target supports SSE, get MXCSR as well.
+  if (Subtarget.hasSSE1()) {
+    // Get pointer to the MXCSR location in memory.
+    MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+    SDValue MXCSRAddr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr,
+                                    DAG.getConstant(X87StateSize, DL, PtrVT));
+    // Store MXCSR into memory.
+    Chain = DAG.getNode(
+        ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain,
+        DAG.getTargetConstant(Intrinsic::x86_sse_stmxcsr, DL, MVT::i32),
+        MXCSRAddr);
+  }
+
+  return Chain;
+}
+
+static SDValue createSetFPEnvNodes(SDValue Ptr, SDValue Chain, SDLoc DL,
+                                   EVT MemVT, MachineMemOperand *MMO,
+                                   SelectionDAG &DAG,
+                                   const X86Subtarget &Subtarget) {
+  // Set x87 state, if it presents.
+  if (Subtarget.hasX87())
+    Chain =
+        DAG.getMemIntrinsicNode(X86ISD::FLDENVm, DL, DAG.getVTList(MVT::Other),
+                                {Chain, Ptr}, MemVT, MMO);
+  // If target supports SSE, set MXCSR as well.
+  if (Subtarget.hasSSE1()) {
+    // Get pointer to the MXCSR location in memory.
+    MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+    SDValue MXCSRAddr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr,
+                                    DAG.getConstant(X87StateSize, DL, PtrVT));
+    // Load MXCSR from memory.
+    Chain = DAG.getNode(
+        ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain,
+        DAG.getTargetConstant(Intrinsic::x86_sse_ldmxcsr, DL, MVT::i32),
+        MXCSRAddr);
+  }
+  return Chain;
+}
+
+SDValue X86TargetLowering::LowerSET_FPENV_MEM(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  SDValue Chain = Op->getOperand(0);
+  SDValue Ptr = Op->getOperand(1);
+  auto *Node = cast<FPStateAccessSDNode>(Op);
+  EVT MemVT = Node->getMemoryVT();
+  assert(MemVT.getSizeInBits() == FPStateSizeInBits);
+  MachineMemOperand *MMO = cast<FPStateAccessSDNode>(Op)->getMemOperand();
+  return createSetFPEnvNodes(Ptr, Chain, DL, MemVT, MMO, DAG, Subtarget);
+}
+
+SDValue X86TargetLowering::LowerRESET_FPENV(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  SDLoc DL(Op);
+  SDValue Chain = Op.getNode()->getOperand(0);
+
+  IntegerType *ItemTy = Type::getInt32Ty(*DAG.getContext());
+  ArrayType *FPEnvTy = ArrayType::get(ItemTy, 8);
+  SmallVector<Constant *, 8> FPEnvVals;
+
+  // x87 FPU Control Word: mask all floating-point exceptions, sets rounding to
+  // nearest. FPU precision is set to 53 bits on Windows and 64 bits otherwise
+  // for compatibility with glibc.
+  unsigned X87CW = Subtarget.isTargetWindowsMSVC() ? 0x27F : 0x37F;
+  FPEnvVals.push_back(ConstantInt::get(ItemTy, X87CW));
+  Constant *Zero = ConstantInt::get(ItemTy, 0);
+  for (unsigned I = 0; I < 6; ++I)
+    FPEnvVals.push_back(Zero);
+
+  // MXCSR: mask all floating-point exceptions, sets rounding to nearest, clear
+  // all exceptions, sets DAZ and FTZ to 0.
+  FPEnvVals.push_back(ConstantInt::get(ItemTy, 0x1F80));
+  Constant *FPEnvBits = ConstantArray::get(FPEnvTy, FPEnvVals);
+  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+  SDValue Env = DAG.getConstantPool(FPEnvBits, PtrVT);
+  MachinePointerInfo MPI =
+      MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MPI, MachineMemOperand::MOStore, X87StateSize, Align(4));
+
+  return createSetFPEnvNodes(Env, Chain, DL, MVT::i32, MMO, DAG, Subtarget);
+}
+
 /// Lower a vector CTLZ using native supported vector CTLZ instruction.
 //
 // i8/i16 vector implemented using dword LZCNT vector instruction
@@ -34320,6 +34439,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
   case ISD::GET_ROUNDING:       return LowerGET_ROUNDING(Op, DAG);
   case ISD::SET_ROUNDING:       return LowerSET_ROUNDING(Op, DAG);
+  case ISD::GET_FPENV_MEM:      return LowerGET_FPENV_MEM(Op, DAG);
+  case ISD::SET_FPENV_MEM:      return LowerSET_FPENV_MEM(Op, DAG);
+  case ISD::RESET_FPENV:        return LowerRESET_FPENV(Op, DAG);
   case ISD::CTLZ:
   case ISD::CTLZ_ZERO_UNDEF:    return LowerCTLZ(Op, Subtarget, DAG);
   case ISD::CTTZ:
@@ -35562,6 +35684,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(TC_RETURN)
   NODE_NAME_CASE(FNSTCW16m)
   NODE_NAME_CASE(FLDCW16m)
+  NODE_NAME_CASE(FNSTENVm)
+  NODE_NAME_CASE(FLDENVm)
   NODE_NAME_CASE(LCMPXCHG_DAG)
   NODE_NAME_CASE(LCMPXCHG8_DAG)
   NODE_NAME_CASE(LCMPXCHG16_DAG)

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 9a06502206e169..250df82a30c2f8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -833,6 +833,12 @@ namespace llvm {
     // Load FP control word from i16 memory.
     FLDCW16m,
 
+    // Store x87 FPU environment into memory.
+    FNSTENVm,
+
+    // Load x87 FPU environment from memory.
+    FLDENVm,
+
     /// This instruction implements FP_TO_SINT with the
     /// integer destination in memory and a FP reg source.  This corresponds
     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
@@ -1663,6 +1669,9 @@ namespace llvm {
     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
                                     SDValue &Chain) const;

diff  --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td
index fbbd3c83dc5c9e..66a2d27abf86b4 100644
--- a/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -25,6 +25,7 @@ def SDTX86Fist      : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
 
 def SDTX86CwdStore  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 def SDTX86CwdLoad   : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDTX86FPEnv     : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 
 def X86fp80_add     : SDNode<"X86ISD::FP80_ADD", SDTFPBinOp, [SDNPCommutative]>;
 def X86strict_fp80_add : SDNode<"X86ISD::STRICT_FP80_ADD", SDTFPBinOp,
@@ -49,6 +50,12 @@ def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m",          SDTX86CwdStore,
 def X86fp_cwd_set16 : SDNode<"X86ISD::FLDCW16m",           SDTX86CwdLoad,
                              [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
                               SDNPMemOperand]>;
+def X86fpenv_get    : SDNode<"X86ISD::FNSTENVm",           SDTX86FPEnv,
+                             [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+                              SDNPMemOperand]>;
+def X86fpenv_set    : SDNode<"X86ISD::FLDENVm",            SDTX86FPEnv,
+                             [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
+                              SDNPMemOperand]>;
 
 def X86fstf32 : PatFrag<(ops node:$val, node:$ptr),
                         (X86fst node:$val, node:$ptr), [{
@@ -418,13 +425,17 @@ def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
 
 let SchedRW = [WriteMicrocoded] in {
 let Defs = [FPSW, FPCW], mayLoad = 1 in {
-def FLDENVm  : FPI<0xD9, MRM4m, (outs), (ins anymem:$src), "fldenv\t$src">;
 def FRSTORm  : FPI<0xDD, MRM4m, (outs), (ins anymem:$src), "frstor\t$src">;
+let Predicates = [HasX87] in
+def FLDENVm  : I<0xD9, MRM4m, (outs), (ins anymem:$src), "fldenv\t$src",
+                 [(X86fpenv_set addr:$src)]>;
 }
 
 let Defs = [FPSW, FPCW], Uses = [FPSW, FPCW], mayStore = 1 in {
-def FSTENVm  : FPI<0xD9, MRM6m, (outs), (ins anymem:$dst), "fnstenv\t$dst">;
 def FSAVEm   : FPI<0xDD, MRM6m, (outs), (ins anymem:$dst), "fnsave\t$dst">;
+let Predicates = [HasX87] in
+def FSTENVm  : I<0xD9, MRM6m, (outs), (ins anymem:$dst), "fnstenv\t$dst",
+                 [(X86fpenv_get addr:$dst)]>;
 }
 
 let Uses = [FPSW], mayStore = 1 in

diff  --git a/llvm/test/CodeGen/X86/fpenv.ll b/llvm/test/CodeGen/X86/fpenv.ll
index d190f4c8f7fd1a..8f6b5b65e47baf 100644
--- a/llvm/test/CodeGen/X86/fpenv.ll
+++ b/llvm/test/CodeGen/X86/fpenv.ll
@@ -280,27 +280,28 @@ entry:
 define void @get_fpenv_01_native(ptr %ptr) nounwind {
 ; X86-NOSSE-LABEL: get_fpenv_01_native:
 ; X86-NOSSE:       # %bb.0: # %entry
-; X86-NOSSE-NEXT:    subl $44, %esp
+; X86-NOSSE-NEXT:    subl $36, %esp
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT:    movl %eax, (%esp)
-; X86-NOSSE-NEXT:    calll fegetenv
-; X86-NOSSE-NEXT:    addl $44, %esp
+; X86-NOSSE-NEXT:    fnstenv (%eax)
+; X86-NOSSE-NEXT:    fldenv (%eax)
+; X86-NOSSE-NEXT:    addl $36, %esp
 ; X86-NOSSE-NEXT:    retl
 ;
 ; X86-SSE-LABEL: get_fpenv_01_native:
 ; X86-SSE:       # %bb.0: # %entry
-; X86-SSE-NEXT:    subl $44, %esp
+; X86-SSE-NEXT:    subl $36, %esp
 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT:    movl %eax, (%esp)
-; X86-SSE-NEXT:    calll fegetenv
-; X86-SSE-NEXT:    addl $44, %esp
+; X86-SSE-NEXT:    fnstenv (%eax)
+; X86-SSE-NEXT:    fldenv (%eax)
+; X86-SSE-NEXT:    stmxcsr 28(%eax)
+; X86-SSE-NEXT:    addl $36, %esp
 ; X86-SSE-NEXT:    retl
 ;
 ; X64-LABEL: get_fpenv_01_native:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    subq $40, %rsp
-; X64-NEXT:    callq fegetenv at PLT
-; X64-NEXT:    addq $40, %rsp
+; X64-NEXT:    fnstenv (%rdi)
+; X64-NEXT:    fldenv (%rdi)
+; X64-NEXT:    stmxcsr 28(%rdi)
 ; X64-NEXT:    retq
 entry:
   %env = call i256 @llvm.get.fpenv.i256()
@@ -342,27 +343,25 @@ entry:
 define void @set_fpenv_01_native(ptr %ptr) nounwind {
 ; X86-NOSSE-LABEL: set_fpenv_01_native:
 ; X86-NOSSE:       # %bb.0: # %entry
-; X86-NOSSE-NEXT:    subl $44, %esp
+; X86-NOSSE-NEXT:    subl $36, %esp
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT:    movl %eax, (%esp)
-; X86-NOSSE-NEXT:    calll fesetenv
-; X86-NOSSE-NEXT:    addl $44, %esp
+; X86-NOSSE-NEXT:    fldenv (%eax)
+; X86-NOSSE-NEXT:    addl $36, %esp
 ; X86-NOSSE-NEXT:    retl
 ;
 ; X86-SSE-LABEL: set_fpenv_01_native:
 ; X86-SSE:       # %bb.0: # %entry
-; X86-SSE-NEXT:    subl $44, %esp
+; X86-SSE-NEXT:    subl $36, %esp
 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT:    movl %eax, (%esp)
-; X86-SSE-NEXT:    calll fesetenv
-; X86-SSE-NEXT:    addl $44, %esp
+; X86-SSE-NEXT:    fldenv (%eax)
+; X86-SSE-NEXT:    ldmxcsr 28(%eax)
+; X86-SSE-NEXT:    addl $36, %esp
 ; X86-SSE-NEXT:    retl
 ;
 ; X64-LABEL: set_fpenv_01_native:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    subq $40, %rsp
-; X64-NEXT:    callq fesetenv at PLT
-; X64-NEXT:    addq $40, %rsp
+; X64-NEXT:    fldenv (%rdi)
+; X64-NEXT:    ldmxcsr 28(%rdi)
 ; X64-NEXT:    retq
 entry:
   %env = load i256, ptr %ptr
@@ -402,26 +401,19 @@ entry:
 define void @reset_fpenv_01_native() nounwind {
 ; X86-NOSSE-LABEL: reset_fpenv_01_native:
 ; X86-NOSSE:       # %bb.0: # %entry
-; X86-NOSSE-NEXT:    subl $12, %esp
-; X86-NOSSE-NEXT:    movl $-1, (%esp)
-; X86-NOSSE-NEXT:    calll fesetenv
-; X86-NOSSE-NEXT:    addl $12, %esp
+; X86-NOSSE-NEXT:    fldenv {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-NOSSE-NEXT:    retl
 ;
 ; X86-SSE-LABEL: reset_fpenv_01_native:
 ; X86-SSE:       # %bb.0: # %entry
-; X86-SSE-NEXT:    subl $12, %esp
-; X86-SSE-NEXT:    movl $-1, (%esp)
-; X86-SSE-NEXT:    calll fesetenv
-; X86-SSE-NEXT:    addl $12, %esp
+; X86-SSE-NEXT:    fldenv {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-SSE-NEXT:    ldmxcsr {{\.?LCPI[0-9]+_[0-9]+}}+28
 ; X86-SSE-NEXT:    retl
 ;
 ; X64-LABEL: reset_fpenv_01_native:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
-; X64-NEXT:    movq $-1, %rdi
-; X64-NEXT:    callq fesetenv at PLT
-; X64-NEXT:    popq %rax
+; X64-NEXT:    fldenv {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; X64-NEXT:    ldmxcsr {{\.?LCPI[0-9]+_[0-9]+}}+28(%rip)
 ; X64-NEXT:    retq
 entry:
   call void @llvm.reset.fpenv()


        


More information about the llvm-commits mailing list