[llvm] be794e3 - [X86][FPEnv] Lowering of {get,set,reset}_fpenv
Serge Pavlov via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 14 08:12:51 PDT 2023
Author: Serge Pavlov
Date: 2023-07-14T22:10:53+07:00
New Revision: be794e3d921b5305fc7849761de0722bbcf6c916
URL: https://github.com/llvm/llvm-project/commit/be794e3d921b5305fc7849761de0722bbcf6c916
DIFF: https://github.com/llvm/llvm-project/commit/be794e3d921b5305fc7849761de0722bbcf6c916.diff
LOG: [X86][FPEnv] Lowering of {get,set,reset}_fpenv
The change implements lowering of `get_fpenv`, `set_fpenv` and
`reset_fpenv`.
Differential Revision: https://reviews.llvm.org/D81833
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86InstrFPStack.td
llvm/test/CodeGen/X86/fpenv.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b20826b4e62334..91506bed01d5ee 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -396,6 +396,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
setOperationAction(ISD::GET_ROUNDING , MVT::i32 , Custom);
setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
+ setOperationAction(ISD::GET_FPENV_MEM , MVT::Other, Custom);
+ setOperationAction(ISD::SET_FPENV_MEM , MVT::Other, Custom);
+ setOperationAction(ISD::RESET_FPENV , MVT::Other, Custom);
}
// Promote the i8 variants and force them on up to i32 which has a shorter
@@ -30068,6 +30071,122 @@ SDValue X86TargetLowering::LowerSET_ROUNDING(SDValue Op,
return Chain;
}
+const unsigned X87StateSize = 28;
+const unsigned FPStateSize = 32;
+const unsigned FPStateSizeInBits = FPStateSize * 8;
+
+SDValue X86TargetLowering::LowerGET_FPENV_MEM(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDLoc DL(Op);
+ SDValue Chain = Op->getOperand(0);
+ SDValue Ptr = Op->getOperand(1);
+ auto *Node = cast<FPStateAccessSDNode>(Op);
+ EVT MemVT = Node->getMemoryVT();
+ assert(MemVT.getSizeInBits() == FPStateSizeInBits);
+ MachineMemOperand *MMO = cast<FPStateAccessSDNode>(Op)->getMemOperand();
+
+ // Get x87 state, if it presents.
+ if (Subtarget.hasX87()) {
+ Chain =
+ DAG.getMemIntrinsicNode(X86ISD::FNSTENVm, DL, DAG.getVTList(MVT::Other),
+ {Chain, Ptr}, MemVT, MMO);
+
+ // FNSTENV changes the exception mask, so load back the stored environment.
+ MachineMemOperand::Flags NewFlags =
+ MachineMemOperand::MOLoad |
+ (MMO->getFlags() & ~MachineMemOperand::MOStore);
+ MMO = MF.getMachineMemOperand(MMO, NewFlags);
+ Chain =
+ DAG.getMemIntrinsicNode(X86ISD::FLDENVm, DL, DAG.getVTList(MVT::Other),
+ {Chain, Ptr}, MemVT, MMO);
+ }
+
+ // If target supports SSE, get MXCSR as well.
+ if (Subtarget.hasSSE1()) {
+ // Get pointer to the MXCSR location in memory.
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ SDValue MXCSRAddr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr,
+ DAG.getConstant(X87StateSize, DL, PtrVT));
+ // Store MXCSR into memory.
+ Chain = DAG.getNode(
+ ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain,
+ DAG.getTargetConstant(Intrinsic::x86_sse_stmxcsr, DL, MVT::i32),
+ MXCSRAddr);
+ }
+
+ return Chain;
+}
+
+static SDValue createSetFPEnvNodes(SDValue Ptr, SDValue Chain, SDLoc DL,
+ EVT MemVT, MachineMemOperand *MMO,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // Set x87 state, if it presents.
+ if (Subtarget.hasX87())
+ Chain =
+ DAG.getMemIntrinsicNode(X86ISD::FLDENVm, DL, DAG.getVTList(MVT::Other),
+ {Chain, Ptr}, MemVT, MMO);
+ // If target supports SSE, set MXCSR as well.
+ if (Subtarget.hasSSE1()) {
+ // Get pointer to the MXCSR location in memory.
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ SDValue MXCSRAddr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr,
+ DAG.getConstant(X87StateSize, DL, PtrVT));
+ // Load MXCSR from memory.
+ Chain = DAG.getNode(
+ ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain,
+ DAG.getTargetConstant(Intrinsic::x86_sse_ldmxcsr, DL, MVT::i32),
+ MXCSRAddr);
+ }
+ return Chain;
+}
+
+SDValue X86TargetLowering::LowerSET_FPENV_MEM(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Chain = Op->getOperand(0);
+ SDValue Ptr = Op->getOperand(1);
+ auto *Node = cast<FPStateAccessSDNode>(Op);
+ EVT MemVT = Node->getMemoryVT();
+ assert(MemVT.getSizeInBits() == FPStateSizeInBits);
+ MachineMemOperand *MMO = cast<FPStateAccessSDNode>(Op)->getMemOperand();
+ return createSetFPEnvNodes(Ptr, Chain, DL, MemVT, MMO, DAG, Subtarget);
+}
+
+SDValue X86TargetLowering::LowerRESET_FPENV(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDLoc DL(Op);
+ SDValue Chain = Op.getNode()->getOperand(0);
+
+ IntegerType *ItemTy = Type::getInt32Ty(*DAG.getContext());
+ ArrayType *FPEnvTy = ArrayType::get(ItemTy, 8);
+ SmallVector<Constant *, 8> FPEnvVals;
+
+ // x87 FPU Control Word: mask all floating-point exceptions, sets rounding to
+ // nearest. FPU precision is set to 53 bits on Windows and 64 bits otherwise
+ // for compatibility with glibc.
+ unsigned X87CW = Subtarget.isTargetWindowsMSVC() ? 0x27F : 0x37F;
+ FPEnvVals.push_back(ConstantInt::get(ItemTy, X87CW));
+ Constant *Zero = ConstantInt::get(ItemTy, 0);
+ for (unsigned I = 0; I < 6; ++I)
+ FPEnvVals.push_back(Zero);
+
+ // MXCSR: mask all floating-point exceptions, sets rounding to nearest, clear
+ // all exceptions, sets DAZ and FTZ to 0.
+ FPEnvVals.push_back(ConstantInt::get(ItemTy, 0x1F80));
+ Constant *FPEnvBits = ConstantArray::get(FPEnvTy, FPEnvVals);
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ SDValue Env = DAG.getConstantPool(FPEnvBits, PtrVT);
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MPI, MachineMemOperand::MOStore, X87StateSize, Align(4));
+
+ return createSetFPEnvNodes(Env, Chain, DL, MVT::i32, MMO, DAG, Subtarget);
+}
+
/// Lower a vector CTLZ using native supported vector CTLZ instruction.
//
// i8/i16 vector implemented using dword LZCNT vector instruction
@@ -34320,6 +34439,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
+ case ISD::GET_FPENV_MEM: return LowerGET_FPENV_MEM(Op, DAG);
+ case ISD::SET_FPENV_MEM: return LowerSET_FPENV_MEM(Op, DAG);
+ case ISD::RESET_FPENV: return LowerRESET_FPENV(Op, DAG);
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ(Op, Subtarget, DAG);
case ISD::CTTZ:
@@ -35562,6 +35684,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(TC_RETURN)
NODE_NAME_CASE(FNSTCW16m)
NODE_NAME_CASE(FLDCW16m)
+ NODE_NAME_CASE(FNSTENVm)
+ NODE_NAME_CASE(FLDENVm)
NODE_NAME_CASE(LCMPXCHG_DAG)
NODE_NAME_CASE(LCMPXCHG8_DAG)
NODE_NAME_CASE(LCMPXCHG16_DAG)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 9a06502206e169..250df82a30c2f8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -833,6 +833,12 @@ namespace llvm {
// Load FP control word from i16 memory.
FLDCW16m,
+ // Store x87 FPU environment into memory.
+ FNSTENVm,
+
+ // Load x87 FPU environment from memory.
+ FLDENVm,
+
/// This instruction implements FP_TO_SINT with the
/// integer destination in memory and a FP reg source. This corresponds
/// to the X86::FIST*m instructions and the rounding mode change stuff. It
@@ -1663,6 +1669,9 @@ namespace llvm {
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
SDValue &Chain) const;
diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td
index fbbd3c83dc5c9e..66a2d27abf86b4 100644
--- a/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -25,6 +25,7 @@ def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDTX86CwdLoad : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDTX86FPEnv : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def X86fp80_add : SDNode<"X86ISD::FP80_ADD", SDTFPBinOp, [SDNPCommutative]>;
def X86strict_fp80_add : SDNode<"X86ISD::STRICT_FP80_ADD", SDTFPBinOp,
@@ -49,6 +50,12 @@ def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
def X86fp_cwd_set16 : SDNode<"X86ISD::FLDCW16m", SDTX86CwdLoad,
[SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
SDNPMemOperand]>;
+def X86fpenv_get : SDNode<"X86ISD::FNSTENVm", SDTX86FPEnv,
+ [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+ SDNPMemOperand]>;
+def X86fpenv_set : SDNode<"X86ISD::FLDENVm", SDTX86FPEnv,
+ [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
+ SDNPMemOperand]>;
def X86fstf32 : PatFrag<(ops node:$val, node:$ptr),
(X86fst node:$val, node:$ptr), [{
@@ -418,13 +425,17 @@ def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
let SchedRW = [WriteMicrocoded] in {
let Defs = [FPSW, FPCW], mayLoad = 1 in {
-def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins anymem:$src), "fldenv\t$src">;
def FRSTORm : FPI<0xDD, MRM4m, (outs), (ins anymem:$src), "frstor\t$src">;
+let Predicates = [HasX87] in
+def FLDENVm : I<0xD9, MRM4m, (outs), (ins anymem:$src), "fldenv\t$src",
+ [(X86fpenv_set addr:$src)]>;
}
let Defs = [FPSW, FPCW], Uses = [FPSW, FPCW], mayStore = 1 in {
-def FSTENVm : FPI<0xD9, MRM6m, (outs), (ins anymem:$dst), "fnstenv\t$dst">;
def FSAVEm : FPI<0xDD, MRM6m, (outs), (ins anymem:$dst), "fnsave\t$dst">;
+let Predicates = [HasX87] in
+def FSTENVm : I<0xD9, MRM6m, (outs), (ins anymem:$dst), "fnstenv\t$dst",
+ [(X86fpenv_get addr:$dst)]>;
}
let Uses = [FPSW], mayStore = 1 in
diff --git a/llvm/test/CodeGen/X86/fpenv.ll b/llvm/test/CodeGen/X86/fpenv.ll
index d190f4c8f7fd1a..8f6b5b65e47baf 100644
--- a/llvm/test/CodeGen/X86/fpenv.ll
+++ b/llvm/test/CodeGen/X86/fpenv.ll
@@ -280,27 +280,28 @@ entry:
define void @get_fpenv_01_native(ptr %ptr) nounwind {
; X86-NOSSE-LABEL: get_fpenv_01_native:
; X86-NOSSE: # %bb.0: # %entry
-; X86-NOSSE-NEXT: subl $44, %esp
+; X86-NOSSE-NEXT: subl $36, %esp
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: calll fegetenv
-; X86-NOSSE-NEXT: addl $44, %esp
+; X86-NOSSE-NEXT: fnstenv (%eax)
+; X86-NOSSE-NEXT: fldenv (%eax)
+; X86-NOSSE-NEXT: addl $36, %esp
; X86-NOSSE-NEXT: retl
;
; X86-SSE-LABEL: get_fpenv_01_native:
; X86-SSE: # %bb.0: # %entry
-; X86-SSE-NEXT: subl $44, %esp
+; X86-SSE-NEXT: subl $36, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT: movl %eax, (%esp)
-; X86-SSE-NEXT: calll fegetenv
-; X86-SSE-NEXT: addl $44, %esp
+; X86-SSE-NEXT: fnstenv (%eax)
+; X86-SSE-NEXT: fldenv (%eax)
+; X86-SSE-NEXT: stmxcsr 28(%eax)
+; X86-SSE-NEXT: addl $36, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: get_fpenv_01_native:
; X64: # %bb.0: # %entry
-; X64-NEXT: subq $40, %rsp
-; X64-NEXT: callq fegetenv at PLT
-; X64-NEXT: addq $40, %rsp
+; X64-NEXT: fnstenv (%rdi)
+; X64-NEXT: fldenv (%rdi)
+; X64-NEXT: stmxcsr 28(%rdi)
; X64-NEXT: retq
entry:
%env = call i256 @llvm.get.fpenv.i256()
@@ -342,27 +343,25 @@ entry:
define void @set_fpenv_01_native(ptr %ptr) nounwind {
; X86-NOSSE-LABEL: set_fpenv_01_native:
; X86-NOSSE: # %bb.0: # %entry
-; X86-NOSSE-NEXT: subl $44, %esp
+; X86-NOSSE-NEXT: subl $36, %esp
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl %eax, (%esp)
-; X86-NOSSE-NEXT: calll fesetenv
-; X86-NOSSE-NEXT: addl $44, %esp
+; X86-NOSSE-NEXT: fldenv (%eax)
+; X86-NOSSE-NEXT: addl $36, %esp
; X86-NOSSE-NEXT: retl
;
; X86-SSE-LABEL: set_fpenv_01_native:
; X86-SSE: # %bb.0: # %entry
-; X86-SSE-NEXT: subl $44, %esp
+; X86-SSE-NEXT: subl $36, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT: movl %eax, (%esp)
-; X86-SSE-NEXT: calll fesetenv
-; X86-SSE-NEXT: addl $44, %esp
+; X86-SSE-NEXT: fldenv (%eax)
+; X86-SSE-NEXT: ldmxcsr 28(%eax)
+; X86-SSE-NEXT: addl $36, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: set_fpenv_01_native:
; X64: # %bb.0: # %entry
-; X64-NEXT: subq $40, %rsp
-; X64-NEXT: callq fesetenv at PLT
-; X64-NEXT: addq $40, %rsp
+; X64-NEXT: fldenv (%rdi)
+; X64-NEXT: ldmxcsr 28(%rdi)
; X64-NEXT: retq
entry:
%env = load i256, ptr %ptr
@@ -402,26 +401,19 @@ entry:
define void @reset_fpenv_01_native() nounwind {
; X86-NOSSE-LABEL: reset_fpenv_01_native:
; X86-NOSSE: # %bb.0: # %entry
-; X86-NOSSE-NEXT: subl $12, %esp
-; X86-NOSSE-NEXT: movl $-1, (%esp)
-; X86-NOSSE-NEXT: calll fesetenv
-; X86-NOSSE-NEXT: addl $12, %esp
+; X86-NOSSE-NEXT: fldenv {{\.?LCPI[0-9]+_[0-9]+}}
; X86-NOSSE-NEXT: retl
;
; X86-SSE-LABEL: reset_fpenv_01_native:
; X86-SSE: # %bb.0: # %entry
-; X86-SSE-NEXT: subl $12, %esp
-; X86-SSE-NEXT: movl $-1, (%esp)
-; X86-SSE-NEXT: calll fesetenv
-; X86-SSE-NEXT: addl $12, %esp
+; X86-SSE-NEXT: fldenv {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-SSE-NEXT: ldmxcsr {{\.?LCPI[0-9]+_[0-9]+}}+28
; X86-SSE-NEXT: retl
;
; X64-LABEL: reset_fpenv_01_native:
; X64: # %bb.0: # %entry
-; X64-NEXT: pushq %rax
-; X64-NEXT: movq $-1, %rdi
-; X64-NEXT: callq fesetenv at PLT
-; X64-NEXT: popq %rax
+; X64-NEXT: fldenv {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; X64-NEXT: ldmxcsr {{\.?LCPI[0-9]+_[0-9]+}}+28(%rip)
; X64-NEXT: retq
entry:
call void @llvm.reset.fpenv()
More information about the llvm-commits
mailing list