[llvm] 12537ab - [FPEnv][X86] Implement lowering of llvm.set.rounding
Serge Pavlov via llvm-commits
llvm-commits at lists.llvm.org
Thu May 13 00:33:45 PDT 2021
Author: Serge Pavlov
Date: 2021-05-13T14:30:38+07:00
New Revision: 12537ab77227db8f2b42e6172b24313d8f442e97
URL: https://github.com/llvm/llvm-project/commit/12537ab77227db8f2b42e6172b24313d8f442e97
DIFF: https://github.com/llvm/llvm-project/commit/12537ab77227db8f2b42e6172b24313d8f442e97.diff
LOG: [FPEnv][X86] Implement lowering of llvm.set.rounding
Differential Revision: https://reviews.llvm.org/D74730
Added:
llvm/test/CodeGen/X86/fpenv.ll
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86InstrFPStack.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 27de4fa57869f..1adab3b363bde 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -358,7 +358,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FREM , MVT::f64 , Expand);
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FREM , MVT::f128 , Expand);
- setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
+
+ if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
+ setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
+ setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
+ }
// Promote the i8 variants and force them on up to i32 which has a shorter
// encoding.
@@ -26937,6 +26941,118 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
return DAG.getMergeValues({RetVal, Chain}, DL);
}
+SDValue X86TargetLowering::LowerSET_ROUNDING(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDLoc DL(Op);
+ SDValue Chain = Op.getNode()->getOperand(0);
+
+ // FP control word may be set only from data in memory. So we need to allocate
+ // stack space to save/load FP control word.
+ int OldCWFrameIdx = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
+ SDValue StackSlot =
+ DAG.getFrameIndex(OldCWFrameIdx, getPointerTy(DAG.getDataLayout()));
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, OldCWFrameIdx);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 2, Align(2));
+
+ // Store FP control word into memory.
+ SDValue Ops[] = {Chain, StackSlot};
+ Chain = DAG.getMemIntrinsicNode(
+ X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), Ops, MVT::i16, MMO);
+
+ // Load FP Control Word from stack slot and clear RM field (bits 11:10).
+ SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MPI);
+ Chain = CWD.getValue(1);
+ CWD = DAG.getNode(ISD::AND, DL, MVT::i16, CWD.getValue(0),
+ DAG.getConstant(0xf3ff, DL, MVT::i16));
+
+ // Calculate new rounding mode.
+ SDValue NewRM = Op.getNode()->getOperand(1);
+ SDValue RMBits;
+ if (auto *CVal = dyn_cast<ConstantSDNode>(NewRM)) {
+ uint64_t RM = CVal->getZExtValue();
+ int FieldVal;
+ switch (static_cast<RoundingMode>(RM)) {
+ case RoundingMode::NearestTiesToEven: FieldVal = X86::rmToNearest; break;
+ case RoundingMode::TowardNegative: FieldVal = X86::rmDownward; break;
+ case RoundingMode::TowardPositive: FieldVal = X86::rmUpward; break;
+ case RoundingMode::TowardZero: FieldVal = X86::rmTowardZero; break;
+ default:
+ llvm_unreachable("rounding mode is not supported by X86 hardware");
+ }
+ RMBits = DAG.getConstant(FieldVal, DL, MVT::i16);
+ } else {
+ // Need to convert argument into bits of control word:
+ // 0 Round to 0 -> 11
+ // 1 Round to nearest -> 00
+ // 2 Round to +inf -> 10
+ // 3 Round to -inf -> 01
+ // The 2-bit value needs then to be shifted so that it occupies bits 11:10.
+ // To make the conversion, put all these values into a value 0xc9 and shift
+ // it left depending on the rounding mode:
+ // (0xc9 << 4) & 0xc00 = X86::rmTowardZero
+ // (0xc9 << 6) & 0xc00 = X86::rmToNearest
+ // ...
+ // (0xc9 << (2 * NewRM + 4)) & 0xc00
+ SDValue ShiftValue =
+ DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
+ DAG.getNode(ISD::ADD, DL, MVT::i32,
+ DAG.getNode(ISD::SHL, DL, MVT::i32, NewRM,
+ DAG.getConstant(1, DL, MVT::i8)),
+ DAG.getConstant(4, DL, MVT::i32)));
+ SDValue Shifted =
+ DAG.getNode(ISD::SHL, DL, MVT::i16, DAG.getConstant(0xc9, DL, MVT::i16),
+ ShiftValue);
+ RMBits = DAG.getNode(ISD::AND, DL, MVT::i16, Shifted,
+ DAG.getConstant(0xc00, DL, MVT::i16));
+ }
+
+ // Update rounding mode bits and store the new FP Control Word into stack.
+ CWD = DAG.getNode(ISD::OR, DL, MVT::i16, CWD, RMBits);
+ Chain = DAG.getStore(Chain, DL, CWD, StackSlot, MPI, /* Alignment = */ 2);
+
+ // Load FP control word from the slot.
+ SDValue OpsLD[] = {Chain, StackSlot};
+ MachineMemOperand *MMOL =
+ MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 2, Align(2));
+ Chain = DAG.getMemIntrinsicNode(
+ X86ISD::FLDCW16m, DL, DAG.getVTList(MVT::Other), OpsLD, MVT::i16, MMOL);
+
+ // If target supports SSE, set MXCSR as well. Rounding mode is encoded in the
+ // same way but in bits 14:13.
+ if (Subtarget.hasSSE1()) {
+ // Store MXCSR into memory.
+ Chain = DAG.getNode(
+ ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain,
+ DAG.getTargetConstant(Intrinsic::x86_sse_stmxcsr, DL, MVT::i32),
+ StackSlot);
+
+ // Load MXCSR from stack slot and clear RM field (bits 14:13).
+ SDValue CWD = DAG.getLoad(MVT::i32, DL, Chain, StackSlot, MPI);
+ Chain = CWD.getValue(1);
+ CWD = DAG.getNode(ISD::AND, DL, MVT::i32, CWD.getValue(0),
+ DAG.getConstant(0xffff9fff, DL, MVT::i32));
+
+ // Shift X87 RM bits from 11:10 to 14:13.
+ RMBits = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, RMBits);
+ RMBits = DAG.getNode(ISD::SHL, DL, MVT::i32, RMBits,
+ DAG.getConstant(3, DL, MVT::i8));
+
+ // Update rounding mode bits and store the new FP Control Word into stack.
+ CWD = DAG.getNode(ISD::OR, DL, MVT::i32, CWD, RMBits);
+ Chain = DAG.getStore(Chain, DL, CWD, StackSlot, MPI, /* Alignment = */ 4);
+
+ // Load MXCSR from the slot.
+ Chain = DAG.getNode(
+ ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain,
+ DAG.getTargetConstant(Intrinsic::x86_sse_ldmxcsr, DL, MVT::i32),
+ StackSlot);
+ }
+
+ return Chain;
+}
+
/// Lower a vector CTLZ using native supported vector CTLZ instruction.
//
// i8/i16 vector implemented using dword LZCNT vector instruction
@@ -30167,6 +30283,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ(Op, Subtarget, DAG);
case ISD::CTTZ:
@@ -31187,6 +31304,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(EH_RETURN)
NODE_NAME_CASE(TC_RETURN)
NODE_NAME_CASE(FNSTCW16m)
+ NODE_NAME_CASE(FLDCW16m)
NODE_NAME_CASE(LCMPXCHG_DAG)
NODE_NAME_CASE(LCMPXCHG8_DAG)
NODE_NAME_CASE(LCMPXCHG16_DAG)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 9f0f108cb12c7..e48272518225c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -780,9 +780,12 @@ namespace llvm {
// subvector broadcast from memory.
SUBV_BROADCAST_LOAD,
- // Store FP control world into i16 memory.
+ // Store FP control word into i16 memory.
FNSTCW16m,
+ // Load FP control word from i16 memory.
+ FLDCW16m,
+
/// This instruction implements FP_TO_SINT with the
/// integer destination in memory and a FP reg source. This corresponds
/// to the X86::FIST*m instructions and the rounding mode change stuff. It
@@ -847,6 +850,19 @@ namespace llvm {
};
} // end namespace X86ISD
+ namespace X86 {
+ /// Current rounding mode is represented in bits 11:10 of FPSR. These
+ /// values are same as corresponding constants for rounding mode used
+ /// in glibc.
+ enum RoundingMode {
+ rmToNearest = 0, // FE_TONEAREST
+ rmDownward = 1 << 10, // FE_DOWNWARD
+ rmUpward = 2 << 10, // FE_UPWARD
+ rmTowardZero = 3 << 10, // FE_TOWARDZERO
+ rmMask = 3 << 10 // Bit mask selecting rounding mode
+ };
+ }
+
/// Define some predicates that are used for node matching.
namespace X86 {
/// Returns true if Elt is a constant zero or floating point constant +0.0.
@@ -1518,6 +1534,7 @@ namespace llvm {
SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td
index 682bdda20a1af..b538da0d48536 100644
--- a/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -24,6 +24,7 @@ def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDTX86CwdLoad : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -38,6 +39,9 @@ def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst,
def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
[SDNPHasChain, SDNPMayStore, SDNPSideEffect,
SDNPMemOperand]>;
+def X86fp_cwd_set16 : SDNode<"X86ISD::FLDCW16m", SDTX86CwdLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
+ SDNPMemOperand]>;
def X86fstf32 : PatFrag<(ops node:$val, node:$ptr),
(X86fst node:$val, node:$ptr), [{
@@ -705,7 +709,8 @@ def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
} // SchedRW
let Defs = [FPSW,FPCW], mayLoad = 1 in
def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
- (outs), (ins i16mem:$dst), "fldcw\t$dst", []>,
+ (outs), (ins i16mem:$dst), "fldcw\t$dst",
+ [(X86fp_cwd_set16 addr:$dst)]>,
Sched<[WriteLoad]>;
// FPU control instructions
diff --git a/llvm/test/CodeGen/X86/fpenv.ll b/llvm/test/CodeGen/X86/fpenv.ll
new file mode 100644
index 0000000000000..9b033765f1596
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fpenv.ll
@@ -0,0 +1,244 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse -verify-machineinstrs < %s | FileCheck %s -check-prefix=X86-NOSSE
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse -verify-machineinstrs < %s | FileCheck %s -check-prefix=X86-SSE
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s -check-prefix=X64
+
+declare void @llvm.set.rounding(i32 %x)
+
+define void @func_01() nounwind {
+; X86-NOSSE-LABEL: func_01:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: fnstcw (%esp)
+; X86-NOSSE-NEXT: orb $12, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fldcw (%esp)
+; X86-NOSSE-NEXT: popl %eax
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE-LABEL: func_01:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: pushl %eax
+; X86-SSE-NEXT: fnstcw (%esp)
+; X86-SSE-NEXT: orb $12, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: fldcw (%esp)
+; X86-SSE-NEXT: stmxcsr (%esp)
+; X86-SSE-NEXT: orb $96, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: ldmxcsr (%esp)
+; X86-SSE-NEXT: popl %eax
+; X86-SSE-NEXT: retl
+;
+; X64-LABEL: func_01:
+; X64: # %bb.0:
+; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: orb $12, -{{[0-9]+}}(%rsp)
+; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: orb $96, -{{[0-9]+}}(%rsp)
+; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: retq
+ call void @llvm.set.rounding(i32 0) ; TowardZero (CW[11-10] = 11)
+ ret void
+}
+
+define void @func_02() nounwind {
+; X86-NOSSE-LABEL: func_02:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: fnstcw (%esp)
+; X86-NOSSE-NEXT: andb $-13, {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT: fldcw (%esp)
+; X86-NOSSE-NEXT: popl %eax
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE-LABEL: func_02:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: pushl %eax
+; X86-SSE-NEXT: fnstcw (%esp)
+; X86-SSE-NEXT: andb $-13, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: fldcw (%esp)
+; X86-SSE-NEXT: stmxcsr (%esp)
+; X86-SSE-NEXT: andb $-97, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: ldmxcsr (%esp)
+; X86-SSE-NEXT: popl %eax
+; X86-SSE-NEXT: retl
+;
+; X64-LABEL: func_02:
+; X64: # %bb.0:
+; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: andb $-13, -{{[0-9]+}}(%rsp)
+; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: andb $-97, -{{[0-9]+}}(%rsp)
+; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: retq
+ call void @llvm.set.rounding(i32 1) ; ToNearestTiesToEven (CW[11-10] = 00)
+ ret void
+}
+
+define void @func_03() nounwind {
+; X86-NOSSE-LABEL: func_03:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: fnstcw (%esp)
+; X86-NOSSE-NEXT: movl $-3073, %eax # imm = 0xF3FF
+; X86-NOSSE-NEXT: andl (%esp), %eax
+; X86-NOSSE-NEXT: orl $2048, %eax # imm = 0x800
+; X86-NOSSE-NEXT: movw %ax, (%esp)
+; X86-NOSSE-NEXT: fldcw (%esp)
+; X86-NOSSE-NEXT: popl %eax
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE-LABEL: func_03:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: pushl %eax
+; X86-SSE-NEXT: fnstcw (%esp)
+; X86-SSE-NEXT: movl $-3073, %eax # imm = 0xF3FF
+; X86-SSE-NEXT: andl (%esp), %eax
+; X86-SSE-NEXT: orl $2048, %eax # imm = 0x800
+; X86-SSE-NEXT: movw %ax, (%esp)
+; X86-SSE-NEXT: fldcw (%esp)
+; X86-SSE-NEXT: stmxcsr (%esp)
+; X86-SSE-NEXT: movl $-24577, %eax # imm = 0x9FFF
+; X86-SSE-NEXT: andl (%esp), %eax
+; X86-SSE-NEXT: orl $16384, %eax # imm = 0x4000
+; X86-SSE-NEXT: movl %eax, (%esp)
+; X86-SSE-NEXT: ldmxcsr (%esp)
+; X86-SSE-NEXT: popl %eax
+; X86-SSE-NEXT: retl
+;
+; X64-LABEL: func_03:
+; X64: # %bb.0:
+; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl $-3073, %eax # imm = 0xF3FF
+; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT: orl $2048, %eax # imm = 0x800
+; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl $-24577, %eax # imm = 0x9FFF
+; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT: orl $16384, %eax # imm = 0x4000
+; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: retq
+ call void @llvm.set.rounding(i32 2) ; Upward (CW[11-10] = 10)
+ ret void
+}
+
+define void @func_04() nounwind {
+; X86-NOSSE-LABEL: func_04:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: fnstcw (%esp)
+; X86-NOSSE-NEXT: movl $-3073, %eax # imm = 0xF3FF
+; X86-NOSSE-NEXT: andl (%esp), %eax
+; X86-NOSSE-NEXT: orl $1024, %eax # imm = 0x400
+; X86-NOSSE-NEXT: movw %ax, (%esp)
+; X86-NOSSE-NEXT: fldcw (%esp)
+; X86-NOSSE-NEXT: popl %eax
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE-LABEL: func_04:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: pushl %eax
+; X86-SSE-NEXT: fnstcw (%esp)
+; X86-SSE-NEXT: movl $-3073, %eax # imm = 0xF3FF
+; X86-SSE-NEXT: andl (%esp), %eax
+; X86-SSE-NEXT: orl $1024, %eax # imm = 0x400
+; X86-SSE-NEXT: movw %ax, (%esp)
+; X86-SSE-NEXT: fldcw (%esp)
+; X86-SSE-NEXT: stmxcsr (%esp)
+; X86-SSE-NEXT: movl $-24577, %eax # imm = 0x9FFF
+; X86-SSE-NEXT: andl (%esp), %eax
+; X86-SSE-NEXT: orl $8192, %eax # imm = 0x2000
+; X86-SSE-NEXT: movl %eax, (%esp)
+; X86-SSE-NEXT: ldmxcsr (%esp)
+; X86-SSE-NEXT: popl %eax
+; X86-SSE-NEXT: retl
+;
+; X64-LABEL: func_04:
+; X64: # %bb.0:
+; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl $-3073, %eax # imm = 0xF3FF
+; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT: orl $1024, %eax # imm = 0x400
+; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl $-24577, %eax # imm = 0x9FFF
+; X64-NEXT: andl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT: orl $8192, %eax # imm = 0x2000
+; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: retq
+ call void @llvm.set.rounding(i32 3) ; Downward (CW[11-10] = 01)
+ ret void
+}
+
+define void @func_05(i32 %x) nounwind {
+; X86-NOSSE-LABEL: func_05:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: leal 4(%eax,%eax), %ecx
+; X86-NOSSE-NEXT: movl $201, %eax
+; X86-NOSSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NOSSE-NEXT: shll %cl, %eax
+; X86-NOSSE-NEXT: andl $3072, %eax # imm = 0xC00
+; X86-NOSSE-NEXT: fnstcw (%esp)
+; X86-NOSSE-NEXT: movl $-3073, %ecx # imm = 0xF3FF
+; X86-NOSSE-NEXT: andl (%esp), %ecx
+; X86-NOSSE-NEXT: orl %eax, %ecx
+; X86-NOSSE-NEXT: movw %cx, (%esp)
+; X86-NOSSE-NEXT: fldcw (%esp)
+; X86-NOSSE-NEXT: popl %eax
+; X86-NOSSE-NEXT: retl
+;
+; X86-SSE-LABEL: func_05:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: pushl %eax
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: leal 4(%eax,%eax), %ecx
+; X86-SSE-NEXT: movl $201, %eax
+; X86-SSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SSE-NEXT: shll %cl, %eax
+; X86-SSE-NEXT: andl $3072, %eax # imm = 0xC00
+; X86-SSE-NEXT: fnstcw (%esp)
+; X86-SSE-NEXT: movl $-3073, %ecx # imm = 0xF3FF
+; X86-SSE-NEXT: andl (%esp), %ecx
+; X86-SSE-NEXT: orl %eax, %ecx
+; X86-SSE-NEXT: movw %cx, (%esp)
+; X86-SSE-NEXT: fldcw (%esp)
+; X86-SSE-NEXT: stmxcsr (%esp)
+; X86-SSE-NEXT: movl $-24577, %ecx # imm = 0x9FFF
+; X86-SSE-NEXT: andl (%esp), %ecx
+; X86-SSE-NEXT: leal (%ecx,%eax,8), %eax
+; X86-SSE-NEXT: movl %eax, (%esp)
+; X86-SSE-NEXT: ldmxcsr (%esp)
+; X86-SSE-NEXT: popl %eax
+; X86-SSE-NEXT: retl
+;
+; X64-LABEL: func_05:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal 4(%rdi,%rdi), %ecx
+; X64-NEXT: movl $201, %eax
+; X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NEXT: shll %cl, %eax
+; X64-NEXT: andl $3072, %eax # imm = 0xC00
+; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl $-3073, %ecx # imm = 0xF3FF
+; X64-NEXT: andl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT: orl %eax, %ecx
+; X64-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
+; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
+; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl $-24577, %ecx # imm = 0x9FFF
+; X64-NEXT: andl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT: leal (%rcx,%rax,8), %eax
+; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; X64-NEXT: ldmxcsr -{{[0-9]+}}(%rsp)
+; X64-NEXT: retq
+ call void @llvm.set.rounding(i32 %x) ; Downward
+ ret void
+}
More information about the llvm-commits
mailing list