[llvm] 4175d7e - [X86] Custom isel floating point X86ISD::CMP on pre-CMOV targets. Eliminate ConvertCmpIfNecessary
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 6 10:43:53 PST 2020
Author: Craig Topper
Date: 2020-02-06T10:43:06-08:00
New Revision: 4175d7e22e1c70c0d2c8ff4a70235e8ec3461e5e
URL: https://github.com/llvm/llvm-project/commit/4175d7e22e1c70c0d2c8ff4a70235e8ec3461e5e
DIFF: https://github.com/llvm/llvm-project/commit/4175d7e22e1c70c0d2c8ff4a70235e8ec3461e5e.diff
LOG: [X86] Custom isel floating point X86ISD::CMP on pre-CMOV targets. Eliminate ConvertCmpIfNecessary
If we don't have cmov, X87 compares write to FPSW and we need to
move the bits to EFLAGS to use as JCC/SETCC/CMOV conditions.
Previously this was done by calling ConvertCmpIfNecessary in
multiple places which would emit the extra code for the FNSTSW,
a shift, a truncate, and a SAHF instructions. Isel would then
select trunc+X86ISD::CMP to a FUCOM instruction that produces FPSW.
This patch centralizes all of the handling into a single custom
isel handler. This allows us to remove ConvertCmpIfNecessary and
a couple target specific ISD opcodes.
Differential Revision: https://reviews.llvm.org/D73863
Added:
Modified:
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86InstrFPStack.td
llvm/lib/Target/X86/X86InstrInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9240fd24f31d..53e9f3482d69 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5037,17 +5037,83 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
return;
}
- case X86ISD::CMP: {
- SDValue N0 = Node->getOperand(0);
- SDValue N1 = Node->getOperand(1);
+ case X86ISD::CMP:
+ case X86ISD::STRICT_FCMP:
+ case X86ISD::STRICT_FCMPS: {
+ bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP ||
+ Node->getOpcode() == X86ISD::STRICT_FCMPS;
+ SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0);
+ SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1);
+
+ // Save the original VT of the compare.
+ MVT CmpVT = N0.getSimpleValueType();
+
+ // Floating point needs special handling if we don't have FCOMI.
+ // FIXME: Should we have a X86ISD::FCMP to avoid mixing int and fp?
+ if (CmpVT.isFloatingPoint()) {
+ if (Subtarget->hasCMov())
+ break;
+
+ bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS;
+
+ unsigned Opc;
+ switch (CmpVT.SimpleTy) {
+ default: llvm_unreachable("Unexpected type!");
+ case MVT::f32:
+ Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32;
+ break;
+ case MVT::f64:
+ Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64;
+ break;
+ case MVT::f80:
+ Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80;
+ break;
+ }
+
+ SDValue Cmp;
+ SDValue Chain =
+ IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode();
+ if (IsStrictCmp) {
+ SDVTList VTs = CurDAG->getVTList(MVT::i16, MVT::Other);
+ Cmp = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0);
+ Chain = Cmp.getValue(1);
+ } else {
+ Cmp = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i16, N0, N1), 0);
+ }
+
+ // Move FPSW to AX.
+ SDValue FPSW = CurDAG->getCopyToReg(Chain, dl, X86::FPSW, Cmp, SDValue());
+ Chain = FPSW;
+ SDValue FNSTSW =
+ SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, FPSW,
+ FPSW.getValue(1)),
+ 0);
+
+ // Extract upper 8-bits of AX.
+ SDValue Extract =
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW);
+
+ // Move AH into flags.
+ // Some 64-bit targets lack SAHF support, but they do support FCOMI.
+ assert(Subtarget->hasLAHFSAHF() &&
+ "Target doesn't support SAHF or FCOMI?");
+ SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue());
+ Chain = AH;
+ SDValue SAHF = SDValue(
+ CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0);
+
+ if (IsStrictCmp)
+ ReplaceUses(SDValue(Node, 1), Chain);
+
+ ReplaceUses(SDValue(Node, 0), SAHF);
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
// Optimizations for TEST compares.
if (!isNullConstant(N1))
break;
- // Save the original VT of the compare.
- MVT CmpVT = N0.getSimpleValueType();
-
// If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
// by a test instruction. The test should be removed later by
// analyzeCompare if we are using only the zero flag.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3242af4801f3..af86e20b3955 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20998,36 +20998,6 @@ static std::pair<SDValue, SDValue> EmitCmp(SDValue Op0, SDValue Op1,
return std::make_pair(Sub.getValue(1), SDValue());
}
-/// Convert a comparison if required by the subtarget.
-SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
- SelectionDAG &DAG) const {
- // If the subtarget does not support the FUCOMI instruction, floating-point
- // comparisons have to be converted.
- bool IsCmp = Cmp.getOpcode() == X86ISD::CMP;
- bool IsStrictCmp = Cmp.getOpcode() == X86ISD::STRICT_FCMP ||
- Cmp.getOpcode() == X86ISD::STRICT_FCMPS;
-
- if (Subtarget.hasCMov() || (!IsCmp && !IsStrictCmp) ||
- !Cmp.getOperand(IsStrictCmp ? 1 : 0).getValueType().isFloatingPoint() ||
- !Cmp.getOperand(IsStrictCmp ? 2 : 1).getValueType().isFloatingPoint())
- return Cmp;
-
- // The instruction selector will select an FUCOM instruction instead of
- // FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence
- // build an SDNode sequence that transfers the result from FPSW into EFLAGS:
- // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86any_fcmp ...)), 8))))
- SDLoc dl(Cmp);
- SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp);
- SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW);
- SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
- DAG.getConstant(8, dl, MVT::i8));
- SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);
-
- // Some 64-bit targets lack SAHF support, but they do support FCOMI.
- assert(Subtarget.hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?");
- return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
-}
-
/// Check if replacement of SQRT with RSQRT should be disabled.
bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -21991,7 +21961,6 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
SDValue EFLAGS = Tmp.first;
if (Chain)
Chain = Tmp.second;
- EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8);
return EFLAGS;
}
@@ -22131,8 +22100,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
/// Return true if opcode is a X86 logical comparison.
static bool isX86LogicalCmp(SDValue Op) {
unsigned Opc = Op.getOpcode();
- if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI ||
- Opc == X86ISD::SAHF)
+ if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI)
return true;
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC ||
@@ -23085,7 +23053,6 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
- Cmp = ConvertCmpIfNecessary(Cmp, DAG);
CC = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
@@ -23101,7 +23068,6 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// separate test.
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
- Cmp = ConvertCmpIfNecessary(Cmp, DAG);
CC = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
@@ -23133,7 +23099,6 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
Cond = EmitTest(Cond, X86Cond, dl, DAG, Subtarget);
}
- Cond = ConvertCmpIfNecessary(Cond, DAG);
return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cond);
}
@@ -29788,7 +29753,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(EH_RETURN)
NODE_NAME_CASE(TC_RETURN)
NODE_NAME_CASE(FNSTCW16m)
- NODE_NAME_CASE(FNSTSW16r)
NODE_NAME_CASE(LCMPXCHG_DAG)
NODE_NAME_CASE(LCMPXCHG8_DAG)
NODE_NAME_CASE(LCMPXCHG16_DAG)
@@ -29913,7 +29877,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MEMBARRIER)
NODE_NAME_CASE(MFENCE)
NODE_NAME_CASE(SEG_ALLOCA)
- NODE_NAME_CASE(SAHF)
NODE_NAME_CASE(RDRAND)
NODE_NAME_CASE(RDSEED)
NODE_NAME_CASE(RDPKRU)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 62b176679fd6..e77df791edd9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -541,12 +541,6 @@ namespace llvm {
MEMBARRIER,
MFENCE,
- // Store FP status word into i16 register.
- FNSTSW16r,
-
- // Store contents of %ah into %eflags.
- SAHF,
-
// Get a random integer and indicate whether it is valid in CF.
RDRAND,
@@ -1494,9 +1488,6 @@ namespace llvm {
MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
MachineBasicBlock *MBB) const;
- /// Convert a comparison if required by the subtarget.
- SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
-
/// Emit flags for the given setcc condition and operands. Also returns the
/// corresponding X86 condition code constant in X86CC.
SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td
index b675aeb36fbb..a4ed513d9253 100644
--- a/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -22,7 +22,6 @@ def SDTX86Fst : SDTypeProfile<0, 2, [SDTCisFP<0>,
SDTCisPtrTy<1>]>;
def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
-def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -34,7 +33,6 @@ def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
@@ -638,19 +636,13 @@ def FLDLN2 : I<0xD9, MRM_ED, (outs), (ins), "fldln2", []>;
} // SchedRW
// Floating point compares.
-let SchedRW = [WriteFCom], Uses = [FPCW] in {
-def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
- [(set FPSW, (trunc (X86any_fcmp RFP32:$lhs, RFP32:$rhs)))]>;
-def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
- [(set FPSW, (trunc (X86any_fcmp RFP64:$lhs, RFP64:$rhs)))]>;
-def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
- [(set FPSW, (trunc (X86any_fcmp RFP80:$lhs, RFP80:$rhs)))]>;
-def COM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
- [(set FPSW, (trunc (X86strict_fcmps RFP32:$lhs, RFP32:$rhs)))]>;
-def COM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
- [(set FPSW, (trunc (X86strict_fcmps RFP64:$lhs, RFP64:$rhs)))]>;
-def COM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
- [(set FPSW, (trunc (X86strict_fcmps RFP80:$lhs, RFP80:$rhs)))]>;
+let SchedRW = [WriteFCom], Uses = [FPCW], hasSideEffects = 0 in {
+def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, []>;
+def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, []>;
+def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, []>;
+def COM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, []>;
+def COM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, []>;
+def COM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, []>;
} // SchedRW
} // mayRaiseFPException = 1
@@ -701,10 +693,9 @@ def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RSTi:$reg),
// Floating point flag ops.
let SchedRW = [WriteALU] in {
-let Defs = [AX, FPSW], Uses = [FPSW] in
+let Defs = [AX, FPSW], Uses = [FPSW], hasSideEffects = 0 in
def FNSTSW16r : I<0xDF, MRM_E0, // AX = fp flags
- (outs), (ins), "fnstsw\t{%ax|ax}",
- [(set AX, (X86fp_stsw FPSW))]>;
+ (outs), (ins), "fnstsw\t{%ax|ax}", []>;
let Defs = [FPSW], Uses = [FPCW] in
def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
(outs), (ins i16mem:$dst), "fnstcw\t$dst",
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index ca5425e8b89f..131832b903ed 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -152,8 +152,6 @@ def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
-def X86sahf : SDNode<"X86ISD::SAHF", SDTX86sahf>;
-
def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand,
[SDNPHasChain, SDNPSideEffect]>;
@@ -1787,9 +1785,8 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
// Condition code ops, incl. set if equal/not equal/...
let SchedRW = [WriteLAHFSAHF] in {
-let Defs = [EFLAGS], Uses = [AH] in
-def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf",
- [(set EFLAGS, (X86sahf AH))]>,
+let Defs = [EFLAGS], Uses = [AH], hasSideEffects = 0 in
+def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>, // flags = AH
Requires<[HasLAHFSAHF]>;
let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>, // AH = flags
More information about the llvm-commits
mailing list