[llvm-commits] [llvm] r155704 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrFPStack.td lib/Target/X86/X86InstrInfo.td lib/Target/X86/X86RegisterInfo.td test/CodeGen/X86/fp-stack-compare-cmov.ll test/CodeGen/X86/fp-stack-compare.ll
Benjamin Kramer
benny.kra at googlemail.com
Fri Apr 27 05:07:43 PDT 2012
Author: d0k
Date: Fri Apr 27 07:07:43 2012
New Revision: 155704
URL: http://llvm.org/viewvc/llvm-project?rev=155704&view=rev
Log:
X86: Don't emit conditional floating point moves on when targeting pre-pentiumpro architectures.
* Model FPSW (the FPU status word) as a register.
* Add ISel patterns for the FUCOM*, FNSTSW and SAHF instructions.
* During Legalize/Lowering, build a node sequence to transfer the comparison
result from FPSW into EFLAGS. If you're wondering about the right-shift: That's
an implicit sub-register extraction (%ax -> %ah) which is handled later on by
the instruction selector.
Fixes PR6679. Patch by Christoph Erhardt!
Added:
llvm/trunk/test/CodeGen/X86/fp-stack-compare-cmov.ll
- copied, changed from r155703, llvm/trunk/test/CodeGen/X86/fp-stack-compare.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrFPStack.td
llvm/trunk/lib/Target/X86/X86InstrInfo.td
llvm/trunk/lib/Target/X86/X86RegisterInfo.td
llvm/trunk/test/CodeGen/X86/fp-stack-compare.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=155704&r1=155703&r2=155704&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Apr 27 07:07:43 2012
@@ -8214,6 +8214,30 @@
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
}
+/// Convert a comparison if required by the subtarget.
+SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
+ SelectionDAG &DAG) const {
+ // If the subtarget does not support the FUCOMI instruction, floating-point
+ // comparisons have to be converted.
+ if (Subtarget->hasCMov() ||
+ Cmp.getOpcode() != X86ISD::CMP ||
+ !Cmp.getOperand(0).getValueType().isFloatingPoint() ||
+ !Cmp.getOperand(1).getValueType().isFloatingPoint())
+ return Cmp;
+
+ // The instruction selector will select an FUCOM instruction instead of
+ // FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence
+ // build an SDNode sequence that transfers the result from FPSW into EFLAGS:
+ // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86cmp ...)), 8))))
+ DebugLoc dl = Cmp.getDebugLoc();
+ SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp);
+ SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW);
+ SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
+ DAG.getConstant(8, MVT::i8));
+ SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);
+ return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
+}
+
/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
/// if it's possible.
SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
@@ -8335,6 +8359,7 @@
return SDValue();
SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
+ EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86CC, MVT::i8), EFLAGS);
}
@@ -8503,7 +8528,8 @@
// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
static bool isX86LogicalCmp(SDValue Op) {
unsigned Opc = Op.getNode()->getOpcode();
- if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI)
+ if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI ||
+ Opc == X86ISD::SAHF)
return true;
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD ||
@@ -8567,6 +8593,7 @@
SDValue CmpOp0 = Cmp.getOperand(0);
Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
+ Cmp = ConvertCmpIfNecessary(Cmp, DAG);
SDValue Res = // Res = 0 or -1.
DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
@@ -8673,6 +8700,7 @@
// a >= b ? -1 : 0 -> RES = setcc_carry
// a >= b ? 0 : -1 -> RES = ~setcc_carry
if (Cond.getOpcode() == X86ISD::CMP) {
+ Cond = ConvertCmpIfNecessary(Cond, DAG);
unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
@@ -8911,6 +8939,7 @@
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
+ Cmp = ConvertCmpIfNecessary(Cmp, DAG);
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
@@ -8940,6 +8969,7 @@
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
+ Cmp = ConvertCmpIfNecessary(Cmp, DAG);
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
@@ -8973,6 +9003,7 @@
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Cond = EmitTest(Cond, X86::COND_NE, DAG);
}
+ Cond = ConvertCmpIfNecessary(Cond, DAG);
return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cond);
}
@@ -11111,6 +11142,7 @@
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
+ case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r";
case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG";
@@ -11179,6 +11211,7 @@
case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER";
case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL";
+ case X86ISD::SAHF: return "X86ISD::SAHF";
}
}
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=155704&r1=155703&r2=155704&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Fri Apr 27 07:07:43 2012
@@ -315,6 +315,12 @@
SFENCE,
LFENCE,
+ // FNSTSW16r - Store FP status word into i16 register.
+ FNSTSW16r,
+
+ // SAHF - Store contents of %ah into %eflags.
+ SAHF,
+
// ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
// ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
// Atomic 64-bit binary operations.
@@ -909,6 +915,9 @@
/// equivalent, for use with the given x86 condition code.
SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
SelectionDAG &DAG) const;
+
+ /// Convert a comparison if required by the subtarget.
+ SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
};
namespace X86 {
Modified: llvm/trunk/lib/Target/X86/X86InstrFPStack.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFPStack.td?rev=155704&r1=155703&r2=155704&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFPStack.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFPStack.td Fri Apr 27 07:07:43 2012
@@ -27,6 +27,7 @@
SDTCisVT<2, OtherVT>]>;
def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
+def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -41,6 +42,7 @@
def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
SDNPMemOperand]>;
+def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
@@ -203,6 +205,7 @@
}
}
+let Defs = [FPSW] in {
defm ADD : FPBinary_rr<fadd>;
defm SUB : FPBinary_rr<fsub>;
defm MUL : FPBinary_rr<fmul>;
@@ -213,6 +216,7 @@
defm MUL : FPBinary<fmul, MRM1m, "mul">;
defm DIV : FPBinary<fdiv, MRM6m, "div">;
defm DIVR: FPBinary<fdiv, MRM7m, "divr">;
+}
class FPST0rInst<bits<8> o, string asm>
: FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, D8;
@@ -257,6 +261,7 @@
def _F : FPI<opcode, RawFrm, (outs), (ins), asmstring>, D9;
}
+let Defs = [FPSW] in {
defm CHS : FPUnary<fneg, 0xE0, "fchs">;
defm ABS : FPUnary<fabs, 0xE1, "fabs">;
defm SQRT: FPUnary<fsqrt,0xFA, "fsqrt">;
@@ -269,6 +274,7 @@
def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
}
def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9;
+} // Defs = [FPSW]
// Versions of FP instructions that take a single memory operand. Added for the
// disassembler; remove as they are included with patterns elsewhere.
@@ -316,6 +322,7 @@
Requires<[HasCMov]>;
}
+let Defs = [FPSW] in {
let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
defm CMOVB : FPCMov<X86_COND_B>;
defm CMOVBE : FPCMov<X86_COND_BE>;
@@ -492,15 +499,16 @@
// Floating point compares.
-let Defs = [EFLAGS] in {
def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
- []>; // FPSW = cmp ST(0) with ST(i)
+ [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
- []>; // FPSW = cmp ST(0) with ST(i)
+ [(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>;
def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
- []>; // FPSW = cmp ST(0) with ST(i)
-
+ [(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>;
+} // Defs = [FPSW]
+
// CC = ST(0) cmp ST(i)
+let Defs = [EFLAGS, FPSW] in {
def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
@@ -509,7 +517,7 @@
[(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
}
-let Defs = [EFLAGS], Uses = [ST0] in {
+let Defs = [FPSW], Uses = [ST0] in {
def UCOM_Fr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i)
(outs), (ins RST:$reg),
"fucom\t$reg">, DD;
@@ -519,7 +527,9 @@
def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop
(outs), (ins),
"fucompp">, DA;
+}
+let Defs = [EFLAGS, FPSW], Uses = [ST0] in {
def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i)
(outs), (ins RST:$reg),
"fucomi\t$reg">, DB;
@@ -528,15 +538,18 @@
"fucompi\t$reg">, DF;
}
+let Defs = [EFLAGS, FPSW] in {
def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
"fcomi\t$reg">, DB;
def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
"fcompi\t$reg">, DF;
+}
// Floating point flag ops.
-let Defs = [AX] in
-def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags
- (outs), (ins), "fnstsw %ax", []>, DF;
+let Defs = [AX], Uses = [FPSW] in
+def FNSTSW16r : I<0xE0, RawFrm, // AX = fp flags
+ (outs), (ins), "fnstsw %ax",
+ [(set AX, (X86fp_stsw FPSW))]>, DF;
def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
(outs), (ins i16mem:$dst), "fnstcw\t$dst",
@@ -547,12 +560,14 @@
(outs), (ins i16mem:$dst), "fldcw\t$dst", []>;
// FPU control instructions
+let Defs = [FPSW] in
def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB;
def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
"ffree\t$reg">, DD;
// Clear exceptions
+let Defs = [FPSW] in
def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", []>, DB;
// Operandless floating-point instructions for the disassembler.
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=155704&r1=155703&r2=155704&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Fri Apr 27 07:07:43 2012
@@ -63,6 +63,8 @@
[SDTCisInt<0>,
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+def SDTX86sahf : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i8>]>;
+
def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
SDTCisVT<2, i8>]>;
def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -131,6 +133,8 @@
def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
+def X86sahf : SDNode<"X86ISD::SAHF", SDTX86sahf>;
+
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
@@ -1016,8 +1020,9 @@
// Condition code ops, incl. set if equal/not equal/...
-let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in
-def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>; // flags = AH
+let Defs = [EFLAGS], Uses = [AH] in
+def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf",
+ [(set EFLAGS, (X86sahf AH))]>;
let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags
@@ -1774,9 +1779,9 @@
def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>;
// We accept "fnstsw %eax" even though it only writes %ax.
-def : InstAlias<"fnstsw %eax", (FNSTSW8r)>;
-def : InstAlias<"fnstsw %al" , (FNSTSW8r)>;
-def : InstAlias<"fnstsw" , (FNSTSW8r)>;
+def : InstAlias<"fnstsw %eax", (FNSTSW16r)>;
+def : InstAlias<"fnstsw %al" , (FNSTSW16r)>;
+def : InstAlias<"fnstsw" , (FNSTSW16r)>;
// lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but
// this is compatible with what GAS does.
Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.td?rev=155704&r1=155703&r2=155704&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.td Fri Apr 27 07:07:43 2012
@@ -223,6 +223,9 @@
def ST6 : STRegister<"st(6)", [FP1]>, DwarfRegNum<[39, 18, 17]>;
def ST7 : STRegister<"st(7)", [FP0]>, DwarfRegNum<[40, 19, 18]>;
+ // Floating-point status word
+ def FPSW : Register<"fpsw">;
+
// Status flags register
def EFLAGS : Register<"flags">;
@@ -472,3 +475,7 @@
let CopyCost = -1; // Don't allow copying of status registers.
let isAllocatable = 0;
}
+def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+ let isAllocatable = 0;
+}
Copied: llvm/trunk/test/CodeGen/X86/fp-stack-compare-cmov.ll (from r155703, llvm/trunk/test/CodeGen/X86/fp-stack-compare.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-stack-compare-cmov.ll?p2=llvm/trunk/test/CodeGen/X86/fp-stack-compare-cmov.ll&p1=llvm/trunk/test/CodeGen/X86/fp-stack-compare.ll&r1=155703&r2=155704&rev=155704&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-stack-compare.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp-stack-compare-cmov.ll Fri Apr 27 07:07:43 2012
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=x86 -mcpu=i386 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=pentiumpro | FileCheck %s
; PR1012
define float @foo(float* %col.2.0) {
; CHECK: fucompi
+; CHECK: fcmov
%tmp = load float* %col.2.0
%tmp16 = fcmp olt float %tmp, 0.000000e+00
%tmp20 = fsub float -0.000000e+00, %tmp
Modified: llvm/trunk/test/CodeGen/X86/fp-stack-compare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-stack-compare.ll?rev=155704&r1=155703&r2=155704&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-stack-compare.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp-stack-compare.ll Fri Apr 27 07:07:43 2012
@@ -1,8 +1,11 @@
; RUN: llc < %s -march=x86 -mcpu=i386 | FileCheck %s
-; PR1012
+; PR6679
define float @foo(float* %col.2.0) {
-; CHECK: fucompi
+; CHECK: fucomp
+; CHECK-NOT: fucompi
+; CHECK: j
+; CHECK-NOT: fcmov
%tmp = load float* %col.2.0
%tmp16 = fcmp olt float %tmp, 0.000000e+00
%tmp20 = fsub float -0.000000e+00, %tmp
More information about the llvm-commits
mailing list