[llvm-commits] [llvm] r108258 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMISelLowering.h lib/Target/ARM/ARMInstrInfo.td test/CodeGen/ARM/fpcmp-opt.ll
Evan Cheng
evan.cheng at apple.com
Tue Jul 13 12:27:43 PDT 2010
Author: evancheng
Date: Tue Jul 13 14:27:42 2010
New Revision: 108258
URL: http://llvm.org/viewvc/llvm-project?rev=108258&view=rev
Log:
Extend the r107852 optimization which turns some fp compare to code sequence using only i32 operations. It now optimize some f64 compares when fp compare is exceptionally slow (e.g. cortex-a8). It also catches comparison against 0.0.
Modified:
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.h
llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
llvm/trunk/test/CodeGen/ARM/fpcmp-opt.ll
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=108258&r1=108257&r2=108258&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Tue Jul 13 14:27:42 2010
@@ -565,6 +565,7 @@
case ARMISD::CMPZ: return "ARMISD::CMPZ";
case ARMISD::CMPFP: return "ARMISD::CMPFP";
case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
+ case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
case ARMISD::CMOV: return "ARMISD::CMOV";
case ARMISD::CNEG: return "ARMISD::CNEG";
@@ -2216,7 +2217,7 @@
/// the given operands.
SDValue
ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- SDValue &ARMCC, SelectionDAG &DAG,
+ SDValue &ARMcc, SelectionDAG &DAG,
DebugLoc dl) const {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
unsigned C = RHSC->getZExtValue();
@@ -2268,48 +2269,14 @@
CompareType = ARMISD::CMPZ;
break;
}
- ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ ARMcc = DAG.getConstant(CondCode, MVT::i32);
return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
}
-static bool canBitcastToInt(SDNode *Op) {
- return Op->hasOneUse() &&
- ISD::isNormalLoad(Op) &&
- Op->getValueType(0) == MVT::f32;
-}
-
-static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) {
- if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
- return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
- Ld->getChain(), Ld->getBasePtr(),
- Ld->getSrcValue(), Ld->getSrcValueOffset(),
- Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->getAlignment());
-
- llvm_unreachable("Unknown VFP cmp argument!");
-}
-
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
SDValue
-ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
- SDValue &ARMCC, SelectionDAG &DAG,
+ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
DebugLoc dl) const {
- if (UnsafeFPMath && FiniteOnlyFPMath() &&
- (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
- CC == ISD::SETNE || CC == ISD::SETUNE) &&
- canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) {
- // If unsafe fp math optimization is enabled and there are no othter uses of
- // the CMP operands, and the condition code is EQ oe NE, we can optimize it
- // to an integer comparison.
- if (CC == ISD::SETOEQ)
- CC = ISD::SETEQ;
- else if (CC == ISD::SETUNE)
- CC = ISD::SETNE;
- LHS = bitcastToInt(LHS, DAG);
- RHS = bitcastToInt(RHS, DAG);
- return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
- }
-
SDValue Cmp;
if (!isFloatingPointZero(RHS))
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
@@ -2328,59 +2295,184 @@
DebugLoc dl = Op.getDebugLoc();
if (LHS.getValueType() == MVT::i32) {
- SDValue ARMCC;
+ SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
}
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
- SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
- ARMCC, CCR, Cmp);
+ ARMcc, CCR, Cmp);
if (CondCode2 != ARMCC::AL) {
- SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
+ SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
- SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl);
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
Result = DAG.getNode(ARMISD::CMOV, dl, VT,
- Result, TrueVal, ARMCC2, CCR, Cmp2);
+ Result, TrueVal, ARMcc2, CCR, Cmp2);
}
return Result;
}
+/// canChangeToInt - Given the fp compare operand, return true if it is suitable
+/// to morph to an integer compare sequence.
+static bool canChangeToInt(SDValue Op, bool &SeenZero,
+ const ARMSubtarget *Subtarget) {
+ SDNode *N = Op.getNode();
+ if (!N->hasOneUse())
+ // Otherwise it requires moving the value from fp to integer registers.
+ return false;
+ if (!N->getNumValues())
+ return false;
+ EVT VT = Op.getValueType();
+ if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
+ // f32 case is generally profitable. f64 case only makes sense when vcmpe +
+ // vmrs are very slow, e.g. cortex-a8.
+ return false;
+
+ if (isFloatingPointZero(Op)) {
+ SeenZero = true;
+ return true;
+ }
+ return ISD::isNormalLoad(N);
+}
+
+static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
+ if (isFloatingPointZero(Op))
+ return DAG.getConstant(0, MVT::i32);
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
+ return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ld->getBasePtr(),
+ Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
+ SDValue &RetVal1, SDValue &RetVal2) {
+ if (isFloatingPointZero(Op)) {
+ RetVal1 = DAG.getConstant(0, MVT::i32);
+ RetVal2 = DAG.getConstant(0, MVT::i32);
+ return;
+ }
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
+ SDValue Ptr = Ld->getBasePtr();
+ RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ptr,
+ Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+
+ EVT PtrType = Ptr.getValueType();
+ unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
+ PtrType, Ptr, DAG.getConstant(4, PtrType));
+ RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), NewPtr,
+ Ld->getSrcValue(), Ld->getSrcValueOffset() + 4,
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ NewAlign);
+ return;
+ }
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
+/// f32 and even f64 comparisons to integer ones.
+SDValue
+ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ bool SeenZero = false;
+ if (canChangeToInt(LHS, SeenZero, Subtarget) &&
+ canChangeToInt(RHS, SeenZero, Subtarget) &&
+ // If one of the operand is zero, it's safe to ignore the NaN case.
+ (FiniteOnlyFPMath() || SeenZero)) {
+ // If unsafe fp math optimization is enabled and there are no othter uses of
+ // the CMP operands, and the condition code is EQ oe NE, we can optimize it
+ // to an integer comparison.
+ if (CC == ISD::SETOEQ)
+ CC = ISD::SETEQ;
+ else if (CC == ISD::SETUNE)
+ CC = ISD::SETNE;
+
+ SDValue ARMcc;
+ if (LHS.getValueType() == MVT::f32) {
+ LHS = bitcastf32Toi32(LHS, DAG);
+ RHS = bitcastf32Toi32(RHS, DAG);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMcc, CCR, Cmp);
+ }
+
+ SDValue LHS1, LHS2;
+ SDValue RHS1, RHS2;
+ expandf64Toi32(LHS, DAG, LHS1, LHS2);
+ expandf64Toi32(RHS, DAG, RHS1, RHS2);
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
+ return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
+ }
+
+ return SDValue();
+}
+
SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
+ SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
- SDValue LHS = Op.getOperand(2);
- SDValue RHS = Op.getOperand(3);
- SDValue Dest = Op.getOperand(4);
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
DebugLoc dl = Op.getDebugLoc();
if (LHS.getValueType() == MVT::i32) {
- SDValue ARMCC;
+ SDValue ARMcc;
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
- Chain, Dest, ARMCC, CCR,Cmp);
+ Chain, Dest, ARMcc, CCR, Cmp);
}
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+
+ if (UnsafeFPMath &&
+ (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
+ CC == ISD::SETNE || CC == ISD::SETUNE)) {
+ SDValue Result = OptimizeVFPBrcond(Op, DAG);
+ if (Result.getNode())
+ return Result;
+ }
+
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
- SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
+ SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
+ SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
if (CondCode2 != ARMCC::AL) {
- ARMCC = DAG.getConstant(CondCode2, MVT::i32);
- SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
+ ARMcc = DAG.getConstant(CondCode2, MVT::i32);
+ SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
}
return Res;
@@ -2469,12 +2561,11 @@
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
- SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
+ SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
- SDValue Cmp = getVFPCmp(Tmp1, FP0,
- ISD::SETLT, ARMCC, DAG, dl);
+ SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
+ return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
}
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
@@ -2611,7 +2702,7 @@
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- SDValue ARMCC;
+ SDValue ARMcc;
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
@@ -2627,9 +2718,9 @@
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
- ARMCC, DAG, dl);
+ ARMcc, DAG, dl);
SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
- SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC,
+ SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
CCR, Cmp);
SDValue Ops[2] = { Lo, Hi };
@@ -2647,7 +2738,7 @@
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- SDValue ARMCC;
+ SDValue ARMcc;
assert(Op.getOpcode() == ISD::SHL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
@@ -2661,9 +2752,9 @@
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
- ARMCC, DAG, dl);
+ ARMcc, DAG, dl);
SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
- SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC,
+ SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
CCR, Cmp);
SDValue Ops[2] = { Lo, Hi };
@@ -3825,6 +3916,15 @@
return BB;
}
+static
+MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I != Succ)
+ return *I;
+ llvm_unreachable("Expecting a BB with two successors!");
+}
+
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
@@ -3941,6 +4041,46 @@
return BB;
}
+ case ARM::BCCi64:
+ case ARM::BCCZi64: {
+ // Compare both parts that make up the double comparison separately for
+ // equality.
+ bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
+
+ unsigned LHS1 = MI->getOperand(1).getReg();
+ unsigned LHS2 = MI->getOperand(2).getReg();
+ if (RHSisZero) {
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS1).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS2).addImm(0)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ } else {
+ unsigned RHS1 = MI->getOperand(3).getReg();
+ unsigned RHS2 = MI->getOperand(4).getReg();
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS1).addReg(RHS1));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS2).addReg(RHS2)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ }
+
+ MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
+ MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
+ if (MI->getOperand(0).getImm() == ARMCC::NE)
+ std::swap(destMBB, exitMBB);
+
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
+ .addMBB(exitMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+
case ARM::tANDsp:
case ARM::tADDspr_:
case ARM::tSUBspi_:
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=108258&r1=108257&r2=108258&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Tue Jul 13 14:27:42 2010
@@ -53,6 +53,8 @@
CMOV, // ARM conditional move instructions.
CNEG, // ARM conditional negate instructions.
+ BCC_i64,
+
RBIT, // ARM bitreverse instruction
FTOSI, // FP to sint within a FP register.
@@ -363,9 +365,11 @@
DebugLoc dl, SelectionDAG &DAG) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
- SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
- SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue getVFPCmp(SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, DebugLoc dl) const;
+
+ SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *BB,
Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=108258&r1=108257&r2=108258&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Tue Jul 13 14:27:42 2010
@@ -38,6 +38,12 @@
[SDTCisPtrTy<0>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
+ [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>, SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>, SDTCisVT<4, i32>,
+ SDTCisVT<5, OtherVT>]>;
+
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
@@ -90,6 +96,9 @@
def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
[SDNPHasChain]>;
+def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
+ [SDNPHasChain]>;
+
def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
[SDNPOutFlag]>;
@@ -2279,6 +2288,22 @@
def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
(CMNzri GPR:$src, so_imm_neg:$imm)>;
+// Pseudo i64 compares for some floating point compares.
+let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
+ Defs = [CPSR] in {
+def BCCi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+ IIC_Br,
+ "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc",
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
+
+def BCCZi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst),
+ IIC_Br,
+ "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, 0, 0, imm:$cc",
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
+} // usesCustomInserter
+
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
Modified: llvm/trunk/test/CodeGen/ARM/fpcmp-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fpcmp-opt.ll?rev=108258&r1=108257&r2=108258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fpcmp-opt.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/fpcmp-opt.ll Tue Jul 13 14:27:42 2010
@@ -1,16 +1,24 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
; rdar://7461510
define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
entry:
-; CHECK: t1:
-; CHECK-NOT: vldr
-; CHECK: ldr
-; CHECK: ldr
-; CHECK: cmp r0, r1
-; CHECK-NOT: vcmpe.f32
-; CHECK-NOT: vmrs
-; CHECK: beq
+; FINITE: t1:
+; FINITE-NOT: vldr
+; FINITE: ldr
+; FINITE: ldr
+; FINITE: cmp r0, r1
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: beq
+
+; NAN: t1:
+; NAN: vldr.32 s0,
+; NAN: vldr.32 s1,
+; NAN: vcmpe.f32 s1, s0
+; NAN: vmrs apsr_nzcv, fpscr
+; NAN: beq
%0 = load float* %a
%1 = load float* %b
%2 = fcmp une float %0, %1
@@ -25,5 +33,50 @@
ret i32 %4
}
+define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
+entry:
+; FINITE: t2:
+; FINITE-NOT: vldr
+; FINITE: ldrd r0, [r0]
+; FINITE: cmp r0, #0
+; FINITE: cmpeq r1, #0
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: bne
+ %0 = load double* %a
+ %1 = fcmp oeq double %0, 0.000000e+00
+ br i1 %1, label %bb1, label %bb2
+
+bb1:
+ %2 = call i32 @bar()
+ ret i32 %2
+
+bb2:
+ %3 = call i32 @foo()
+ ret i32 %3
+}
+
+define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
+entry:
+; FINITE: t3:
+; FINITE-NOT: vldr
+; FINITE: ldr r0, [r0]
+; FINITE: cmp r0, #0
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: bne
+ %0 = load float* %a
+ %1 = fcmp oeq float %0, 0.000000e+00
+ br i1 %1, label %bb1, label %bb2
+
+bb1:
+ %2 = call i32 @bar()
+ ret i32 %2
+
+bb2:
+ %3 = call i32 @foo()
+ ret i32 %3
+}
+
declare i32 @bar()
declare i32 @foo()
More information about the llvm-commits
mailing list