[llvm-commits] [llvm] r107852 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMISelLowering.h test/CodeGen/ARM/fpcmp-opt.ll
Evan Cheng
evan.cheng at apple.com
Wed Jul 7 23:21:25 PDT 2010
On Jul 7, 2010, at 11:03 PM, Kevin Enderby wrote:
> Also what about NaN's compared to anything being required to return unordered (that is the way I recall the IEEE standard)?
I did check for those. :-)
Evan
>
> Kev
>
> On Jul 7, 2010, at 7:58 PM, Bob Wilson wrote:
>
>> This doesn't account for +0 == -0, does it?
>>
>> I think this is only safe when the UnsafeFPMath option is enabled.
>>
>> On Jul 7, 2010, at 7:08 PM, Evan Cheng wrote:
>>
>>> Author: evancheng
>>> Date: Wed Jul 7 21:08:50 2010
>>> New Revision: 107852
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=107852&view=rev
>>> Log:
>>> Optimize some vfp comparisons to integer ones. This patch implements the simplest case when the following conditions are met:
>>> 1. The arguments are f32.
>>> 2. The arguments are loads and they have no uses other than the comparison.
>>> 3. The comparison code is EQ or NE.
>>>
>>> e.g.
>>> vldr.32 s0, [r1]
>>> vldr.32 s1, [r0]
>>> vcmpe.f32 s1, s0
>>> vmrs apsr_nzcv, fpscr
>>> beq LBB0_2
>>> =>
>>> ldr r1, [r1]
>>> ldr r0, [r0]
>>> cmp r0, r1
>>> beq LBB0_2
>>>
>>> More complicated cases will be implemented in subsequent patches.
>>>
>>> Added:
>>> llvm/trunk/test/CodeGen/ARM/fpcmp-opt.ll
>>> Modified:
>>> llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
>>> llvm/trunk/lib/Target/ARM/ARMISelLowering.h
>>>
>>> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=107852&r1=107851&r2=107852&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
>>> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Wed Jul 7 21:08:50 2010
>>> @@ -59,7 +59,7 @@
>>>
>>> static cl::opt<bool>
>>> EnableARMLongCalls("arm-long-calls", cl::Hidden,
>>> - cl::desc("Generate calls via indirect call instructions."),
>>> + cl::desc("Generate calls via indirect call instructions"),
>>> cl::init(false));
>>>
>>> static cl::opt<bool>
>>> @@ -69,7 +69,7 @@
>>>
>>> static cl::opt<bool>
>>> EnableARMCodePlacement("arm-code-placement", cl::Hidden,
>>> - cl::desc("Enable code placement pass for ARM."),
>>> + cl::desc("Enable code placement pass for ARM"),
>>> cl::init(false));
>>>
>>> static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
>>> @@ -2273,9 +2273,42 @@
>>> return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
>>> }
>>>
>>> +static bool canBitcastToInt(SDNode *Op) {
>>> + return Op->hasOneUse() &&
>>> + ISD::isNormalLoad(Op) &&
>>> + Op->getValueType(0) == MVT::f32;
>>> +}
>>> +
>>> +static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) {
>>> + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
>>> + return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
>>> + Ld->getChain(), Ld->getBasePtr(),
>>> + Ld->getSrcValue(), Ld->getSrcValueOffset(),
>>> + Ld->isVolatile(), Ld->isNonTemporal(),
>>> + Ld->getAlignment());
>>> +
>>> + llvm_unreachable("Unknown VFP cmp argument!");
>>> +}
>>> +
>>> /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
>>> -static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
>>> - DebugLoc dl) {
>>> +SDValue
>>> +ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
>>> + SDValue &ARMCC, SelectionDAG &DAG,
>>> + DebugLoc dl) const {
>>> + if ((CC == ISD::SETEQ || CC == ISD::SETOEQ ||
>>> + CC == ISD::SETNE || CC == ISD::SETUNE) &&
>>> + canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) {
>>> + // If there are no othter uses of the CMP operands, and the condition
>>> + // code is EQ oe NE, we can optimize it to an integer comparison.
>>> + if (CC == ISD::SETOEQ)
>>> + CC = ISD::SETEQ;
>>> + else if (CC == ISD::SETUNE)
>>> + CC = ISD::SETNE;
>>> + LHS = bitcastToInt(LHS, DAG);
>>> + RHS = bitcastToInt(RHS, DAG);
>>> + return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
>>> + }
>>> +
>>> SDValue Cmp;
>>> if (!isFloatingPointZero(RHS))
>>> Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
>>> @@ -2305,13 +2338,13 @@
>>>
>>> SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
>>> SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
>>> - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
>>> + SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
>>> SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
>>> - ARMCC, CCR, Cmp);
>>> + ARMCC, CCR, Cmp);
>>> if (CondCode2 != ARMCC::AL) {
>>> SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
>>> // FIXME: Needs another CMP because flag can have but one use.
>>> - SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
>>> + SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl);
>>> Result = DAG.getNode(ARMISD::CMOV, dl, VT,
>>> Result, TrueVal, ARMCC2, CCR, Cmp2);
>>> }
>>> @@ -2338,8 +2371,8 @@
>>> ARMCC::CondCodes CondCode, CondCode2;
>>> FPCCToARMCC(CC, CondCode, CondCode2);
>>>
>>> - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
>>> SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
>>> + SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
>>> SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
>>> SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
>>> SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
>>> @@ -2427,7 +2460,7 @@
>>> return DAG.getNode(Opc, dl, VT, Op);
>>> }
>>>
>>> -static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
>>> +SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
>>> // Implement fcopysign with a fabs and a conditional fneg.
>>> SDValue Tmp0 = Op.getOperand(0);
>>> SDValue Tmp1 = Op.getOperand(1);
>>> @@ -2435,8 +2468,10 @@
>>> EVT VT = Op.getValueType();
>>> EVT SrcVT = Tmp1.getValueType();
>>> SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
>>> - SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl);
>>> SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
>>> + SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
>>> + SDValue Cmp = getVFPCmp(Tmp1, FP0,
>>> + ISD::SETLT, ARMCC, DAG, dl);
>>> SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
>>> return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
>>> }
>>>
>>> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=107852&r1=107851&r2=107852&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
>>> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Wed Jul 7 21:08:50 2010
>>> @@ -313,6 +313,7 @@
>>> SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
>>> SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
>>> SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
>>> + SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
>>> SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
>>> SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
>>> SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
>>> @@ -363,6 +364,8 @@
>>>
>>> SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
>>> SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
>>> + SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
>>> + SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
>>>
>>> MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
>>> MachineBasicBlock *BB,
>>>
>>> Added: llvm/trunk/test/CodeGen/ARM/fpcmp-opt.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fpcmp-opt.ll?rev=107852&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/ARM/fpcmp-opt.ll (added)
>>> +++ llvm/trunk/test/CodeGen/ARM/fpcmp-opt.ll Wed Jul 7 21:08:50 2010
>>> @@ -0,0 +1,29 @@
>>> +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
>>> +; rdar://7461510
>>> +
>>> +define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
>>> +entry:
>>> +; CHECK: t1:
>>> +; CHECK-NOT: vldr
>>> +; CHECK: ldr
>>> +; CHECK: ldr
>>> +; CHECK: cmp r0, r1
>>> +; CHECK-NOT: vcmpe.f32
>>> +; CHECK-NOT: vmrs
>>> +; CHECK: beq
>>> + %0 = load float* %a
>>> + %1 = load float* %b
>>> + %2 = fcmp une float %0, %1
>>> + br i1 %2, label %bb1, label %bb2
>>> +
>>> +bb1:
>>> + %3 = call i32 @bar()
>>> + ret i32 %3
>>> +
>>> +bb2:
>>> + %4 = call i32 @foo()
>>> + ret i32 %4
>>> +}
>>> +
>>> +declare i32 @bar()
>>> +declare i32 @foo()
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
More information about the llvm-commits
mailing list