[llvm-commits] [llvm] r107852 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMISelLowering.h test/CodeGen/ARM/fpcmp-opt.ll

Wed Jul 7 23:03:57 PDT 2010

Also what about NaN's compared to anything being required to return  
unordered (that is the way I recall the IEEE standard)?

Kev

On Jul 7, 2010, at 7:58 PM, Bob Wilson wrote:

> This doesn't account for +0 == -0, does it?
>
> I think this is only safe when the UnsafeFPMath option is enabled.
>
> On Jul 7, 2010, at 7:08 PM, Evan Cheng wrote:
>
>> Author: evancheng
>> Date: Wed Jul  7 21:08:50 2010
>> New Revision: 107852
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=107852&view=rev
>> Log:
>> Optimize some vfp comparisons to integer ones. This patch  
>> implements the simplest case when the following conditions are met:
>> 1. The arguments are f32.
>> 2. The arguments are loads and they have no uses other than the  
>> comparison.
>> 3. The comparison code is EQ or NE.
>>
>> e.g.
>>       vldr.32 s0, [r1]
>>       vldr.32 s1, [r0]
>>       vcmpe.f32       s1, s0
>>       vmrs    apsr_nzcv, fpscr
>> 	beq     LBB0_2
>> =>
>>       ldr     r1, [r1]
>>       ldr     r0, [r0]
>>       cmp     r0, r1
>>       beq     LBB0_2
>>
>> More complicated cases will be implemented in subsequent patches.
>>
>> Added:
>>   llvm/trunk/test/CodeGen/ARM/fpcmp-opt.ll
>> Modified:
>>   llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
>>   llvm/trunk/lib/Target/ARM/ARMISelLowering.h
>>
>> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=107852&r1=107851&r2=107852&view=diff
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> =====================================================================
>> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Wed Jul  7  
>> 21:08:50 2010
>> @@ -59,7 +59,7 @@
>>
>> static cl::opt<bool>
>> EnableARMLongCalls("arm-long-calls", cl::Hidden,
>> -  cl::desc("Generate calls via indirect call instructions."),
>> +  cl::desc("Generate calls via indirect call instructions"),
>>  cl::init(false));
>>
>> static cl::opt<bool>
>> @@ -69,7 +69,7 @@
>>
>> static cl::opt<bool>
>> EnableARMCodePlacement("arm-code-placement", cl::Hidden,
>> -  cl::desc("Enable code placement pass for ARM."),
>> +  cl::desc("Enable code placement pass for ARM"),
>>  cl::init(false));
>>
>> static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT  
>> &LocVT,
>> @@ -2273,9 +2273,42 @@
>>  return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
>> }
>>
>> +static bool canBitcastToInt(SDNode *Op) {
>> +  return Op->hasOneUse() &&
>> +    ISD::isNormalLoad(Op) &&
>> +    Op->getValueType(0) == MVT::f32;
>> +}
>> +
>> +static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) {
>> +  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
>> +    return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
>> +                       Ld->getChain(), Ld->getBasePtr(),
>> +                       Ld->getSrcValue(), Ld->getSrcValueOffset(),
>> +                       Ld->isVolatile(), Ld->isNonTemporal(),
>> +                       Ld->getAlignment());
>> +
>> +  llvm_unreachable("Unknown VFP cmp argument!");
>> +}
>> +
>> /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given  
>> operands.
>> -static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG  
>> &DAG,
>> -                         DebugLoc dl) {
>> +SDValue
>> +ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS,  
>> ISD::CondCode CC,
>> +                             SDValue &ARMCC, SelectionDAG &DAG,
>> +                             DebugLoc dl) const {
>> +  if ((CC == ISD::SETEQ || CC == ISD::SETOEQ ||
>> +       CC == ISD::SETNE || CC == ISD::SETUNE) &&
>> +      canBitcastToInt(LHS.getNode()) &&  
>> canBitcastToInt(RHS.getNode())) {
>> +    // If there are no othter uses of the CMP operands, and the  
>> condition
>> +    // code is EQ oe NE, we can optimize it to an integer  
>> comparison.
>> +    if (CC == ISD::SETOEQ)
>> +      CC = ISD::SETEQ;
>> +    else if (CC == ISD::SETUNE)
>> +      CC = ISD::SETNE;
>> +    LHS = bitcastToInt(LHS, DAG);
>> +    RHS = bitcastToInt(RHS, DAG);
>> +    return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
>> +  }
>> +
>>  SDValue Cmp;
>>  if (!isFloatingPointZero(RHS))
>>    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
>> @@ -2305,13 +2338,13 @@
>>
>>  SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
>>  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
>> -  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
>> +  SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
>>  SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal,  
>> TrueVal,
>> -                                 ARMCC, CCR, Cmp);
>> +                               ARMCC, CCR, Cmp);
>>  if (CondCode2 != ARMCC::AL) {
>>    SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
>>    // FIXME: Needs another CMP because flag can have but one use.
>> -    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
>> +    SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl);
>>    Result = DAG.getNode(ARMISD::CMOV, dl, VT,
>>                         Result, TrueVal, ARMCC2, CCR, Cmp2);
>>  }
>> @@ -2338,8 +2371,8 @@
>>  ARMCC::CondCodes CondCode, CondCode2;
>>  FPCCToARMCC(CC, CondCode, CondCode2);
>>
>> -  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
>>  SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
>> +  SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
>>  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
>>  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
>>  SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
>> @@ -2427,7 +2460,7 @@
>>  return DAG.getNode(Opc, dl, VT, Op);
>> }
>>
>> -static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
>> +SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG  
>> &DAG) const {
>>  // Implement fcopysign with a fabs and a conditional fneg.
>>  SDValue Tmp0 = Op.getOperand(0);
>>  SDValue Tmp1 = Op.getOperand(1);
>> @@ -2435,8 +2468,10 @@
>>  EVT VT = Op.getValueType();
>>  EVT SrcVT = Tmp1.getValueType();
>>  SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
>> -  SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT),  
>> DAG, dl);
>>  SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
>> +  SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
>> +  SDValue Cmp = getVFPCmp(Tmp1, FP0,
>> +                          ISD::SETLT, ARMCC, DAG, dl);
>>  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
>>  return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC,  
>> CCR, Cmp);
>> }
>>
>> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=107852&r1=107851&r2=107852&view=diff
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> =====================================================================
>> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
>> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Wed Jul  7 21:08:50  
>> 2010
>> @@ -313,6 +313,7 @@
>>    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
>>    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
>>    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
>> +    SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
>>    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
>>    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
>>    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG)  
>> const;
>> @@ -363,6 +364,8 @@
>>
>>    SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
>>                      SDValue &ARMCC, SelectionDAG &DAG, DebugLoc  
>> dl) const;
>> +    SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
>> +                      SDValue &ARMCC, SelectionDAG &DAG, DebugLoc  
>> dl) const;
>>
>>    MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
>>                                         MachineBasicBlock *BB,
>>
>> Added: llvm/trunk/test/CodeGen/ARM/fpcmp-opt.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fpcmp-opt.ll?rev=107852&view=auto
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> =====================================================================
>> --- llvm/trunk/test/CodeGen/ARM/fpcmp-opt.ll (added)
>> +++ llvm/trunk/test/CodeGen/ARM/fpcmp-opt.ll Wed Jul  7 21:08:50 2010
>> @@ -0,0 +1,29 @@
>> +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
>> +; rdar://7461510
>> +
>> +define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
>> +entry:
>> +; CHECK: t1:
>> +; CHECK-NOT: vldr
>> +; CHECK: ldr
>> +; CHECK: ldr
>> +; CHECK: cmp r0, r1
>> +; CHECK-NOT: vcmpe.f32
>> +; CHECK-NOT: vmrs
>> +; CHECK: beq
>> +  %0 = load float* %a
>> +  %1 = load float* %b
>> +  %2 = fcmp une float %0, %1
>> +  br i1 %2, label %bb1, label %bb2
>> +
>> +bb1:
>> +  %3 = call i32 @bar()
>> +  ret i32 %3
>> +
>> +bb2:
>> +  %4 = call i32 @foo()
>> +  ret i32 %4
>> +}
>> +
>> +declare i32 @bar()
>> +declare i32 @foo()
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits