[llvm] r237247 - [AArch64] Codegen VMAX/VMIN for safe math cases

Wed May 13 07:14:06 PDT 2015

Hi Artyom,

Unfortunately I had to revert this as it was causing some failures in
spec2000/2006.

My example would be 453.povray, where we get failures in
Instruction selection:

clang++ -c -o camera.o -DSPEC_CPU -DNDEBUG    -O3 -ffast-math
-mcpu=cortex-a57   -DSPEC_CPU_LP64       camera.cpp
fatal error: error in backend: Cannot select: 0x37689d0: i32 =
AArch64ISD::FMAX 0x372b6f0, 0x376f780 [ORD=20] [ID=43]
0x372b6f0: i32 = sub 0x372ab10, 0x372d7d0 [ORD=17] [ID=35]
0x372ab10: i32 = Constant<0> [ID=16]
0x372d7d0: i32,ch = CopyFromReg 0x369f890, 0x372b100 [ORD=17] [ID=27]
0x372b100: i32 = Register %vreg207 [ID=17]
0x376f780: i32 = xor 0x372dc90, 0x3729530 [ORD=18] [ID=36]
0x372dc90: i32,ch = CopyFromReg 0x369f890, 0x372b5c0 [ORD=18] [ID=28]
0x372b5c0: i32 = Register %vreg206 [ID=19]
0x3729530: i32 = Constant<-1> [ID=18]

Or:
/tmp/clang//bin/clang++ -c -o colour.o -DSPEC_CPU -DNDEBUG    -O3
-ffast-math -mcpu=cortex-a57   -DSPEC_CPU_LP64       colour.cpp
fatal error: error in backend: Cannot select: 0x367ef10: i64 =
AArch64ISD::FMAX 0x36307b0, 0x36317c0 [ORD=8] [ID=45]
0x36307b0: i64,ch = CopyFromReg 0x35be1b0, 0x367ab70 [ORD=7] [ID=30]
0x367ab70: i64 = Register %vreg2 [ID=8]
0x36317c0: i64 = xor 0x365f420, 0x365e870 [ORD=6] [ID=38]
0x365f420: i64,ch = CopyFromReg 0x35be1b0, 0x3631130 [ORD=3] [ID=28]
0x3631130: i64 = Register %vreg6 [ID=3]
0x365e870: i64 = Constant<-1> [ID=7]
In function: _ZN3povL14sort_and_splitEPPNS_16BBox_Tree_StructERS2_Plll

I'll work on getting a more reduced test case..

Thanks,

Silviu

> -----Original Message-----
> From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-
> bounces at cs.uiuc.edu] On Behalf Of Artyom Skrobov
> Sent: 13 May 2015 13:01
> To: llvm-commits at cs.uiuc.edu
> Subject: [llvm] r237247 - [AArch64] Codegen VMAX/VMIN for safe math
> cases
> 
> Author: askrobov
> Date: Wed May 13 07:01:09 2015
> New Revision: 237247
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=237247&view=rev
> Log:
> [AArch64] Codegen VMAX/VMIN for safe math cases
> 
> Modified:
>     llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
>     llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
>     llvm/trunk/test/CodeGen/AArch64/arm64-fmax.ll
> 
> Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=23724
> 7&r1=237246&r2=237247&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Wed May 13
> +++ 07:01:09 2015
> @@ -491,6 +491,7 @@ AArch64TargetLowering::AArch64TargetLowe
> 
>    setTargetDAGCombine(ISD::SELECT);
>    setTargetDAGCombine(ISD::VSELECT);
> +  setTargetDAGCombine(ISD::SELECT_CC);
> 
>    setTargetDAGCombine(ISD::INTRINSIC_VOID);
>    setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
> @@ -3701,46 +3702,6 @@ SDValue AArch64TargetLowering::LowerSELE
>    assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() ==
> MVT::f64);
>    assert(LHS.getValueType() == RHS.getValueType());
>    EVT VT = TVal.getValueType();
> -
> -  // Try to match this select into a max/min operation, which have
dedicated
> -  // opcode in the instruction set.
> -  // FIXME: This is not correct in the presence of NaNs, so we only
enable this
> -  // in no-NaNs mode.
> -  if (getTargetMachine().Options.NoNaNsFPMath) {
> -    SDValue MinMaxLHS = TVal, MinMaxRHS = FVal;
> -    if (selectCCOpsAreFMaxCompatible(LHS, MinMaxRHS) &&
> -        selectCCOpsAreFMaxCompatible(RHS, MinMaxLHS)) {
> -      CC = ISD::getSetCCSwappedOperands(CC);
> -      std::swap(MinMaxLHS, MinMaxRHS);
> -    }
> -
> -    if (selectCCOpsAreFMaxCompatible(LHS, MinMaxLHS) &&
> -        selectCCOpsAreFMaxCompatible(RHS, MinMaxRHS)) {
> -      switch (CC) {
> -      default:
> -        break;
> -      case ISD::SETGT:
> -      case ISD::SETGE:
> -      case ISD::SETUGT:
> -      case ISD::SETUGE:
> -      case ISD::SETOGT:
> -      case ISD::SETOGE:
> -        return DAG.getNode(AArch64ISD::FMAX, dl, VT, MinMaxLHS,
> MinMaxRHS);
> -        break;
> -      case ISD::SETLT:
> -      case ISD::SETLE:
> -      case ISD::SETULT:
> -      case ISD::SETULE:
> -      case ISD::SETOLT:
> -      case ISD::SETOLE:
> -        return DAG.getNode(AArch64ISD::FMIN, dl, VT, MinMaxLHS,
> MinMaxRHS);
> -        break;
> -      }
> -    }
> -  }
> -
> -  // If that fails, we'll need to perform an FCMP + CSEL sequence.  Go
ahead
> -  // and do the comparison.
>    SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
> 
>    // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
> totally @@ -8735,6 +8696,75 @@ static SDValue
> performSelectCombine(SDNo
>    return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N-
> >getOperand(2));  }
> 
> +/// performSelectCCCombine - Target-specific DAG combining for
> +ISD::SELECT_CC /// to match FMIN/FMAX patterns.
> +static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG)
> {
> +  // Try to use FMIN/FMAX instructions for FP selects like "x < y ? x :
y".
> +  // Unless the NoNaNsFPMath option is set, be careful about NaNs:
> +  // vmax/vmin return NaN if either operand is a NaN;
> +  // only do the transformation when it matches that behavior.
> +
> +  SDValue CondLHS = N->getOperand(0);
> +  SDValue CondRHS = N->getOperand(1);
> +  SDValue LHS = N->getOperand(2);
> +  SDValue RHS = N->getOperand(3);
> +  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
> +
> +  unsigned Opcode;
> +  bool IsReversed;
> +  if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) &&
> +      selectCCOpsAreFMaxCompatible(CondRHS, RHS)) {
> +    IsReversed = false; // x CC y ? x : y  } else if
> + (selectCCOpsAreFMaxCompatible(CondRHS, LHS) &&
> +             selectCCOpsAreFMaxCompatible(CondLHS, RHS)) {
> +    IsReversed = true ; // x CC y ? y : x  } else {
> +    return SDValue();
> +  }
> +
> +  bool IsUnordered = false, IsOrEqual;
> +  switch (CC) {
> +  default:
> +    return SDValue();
> +  case ISD::SETULT:
> +  case ISD::SETULE:
> +    IsUnordered = true;
> +  case ISD::SETOLT:
> +  case ISD::SETOLE:
> +  case ISD::SETLT:
> +  case ISD::SETLE:
> +    IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC ==
ISD::SETULE);
> +    Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN;
> +    break;
> +
> +  case ISD::SETUGT:
> +  case ISD::SETUGE:
> +    IsUnordered = true;
> +  case ISD::SETOGT:
> +  case ISD::SETOGE:
> +  case ISD::SETGT:
> +  case ISD::SETGE:
> +    IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC ==
> ISD::SETUGE);
> +    Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX;
> +    break;
> +  }
> +
> +  // If LHS is NaN, an ordered comparison will be false and the result
> + will be  // the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid
> + this by checking  // that LHS != NaN. Likewise, for unordered
comparisons,
> check for RHS != NaN.
> +  if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
> +    return SDValue();
> +
> +  // For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both
> + be true,  // but FMIN will return -0, and FMAX will return +0. So
> + FMIN/FMAX can only be  // used for unsafe math or if one of the operands
> is known to be nonzero.
> +  if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath &&
> +      !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
> +    return SDValue();
> +
> +  return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS); }
> +
>  SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
>                                                   DAGCombinerInfo &DCI)
const {
>    SelectionDAG &DAG = DCI.DAG;
> @@ -8767,6 +8797,8 @@ SDValue AArch64TargetLowering::PerformDA
>      return performSelectCombine(N, DCI);
>    case ISD::VSELECT:
>      return performVSelectCombine(N, DCI.DAG);
> +  case ISD::SELECT_CC:
> +    return performSelectCCCombine(N, DCI.DAG);
>    case ISD::STORE:
>      return performSTORECombine(N, DCI, DAG, Subtarget);
>    case AArch64ISD::BRCOND:
> 
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=237247&r1=2
> 37246&r2=237247&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Wed May 13 07:01:09
> +++ 2015
> @@ -3521,9 +3521,6 @@ SDValue ARMTargetLowering::LowerSELECT_C
>      //   c = fcmp [?gt, ?ge, ?lt, ?le] a, b
>      //   select c, a, b
>      // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to
'gt'.
> -    // FIXME: There is similar code that allows some extensions in
> -    // AArch64TargetLowering::LowerSELECT_CC that should be shared with
> this
> -    // code.
>      bool swapSides = false;
>      if (!getTargetMachine().Options.NoNaNsFPMath) {
>        // transformability may depend on which way around we compare
> 
> Modified: llvm/trunk/test/CodeGen/AArch64/arm64-fmax.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/CodeGen/AArch64/arm64-
> fmax.ll?rev=237247&r1=237246&r2=237247&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/test/CodeGen/AArch64/arm64-fmax.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/arm64-fmax.ll Wed May 13 07:01:09
> +++ 2015
> @@ -1,29 +1,49 @@
>  ; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s
> +; RUN: llc -march=arm64 < %s | FileCheck %s --check-prefix=CHECK-SAFE
> 
>  define double @test_direct(float %in) #1 {  ; CHECK-LABEL: test_direct:
> +; CHECK-SAFE-LABEL: test_direct:
>    %cmp = fcmp olt float %in, 0.000000e+00
>    %longer = fpext float %in to double
>    %val = select i1 %cmp, double 0.000000e+00, double %longer
>    ret double %val
> 
>  ; CHECK: fmax
> +; CHECK-SAFE: fmax
>  }
> 
>  define double @test_cross(float %in) #1 {  ; CHECK-LABEL: test_cross:
> +; CHECK-SAFE-LABEL: test_cross:
> +  %cmp = fcmp ult float %in, 0.000000e+00
> +  %longer = fpext float %in to double
> +  %val = select i1 %cmp, double %longer, double 0.000000e+00
> +  ret double %val
> +
> +; CHECK: fmin
> +; CHECK-SAFE: fmin
> +}
> +
> +; Same as previous, but with ordered comparison; ; can't be converted
> +in safe-math mode.
> +define double @test_cross_fail_nan(float %in) #1 { ; CHECK-LABEL:
> +test_cross_fail_nan:
> +; CHECK-SAFE-LABEL: test_cross_fail_nan:
>    %cmp = fcmp olt float %in, 0.000000e+00
>    %longer = fpext float %in to double
>    %val = select i1 %cmp, double %longer, double 0.000000e+00
>    ret double %val
> 
>  ; CHECK: fmin
> +; CHECK-SAFE: fcsel d0, d1, d0, mi
>  }
> 
>  ; This isn't a min or a max, but passes the first condition for swapping
the  ;
> results. Make sure they're put back before we resort to the normal fcsel.
>  define float @test_cross_fail(float %lhs, float %rhs) {  ; CHECK-LABEL:
> test_cross_fail:
> +; CHECK-SAFE-LABEL: test_cross_fail:
>    %tst = fcmp une float %lhs, %rhs
>    %res = select i1 %tst, float %rhs, float %lhs
>    ret float %res
> @@ -31,4 +51,5 @@ define float @test_cross_fail(float %lhs
>    ; The register allocator would have to decide to be deliberately obtuse
> before
>    ; other register were used.
>  ; CHECK: fcsel s0, s1, s0, ne
> -}
> \ No newline at end of file
> +; CHECK-SAFE: fcsel s0, s1, s0, ne
> +}
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits