[llvm] r237247 - [AArch64] Codegen VMAX/VMIN for safe math cases
Silviu Baranga
Silviu.Baranga at arm.com
Wed May 13 07:14:06 PDT 2015
Hi Artyom,
Unfortunately I had to revert this as it was causing some failures in
spec2000/2006.
My example would be 453.povray, where we get failures in
Instruction selection:
clang++ -c -o camera.o -DSPEC_CPU -DNDEBUG -O3 -ffast-math
-mcpu=cortex-a57 -DSPEC_CPU_LP64 camera.cpp
fatal error: error in backend: Cannot select: 0x37689d0: i32 =
AArch64ISD::FMAX 0x372b6f0, 0x376f780 [ORD=20] [ID=43]
0x372b6f0: i32 = sub 0x372ab10, 0x372d7d0 [ORD=17] [ID=35]
0x372ab10: i32 = Constant<0> [ID=16]
0x372d7d0: i32,ch = CopyFromReg 0x369f890, 0x372b100 [ORD=17] [ID=27]
0x372b100: i32 = Register %vreg207 [ID=17]
0x376f780: i32 = xor 0x372dc90, 0x3729530 [ORD=18] [ID=36]
0x372dc90: i32,ch = CopyFromReg 0x369f890, 0x372b5c0 [ORD=18] [ID=28]
0x372b5c0: i32 = Register %vreg206 [ID=19]
0x3729530: i32 = Constant<-1> [ID=18]
Or:
/tmp/clang//bin/clang++ -c -o colour.o -DSPEC_CPU -DNDEBUG -O3
-ffast-math -mcpu=cortex-a57 -DSPEC_CPU_LP64 colour.cpp
fatal error: error in backend: Cannot select: 0x367ef10: i64 =
AArch64ISD::FMAX 0x36307b0, 0x36317c0 [ORD=8] [ID=45]
0x36307b0: i64,ch = CopyFromReg 0x35be1b0, 0x367ab70 [ORD=7] [ID=30]
0x367ab70: i64 = Register %vreg2 [ID=8]
0x36317c0: i64 = xor 0x365f420, 0x365e870 [ORD=6] [ID=38]
0x365f420: i64,ch = CopyFromReg 0x35be1b0, 0x3631130 [ORD=3] [ID=28]
0x3631130: i64 = Register %vreg6 [ID=3]
0x365e870: i64 = Constant<-1> [ID=7]
In function: _ZN3povL14sort_and_splitEPPNS_16BBox_Tree_StructERS2_Plll
I'll work on getting a more reduced test case..
Thanks,
Silviu
> -----Original Message-----
> From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-
> bounces at cs.uiuc.edu] On Behalf Of Artyom Skrobov
> Sent: 13 May 2015 13:01
> To: llvm-commits at cs.uiuc.edu
> Subject: [llvm] r237247 - [AArch64] Codegen VMAX/VMIN for safe math
> cases
>
> Author: askrobov
> Date: Wed May 13 07:01:09 2015
> New Revision: 237247
>
> URL: http://llvm.org/viewvc/llvm-project?rev=237247&view=rev
> Log:
> [AArch64] Codegen VMAX/VMIN for safe math cases
>
> Modified:
> llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
> llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> llvm/trunk/test/CodeGen/AArch64/arm64-fmax.ll
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=23724
> 7&r1=237246&r2=237247&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Wed May 13
> +++ 07:01:09 2015
> @@ -491,6 +491,7 @@ AArch64TargetLowering::AArch64TargetLowe
>
> setTargetDAGCombine(ISD::SELECT);
> setTargetDAGCombine(ISD::VSELECT);
> + setTargetDAGCombine(ISD::SELECT_CC);
>
> setTargetDAGCombine(ISD::INTRINSIC_VOID);
> setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
> @@ -3701,46 +3702,6 @@ SDValue AArch64TargetLowering::LowerSELE
> assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() ==
> MVT::f64);
> assert(LHS.getValueType() == RHS.getValueType());
> EVT VT = TVal.getValueType();
> -
> - // Try to match this select into a max/min operation, which have
dedicated
> - // opcode in the instruction set.
> - // FIXME: This is not correct in the presence of NaNs, so we only
enable this
> - // in no-NaNs mode.
> - if (getTargetMachine().Options.NoNaNsFPMath) {
> - SDValue MinMaxLHS = TVal, MinMaxRHS = FVal;
> - if (selectCCOpsAreFMaxCompatible(LHS, MinMaxRHS) &&
> - selectCCOpsAreFMaxCompatible(RHS, MinMaxLHS)) {
> - CC = ISD::getSetCCSwappedOperands(CC);
> - std::swap(MinMaxLHS, MinMaxRHS);
> - }
> -
> - if (selectCCOpsAreFMaxCompatible(LHS, MinMaxLHS) &&
> - selectCCOpsAreFMaxCompatible(RHS, MinMaxRHS)) {
> - switch (CC) {
> - default:
> - break;
> - case ISD::SETGT:
> - case ISD::SETGE:
> - case ISD::SETUGT:
> - case ISD::SETUGE:
> - case ISD::SETOGT:
> - case ISD::SETOGE:
> - return DAG.getNode(AArch64ISD::FMAX, dl, VT, MinMaxLHS,
> MinMaxRHS);
> - break;
> - case ISD::SETLT:
> - case ISD::SETLE:
> - case ISD::SETULT:
> - case ISD::SETULE:
> - case ISD::SETOLT:
> - case ISD::SETOLE:
> - return DAG.getNode(AArch64ISD::FMIN, dl, VT, MinMaxLHS,
> MinMaxRHS);
> - break;
> - }
> - }
> - }
> -
> - // If that fails, we'll need to perform an FCMP + CSEL sequence. Go
ahead
> - // and do the comparison.
> SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
>
> // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
> totally @@ -8735,6 +8696,75 @@ static SDValue
> performSelectCombine(SDNo
> return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N-
> >getOperand(2)); }
>
> +/// performSelectCCCombine - Target-specific DAG combining for
> +ISD::SELECT_CC /// to match FMIN/FMAX patterns.
> +static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG)
> {
> + // Try to use FMIN/FMAX instructions for FP selects like "x < y ? x :
y".
> + // Unless the NoNaNsFPMath option is set, be careful about NaNs:
> + // vmax/vmin return NaN if either operand is a NaN;
> + // only do the transformation when it matches that behavior.
> +
> + SDValue CondLHS = N->getOperand(0);
> + SDValue CondRHS = N->getOperand(1);
> + SDValue LHS = N->getOperand(2);
> + SDValue RHS = N->getOperand(3);
> + ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
> +
> + unsigned Opcode;
> + bool IsReversed;
> + if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) &&
> + selectCCOpsAreFMaxCompatible(CondRHS, RHS)) {
> + IsReversed = false; // x CC y ? x : y } else if
> + (selectCCOpsAreFMaxCompatible(CondRHS, LHS) &&
> + selectCCOpsAreFMaxCompatible(CondLHS, RHS)) {
> + IsReversed = true ; // x CC y ? y : x } else {
> + return SDValue();
> + }
> +
> + bool IsUnordered = false, IsOrEqual;
> + switch (CC) {
> + default:
> + return SDValue();
> + case ISD::SETULT:
> + case ISD::SETULE:
> + IsUnordered = true;
> + case ISD::SETOLT:
> + case ISD::SETOLE:
> + case ISD::SETLT:
> + case ISD::SETLE:
> + IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC ==
ISD::SETULE);
> + Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN;
> + break;
> +
> + case ISD::SETUGT:
> + case ISD::SETUGE:
> + IsUnordered = true;
> + case ISD::SETOGT:
> + case ISD::SETOGE:
> + case ISD::SETGT:
> + case ISD::SETGE:
> + IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC ==
> ISD::SETUGE);
> + Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX;
> + break;
> + }
> +
> + // If LHS is NaN, an ordered comparison will be false and the result
> + will be // the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid
> + this by checking // that LHS != NaN. Likewise, for unordered
comparisons,
> check for RHS != NaN.
> + if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
> + return SDValue();
> +
> + // For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both
> + be true, // but FMIN will return -0, and FMAX will return +0. So
> + FMIN/FMAX can only be // used for unsafe math or if one of the operands
> is known to be nonzero.
> + if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath &&
> + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
> + return SDValue();
> +
> + return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS); }
> +
> SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
> DAGCombinerInfo &DCI)
const {
> SelectionDAG &DAG = DCI.DAG;
> @@ -8767,6 +8797,8 @@ SDValue AArch64TargetLowering::PerformDA
> return performSelectCombine(N, DCI);
> case ISD::VSELECT:
> return performVSelectCombine(N, DCI.DAG);
> + case ISD::SELECT_CC:
> + return performSelectCCCombine(N, DCI.DAG);
> case ISD::STORE:
> return performSTORECombine(N, DCI, DAG, Subtarget);
> case AArch64ISD::BRCOND:
>
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=237247&r1=2
> 37246&r2=237247&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Wed May 13 07:01:09
> +++ 2015
> @@ -3521,9 +3521,6 @@ SDValue ARMTargetLowering::LowerSELECT_C
> // c = fcmp [?gt, ?ge, ?lt, ?le] a, b
> // select c, a, b
> // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to
'gt'.
> - // FIXME: There is similar code that allows some extensions in
> - // AArch64TargetLowering::LowerSELECT_CC that should be shared with
> this
> - // code.
> bool swapSides = false;
> if (!getTargetMachine().Options.NoNaNsFPMath) {
> // transformability may depend on which way around we compare
>
> Modified: llvm/trunk/test/CodeGen/AArch64/arm64-fmax.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/CodeGen/AArch64/arm64-
> fmax.ll?rev=237247&r1=237246&r2=237247&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/test/CodeGen/AArch64/arm64-fmax.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/arm64-fmax.ll Wed May 13 07:01:09
> +++ 2015
> @@ -1,29 +1,49 @@
> ; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s
> +; RUN: llc -march=arm64 < %s | FileCheck %s --check-prefix=CHECK-SAFE
>
> define double @test_direct(float %in) #1 { ; CHECK-LABEL: test_direct:
> +; CHECK-SAFE-LABEL: test_direct:
> %cmp = fcmp olt float %in, 0.000000e+00
> %longer = fpext float %in to double
> %val = select i1 %cmp, double 0.000000e+00, double %longer
> ret double %val
>
> ; CHECK: fmax
> +; CHECK-SAFE: fmax
> }
>
> define double @test_cross(float %in) #1 { ; CHECK-LABEL: test_cross:
> +; CHECK-SAFE-LABEL: test_cross:
> + %cmp = fcmp ult float %in, 0.000000e+00
> + %longer = fpext float %in to double
> + %val = select i1 %cmp, double %longer, double 0.000000e+00
> + ret double %val
> +
> +; CHECK: fmin
> +; CHECK-SAFE: fmin
> +}
> +
> +; Same as previous, but with ordered comparison; ; can't be converted
> +in safe-math mode.
> +define double @test_cross_fail_nan(float %in) #1 { ; CHECK-LABEL:
> +test_cross_fail_nan:
> +; CHECK-SAFE-LABEL: test_cross_fail_nan:
> %cmp = fcmp olt float %in, 0.000000e+00
> %longer = fpext float %in to double
> %val = select i1 %cmp, double %longer, double 0.000000e+00
> ret double %val
>
> ; CHECK: fmin
> +; CHECK-SAFE: fcsel d0, d1, d0, mi
> }
>
> ; This isn't a min or a max, but passes the first condition for swapping
the ;
> results. Make sure they're put back before we resort to the normal fcsel.
> define float @test_cross_fail(float %lhs, float %rhs) { ; CHECK-LABEL:
> test_cross_fail:
> +; CHECK-SAFE-LABEL: test_cross_fail:
> %tst = fcmp une float %lhs, %rhs
> %res = select i1 %tst, float %rhs, float %lhs
> ret float %res
> @@ -31,4 +51,5 @@ define float @test_cross_fail(float %lhs
> ; The register allocator would have to decide to be deliberately obtuse
> before
> ; other register were used.
> ; CHECK: fcsel s0, s1, s0, ne
> -}
> \ No newline at end of file
> +; CHECK-SAFE: fcsel s0, s1, s0, ne
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list