[PATCH] DAGCombine: Remove redundant NaN checks around ISD::FSQRT

Thu Mar 19 15:54:33 PDT 2015

LGTM

—Owen

> On Mar 19, 2015, at 3:08 PM, Tom Stellard <thomas.stellard at amd.com> wrote:
> 
> Hi resistor,
> 
> This folds:
> 
> (select (setcc x, -0.0, olt), NaN, (fsqrt x)) -> ( fsqrt x)
> 
> REPOSITORY
>  rL LLVM
> 
> http://reviews.llvm.org/D8470
> 
> Files:
>  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>  test/CodeGen/R600/llvm.sqrt.ll
> 
> Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> ===================================================================
> --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> @@ -5160,6 +5160,9 @@
>     }
>   }
> 
> +  if (SimplifySelectOps(N, N1, N2))
> +    return SDValue(N, 0);  // Don't revisit N.
> +
>   // If the VSELECT result requires splitting and the mask is provided by a
>   // SETCC, then split both nodes and its operands before legalization. This
>   // prevents the type legalizer from unrolling SETCC into scalar comparisons
> @@ -12437,6 +12440,37 @@
> bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
>                                     SDValue RHS) {
> 
> +  // fold (select (setcc x, -0.0, olt), NaN, (fsqrt x))
> +  // The select + setcc is redundant, becuase fsqrt returns NaN for X < -0.
> +  if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
> +    if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
> +      // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
> +      SDValue Sqrt = RHS;
> +      ISD::CondCode CC;
> +      SDValue CmpLHS;
> +      const ConstantFPSDNode *NegZero = nullptr;
> +
> +      if (TheSelect->getOpcode() == ISD::SELECT_CC) {
> +        CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
> +        CmpLHS = TheSelect->getOperand(0);
> +        NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1));
> +      } else {
> +        // SELECT or VSELECT
> +        SDValue Cmp = TheSelect->getOperand(0);
> +        if (Cmp.getOpcode() == ISD::SETCC) {
> +          CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
> +          CmpLHS = Cmp.getOperand(0);
> +          NegZero = isConstOrConstSplatFP(Cmp.getOperand(1));
> +        }
> +      }
> +      if (NegZero && NegZero->isNegative() && NegZero->isZero() &&
> +          Sqrt.getOperand(0) == CmpLHS && CC == ISD::SETOLT) {
> +          // We have: (select (setcc x, -0.0, olt), NaN, (fsqrt x))
> +          CombineTo(TheSelect, Sqrt);
> +          return true;
> +      }
> +    }
> +  }
>   // Cannot simplify select with vector condition
>   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
> 
> Index: test/CodeGen/R600/llvm.sqrt.ll
> ===================================================================
> --- test/CodeGen/R600/llvm.sqrt.ll
> +++ test/CodeGen/R600/llvm.sqrt.ll
> @@ -50,6 +50,31 @@
>   ret void
> }
> 
> +; SI-LABEL: {{^}}elim_redun_check:
> +; SI: v_sqrt_f32_e32
> +; SI-NOT: v_cndmask
> +define void @elim_redun_check(float addrspace(1)* %out, float %in) {
> +entry:
> +  %sqrt = call float @llvm.sqrt.f32(float %in)
> +  %cmp = fcmp olt float %in, -0.000000e+00
> +  %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
> +  store float %res, float addrspace(1)* %out
> +  ret void
> +}
> +
> +; SI-LABEL: {{^}}elim_redun_check_v2:
> +; SI: v_sqrt_f32_e32
> +; SI: v_sqrt_f32_e32
> +; SI-NOT: v_cndmask
> +define void @elim_redun_check_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
> +entry:
> +  %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
> +  %cmp = fcmp olt <2 x float> %in, <float -0.000000e+00, float -0.000000e+00>
> +  %res = select <2 x i1> %cmp, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> %sqrt
> +  store <2 x float> %res, <2 x float> addrspace(1)* %out
> +  ret void
> +}
> +
> declare float @llvm.sqrt.f32(float %in)
> declare <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
> declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)
> 
> EMAIL PREFERENCES
>  http://reviews.llvm.org/settings/panel/emailpreferences/
> <D8470.22314.patch>