[llvm] r316554 - DAG: Fix creating select with wrong condition type

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 25 00:14:07 PDT 2017


Author: arsenm
Date: Wed Oct 25 00:14:07 2017
New Revision: 316554

URL: http://llvm.org/viewvc/llvm-project?rev=316554&view=rev
Log:
DAG: Fix creating select with wrong condition type

This code added in r297930 assumed that it could create
a select with a condition type that is just an integer
bitcast of the selected type. For AMDGPU any vselect is
going to be scalarized (although the vector types are legal),
and all select conditions must be i1 (the same as getSetCCResultType).

This logic doesn't really make sense to me, but there's
never really been a consistent policy in what the select
condition mask type is supposed to be. Try to extend
the logic for skipping the transform for condition types
that aren't setccs. It doesn't seem quite right to me though,
but checking conditions that seem more sensible (like whether the
vselect is going to be expanded) doesn't work since this
seems to depend on that also.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
    llvm/trunk/test/CodeGen/X86/avx512-vselect.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp?rev=316554&r1=316553&r2=316554&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Wed Oct 25 00:14:07 2017
@@ -3101,7 +3101,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAn
 
   // If this is a splitted VSELECT that was previously already handled, do
   // nothing.
-  if (Cond->getValueType(0).getScalarSizeInBits() != 1)
+  EVT CondVT = Cond->getValueType(0);
+  if (CondVT.getScalarSizeInBits() != 1)
     return SDValue();
 
   EVT VSelVT = N->getValueType(0);
@@ -3125,6 +3126,14 @@ SDValue DAGTypeLegalizer::WidenVSELECTAn
     EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
     if (SetCCResVT.getScalarSizeInBits() == 1)
       return SDValue();
+  } else if (CondVT.getScalarType() == MVT::i1) {
+    // If there is support for an i1 vector mask (or only scalar i1 conditions),
+    // don't touch.
+    while (TLI.getTypeAction(Ctx, CondVT) != TargetLowering::TypeLegal)
+      CondVT = TLI.getTypeToTransformTo(Ctx, CondVT);
+
+    if (CondVT.getScalarType() == MVT::i1)
+      return SDValue();
   }
 
   // Get the VT and operands for VSELECT, and widen if needed.

Added: llvm/trunk/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll?rev=316554&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll Wed Oct 25 00:14:07 2017
@@ -0,0 +1,52 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+
+; Check that DAGTypeLegalizer::WidenVSELECTAndMask doesn't try to
+; create vselects with i64 condition masks.
+
+; FIXME: Should be able to avoid intermediate vselect
+; GCN-LABEL: {{^}}widen_vselect_and_mask_v4f64:
+; GCN: v_cmp_u_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]],
+; GCN: v_cndmask_b32_e64 v[[VSEL:[0-9]+]], 0, -1, [[CMP]]
+; GCN: v_mov_b32_e32 v[[VSEL_EXT:[0-9]+]], v[[VSEL]]
+; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[VSEL]]:[[VSEL_EXT]]{{\]}}
+define amdgpu_kernel void @widen_vselect_and_mask_v4f64(<4 x double> %arg) #0 {
+bb:
+  %tmp = extractelement <4 x double> %arg, i64 0
+  %tmp1 = fcmp uno double %tmp, 0.000000e+00
+  %tmp2 = sext i1 %tmp1 to i64
+  %tmp3 = insertelement <4 x i64> undef, i64 %tmp2, i32 0
+  %tmp4 = insertelement <4 x i64> %tmp3, i64 undef, i32 1
+  %tmp5 = insertelement <4 x i64> %tmp4, i64 undef, i32 2
+  %tmp6 = insertelement <4 x i64> %tmp5, i64 undef, i32 3
+  %tmp7 = fcmp une <4 x double> %arg, zeroinitializer
+  %tmp8 = icmp sgt <4 x i64> %tmp6, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %tmp9 = and <4 x i1> %tmp8, %tmp7
+  %tmp10 = select <4 x i1> %tmp9, <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, <4 x double> zeroinitializer
+  store <4 x double> %tmp10, <4 x double> addrspace(1)* null, align 32
+  ret void
+}
+
+; GCN-LABEL: {{^}}widen_vselect_and_mask_v4i64:
+; GCN: v_cmp_eq_u64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]],
+; GCN: v_cndmask_b32_e64 v[[VSEL:[0-9]+]], 0, -1, [[CMP]]
+; GCN: v_mov_b32_e32 v[[VSEL_EXT:[0-9]+]], v[[VSEL]]
+; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[VSEL]]:[[VSEL_EXT]]{{\]}}
+define amdgpu_kernel void @widen_vselect_and_mask_v4i64(<4 x i64> %arg) #0 {
+bb:
+  %tmp = extractelement <4 x i64> %arg, i64 0
+  %tmp1 = icmp eq i64 %tmp, 0
+  %tmp2 = sext i1 %tmp1 to i64
+  %tmp3 = insertelement <4 x i64> undef, i64 %tmp2, i32 0
+  %tmp4 = insertelement <4 x i64> %tmp3, i64 undef, i32 1
+  %tmp5 = insertelement <4 x i64> %tmp4, i64 undef, i32 2
+  %tmp6 = insertelement <4 x i64> %tmp5, i64 undef, i32 3
+  %tmp7 = icmp ne <4 x i64> %arg, zeroinitializer
+  %tmp8 = icmp sgt <4 x i64> %tmp6, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %tmp9 = and <4 x i1> %tmp8, %tmp7
+  %tmp10 = select <4 x i1> %tmp9, <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i64> zeroinitializer
+  store <4 x i64> %tmp10, <4 x i64> addrspace(1)* null, align 32
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }

Modified: llvm/trunk/test/CodeGen/X86/avx512-vselect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vselect.ll?rev=316554&r1=316553&r2=316554&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vselect.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vselect.ll Wed Oct 25 00:14:07 2017
@@ -23,35 +23,16 @@ entry:
 ; both formulations of vselect. All of this trickery is because we can't
 ; directly form an SDAG input to the lowering.
 define <16 x double> @test2(<16 x float> %x, <16 x float> %y, <16 x double> %a, <16 x double> %b) {
-; CHECK-SKX-LABEL: test2:
-; CHECK-SKX:       # BB#0: # %entry
-; CHECK-SKX-NEXT:    vxorps %xmm6, %xmm6, %xmm6
-; CHECK-SKX-NEXT:    vcmpltps %zmm0, %zmm6, %k0
-; CHECK-SKX-NEXT:    vcmpltps %zmm6, %zmm1, %k1
-; CHECK-SKX-NEXT:    korw %k1, %k0, %k0
-; CHECK-SKX-NEXT:    kshiftrw $8, %k0, %k1
-; CHECK-SKX-NEXT:    vpmovm2q %k1, %zmm1
-; CHECK-SKX-NEXT:    vpmovm2q %k0, %zmm0
-; CHECK-SKX-NEXT:    vptestmq %zmm0, %zmm0, %k1
-; CHECK-SKX-NEXT:    vblendmpd %zmm2, %zmm4, %zmm0 {%k1}
-; CHECK-SKX-NEXT:    vptestmq %zmm1, %zmm1, %k1
-; CHECK-SKX-NEXT:    vblendmpd %zmm3, %zmm5, %zmm1 {%k1}
-; CHECK-SKX-NEXT:    retq
-;
-; CHECK-KNL-LABEL: test2:
-; CHECK-KNL:       # BB#0: # %entry
-; CHECK-KNL-NEXT:    vxorps %xmm6, %xmm6, %xmm6
-; CHECK-KNL-NEXT:    vcmpltps %zmm0, %zmm6, %k0
-; CHECK-KNL-NEXT:    vcmpltps %zmm6, %zmm1, %k1
-; CHECK-KNL-NEXT:    korw %k1, %k0, %k1
-; CHECK-KNL-NEXT:    kshiftrw $8, %k1, %k2
-; CHECK-KNL-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; CHECK-KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; CHECK-KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
-; CHECK-KNL-NEXT:    vblendmpd %zmm2, %zmm4, %zmm0 {%k1}
-; CHECK-KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
-; CHECK-KNL-NEXT:    vblendmpd %zmm3, %zmm5, %zmm1 {%k1}
-; CHECK-KNL-NEXT:    retq
+; CHECK-LABEL: test2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    vxorps %xmm6, %xmm6, %xmm6
+; CHECK-NEXT:    vcmpltps %zmm0, %zmm6, %k0
+; CHECK-NEXT:    vcmpltps %zmm6, %zmm1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k1
+; CHECK-NEXT:    vblendmpd %zmm2, %zmm4, %zmm0 {%k1}
+; CHECK-NEXT:    kshiftrw $8, %k1, %k1
+; CHECK-NEXT:    vblendmpd %zmm3, %zmm5, %zmm1 {%k1}
+; CHECK-NEXT:    retq
 entry:
   %gt.m = fcmp ogt <16 x float> %x, zeroinitializer
   %lt.m = fcmp olt <16 x float> %y, zeroinitializer




More information about the llvm-commits mailing list