[llvm] r374555 - [DAGCombiner] fold vselect-of-constants to shift

Fri Oct 11 07:17:56 PDT 2019

Author: spatel
Date: Fri Oct 11 07:17:56 2019
New Revision: 374555

URL: http://llvm.org/viewvc/llvm-project?rev=374555&view=rev
Log:
[DAGCombiner] fold vselect-of-constants to shift

The diffs suggest that we are missing some more basic
analysis/transforms, but this keeps the vector path in
sync with the scalar (rL374397). This is again a
preliminary step for introducing the reverse transform
in IR as proposed in D63382.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll
    llvm/trunk/test/CodeGen/X86/vselect.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=374555&r1=374554&r2=374555&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Oct 11 07:17:56 2019
@@ -8614,6 +8614,15 @@ SDValue DAGCombiner::foldVSelectOfConsta
     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
   }
 
+  // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
+  APInt Pow2C;
+  if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
+      isNullOrNullSplat(N2)) {
+    SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
+    SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
+    return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
+  }
+
   // The general case for select-of-constants:
   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
   // ...but that only makes sense if a vselect is slower than 2 logic ops, so

Modified: llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll?rev=374555&r1=374554&r2=374555&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll (original)
+++ llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll Fri Oct 11 07:17:56 2019
@@ -213,9 +213,8 @@ define <16 x i8> @sel_shift_bool_v16i8(<
 define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) {
 ; ANY-LABEL: sel_shift_bool_v8i16:
 ; ANY:       # %bb.0:
-; ANY-NEXT:    psllw $15, %xmm0
-; ANY-NEXT:    psraw $15, %xmm0
 ; ANY-NEXT:    pand {{.*}}(%rip), %xmm0
+; ANY-NEXT:    psllw $7, %xmm0
 ; ANY-NEXT:    retq
   %shl= select <8 x i1> %t, <8 x i16> <i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128>, <8 x i16> zeroinitializer
   ret <8 x i16> %shl
@@ -224,9 +223,8 @@ define <8 x i16> @sel_shift_bool_v8i16(<
 define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) {
 ; ANY-LABEL: sel_shift_bool_v4i32:
 ; ANY:       # %bb.0:
-; ANY-NEXT:    pslld $31, %xmm0
-; ANY-NEXT:    psrad $31, %xmm0
 ; ANY-NEXT:    pand {{.*}}(%rip), %xmm0
+; ANY-NEXT:    pslld $6, %xmm0
 ; ANY-NEXT:    retq
   %shl = select <4 x i1> %t, <4 x i32> <i32 64, i32 64, i32 64, i32 64>, <4 x i32> zeroinitializer
   ret <4 x i32> %shl
@@ -235,10 +233,8 @@ define <4 x i32> @sel_shift_bool_v4i32(<
 define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) {
 ; ANY-LABEL: sel_shift_bool_v2i64:
 ; ANY:       # %bb.0:
-; ANY-NEXT:    psllq $63, %xmm0
-; ANY-NEXT:    psrad $31, %xmm0
-; ANY-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; ANY-NEXT:    pand {{.*}}(%rip), %xmm0
+; ANY-NEXT:    psllq $16, %xmm0
 ; ANY-NEXT:    retq
   %shl = select <2 x i1> %t, <2 x i64> <i64 65536, i64 65536>, <2 x i64> zeroinitializer
   ret <2 x i64> %shl

Modified: llvm/trunk/test/CodeGen/X86/vselect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect.ll?rev=374555&r1=374554&r2=374555&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vselect.ll Fri Oct 11 07:17:56 2019
@@ -647,33 +647,22 @@ define void @vselect_allzeros_LHS_multip
 ; This test case previously crashed after r363802, r363850, and r363856 due
 ; any_extend_vector_inreg not being handled by the X86 backend.
 define i64 @vselect_any_extend_vector_inreg_crash(<8 x i8>* %x) {
-; SSE2-LABEL: vselect_any_extend_vector_inreg_crash:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT:    pcmpeqb {{.*}}(%rip), %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    psrad $24, %xmm0
-; SSE2-NEXT:    movq %xmm0, %rax
-; SSE2-NEXT:    andl $32768, %eax # imm = 0x8000
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: vselect_any_extend_vector_inreg_crash:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSE41-NEXT:    pcmpeqb {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    pmovsxbq %xmm0, %xmm0
-; SSE41-NEXT:    movq %xmm0, %rax
-; SSE41-NEXT:    andl $32768, %eax # imm = 0x8000
-; SSE41-NEXT:    retq
+; SSE-LABEL: vselect_any_extend_vector_inreg_crash:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT:    pcmpeqb {{.*}}(%rip), %xmm0
+; SSE-NEXT:    movq %xmm0, %rax
+; SSE-NEXT:    andl $1, %eax
+; SSE-NEXT:    shlq $15, %rax
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: vselect_any_extend_vector_inreg_crash:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
 ; AVX-NEXT:    vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
-; AVX-NEXT:    vpmovsxbq %xmm0, %xmm0
 ; AVX-NEXT:    vmovq %xmm0, %rax
-; AVX-NEXT:    andl $32768, %eax # imm = 0x8000
+; AVX-NEXT:    andl $1, %eax
+; AVX-NEXT:    shlq $15, %rax
 ; AVX-NEXT:    retq
 0:
   %1 = load <8 x i8>, <8 x i8>* %x