[PATCH] D41040: [DAGCombiner] protect against an infinite loop between shl <--> mul (PR35579)

Fri Dec 8 16:01:13 PST 2017

spatel created this revision.
Herald added subscribers: mcrosier, rengolin.

At first, I tried to thread the x86 needle and use a target hook (isVectorShiftByScalarCheap()) to disable the transform only for non-splat pow-of-2 constants, but not AVX2, but only some element types, but...it's difficult.

Here we just avoid the loop with the x86 vector transform that conflicts with the general DAG combine and preserve all of the existing behavior AFAICT otherwise.

Some tests that will probably fail if someone does try to restrict this in a more targeted way for x86-only may be found in:

  test/CodeGen/X86/combine-mul.ll
  test/CodeGen/X86/vector-mul.ll
  test/CodeGen/X86/widen_arith-5.ll

This should prevent the infinite looping seen with:
https://bugs.llvm.org/show_bug.cgi?id=35579


https://reviews.llvm.org/D41040

Files:
  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  test/CodeGen/X86/combine-mul.ll


Index: test/CodeGen/X86/combine-mul.ll
===================================================================

--- test/CodeGen/X86/combine-mul.ll
+++ test/CodeGen/X86/combine-mul.ll
@@ -308,3 +308,37 @@
   %2 = mul <4 x i32> %1, <i32 4, i32 6, i32 2, i32 0>
   ret <4 x i32> %2
 }
+
+; This would infinite loop because DAGCombiner wants to turn this into a shift,
+; but x86 lowering wants to avoid non-uniform vector shift amounts.
+
+define <16 x i8> @PR35579(<16 x i8> %x) {
+; SSE-LABEL: PR35579:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pmovsxbw %xmm0, %xmm1
+; SSE-NEXT:    pmullw {{.*}}(%rip), %xmm1
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE-NEXT:    pand %xmm2, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    pmovsxbw %xmm0, %xmm0
+; SSE-NEXT:    pmullw {{.*}}(%rip), %xmm0
+; SSE-NEXT:    pand %xmm2, %xmm0
+; SSE-NEXT:    packuswb %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: PR35579:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpmovsxbw %xmm0, %ymm0
+; AVX-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
+; AVX-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %r = mul <16 x i8> %x, <i8 0, i8 1, i8 2, i8 1, i8 4, i8 1, i8 2, i8 1, i8 8, i8 1, i8 2, i8 1, i8 4, i8 1, i8 2, i8 1>
+  ret <16 x i8> %r
+}
+
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2674,7 +2674,8 @@
   }
   // fold (mul x, (1 << c)) -> x << c
   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
-      DAG.isKnownToBeAPowerOfTwo(N1)) {
+      DAG.isKnownToBeAPowerOfTwo(N1) &&
+      (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
     SDLoc DL(N);
     SDValue LogBase2 = BuildLogBase2(N1, DL);
     AddToWorklist(LogBase2.getNode());


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D41040.126226.patch
Type: text/x-patch
Size: 2160 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171209/6e2f7518/attachment.bin>