[PATCH] D83455: [X86] Immediately call LowerShift from lowerBuildVectorToBitOp.

Wed Jul 8 22:34:33 PDT 2020

craig.topper created this revision.
craig.topper added reviewers: spatel, RKSimon.
Herald added a subscriber: hiraditya.
Herald added a project: LLVM.

If we don't immediately lower the vector shift, the splat
constant vector we created may get turned into a constant pool
load before we get around to lowering the shift. This makes it
a lot more difficult to create a shift by constant. Sometimes we
fail to see through the constant pool at all and end up trying
to lower as if it was a variable shift. This requires custom
handling and may create an unsupported vselect on pre-sse-4.1
targets. Since we're after LegalizeVectorOps we are unable to
legalize the unsupported vselect as that code is in LegalizeDAG.

So calling LowerShift immediately ensures that we get see the
splat constant.

Fixes PR46527.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D83455

Files:
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/test/CodeGen/X86/pr46527.ll


Index: llvm/test/CodeGen/X86/pr46527.ll
===================================================================

--- /dev/null
+++ llvm/test/CodeGen/X86/pr46527.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+;RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 -relocation-model=pic | FileCheck %s
+
+define void @f(<16 x i8>* %out, <16 x i8> %in, i1 %flag) {
+; CHECK-LABEL: f:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    calll .L0$pb
+; CHECK-NEXT:    .cfi_adjust_cfa_offset 4
+; CHECK-NEXT:  .L0$pb:
+; CHECK-NEXT:    popl %eax
+; CHECK-NEXT:    .cfi_adjust_cfa_offset -4
+; CHECK-NEXT:  .Ltmp0:
+; CHECK-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; CHECK-NEXT:    notb %dl
+; CHECK-NEXT:    andb $1, %dl
+; CHECK-NEXT:    movzbl %dl, %edx
+; CHECK-NEXT:    movd %edx, %xmm1
+; CHECK-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; CHECK-NEXT:    paddb %xmm1, %xmm1
+; CHECK-NEXT:    pxor %xmm0, %xmm1
+; CHECK-NEXT:    pxor {{\.LCPI.*}}@GOTOFF(%eax), %xmm1
+; CHECK-NEXT:    movdqa %xmm1, (%ecx)
+; CHECK-NEXT:    retl
+entry:
+  %0 = select i1 %flag, i8 0, i8 2
+  %1 = insertelement <16 x i8> undef, i8 %0, i32 0
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
+  %3 = xor <16 x i8> %2, %in
+  %4 = xor <16 x i8> %3, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  store <16 x i8> %4, <16 x i8>* %out, align 16
+  ret void
+}
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9689,6 +9689,9 @@
   return SDValue();
 }
 
+static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
+                          SelectionDAG &DAG);
+
 /// If a BUILD_VECTOR's source elements all apply the same bit operation and
 /// one of their operands is constant, lower to a pair of BUILD_VECTOR and
 /// just apply the bit to the vectors.
@@ -9696,6 +9699,7 @@
 /// from this, but enough scalar bit operations are created from the later
 /// legalization + scalarization stages to need basic support.
 static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
+                                       const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   MVT VT = Op->getSimpleValueType(0);
@@ -9759,7 +9763,14 @@
 
   SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts);
   SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts);
-  return DAG.getNode(Opcode, DL, VT, LHS, RHS);
+  SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS);
+
+  if (!IsShift)
+    return Res;
+
+  // Immediately lower the shift to ensure the constant build vector doesn't
+  // get converted to a constant pool before the shift is lowered.
+  return LowerShift(Res, Subtarget, DAG);
 }
 
 /// Create a vector constant without a load. SSE/AVX provide the bare minimum
@@ -10115,7 +10126,7 @@
     return HorizontalOp;
   if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG))
     return Broadcast;
-  if (SDValue BitOp = lowerBuildVectorToBitOp(BV, DAG))
+  if (SDValue BitOp = lowerBuildVectorToBitOp(BV, Subtarget, DAG))
     return BitOp;
 
   unsigned EVTBits = EltVT.getSizeInBits();


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D83455.276635.patch
Type: text/x-patch
Size: 3589 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200709/890a122f/attachment.bin>