[PATCH] D103192: [X86] Fold (shift undef, X)->0 for vector shifts by immediate.

Wed May 26 11:22:57 PDT 2021

craig.topper created this revision.
craig.topper added reviewers: spatel, RKSimon.
Herald added subscribers: pengfei, hiraditya.
craig.topper requested review of this revision.
Herald added a project: LLVM.

We could previously do this by accident through the later
call to getTargetConstantBitsFromNode I think, but that only worked
if N0 had a single use. This patch makes it explicit for undef and
doesn't have a use count check.

I think this is needed to move the (shl X, 1)->(add X, X)
fold to isel for PR50468. We need to be sure X won't be IMPLICIT_DEF
which might prevent the same vreg from being used for both operands.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D103192

Files:
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/test/CodeGen/X86/vec_shift5.ll


Index: llvm/test/CodeGen/X86/vec_shift5.ll
===================================================================

--- llvm/test/CodeGen/X86/vec_shift5.ll
+++ llvm/test/CodeGen/X86/vec_shift5.ll
@@ -170,6 +170,29 @@
   ret <2 x i64> %1
 }
 
+; Make sure we fold fully undef input vectors. We previously folded only when
+; undef had a single use so use 2 undefs.
+define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0, <4 x i32>* %dummy) {
+; X86-LABEL: test_x86_sse2_pslli_d:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    xorps %xmm0, %xmm0
+; X86-NEXT:    movaps %xmm0, (%eax)
+; X86-NEXT:    xorps %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_x86_sse2_pslli_d:
+; X64:       # %bb.0:
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    movaps %xmm0, (%rdi)
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    retq
+  %a = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 6)
+  store <4 x i32> %a, <4 x i32>* %dummy
+  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 7)
+  ret <4 x i32> %res
+}
+
 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32)
 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32)
 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32)
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43866,6 +43866,10 @@
   assert(N->getOperand(1).getValueType() == MVT::i8 &&
          "Unexpected shift amount type");
 
+  // (shift undef, X) -> 0
+  if (N0.isUndef())
+    return DAG.getConstant(0, SDLoc(N), VT);
+
   // Out of range logical bit shifts are guaranteed to be zero.
   // Out of range arithmetic bit shifts splat the sign bit.
   unsigned ShiftVal = N->getConstantOperandVal(1);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D103192.348029.patch
Type: text/x-patch
Size: 1847 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210526/2b6983dd/attachment.bin>