[llvm] a105d30 - [X86] Fold (shift undef, X)->0 for vector shifts by immediate.

Thu May 27 09:32:01 PDT 2021

Author: Craig Topper
Date: 2021-05-27T09:31:47-07:00
New Revision: a105d3024efec365961e940c489c4ed5198736d2

URL: https://github.com/llvm/llvm-project/commit/a105d3024efec365961e940c489c4ed5198736d2
DIFF: https://github.com/llvm/llvm-project/commit/a105d3024efec365961e940c489c4ed5198736d2.diff

LOG: [X86] Fold (shift undef, X)->0 for vector shifts by immediate.

We could previously do this by accident through the later
call to getTargetConstantBitsFromNode I think, but that only worked
if N0 had a single use. This patch makes it explicit for undef and
doesn't have a use count check.

I think this is needed to move the (shl X, 1)->(add X, X)
fold to isel for PR50468. We need to be sure X won't be IMPLICIT_DEF
which might prevent the same vreg from being used for both operands.

Differential Revision: https://reviews.llvm.org/D103192

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vec_shift5.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 70bf79c1de68..71d1399fa8c5 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43866,6 +43866,10 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
   assert(N->getOperand(1).getValueType() == MVT::i8 &&
          "Unexpected shift amount type");
 
+  // (shift undef, X) -> 0
+  if (N0.isUndef())
+    return DAG.getConstant(0, SDLoc(N), VT);
+
   // Out of range logical bit shifts are guaranteed to be zero.
   // Out of range arithmetic bit shifts splat the sign bit.
   unsigned ShiftVal = N->getConstantOperandVal(1);

diff  --git a/llvm/test/CodeGen/X86/vec_shift5.ll b/llvm/test/CodeGen/X86/vec_shift5.ll
index f6f96710b81c..0514c6e61ade 100644
--- a/llvm/test/CodeGen/X86/vec_shift5.ll
+++ b/llvm/test/CodeGen/X86/vec_shift5.ll
@@ -176,16 +176,16 @@ define <4 x i32> @test17(<4 x i32> %a0, <4 x i32>* %dummy) {
 ; X86-LABEL: test17:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    pslld $6, %xmm0
-; X86-NEXT:    movdqa %xmm0, (%eax)
-; X86-NEXT:    pslld $7, %xmm0
+; X86-NEXT:    xorps %xmm0, %xmm0
+; X86-NEXT:    movaps %xmm0, (%eax)
+; X86-NEXT:    xorps %xmm0, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test17:
 ; X64:       # %bb.0:
-; X64-NEXT:    pslld $6, %xmm0
-; X64-NEXT:    movdqa %xmm0, (%rdi)
-; X64-NEXT:    pslld $7, %xmm0
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    movaps %xmm0, (%rdi)
+; X64-NEXT:    xorps %xmm0, %xmm0
 ; X64-NEXT:    retq
   %a = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 6)
   store <4 x i32> %a, <4 x i32>* %dummy
@@ -197,16 +197,16 @@ define <4 x i32> @test18(<4 x i32> %a0, <4 x i32>* %dummy) {
 ; X86-LABEL: test18:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    pslld $3, %xmm0
-; X86-NEXT:    movdqa %xmm0, (%eax)
-; X86-NEXT:    pslld $1, %xmm0
+; X86-NEXT:    xorps %xmm0, %xmm0
+; X86-NEXT:    movaps %xmm0, (%eax)
+; X86-NEXT:    xorps %xmm0, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test18:
 ; X64:       # %bb.0:
-; X64-NEXT:    pslld $3, %xmm0
-; X64-NEXT:    movdqa %xmm0, (%rdi)
-; X64-NEXT:    pslld $1, %xmm0
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    movaps %xmm0, (%rdi)
+; X64-NEXT:    xorps %xmm0, %xmm0
 ; X64-NEXT:    retq
   %a = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 3)
   store <4 x i32> %a, <4 x i32>* %dummy