[llvm] r353645 - [DAGCombine] Simplify funnel shifts with undef/zero args to bitshifts

Sun Feb 10 09:04:01 PST 2019

Author: rksimon
Date: Sun Feb 10 09:04:00 2019
New Revision: 353645

URL: http://llvm.org/viewvc/llvm-project?rev=353645&view=rev
Log:
[DAGCombine] Simplify funnel shifts with undef/zero args to bitshifts

Now that we have SimplifyDemandedBits support for funnel shifts (rL353539), we need to simplify funnel shifts back to bitshifts in cases where either argument has been folded to undef/zero.

Differential Revision: https://reviews.llvm.org/D58009

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/X86/funnel-shift.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=353645&r1=353644&r2=353645&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sun Feb 10 09:04:00 2019
@@ -7127,13 +7127,52 @@ SDValue DAGCombiner::visitFunnelShift(SD
             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
       return IsFSHL ? N0 : N1;
 
-  // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
+  auto IsUndefOrZero = [](SDValue V) {
+    if (V.isUndef())
+      return true;
+    if (ConstantSDNode *Cst = isConstOrConstSplat(V, /*AllowUndefs*/true))
+      return Cst->getAPIntValue() == 0;
+    return false;
+  };
+
   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
+    EVT ShAmtTy = N2.getValueType();
+
+    // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
     if (Cst->getAPIntValue().uge(BitWidth)) {
       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
-                         DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType()));
+                         DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
     }
+
+    unsigned ShAmt = Cst->getZExtValue();
+    if (ShAmt == 0)
+      return IsFSHL ? N0 : N1;
+
+    // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
+    // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
+    // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
+    // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
+    if (IsUndefOrZero(N0))
+      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
+                         DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
+                                         SDLoc(N), ShAmtTy));
+    if (IsUndefOrZero(N1))
+      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
+                         DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
+                                         SDLoc(N), ShAmtTy));
+  }
+
+  // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
+  // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
+  // iff We know the shift amount is in range.
+  // TODO: when is it worth doing SUB(BW, N2) as well?
+  if (isPowerOf2_32(BitWidth)) {
+    APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
+    if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
+    if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
   }
 
   // fold (fshl N0, N0, N2) -> (rotl N0, N2)

Modified: llvm/trunk/test/CodeGen/X86/funnel-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/funnel-shift.ll?rev=353645&r1=353644&r2=353645&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/funnel-shift.ll (original)
+++ llvm/trunk/test/CodeGen/X86/funnel-shift.ll Sun Feb 10 09:04:00 2019
@@ -404,12 +404,13 @@ define i32 @fshl_i32_undef0_cst(i32 %a0)
 ; X32-SSE2-LABEL: fshl_i32_undef0_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shldl $9, %eax, %eax
+; X32-SSE2-NEXT:    shrl $23, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_undef0_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    shldl $9, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shrl $23, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 9)
   ret i32 %res
@@ -438,19 +439,18 @@ define i32 @fshl_i32_undef1_msk(i32 %a0,
 ; X32-SSE2-LABEL: fshl_i32_undef1_msk:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    andl $7, %ecx
-; X32-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X32-SSE2-NEXT:    shldl %cl, %eax, %eax
+; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X32-SSE2-NEXT:    andb $7, %cl
+; X32-SSE2-NEXT:    shll %cl, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_undef1_msk:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %esi, %ecx
 ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    andl $7, %ecx
+; X64-AVX2-NEXT:    andb $7, %cl
 ; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shldl %cl, %eax, %eax
+; X64-AVX2-NEXT:    shll %cl, %eax
 ; X64-AVX2-NEXT:    retq
   %m = and i32 %a1, 7
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %m)
@@ -461,13 +461,13 @@ define i32 @fshl_i32_undef1_cst(i32 %a0)
 ; X32-SSE2-LABEL: fshl_i32_undef1_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shldl $9, %eax, %eax
+; X32-SSE2-NEXT:    shll $9, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_undef1_cst:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shldl $9, %eax, %eax
+; X64-AVX2-NEXT:    shll $9, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 9)
   ret i32 %res
@@ -513,19 +513,18 @@ define i32 @fshr_i32_undef0_msk(i32 %a0,
 ; X32-SSE2-LABEL: fshr_i32_undef0_msk:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    andl $7, %ecx
-; X32-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X32-SSE2-NEXT:    shrdl %cl, %eax, %eax
+; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X32-SSE2-NEXT:    andb $7, %cl
+; X32-SSE2-NEXT:    shrl %cl, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_undef0_msk:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %esi, %ecx
 ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    andl $7, %ecx
+; X64-AVX2-NEXT:    andb $7, %cl
 ; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shrdl %cl, %eax, %eax
+; X64-AVX2-NEXT:    shrl %cl, %eax
 ; X64-AVX2-NEXT:    retq
   %m = and i32 %a1, 7
   %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %m)
@@ -536,13 +535,13 @@ define i32 @fshr_i32_undef0_cst(i32 %a0)
 ; X32-SSE2-LABEL: fshr_i32_undef0_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shrdl $9, %eax, %eax
+; X32-SSE2-NEXT:    shrl $9, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_undef0_cst:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shrdl $9, %eax, %eax
+; X64-AVX2-NEXT:    shrl $9, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 9)
   ret i32 %res
@@ -592,12 +591,13 @@ define i32 @fshr_i32_undef1_cst(i32 %a0)
 ; X32-SSE2-LABEL: fshr_i32_undef1_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shrdl $9, %eax, %eax
+; X32-SSE2-NEXT:    shll $23, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_undef1_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    shrdl $9, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shll $23, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 9)
   ret i32 %res
@@ -645,15 +645,14 @@ define i32 @fshl_i32_zero0(i32 %a0, i32
 define i32 @fshl_i32_zero0_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshl_i32_zero0_cst:
 ; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    xorl %eax, %eax
-; X32-SSE2-NEXT:    shldl $9, %ecx, %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    shrl $23, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_zero0_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    xorl %eax, %eax
-; X64-AVX2-NEXT:    shldl $9, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shrl $23, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 9)
   ret i32 %res
@@ -683,15 +682,14 @@ define i32 @fshl_i32_zero1(i32 %a0, i32
 define i32 @fshl_i32_zero1_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshl_i32_zero1_cst:
 ; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    xorl %eax, %eax
-; X32-SSE2-NEXT:    shrdl $23, %ecx, %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    shll $9, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_zero1_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    xorl %eax, %eax
-; X64-AVX2-NEXT:    shrdl $23, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shll $9, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 9)
   ret i32 %res
@@ -721,15 +719,14 @@ define i32 @fshr_i32_zero0(i32 %a0, i32
 define i32 @fshr_i32_zero0_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshr_i32_zero0_cst:
 ; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    xorl %eax, %eax
-; X32-SSE2-NEXT:    shldl $23, %ecx, %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    shrl $9, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_zero0_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    xorl %eax, %eax
-; X64-AVX2-NEXT:    shldl $23, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shrl $9, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 9)
   ret i32 %res
@@ -758,15 +755,14 @@ define i32 @fshr_i32_zero1(i32 %a0, i32
 define i32 @fshr_i32_zero1_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshr_i32_zero1_cst:
 ; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    xorl %eax, %eax
-; X32-SSE2-NEXT:    shrdl $9, %ecx, %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    shll $23, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_zero1_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    xorl %eax, %eax
-; X64-AVX2-NEXT:    shrdl $9, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shll $23, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 9)
   ret i32 %res