[llvm] r332895 - [DAGCombiner] Use computeKnownBits to match rotate patterns that have had their amount masking modified by simplifyDemandedBits

Mon May 21 14:09:18 PDT 2018

Author: ctopper
Date: Mon May 21 14:09:18 2018
New Revision: 332895

URL: http://llvm.org/viewvc/llvm-project?rev=332895&view=rev
Log:
[DAGCombiner] Use computeKnownBits to match rotate patterns that have had their amount masking modified by simplifyDemandedBits

SimplifyDemandedBits can remove bits from the masks for the shift amounts we need to see to detect rotates.

This patch uses zeroes from computeKnownBits to fill in some of these mask bits to make the match work.

As currently written this calls computeKnownBits even when the mask hasn't been simplified because it made the code simpler. If we're worried about compile time performance we can improve this.

I know we're talking about making a rotate intrinsic, but hopefully we can go ahead and do this change and just make sure the rotate intrinsic also handles it.

Differential Revision: https://reviews.llvm.org/D47116

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/X86/combine-rotates.ll
    llvm/trunk/test/CodeGen/X86/rotate4.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=332895&r1=332894&r2=332895&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon May 21 14:09:18 2018
@@ -4823,7 +4823,8 @@ bool DAGCombiner::MatchRotateHalf(SDValu
 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
 // to consider shift amounts with defined behavior.
-static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
+static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
+                           SelectionDAG &DAG) {
   // If EltSize is a power of 2 then:
   //
   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
@@ -4858,9 +4859,13 @@ static bool matchRotateSub(SDValue Pos,
   unsigned MaskLoBits = 0;
   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
-      if (NegC->getAPIntValue() == EltSize - 1) {
+      KnownBits Known;
+      DAG.computeKnownBits(Neg.getOperand(0), Known);
+      unsigned Bits = Log2_64(EltSize);
+      if (NegC->getAPIntValue().getActiveBits() <= Bits &&
+          ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
         Neg = Neg.getOperand(0);
-        MaskLoBits = Log2_64(EltSize);
+        MaskLoBits = Bits;
       }
     }
   }
@@ -4875,10 +4880,16 @@ static bool matchRotateSub(SDValue Pos,
 
   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
   // Pos'.  The truncation is redundant for the purpose of the equality.
-  if (MaskLoBits && Pos.getOpcode() == ISD::AND)
-    if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
-      if (PosC->getAPIntValue() == EltSize - 1)
+  if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
+    if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
+      KnownBits Known;
+      DAG.computeKnownBits(Pos.getOperand(0), Known);
+      if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
+          ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
+           MaskLoBits))
         Pos = Pos.getOperand(0);
+    }
+  }
 
   // The condition we need is now:
   //
@@ -4934,7 +4945,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(S
   //          (srl x, (*ext y))) ->
   //   (rotr x, y) or (rotl x, (sub 32, y))
   EVT VT = Shifted.getValueType();
-  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
+  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
                        HasPos ? Pos : Neg).getNode();

Modified: llvm/trunk/test/CodeGen/X86/combine-rotates.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-rotates.ll?rev=332895&r1=332894&r2=332895&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-rotates.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-rotates.ll Mon May 21 14:09:18 2018
@@ -61,27 +61,14 @@ define <4 x i32> @combine_vec_rot_rot_sp
 define <4 x i32> @rotate_demanded_bits(<4 x i32>, <4 x i32>) {
 ; XOP-LABEL: rotate_demanded_bits:
 ; XOP:       # %bb.0:
-; XOP-NEXT:    vmovdqa {{.*#+}} xmm2 = [30,30,30,30]
-; XOP-NEXT:    vpand %xmm2, %xmm1, %xmm1
-; XOP-NEXT:    vpshld %xmm1, %xmm0, %xmm3
-; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; XOP-NEXT:    vpsubd %xmm1, %xmm4, %xmm1
-; XOP-NEXT:    vpand %xmm2, %xmm1, %xmm1
-; XOP-NEXT:    vpsubd %xmm1, %xmm4, %xmm1
-; XOP-NEXT:    vpshld %xmm1, %xmm0, %xmm0
-; XOP-NEXT:    vpor %xmm3, %xmm0, %xmm0
+; XOP-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT:    vprotd %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
 ;
 ; AVX512-LABEL: rotate_demanded_bits:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [30,30,30,30]
-; AVX512-NEXT:    vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT:    vpsllvd %xmm1, %xmm0, %xmm3
-; AVX512-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX512-NEXT:    vpsubd %xmm1, %xmm4, %xmm1
-; AVX512-NEXT:    vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpor %xmm3, %xmm0, %xmm0
+; AVX512-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512-NEXT:    vprolvd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %3 = and <4 x i32> %1, <i32 30, i32 30, i32 30, i32 30>
   %4 = shl <4 x i32> %0, %3
@@ -117,28 +104,15 @@ define <4 x i32> @rotate_demanded_bits_3
 ; XOP-LABEL: rotate_demanded_bits_3:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpaddd %xmm1, %xmm1, %xmm1
-; XOP-NEXT:    vmovdqa {{.*#+}} xmm2 = [30,30,30,30]
-; XOP-NEXT:    vpand %xmm2, %xmm1, %xmm3
-; XOP-NEXT:    vpshld %xmm3, %xmm0, %xmm3
-; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; XOP-NEXT:    vpsubd %xmm1, %xmm4, %xmm1
-; XOP-NEXT:    vpand %xmm2, %xmm1, %xmm1
-; XOP-NEXT:    vpsubd %xmm1, %xmm4, %xmm1
-; XOP-NEXT:    vpshld %xmm1, %xmm0, %xmm0
-; XOP-NEXT:    vpor %xmm0, %xmm3, %xmm0
+; XOP-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT:    vprotd %xmm1, %xmm0, %xmm0
 ; XOP-NEXT:    retq
 ;
 ; AVX512-LABEL: rotate_demanded_bits_3:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddd %xmm1, %xmm1, %xmm1
-; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [30,30,30,30]
-; AVX512-NEXT:    vpand %xmm2, %xmm1, %xmm3
-; AVX512-NEXT:    vpsllvd %xmm3, %xmm0, %xmm3
-; AVX512-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX512-NEXT:    vpsubd %xmm1, %xmm4, %xmm1
-; AVX512-NEXT:    vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpor %xmm0, %xmm3, %xmm0
+; AVX512-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512-NEXT:    vprolvd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %3 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   %4 = and <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30>

Modified: llvm/trunk/test/CodeGen/X86/rotate4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rotate4.ll?rev=332895&r1=332894&r2=332895&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rotate4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rotate4.ll Mon May 21 14:09:18 2018
@@ -284,15 +284,9 @@ define void @rotate_right_m16(i16* %p, i
 define i32 @rotate_demanded_bits(i32, i32) {
 ; CHECK-LABEL: rotate_demanded_bits:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    andb $30, %sil
 ; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    andl $30, %ecx
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    shll %cl, %eax
-; CHECK-NEXT:    negl %ecx
-; CHECK-NEXT:    andb $30, %cl
-; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shrl %cl, %edi
-; CHECK-NEXT:    orl %eax, %edi
+; CHECK-NEXT:    roll %cl, %edi
 ; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    retq
   %3 = and i32 %1, 30
@@ -324,16 +318,10 @@ define i32 @rotate_demanded_bits_2(i32,
 define i32 @rotate_demanded_bits_3(i32, i32) {
 ; CHECK-LABEL: rotate_demanded_bits_3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addl %esi, %esi
-; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    andb $30, %cl
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    shll %cl, %eax
-; CHECK-NEXT:    negl %esi
+; CHECK-NEXT:    addb %sil, %sil
 ; CHECK-NEXT:    andb $30, %sil
 ; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    shrl %cl, %edi
-; CHECK-NEXT:    orl %eax, %edi
+; CHECK-NEXT:    roll %cl, %edi
 ; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    retq
   %3 = shl i32 %1, 1