[llvm] 6e20539 - [InstCombine] fold lshr(mul X, SplatC), C2

Wed Feb 10 12:02:45 PST 2021

Author: Sanjay Patel
Date: 2021-02-10T15:02:31-05:00
New Revision: 6e2053983e0d3f69b0d9219923d7ba1eae592e12

URL: https://github.com/llvm/llvm-project/commit/6e2053983e0d3f69b0d9219923d7ba1eae592e12
DIFF: https://github.com/llvm/llvm-project/commit/6e2053983e0d3f69b0d9219923d7ba1eae592e12.diff

LOG: [InstCombine] fold lshr(mul X, SplatC), C2

This is a special-case multiply that replicates bits of
the source operand. We need this fold to avoid regression
if we make canonicalization to `mul` more aggressive for
shl+or patterns.

I did not see a way to make Alive generalize the bit width
condition for even-number-of-bits only, but an example of
the proof is:
  Name: i32
  Pre: isPowerOf2(C1 - 1) && log2(C1) == C2 && (C2 * 2 == width(C2))
  %m = mul nuw i32 %x, C1
  %t = lshr i32 %m, C2
  =>
  %t = and i32 %x, C1 - 2

  Name: i14
  %m = mul nuw i14 %x, 129
  %t = lshr i14 %m, 7
  =>
  %t = and i14 %x, 127

https://rise4fun.com/Alive/e52

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
    llvm/test/Transforms/InstCombine/lshr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index c4448739f2b5..2007cf0bbc9c 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1145,6 +1145,16 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
         return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
     }
 
+    // Look for a "splat" mul pattern - it replicates bits across each half of
+    // a value, so a right shift is just a mask of the low bits:
+    // lshr i32 (mul nuw X, Pow2+1), 16 --> and X, Pow2-1
+    // TODO: Generalize to allow more than just half-width shifts?
+    const APInt *MulC;
+    if (match(Op0, m_NUWMul(m_Value(X), m_APInt(MulC))) &&
+        ShAmt * 2 == BitWidth && (*MulC - 1).isPowerOf2() &&
+        MulC->logBase2() == ShAmt)
+      return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
+
     // If the shifted-out value is known-zero, then this is an exact shift.
     if (!I.isExact() &&
         MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) {

diff  --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll
index ea3d3de542d0..bc9bd9262ccd 100644
--- a/llvm/test/Transforms/InstCombine/lshr.ll
+++ b/llvm/test/Transforms/InstCombine/lshr.ll
@@ -262,8 +262,7 @@ define <2 x i32> @narrow_lshr_constant(<2 x i8> %x, <2 x i8> %y) {
 
 define i32 @mul_splat_fold(i32 %x) {
 ; CHECK-LABEL: @mul_splat_fold(
-; CHECK-NEXT:    [[M:%.*]] = mul nuw i32 [[X:%.*]], 65537
-; CHECK-NEXT:    [[T:%.*]] = lshr i32 [[M]], 16
+; CHECK-NEXT:    [[T:%.*]] = and i32 [[X:%.*]], 65535
 ; CHECK-NEXT:    ret i32 [[T]]
 ;
   %m = mul nuw i32 %x, 65537
@@ -271,13 +270,15 @@ define i32 @mul_splat_fold(i32 %x) {
   ret i32 %t
 }
 
+; Vector type, extra use, weird types are all ok.
+
 declare void @usevec(<3 x i14>)
 
 define <3 x i14> @mul_splat_fold_vec(<3 x i14> %x) {
 ; CHECK-LABEL: @mul_splat_fold_vec(
 ; CHECK-NEXT:    [[M:%.*]] = mul nuw <3 x i14> [[X:%.*]], <i14 129, i14 129, i14 129>
 ; CHECK-NEXT:    call void @usevec(<3 x i14> [[M]])
-; CHECK-NEXT:    [[T:%.*]] = lshr <3 x i14> [[M]], <i14 7, i14 7, i14 7>
+; CHECK-NEXT:    [[T:%.*]] = and <3 x i14> [[X]], <i14 127, i14 127, i14 127>
 ; CHECK-NEXT:    ret <3 x i14> [[T]]
 ;
   %m = mul nuw <3 x i14> %x, <i14 129, i14 129, i14 129>
@@ -286,6 +287,8 @@ define <3 x i14> @mul_splat_fold_vec(<3 x i14> %x) {
   ret <3 x i14> %t
 }
 
+; Negative test
+
 define i32 @mul_splat_fold_wrong_mul_const(i32 %x) {
 ; CHECK-LABEL: @mul_splat_fold_wrong_mul_const(
 ; CHECK-NEXT:    [[M:%.*]] = mul nuw i32 [[X:%.*]], 65538
@@ -297,6 +300,8 @@ define i32 @mul_splat_fold_wrong_mul_const(i32 %x) {
   ret i32 %t
 }
 
+; Negative test
+
 define i32 @mul_splat_fold_wrong_lshr_const(i32 %x) {
 ; CHECK-LABEL: @mul_splat_fold_wrong_lshr_const(
 ; CHECK-NEXT:    [[M:%.*]] = mul nuw i32 [[X:%.*]], 65537
@@ -308,6 +313,8 @@ define i32 @mul_splat_fold_wrong_lshr_const(i32 %x) {
   ret i32 %t
 }
 
+; Negative test
+
 define i32 @mul_splat_fold_no_nuw(i32 %x) {
 ; CHECK-LABEL: @mul_splat_fold_no_nuw(
 ; CHECK-NEXT:    [[M:%.*]] = mul nsw i32 [[X:%.*]], 65537