[llvm] 82040d4 - [InstCombine] reduce right-shift-of-left-shifted constant via demanded bits

Tue Jun 7 10:29:58 PDT 2022

Author: Sanjay Patel
Date: 2022-06-07T13:28:18-04:00
New Revision: 82040d414b3c9fc283ba57c8193fbc9578044cd9

URL: https://github.com/llvm/llvm-project/commit/82040d414b3c9fc283ba57c8193fbc9578044cd9
DIFF: https://github.com/llvm/llvm-project/commit/82040d414b3c9fc283ba57c8193fbc9578044cd9.diff

LOG: [InstCombine] reduce right-shift-of-left-shifted constant via demanded bits

If we don't demand high bits (zeros) and it is valid to pre-shift a constant:
(C2 << X) >> C1 --> (C2 >> C1) << X

https://alive2.llvm.org/ce/z/P3dWDW

There are a variety of related patterns, but I haven't found a single solution
that gets all of the motivating examples - so pulling this piece out of
D126617 along with more tests.

We should also handle the case where we shift-right followed by shift-left,
but I'll make that a follow-on patch assuming this one is ok. It seems likely
that we would want to add this to the SDAG version of the code too to keep it
on par with IR.

Differential Revision: https://reviews.llvm.org/D127122

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
    llvm/test/Transforms/InstCombine/shift-shift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 278db05f65d1b..0c11846ded448 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -630,6 +630,21 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
             ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI);
         if (SignBits >= NumHiDemandedBits)
           return I->getOperand(0);
+
+        // If we can pre-shift a left-shifted constant to the right without
+        // losing any low bits (we already know we don't demand the high bits),
+        // then eliminate the right-shift:
+        // (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X
+        Value *X;
+        Constant *C;
+        if (match(I->getOperand(0), m_Shl(m_ImmConstant(C), m_Value(X)))) {
+          Constant *RightShiftAmtC = ConstantInt::get(VTy, ShiftAmt);
+          Constant *NewC = ConstantExpr::getLShr(C, RightShiftAmtC);
+          if (ConstantExpr::getShl(NewC, RightShiftAmtC) == C) {
+            Instruction *Shl = BinaryOperator::CreateShl(NewC, X);
+            return InsertNewInstWith(Shl, *I);
+          }
+        }
       }
 
       // Unsigned shift right.

diff  --git a/llvm/test/Transforms/InstCombine/shift-shift.ll b/llvm/test/Transforms/InstCombine/shift-shift.ll
index 5304e379093f8..b3880adb8c4ec 100644
--- a/llvm/test/Transforms/InstCombine/shift-shift.ll
+++ b/llvm/test/Transforms/InstCombine/shift-shift.ll
@@ -421,11 +421,12 @@ define i32 @shl_lshr_constants(i32 %x) {
   ret i32 %r
 }
 
+; Pre-shift a constant to eliminate lshr.
+
 define i8 @shl_lshr_demand1(i8 %x) {
 ; CHECK-LABEL: @shl_lshr_demand1(
-; CHECK-NEXT:    [[SHL:%.*]] = shl i8 40, [[X:%.*]]
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact i8 [[SHL]], 3
-; CHECK-NEXT:    [[R:%.*]] = or i8 [[LSHR]], -32
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 5, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[TMP1]], -32
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %shl = shl i8 40, %x ; 0b0010_1000
@@ -434,11 +435,13 @@ define i8 @shl_lshr_demand1(i8 %x) {
   ret i8 %r
 }
 
+; Pre-shift a constant to eliminate disguised lshr.
+
 define i8 @shl_ashr_demand2(i8 %x) {
 ; CHECK-LABEL: @shl_ashr_demand2(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i8 40, [[X:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[SHL]])
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i8 [[SHL]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 5, [[X]]
 ; CHECK-NEXT:    [[R:%.*]] = or i8 [[TMP1]], -32
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
@@ -449,6 +452,8 @@ define i8 @shl_ashr_demand2(i8 %x) {
   ret i8 %r
 }
 
+; It is not safe to pre-shift because we demand an extra high bit.
+
 define i8 @shl_lshr_demand3(i8 %x) {
 ; CHECK-LABEL: @shl_lshr_demand3(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i8 40, [[X:%.*]]
@@ -462,6 +467,8 @@ define i8 @shl_lshr_demand3(i8 %x) {
   ret i8 %r
 }
 
+; It is not valid to pre-shift because we lose the low bit of 44.
+
 define i8 @shl_lshr_demand4(i8 %x) {
 ; CHECK-LABEL: @shl_lshr_demand4(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i8 44, [[X:%.*]]
@@ -475,11 +482,12 @@ define i8 @shl_lshr_demand4(i8 %x) {
   ret i8 %r
 }
 
+; Splat vectors work too, and we don't care what instruction reduces demand for high bits.
+
 define <2 x i6> @shl_lshr_demand5(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -108>, [[X:%.*]]
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact <2 x i8> [[SHL]], <i8 2, i8 2>
-; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> <i8 37, i8 37>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i8> [[TMP1]] to <2 x i6>
 ; CHECK-NEXT:    ret <2 x i6> [[R]]
 ;
   %shl = shl <2 x i8> <i8 148, i8 148>, %x ; 0b1001_0100
@@ -488,6 +496,8 @@ define <2 x i6> @shl_lshr_demand5(<2 x i8> %x) {
   ret <2 x i6> %r
 }
 
+; TODO: allow undef/poison elements for this transform.
+
 define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5_undef_left(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 undef, i8 -108>, [[X:%.*]]
@@ -501,6 +511,8 @@ define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) {
   ret <2 x i6> %r
 }
 
+; TODO: allow undef/poison elements for this transform.
+
 define <2 x i6> @shl_lshr_demand5_undef_right(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5_undef_right(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -108>, [[X:%.*]]
@@ -514,6 +526,8 @@ define <2 x i6> @shl_lshr_demand5_undef_right(<2 x i8> %x) {
   ret <2 x i6> %r
 }
 
+; TODO: allow non-splat vector constants.
+
 define <2 x i6> @shl_lshr_demand5_nonuniform_vec_left(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_left(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -108>, [[X:%.*]]
@@ -527,11 +541,12 @@ define <2 x i6> @shl_lshr_demand5_nonuniform_vec_left(<2 x i8> %x) {
   ret <2 x i6> %r
 }
 
+; non-splat shl constant is ok.
+
 define <2 x i6> @shl_lshr_demand5_nonuniform_vec_right(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_right(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -112>, [[X:%.*]]
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], <i8 2, i8 2>
-; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> <i8 37, i8 36>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i8> [[TMP1]] to <2 x i6>
 ; CHECK-NEXT:    ret <2 x i6> [[R]]
 ;
   %shl = shl <2 x i8> <i8 148, i8 144>, %x ; 0b1001_0100, 0b1001_0000
@@ -540,6 +555,8 @@ define <2 x i6> @shl_lshr_demand5_nonuniform_vec_right(<2 x i8> %x) {
   ret <2 x i6> %r
 }
 
+; This is possible, but may require significant changes to the demanded bits framework.
+
 define <2 x i6> @shl_lshr_demand5_nonuniform_vec_both(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_both(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 -104, i8 -108>, [[X:%.*]]
@@ -553,11 +570,12 @@ define <2 x i6> @shl_lshr_demand5_nonuniform_vec_both(<2 x i8> %x) {
   ret <2 x i6> %r
 }
 
+; 'and' can reduce demand for high bits too.
+
 define i16 @shl_lshr_demand6(i16 %x) {
 ; CHECK-LABEL: @shl_lshr_demand6(
-; CHECK-NEXT:    [[SHL:%.*]] = shl i16 -32624, [[X:%.*]]
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact i16 [[SHL]], 4
-; CHECK-NEXT:    [[R:%.*]] = and i16 [[LSHR]], 4094
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i16 2057, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and i16 [[TMP1]], 4094
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %shl = shl i16 32912, %x ; 0b1000_0000_1001_0000