[llvm] 3fabd98 - [InstCombine] fold (trunc (X>>C1)) << C to shift+mask directly

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 1 11:22:50 PDT 2021


Author: Sanjay Patel
Date: 2021-10-01T14:22:44-04:00
New Revision: 3fabd98e5b3e4c9525c0bf97f5595a1aeff3e85a

URL: https://github.com/llvm/llvm-project/commit/3fabd98e5b3e4c9525c0bf97f5595a1aeff3e85a
DIFF: https://github.com/llvm/llvm-project/commit/3fabd98e5b3e4c9525c0bf97f5595a1aeff3e85a.diff

LOG: [InstCombine] fold (trunc (X>>C1)) << C to shift+mask directly

This is no-externally-visible-functional-difference-intended.
That is, the test diffs show identical instructions other than
name changes (those are included specifically to verify the logic).

The existing transforms created extra instructions and relied
on subsequent folds to get to the final result, but that could
conflict with other transforms like the proposed D110170 (and
caused that patch to be reverted twice so far because of infinite
combine loops).

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
    llvm/test/Transforms/InstCombine/bswap.ll
    llvm/test/Transforms/InstCombine/shift-shift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index df7fb01ca7fa..0f83be53b875 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -845,6 +845,26 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
       }
     }
 
+    // Similar to above, but look through an intermediate trunc instruction.
+    BinaryOperator *Shr;
+    if (match(Op0, m_OneUse(m_Trunc(m_OneUse(m_BinOp(Shr))))) &&
+        match(Shr, m_Shr(m_Value(X), m_APInt(C1)))) {
+      // The larger shift direction survives through the transform.
+      unsigned ShrAmtC = C1->getZExtValue();
+      unsigned ShDiff = ShrAmtC > ShAmtC ? ShrAmtC - ShAmtC : ShAmtC - ShrAmtC;
+      Constant *ShiftDiffC = ConstantInt::get(X->getType(), ShDiff);
+      auto ShiftOpc = ShrAmtC > ShAmtC ? Shr->getOpcode() : Instruction::Shl;
+
+      // If C1 > C:
+      // (trunc (X >> C1)) << C --> (trunc (X >> (C1 - C))) && (-1 << C)
+      // If C > C1:
+      // (trunc (X >> C1)) << C --> (trunc (X << (C - C1))) && (-1 << C)
+      Value *NewShift = Builder.CreateBinOp(ShiftOpc, X, ShiftDiffC, "sh.
diff ");
+      Value *Trunc = Builder.CreateTrunc(NewShift, Ty, "tr.sh.
diff ");
+      APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmtC));
+      return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Ty, Mask));
+    }
+
     if (match(Op0, m_Shl(m_Value(X), m_APInt(C1))) && C1->ult(BitWidth)) {
       unsigned AmtSum = ShAmtC + C1->getZExtValue();
       // Oversized shifts are simplified to zero in InstSimplify.
@@ -853,41 +873,6 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
         return BinaryOperator::CreateShl(X, ConstantInt::get(Ty, AmtSum));
     }
 
-    // Fold shl(trunc(shr(x,c1)),c2) -> trunc(and(shl(shr(x,c1),c2),c2'))
-    // Require that the input operand is a non-poison shift-by-constant so that
-    // we have confidence that the shifts will get folded together.
-    Instruction *TrOp;
-    const APInt *TrShiftAmt;
-    if (match(Op0, m_OneUse(m_Trunc(m_Instruction(TrOp)))) &&
-        match(TrOp, m_OneUse(m_Shr(m_Value(), m_APInt(TrShiftAmt)))) &&
-        TrShiftAmt->ult(TrOp->getType()->getScalarSizeInBits())) {
-      Type *SrcTy = TrOp->getType();
-
-      // Okay, we'll do this xform.  Make the shift of shift.
-      unsigned SrcSize = SrcTy->getScalarSizeInBits();
-      Constant *ShAmt = ConstantInt::get(SrcTy, C->zext(SrcSize));
-
-      // (shift2 (shift1 & 0x00FF), c2)
-      Value *NSh = Builder.CreateBinOp(I.getOpcode(), TrOp, ShAmt, I.getName());
-
-      // For logical shifts, the truncation has the effect of making the high
-      // part of the register be zeros.  Emulate this by inserting an AND to
-      // clear the top bits as needed.  This 'and' will usually be zapped by
-      // other xforms later if dead.
-      Constant *MaskV =
-          ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcSize, BitWidth));
-
-      // The mask we constructed says what the trunc would do if occurring
-      // between the shifts.  We want to know the effect *after* the second
-      // shift.  We know that it is a logical shift by a constant, so adjust the
-      // mask as appropriate.
-      MaskV = ConstantExpr::get(I.getOpcode(), MaskV, ShAmt);
-      // shift1 & 0x00FF
-      Value *And = Builder.CreateAnd(NSh, MaskV, Op0->getName());
-      // Return the value truncated to the interesting size.
-      return new TruncInst(And, Ty);
-    }
-
     // If we have an opposite shift by the same amount, we may be able to
     // reorder binops and shifts to eliminate math/logic.
     auto isSuitableBinOpcode = [](Instruction::BinaryOps BinOpcode) {

diff  --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll
index 32caf4a89d60..18b8a2532abb 100644
--- a/llvm/test/Transforms/InstCombine/bswap.ll
+++ b/llvm/test/Transforms/InstCombine/bswap.ll
@@ -743,9 +743,9 @@ define i16 @trunc_bswap_i160(i160* %a0) {
 ; CHECK-NEXT:    [[LSHR1:%.*]] = lshr i160 [[LOAD]], 136
 ; CHECK-NEXT:    [[CAST1:%.*]] = trunc i160 [[LSHR1]] to i16
 ; CHECK-NEXT:    [[AND1:%.*]] = and i16 [[CAST1]], 255
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i160 [[LOAD]], 120
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i160 [[TMP1]] to i16
-; CHECK-NEXT:    [[SHL:%.*]] = and i16 [[TMP2]], -256
+; CHECK-NEXT:    [[SH_DIFF:%.*]] = lshr i160 [[LOAD]], 120
+; CHECK-NEXT:    [[TR_SH_DIFF:%.*]] = trunc i160 [[SH_DIFF]] to i16
+; CHECK-NEXT:    [[SHL:%.*]] = and i16 [[TR_SH_DIFF]], -256
 ; CHECK-NEXT:    [[OR:%.*]] = or i16 [[AND1]], [[SHL]]
 ; CHECK-NEXT:    ret i16 [[OR]]
 ;

diff  --git a/llvm/test/Transforms/InstCombine/shift-shift.ll b/llvm/test/Transforms/InstCombine/shift-shift.ll
index 7b933649b984..7c6618ffaa20 100644
--- a/llvm/test/Transforms/InstCombine/shift-shift.ll
+++ b/llvm/test/Transforms/InstCombine/shift-shift.ll
@@ -139,9 +139,9 @@ define <2 x i32> @lshr_lshr_vec(<2 x i32> %A) {
 
 define i8 @shl_trunc_bigger_lshr(i32 %x) {
 ; CHECK-LABEL: @shl_trunc_bigger_lshr(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
-; CHECK-NEXT:    [[LT:%.*]] = and i8 [[TMP2]], -8
+; CHECK-NEXT:    [[SH_DIFF:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TR_SH_DIFF:%.*]] = trunc i32 [[SH_DIFF]] to i8
+; CHECK-NEXT:    [[LT:%.*]] = and i8 [[TR_SH_DIFF]], -8
 ; CHECK-NEXT:    ret i8 [[LT]]
 ;
   %rt = lshr i32 %x, 5
@@ -153,8 +153,8 @@ define i8 @shl_trunc_bigger_lshr(i32 %x) {
 define i8 @shl_trunc_smaller_lshr(i32 %x) {
 ; CHECK-LABEL: @shl_trunc_smaller_lshr(
 ; CHECK-NEXT:    [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[X_TR]], 2
-; CHECK-NEXT:    [[LT:%.*]] = and i8 [[TMP1]], -32
+; CHECK-NEXT:    [[TR_SH_DIFF:%.*]] = shl i8 [[X_TR]], 2
+; CHECK-NEXT:    [[LT:%.*]] = and i8 [[TR_SH_DIFF]], -32
 ; CHECK-NEXT:    ret i8 [[LT]]
 ;
   %rt = lshr i32 %x, 3
@@ -165,9 +165,9 @@ define i8 @shl_trunc_smaller_lshr(i32 %x) {
 
 define i24 @shl_trunc_bigger_ashr(i32 %x) {
 ; CHECK-LABEL: @shl_trunc_bigger_ashr(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[X:%.*]], 9
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
-; CHECK-NEXT:    [[LT:%.*]] = and i24 [[TMP2]], -8
+; CHECK-NEXT:    [[SH_DIFF:%.*]] = ashr i32 [[X:%.*]], 9
+; CHECK-NEXT:    [[TR_SH_DIFF:%.*]] = trunc i32 [[SH_DIFF]] to i24
+; CHECK-NEXT:    [[LT:%.*]] = and i24 [[TR_SH_DIFF]], -8
 ; CHECK-NEXT:    ret i24 [[LT]]
 ;
   %rt = ashr i32 %x, 12
@@ -179,8 +179,8 @@ define i24 @shl_trunc_bigger_ashr(i32 %x) {
 define i24 @shl_trunc_smaller_ashr(i32 %x) {
 ; CHECK-LABEL: @shl_trunc_smaller_ashr(
 ; CHECK-NEXT:    [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i24
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i24 [[X_TR]], 3
-; CHECK-NEXT:    [[LT:%.*]] = and i24 [[TMP1]], -8192
+; CHECK-NEXT:    [[TR_SH_DIFF:%.*]] = shl i24 [[X_TR]], 3
+; CHECK-NEXT:    [[LT:%.*]] = and i24 [[TR_SH_DIFF]], -8192
 ; CHECK-NEXT:    ret i24 [[LT]]
 ;
   %rt = ashr i32 %x, 10


        


More information about the llvm-commits mailing list