[llvm] 31971ca - [InstCombine] Try to narrow expr if trunc cannot be removed.

Fri Jul 3 12:30:44 PDT 2020

Author: Florian Hahn
Date: 2020-07-03T20:22:51+01:00
New Revision: 31971ca1c6f42aa9a5e162d63528b9709448f8d9

URL: https://github.com/llvm/llvm-project/commit/31971ca1c6f42aa9a5e162d63528b9709448f8d9
DIFF: https://github.com/llvm/llvm-project/commit/31971ca1c6f42aa9a5e162d63528b9709448f8d9.diff

LOG: [InstCombine] Try to narrow expr if trunc cannot be removed.

Narrowing an input expression of a truncate to a type larger than the
result of the truncate won't allow removing the truncate, but it may
enable further optimizations, e.g. allowing for larger vectorization
factors.

For now this is intentionally limited to integer types only, to avoid
producing new vector ops that might not be suitable for the target.

If we know that the only user is a trunc, we can also be allow more
cases, e.g. also shortening expressions with some additional shifts.

I would appreciate feedback on the best place to do such a narrowing.

This fixes PR43580.

Reviewers: spatel, RKSimon, lebedev.ri, xbolva00

Reviewed By: lebedev.ri

Differential Revision: https://reviews.llvm.org/D82973

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
    llvm/test/Transforms/InstCombine/trunc-binop-ext.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 7b3c503facf1..3639edb5df4d 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -715,6 +715,25 @@ Instruction *InstCombiner::visitTrunc(TruncInst &Trunc) {
     return replaceInstUsesWith(Trunc, Res);
   }
 
+  // For integer types, check if we can shorten the entire input expression to
+  // DestWidth * 2, which won't allow removing the truncate, but reducing the
+  // width may enable further optimizations, e.g. allowing for larger
+  // vectorization factors.
+  if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
+    if (DestWidth * 2 < SrcWidth) {
+      auto *NewDestTy = DestITy->getExtendedType();
+      if (shouldChangeType(SrcTy, NewDestTy) &&
+          canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
+        LLVM_DEBUG(
+            dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+                      " to reduce the width of operand of"
+                   << Trunc << '\n');
+        Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
+        return new TruncInst(Res, DestTy);
+      }
+    }
+  }
+
   // Test if the trunc is the user of a select which is part of a
   // minimum or maximum operation. If so, don't do any more simplification.
   // Even simplifying demanded bits can break the canonical form of a

diff  --git a/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll b/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll
index dfc416021e76..570153163ab9 100644
--- a/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll
+++ b/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll
@@ -318,11 +318,11 @@ define <2 x i16> @narrow_zext_mul_commute(<2 x i16> %x16, <2 x i32> %y32) {
 ; Test cases for PR43580
 define i8 @narrow_zext_ashr_keep_trunc(i8 %i1, i8 %i2) {
 ; CHECK-LABEL: @narrow_zext_ashr_keep_trunc(
-; CHECK-NEXT:    [[I1_EXT:%.*]] = sext i8 [[I1:%.*]] to i32
-; CHECK-NEXT:    [[I2_EXT:%.*]] = sext i8 [[I2:%.*]] to i32
-; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[I1_EXT]], [[I2_EXT]]
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[SUB]], 1
-; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[TMP1]] to i8
+; CHECK-NEXT:    [[I1_EXT:%.*]] = sext i8 [[I1:%.*]] to i16
+; CHECK-NEXT:    [[I2_EXT:%.*]] = sext i8 [[I2:%.*]] to i16
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw i16 [[I1_EXT]], [[I2_EXT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i16 [[SUB]], 1
+; CHECK-NEXT:    [[T:%.*]] = trunc i16 [[TMP1]] to i8
 ; CHECK-NEXT:    ret i8 [[T]]
 ;
   %i1.ext = sext i8 %i1 to i32
@@ -335,11 +335,11 @@ define i8 @narrow_zext_ashr_keep_trunc(i8 %i1, i8 %i2) {
 
 define i8 @narrow_zext_ashr_keep_trunc2(i9 %i1, i9 %i2) {
 ; CHECK-LABEL: @narrow_zext_ashr_keep_trunc2(
-; CHECK-NEXT:    [[I1_EXT1:%.*]] = zext i9 [[I1:%.*]] to i64
-; CHECK-NEXT:    [[I2_EXT2:%.*]] = zext i9 [[I2:%.*]] to i64
-; CHECK-NEXT:    [[SUB:%.*]] = add nuw nsw i64 [[I1_EXT1]], [[I2_EXT2]]
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[SUB]], 1
-; CHECK-NEXT:    [[T:%.*]] = trunc i64 [[TMP1]] to i8
+; CHECK-NEXT:    [[I1_EXT1:%.*]] = zext i9 [[I1:%.*]] to i16
+; CHECK-NEXT:    [[I2_EXT2:%.*]] = zext i9 [[I2:%.*]] to i16
+; CHECK-NEXT:    [[SUB:%.*]] = add nuw nsw i16 [[I1_EXT1]], [[I2_EXT2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i16 [[SUB]], 1
+; CHECK-NEXT:    [[T:%.*]] = trunc i16 [[TMP1]] to i8
 ; CHECK-NEXT:    ret i8 [[T]]
 ;
   %i1.ext = sext i9 %i1 to i64
@@ -352,11 +352,11 @@ define i8 @narrow_zext_ashr_keep_trunc2(i9 %i1, i9 %i2) {
 
 define i7 @narrow_zext_ashr_keep_trunc3(i8 %i1, i8 %i2) {
 ; CHECK-LABEL: @narrow_zext_ashr_keep_trunc3(
-; CHECK-NEXT:    [[I1_EXT1:%.*]] = zext i8 [[I1:%.*]] to i64
-; CHECK-NEXT:    [[I2_EXT2:%.*]] = zext i8 [[I2:%.*]] to i64
-; CHECK-NEXT:    [[SUB:%.*]] = add nuw nsw i64 [[I1_EXT1]], [[I2_EXT2]]
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[SUB]], 1
-; CHECK-NEXT:    [[T:%.*]] = trunc i64 [[TMP1]] to i7
+; CHECK-NEXT:    [[I1_EXT1:%.*]] = zext i8 [[I1:%.*]] to i14
+; CHECK-NEXT:    [[I2_EXT2:%.*]] = zext i8 [[I2:%.*]] to i14
+; CHECK-NEXT:    [[SUB:%.*]] = add nuw nsw i14 [[I1_EXT1]], [[I2_EXT2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i14 [[SUB]], 1
+; CHECK-NEXT:    [[T:%.*]] = trunc i14 [[TMP1]] to i7
 ; CHECK-NEXT:    ret i7 [[T]]
 ;
   %i1.ext = sext i8 %i1 to i64