[llvm] b931731 - [InstCombine] Narrow trunc(lshr) in more cases (#139645)

Tue May 13 13:42:45 PDT 2025

Author: Usman Nadeem
Date: 2025-05-13T13:42:42-07:00
New Revision: b93173185d9c93bd6fc94baaa453bd030cd36fea

URL: https://github.com/llvm/llvm-project/commit/b93173185d9c93bd6fc94baaa453bd030cd36fea
DIFF: https://github.com/llvm/llvm-project/commit/b93173185d9c93bd6fc94baaa453bd030cd36fea.diff

LOG: [InstCombine] Narrow trunc(lshr) in more cases (#139645)

We can narrow `trunc(lshr(i32)) to i8` to `trunc(lshr(i16)) to i8` even
when the bits that we are shifting in are not zero, in the cases where
the MSBs of the shifted value don't actually matter and actually end up
being truncated away.

This kind of narrowing does not remove the trunc but can help the
vectorizer generate better code in a smaller type.
Motivation: libyuv, functions like ARGBToUV444Row_C().

Proof: https://alive2.llvm.org/ce/z/9Ao2aJ

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
    llvm/test/Transforms/InstCombine/cast.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index d6c99366e6f00..9088b5bdec78b 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -51,6 +51,8 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
     Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
     Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
     Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+    if (Opc == Instruction::LShr || Opc == Instruction::AShr)
+      Res->setIsExact(I->isExact());
     break;
   }
   case Instruction::Trunc:
@@ -319,13 +321,21 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
     //       zero - use AmtKnownBits.getMaxValue().
     uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
     uint32_t BitWidth = Ty->getScalarSizeInBits();
-    KnownBits AmtKnownBits =
-        llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
+    KnownBits AmtKnownBits = IC.computeKnownBits(I->getOperand(1), 0, CxtI);
+    APInt MaxShiftAmt = AmtKnownBits.getMaxValue();
     APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
-    if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
-        IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
-      return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
-             canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+    if (MaxShiftAmt.ult(BitWidth)) {
+      // If the only user is a trunc then we can narrow the shift if any new
+      // MSBs are not going to be used.
+      if (auto *Trunc = dyn_cast<TruncInst>(V->user_back())) {
+        auto DemandedBits = Trunc->getType()->getScalarSizeInBits();
+        if ((MaxShiftAmt + DemandedBits).ule(BitWidth))
+          return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+                 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+      }
+      if (IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI))
+        return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+               canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
     }
     break;
   }

diff  --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
index 0f957e22ad17b..42a764f2904ee 100644
--- a/llvm/test/Transforms/InstCombine/cast.ll
+++ b/llvm/test/Transforms/InstCombine/cast.ll
@@ -5,6 +5,7 @@
 ; RUN: opt < %s -passes=instcombine -S -data-layout="E-p:64:64:64-p1:32:32:32-p2:64:64:64-p3:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64" -use-constant-fp-for-fixed-length-splat -use-constant-int-for-fixed-length-splat | FileCheck %s --check-prefixes=ALL,BE
 ; RUN: opt < %s -passes=instcombine -S -data-layout="e-p:64:64:64-p1:32:32:32-p2:64:64:64-p3:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64" -use-constant-fp-for-fixed-length-splat -use-constant-int-for-fixed-length-splat | FileCheck %s --check-prefixes=ALL,LE
 
+declare void @use_i8(i8)
 declare void @use_i32(i32)
 declare void @use_v2i32(<2 x i32>)
 
@@ -2041,6 +2042,120 @@ define <2 x i8> @trunc_lshr_zext_uses1(<2 x i8> %A) {
   ret <2 x i8> %D
 }
 
+define i8 @trunc_lshr_ext_halfWidth(i16 %a, i16 %b, i16 range(i16 0, 8) %shiftAmt) {
+; ALL-LABEL: @trunc_lshr_ext_halfWidth(
+; ALL-NEXT:    [[ADD:%.*]] = add i16 [[A:%.*]], [[B:%.*]]
+; ALL-NEXT:    [[SHR:%.*]] = lshr i16 [[ADD]], [[SHIFTAMT:%.*]]
+; ALL-NEXT:    [[TRUNC:%.*]] = trunc i16 [[SHR]] to i8
+; ALL-NEXT:    ret i8 [[TRUNC]]
+;
+  %zext_a = zext i16 %a to i32
+  %zext_b = zext i16 %b to i32
+  %zext_shiftAmt = zext i16 %shiftAmt to i32
+  %add = add nuw nsw i32 %zext_a, %zext_b
+  %shr = lshr i32 %add, %zext_shiftAmt
+  %trunc = trunc i32 %shr to i8
+  ret i8 %trunc
+}
+
+define i8 @trunc_lshr_ext_halfWidth_rhsOutofRange_neg(i16 %a, i16 %b, i16 range(i16 0, 10) %shiftAmt) {
+; ALL-LABEL: @trunc_lshr_ext_halfWidth_rhsOutofRange_neg(
+; ALL-NEXT:    [[ZEXT_A:%.*]] = zext i16 [[A:%.*]] to i32
+; ALL-NEXT:    [[ZEXT_B:%.*]] = zext i16 [[B:%.*]] to i32
+; ALL-NEXT:    [[ZEXT_SHIFTAMT:%.*]] = zext nneg i16 [[SHIFTAMT:%.*]] to i32
+; ALL-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[ZEXT_A]], [[ZEXT_B]]
+; ALL-NEXT:    [[SHR:%.*]] = lshr i32 [[ADD]], [[ZEXT_SHIFTAMT]]
+; ALL-NEXT:    [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8
+; ALL-NEXT:    ret i8 [[TRUNC]]
+;
+  %zext_a = zext i16 %a to i32
+  %zext_b = zext i16 %b to i32
+  %zext_shiftAmt = zext i16 %shiftAmt to i32
+  %add = add nuw nsw i32 %zext_a, %zext_b
+  %shr = lshr i32 %add, %zext_shiftAmt
+  %trunc = trunc i32 %shr to i8
+  ret i8 %trunc
+}
+
+define i8 @trunc_lshr_ext_halfWidth_rhsNoRange_neg(i16 %a, i16 %b, i16 %shiftAmt) {
+; ALL-LABEL: @trunc_lshr_ext_halfWidth_rhsNoRange_neg(
+; ALL-NEXT:    [[ZEXT_A:%.*]] = zext i16 [[A:%.*]] to i32
+; ALL-NEXT:    [[ZEXT_B:%.*]] = zext i16 [[B:%.*]] to i32
+; ALL-NEXT:    [[ZEXT_SHIFTAMT:%.*]] = zext nneg i16 [[SHIFTAMT:%.*]] to i32
+; ALL-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[ZEXT_A]], [[ZEXT_B]]
+; ALL-NEXT:    [[SHR:%.*]] = lshr i32 [[ADD]], [[ZEXT_SHIFTAMT]]
+; ALL-NEXT:    [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8
+; ALL-NEXT:    ret i8 [[TRUNC]]
+;
+  %zext_a = zext i16 %a to i32
+  %zext_b = zext i16 %b to i32
+  %zext_shiftAmt = zext i16 %shiftAmt to i32
+  %add = add nuw nsw i32 %zext_a, %zext_b
+  %shr = lshr i32 %add, %zext_shiftAmt
+  %trunc = trunc i32 %shr to i8
+  ret i8 %trunc
+}
+
+define i8 @trunc_lshr_ext_halfWidth_twouse_neg1(i16 %a, i16 %b, i16 range(i16 0, 8) %shiftAmt) {
+; ALL-LABEL: @trunc_lshr_ext_halfWidth_twouse_neg1(
+; ALL-NEXT:    [[ZEXT_A:%.*]] = zext i16 [[A:%.*]] to i32
+; ALL-NEXT:    [[ZEXT_B:%.*]] = zext i16 [[B:%.*]] to i32
+; ALL-NEXT:    [[ZEXT_SHIFTAMT:%.*]] = zext nneg i16 [[SHIFTAMT:%.*]] to i32
+; ALL-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[ZEXT_A]], [[ZEXT_B]]
+; ALL-NEXT:    call void @use_i32(i32 [[ADD]])
+; ALL-NEXT:    [[SHR:%.*]] = lshr i32 [[ADD]], [[ZEXT_SHIFTAMT]]
+; ALL-NEXT:    [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8
+; ALL-NEXT:    ret i8 [[TRUNC]]
+;
+  %zext_a = zext i16 %a to i32
+  %zext_b = zext i16 %b to i32
+  %zext_shiftAmt = zext i16 %shiftAmt to i32
+  %add = add nuw nsw i32 %zext_a, %zext_b
+  call void @use_i32(i32 %add)
+  %shr = lshr i32 %add, %zext_shiftAmt
+  %trunc = trunc i32 %shr to i8
+  ret i8 %trunc
+}
+
+define i8 @trunc_lshr_ext_halfWidth_twouse_neg2(i16 %a, i16 %b, i16 range(i16 0, 8) %shiftAmt) {
+; ALL-LABEL: @trunc_lshr_ext_halfWidth_twouse_neg2(
+; ALL-NEXT:    [[ZEXT_A:%.*]] = zext i16 [[A:%.*]] to i32
+; ALL-NEXT:    [[ZEXT_B:%.*]] = zext i16 [[B:%.*]] to i32
+; ALL-NEXT:    [[ZEXT_SHIFTAMT:%.*]] = zext nneg i16 [[SHIFTAMT:%.*]] to i32
+; ALL-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[ZEXT_A]], [[ZEXT_B]]
+; ALL-NEXT:    [[SHR:%.*]] = lshr i32 [[ADD]], [[ZEXT_SHIFTAMT]]
+; ALL-NEXT:    call void @use_i32(i32 [[SHR]])
+; ALL-NEXT:    [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8
+; ALL-NEXT:    ret i8 [[TRUNC]]
+;
+  %zext_a = zext i16 %a to i32
+  %zext_b = zext i16 %b to i32
+  %zext_shiftAmt = zext i16 %shiftAmt to i32
+  %add = add nuw nsw i32 %zext_a, %zext_b
+  %shr = lshr i32 %add, %zext_shiftAmt
+  call void @use_i32(i32 %shr)
+  %trunc = trunc i32 %shr to i8
+  ret i8 %trunc
+}
+
+; The narrowing transform only happens for integer types.
+define <2 x i8> @trunc_lshr_ext_halfWidth_vector_neg(<2 x i16> %a, <2 x i16> %b) {
+; ALL-LABEL: @trunc_lshr_ext_halfWidth_vector_neg(
+; ALL-NEXT:    [[ZEXT_A:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32>
+; ALL-NEXT:    [[ZEXT_B:%.*]] = zext <2 x i16> [[B:%.*]] to <2 x i32>
+; ALL-NEXT:    [[ADD:%.*]] = add nuw nsw <2 x i32> [[ZEXT_A]], [[ZEXT_B]]
+; ALL-NEXT:    [[SHR:%.*]] = lshr <2 x i32> [[ADD]], splat (i32 6)
+; ALL-NEXT:    [[TRUNC:%.*]] = trunc <2 x i32> [[SHR]] to <2 x i8>
+; ALL-NEXT:    ret <2 x i8> [[TRUNC]]
+;
+  %zext_a = zext <2 x i16> %a to <2 x i32>
+  %zext_b = zext <2 x i16> %b to <2 x i32>
+  %add = add nuw nsw <2 x i32> %zext_a, %zext_b
+  %shr = lshr <2 x i32> %add, <i32 6, i32 6>
+  %trunc = trunc <2 x i32> %shr to <2 x i8>
+  ret <2 x i8> %trunc
+}
+
 ; The following four tests sext + lshr + trunc patterns.
 ; PR33078