[llvm] [InstCombine] Support multi-use values in cast elimination transforms (PR #165877)
Valeriy Savchenko via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 31 08:40:54 PDT 2025
https://github.com/SavchenkoValeriy created https://github.com/llvm/llvm-project/pull/165877
`canEvaluateTruncated` and `canEvaluateSExtd` previously rejected multi-use values to avoid duplication. This was overly conservative, if all users of a multi-use value are part of the transform, we can evaluate it in a different type without duplication.
This change tracks visited values and defers decisions on multi-use values until we verify all their users were visited. `EvaluateInDifferentType` now memoizes multi-use values to avoid creating duplicates.
Applied to truncation and sext. Zext unchanged due to its dual-return nature.
>From 0b43b4553dd4afa513166388d1a20fc1976f3b5f Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko at apple.com>
Date: Fri, 31 Oct 2025 13:14:49 +0000
Subject: [PATCH 1/3] [NFC][InstCombine] Add tests for mult-user cast
transforms
---
llvm/test/Transforms/InstCombine/cast.ll | 148 +++++++++++++++++++++++
1 file changed, 148 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
index cc0f2e79d204b..f1c6fe0c24311 100644
--- a/llvm/test/Transforms/InstCombine/cast.ll
+++ b/llvm/test/Transforms/InstCombine/cast.ll
@@ -2238,3 +2238,151 @@ define i32 @test95(i32 %x) {
%5 = zext i8 %4 to i32
ret i32 %5
}
+
+define i16 @test96(i16 %x, i16 %y) {
+; ALL-LABEL: @test96(
+; ALL-NEXT: [[ZX:%.*]] = zext i16 [[X:%.*]] to i32
+; ALL-NEXT: [[ZY:%.*]] = zext i16 [[Y:%.*]] to i32
+; ALL-NEXT: [[A:%.*]] = add nuw nsw i32 [[ZX]], [[ZY]]
+; ALL-NEXT: [[B:%.*]] = add nuw nsw i32 [[A]], 5
+; ALL-NEXT: [[C:%.*]] = mul nuw nsw i32 [[A]], 3
+; ALL-NEXT: [[D:%.*]] = or i32 [[B]], [[C]]
+; ALL-NEXT: [[T:%.*]] = trunc i32 [[D]] to i16
+; ALL-NEXT: ret i16 [[T]]
+;
+ %zx = zext i16 %x to i32
+ %zy = zext i16 %y to i32
+ %a = add i32 %zx, %zy
+ %b = add i32 %a, 5
+ %c = mul i32 %a, 3
+ %d = or i32 %b, %c
+ %t = trunc i32 %d to i16
+ ret i16 %t
+}
+
+define i16 @test97(i16 %x, i16 %y) {
+; ALL-LABEL: @test97(
+; ALL-NEXT: [[ZX:%.*]] = zext i16 [[X:%.*]] to i32
+; ALL-NEXT: [[ZY:%.*]] = zext i16 [[Y:%.*]] to i32
+; ALL-NEXT: [[A:%.*]] = add nuw nsw i32 [[ZX]], [[ZY]]
+; ALL-NEXT: [[B:%.*]] = add nuw nsw i32 [[A]], 5
+; ALL-NEXT: [[C:%.*]] = mul nuw nsw i32 [[A]], 3
+; ALL-NEXT: [[D:%.*]] = or i32 [[B]], [[C]]
+; ALL-NEXT: call void @use_i32(i32 [[A]])
+; ALL-NEXT: [[T:%.*]] = trunc i32 [[D]] to i16
+; ALL-NEXT: ret i16 [[T]]
+;
+ %zx = zext i16 %x to i32
+ %zy = zext i16 %y to i32
+ %a = add i32 %zx, %zy
+ %b = add i32 %a, 5
+ %c = mul i32 %a, 3
+ %d = or i32 %b, %c
+ call void @use_i32(i32 %a)
+ %t = trunc i32 %d to i16
+ ret i16 %t
+}
+
+; expected not to narrow operations to i16 due to a loop in use chains
+define i16 @test98(i16 %x, i16 %n) {
+; ALL-LABEL: @test98(
+; ALL-NEXT: entry:
+; ALL-NEXT: [[Z:%.*]] = zext i16 [[X:%.*]] to i32
+; ALL-NEXT: br label [[LOOP:%.*]]
+; ALL: loop:
+; ALL-NEXT: [[P:%.*]] = phi i32 [ [[Z]], [[ENTRY:%.*]] ], [ [[A:%.*]], [[LOOP]] ]
+; ALL-NEXT: [[A]] = add i32 [[P]], 1
+; ALL-NEXT: [[T:%.*]] = trunc i32 [[A]] to i16
+; ALL-NEXT: [[COND:%.*]] = icmp ugt i16 [[N:%.*]], [[T]]
+; ALL-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; ALL: exit:
+; ALL-NEXT: ret i16 [[T]]
+;
+entry:
+ %z = zext i16 %x to i32
+ br label %loop
+
+loop:
+ %p = phi i32 [ %z, %entry ], [ %a, %loop ]
+ %a = add i32 %p, 1
+ %t = trunc i32 %a to i16
+ %cond = icmp ult i16 %t, %n
+ br i1 %cond, label %loop, label %exit
+
+exit:
+ ret i16 %t
+}
+
+define i32 @test99(i32 %x, i32 %y) {
+; ALL-LABEL: @test99(
+; ALL-NEXT: [[TX:%.*]] = trunc i32 [[X:%.*]] to i16
+; ALL-NEXT: [[TY:%.*]] = trunc i32 [[Y:%.*]] to i16
+; ALL-NEXT: [[A:%.*]] = add i16 [[TX]], [[TY]]
+; ALL-NEXT: [[B:%.*]] = add i16 [[A]], 5
+; ALL-NEXT: [[C:%.*]] = mul i16 [[A]], 3
+; ALL-NEXT: [[D:%.*]] = or i16 [[B]], [[C]]
+; ALL-NEXT: [[S:%.*]] = sext i16 [[D]] to i32
+; ALL-NEXT: ret i32 [[S]]
+;
+ %tx = trunc i32 %x to i16
+ %ty = trunc i32 %y to i16
+ %a = add i16 %tx, %ty
+ %b = add i16 %a, 5
+ %c = mul i16 %a, 3
+ %d = or i16 %b, %c
+ %t = sext i16 %d to i32
+ ret i32 %t
+}
+
+define i32 @test100(i32 %x, i32 %y) {
+; ALL-LABEL: @test100(
+; ALL-NEXT: [[TX:%.*]] = trunc i32 [[X:%.*]] to i8
+; ALL-NEXT: [[TY:%.*]] = trunc i32 [[Y:%.*]] to i8
+; ALL-NEXT: [[A:%.*]] = add i8 [[TX]], [[TY]]
+; ALL-NEXT: [[B:%.*]] = add i8 [[A]], 5
+; ALL-NEXT: [[C:%.*]] = mul i8 [[A]], 3
+; ALL-NEXT: [[D:%.*]] = or i8 [[B]], [[C]]
+; ALL-NEXT: call void @use_i8(i8 [[A]])
+; ALL-NEXT: [[T:%.*]] = sext i8 [[D]] to i32
+; ALL-NEXT: ret i32 [[T]]
+;
+ %tx = trunc i32 %x to i8
+ %ty = trunc i32 %y to i8
+ %a = add i8 %tx, %ty
+ %b = add i8 %a, 5
+ %c = mul i8 %a, 3
+ %d = or i8 %b, %c
+ call void @use_i8(i8 %a)
+ %t = sext i8 %d to i32
+ ret i32 %t
+}
+
+; expected not to extend operations to i32 due to a loop in use chains
+define i32 @test101(i32 %x, i8 %n) {
+; ALL-LABEL: @test101(
+; ALL-NEXT: entry:
+; ALL-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i8
+; ALL-NEXT: br label [[LOOP:%.*]]
+; ALL: loop:
+; ALL-NEXT: [[P:%.*]] = phi i8 [ [[T]], [[ENTRY:%.*]] ], [ [[A:%.*]], [[LOOP]] ]
+; ALL-NEXT: [[A]] = add i8 [[P]], 1
+; ALL-NEXT: [[COND:%.*]] = icmp ult i8 [[A]], [[N:%.*]]
+; ALL-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; ALL: exit:
+; ALL-NEXT: [[S:%.*]] = sext i8 [[A]] to i32
+; ALL-NEXT: ret i32 [[S]]
+;
+entry:
+ %t = trunc i32 %x to i8
+ br label %loop
+
+loop:
+ %p = phi i8 [ %t, %entry ], [ %a, %loop ]
+ %a = add i8 %p, 1
+ %cond = icmp ult i8 %a, %n
+ br i1 %cond, label %loop, label %exit
+
+exit:
+ %s = sext i8 %a to i32
+ ret i32 %s
+}
>From 10ae2667f95c1140e71dfe14532c49e230f07f97 Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko at apple.com>
Date: Tue, 28 Oct 2025 10:55:52 +0000
Subject: [PATCH 2/3] [InstCombine] Perform trunc transformation for values
with >1 users
---
.../InstCombine/InstCombineCasts.cpp | 367 ++++++++++++++----
.../Transforms/InstCombine/cast-mul-select.ll | 22 +-
llvm/test/Transforms/InstCombine/cast.ll | 11 +-
.../Transforms/InstCombine/catchswitch-phi.ll | 10 +-
.../Transforms/InstCombine/icmp-mul-zext.ll | 7 +-
.../logical-select-inseltpoison.ll | 14 +-
.../Transforms/InstCombine/logical-select.ll | 14 +-
7 files changed, 330 insertions(+), 115 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 4c9b10a094981..384f37d0a3310 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -12,14 +12,21 @@
#include "InstCombineInternal.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include <iterator>
#include <optional>
using namespace llvm;
@@ -27,12 +34,19 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
-/// true for, actually insert the code to evaluate the expression.
-Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
- bool isSigned) {
+using EvaluatedMap = SmallDenseMap<Value *, Value *, 8>;
+
+static Value *EvaluateInDifferentTypeImpl(Value *V, Type *Ty, bool isSigned,
+ InstCombinerImpl &IC,
+ EvaluatedMap &Processed) {
+ // Since we cover transformation of isntructions with multiple users, we might
+ // come to the same node via multiple paths. We should not create a
+ // replacement for every single one of them though.
+ if (const auto It = Processed.find(V); It != Processed.end())
+ return It->getSecond();
+
if (Constant *C = dyn_cast<Constant>(V))
- return ConstantFoldIntegerCast(C, Ty, isSigned, DL);
+ return ConstantFoldIntegerCast(C, Ty, isSigned, IC.getDataLayout());
// Otherwise, it must be an instruction.
Instruction *I = cast<Instruction>(V);
@@ -50,8 +64,10 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
case Instruction::Shl:
case Instruction::UDiv:
case Instruction::URem: {
- Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
- Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+ Value *LHS = EvaluateInDifferentTypeImpl(I->getOperand(0), Ty, isSigned, IC,
+ Processed);
+ Value *RHS = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned, IC,
+ Processed);
Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
if (Opc == Instruction::LShr || Opc == Instruction::AShr)
Res->setIsExact(I->isExact());
@@ -72,8 +88,10 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
Opc == Instruction::SExt);
break;
case Instruction::Select: {
- Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
- Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
+ Value *True = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned,
+ IC, Processed);
+ Value *False = EvaluateInDifferentTypeImpl(I->getOperand(2), Ty, isSigned,
+ IC, Processed);
Res = SelectInst::Create(I->getOperand(0), True, False);
break;
}
@@ -81,8 +99,8 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
PHINode *OPN = cast<PHINode>(I);
PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
- Value *V =
- EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+ Value *V = EvaluateInDifferentTypeImpl(OPN->getIncomingValue(i), Ty,
+ isSigned, IC, Processed);
NPN->addIncoming(V, OPN->getIncomingBlock(i));
}
Res = NPN;
@@ -90,8 +108,8 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
}
case Instruction::FPToUI:
case Instruction::FPToSI:
- Res = CastInst::Create(
- static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty);
+ Res = CastInst::Create(static_cast<Instruction::CastOps>(Opc),
+ I->getOperand(0), Ty);
break;
case Instruction::Call:
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
@@ -111,8 +129,10 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
auto *ScalarTy = cast<VectorType>(Ty)->getElementType();
auto *VTy = cast<VectorType>(I->getOperand(0)->getType());
auto *FixedTy = VectorType::get(ScalarTy, VTy->getElementCount());
- Value *Op0 = EvaluateInDifferentType(I->getOperand(0), FixedTy, isSigned);
- Value *Op1 = EvaluateInDifferentType(I->getOperand(1), FixedTy, isSigned);
+ Value *Op0 = EvaluateInDifferentTypeImpl(I->getOperand(0), FixedTy,
+ isSigned, IC, Processed);
+ Value *Op1 = EvaluateInDifferentTypeImpl(I->getOperand(1), FixedTy,
+ isSigned, IC, Processed);
Res = new ShuffleVectorInst(Op0, Op1,
cast<ShuffleVectorInst>(I)->getShuffleMask());
break;
@@ -123,7 +143,22 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
}
Res->takeName(I);
- return InsertNewInstWith(Res, I->getIterator());
+ Value *Result = IC.InsertNewInstWith(Res, I->getIterator());
+ // There is no need in keeping track of the old value/new value relationship
+ // when we have only one user, we came have here from that user and no-one
+ // else cares.
+ if (!V->hasOneUse()) {
+ Processed[V] = Result;
+ }
+ return Result;
+}
+
+/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
+/// true for, actually insert the code to evaluate the expression.
+Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
+ bool isSigned) {
+ EvaluatedMap Processed;
+ return EvaluateInDifferentTypeImpl(V, Ty, isSigned, *this, Processed);
}
Instruction::CastOps
@@ -227,9 +262,174 @@ Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
return nullptr;
}
+namespace {
+
+/// Helper class for evaluating whether a value can be computed in a different
+/// type without changing its value. Used by cast simplification transforms.
+class TypeEvaluationHelper {
+public:
+ /// Return true if we can evaluate the specified expression tree as type Ty
+ /// instead of its larger type, and arrive with the same value.
+ /// This is used by code that tries to eliminate truncates.
+ [[nodiscard]] static bool canEvaluateTruncated(Value *V, Type *Ty,
+ InstCombinerImpl &IC,
+ Instruction *CxtI);
+
+ /// Determine if the specified value can be computed in the specified wider
+ /// type and produce the same low bits. If not, return false.
+ [[nodiscard]] static bool canEvaluateZExtd(Value *V, Type *Ty,
+ unsigned &BitsToClear,
+ InstCombinerImpl &IC,
+ Instruction *CxtI);
+
+ /// Return true if we can take the specified value and return it as type Ty
+ /// without inserting any new casts and without changing the value of the
+ /// common low bits.
+ [[nodiscard]] static bool canEvaluateSExtd(Value *V, Type *Ty);
+
+private:
+ /// Constants and extensions/truncates from the destination type are always
+ /// free to be evaluated in that type.
+ [[nodiscard]] static bool canAlwaysEvaluateInType(Value *V, Type *Ty);
+
+ /// Check if we traversed all the users of the multi-use values we've seen.
+ [[nodiscard]] bool allPendingVisited() const {
+ return llvm::all_of(Pending,
+ [this](Value *V) { return Visited.contains(V); });
+ }
+
+ /// A generic wrapper for canEvaluate* recursions to inject visitation
+ /// tracking and enforce correct multi-use value evaluations.
+ [[nodiscard]] bool
+ canEvaluate(Value *V, Type *Ty,
+ llvm::function_ref<bool(Value *, Type *Type)> Pred) {
+ if (canAlwaysEvaluateInType(V, Ty))
+ return true;
+
+ if (!isa<Instruction>(V))
+ return false;
+
+ auto *I = cast<Instruction>(V);
+ // We insert false by default to return false when we encounter user loops.
+ const auto [It, Inserted] = Visited.insert({V, false});
+
+ // There are three possible cases for us having information on this value
+ // in the Visited map:
+ // 1. We properly checked it and concluded that we can evaluate it (true)
+ // 2. We properly checked it and concluded that we can't (false)
+ // 3. We started to check it, but during the recursive traversal we came
+ // back to it.
+ //
+ // For cases 1 and 2, we can safely return the stored result. For case 3, we
+ // can potentially have a situation where we can evaluate recursive user
+ // chains, but that can be quite tricky to do properly and isntead, we
+ // return false.
+ //
+ // In any case, we should return whatever was there in the map to begin
+ // with.
+ if (!Inserted)
+ return It->getSecond();
+
+ // We can easily make a decision about single-user values whether they can
+ // be evaluated in a different type or not, we came from that user. This is
+ // not as simple for multi-user values.
+ //
+ // In general, we have the following case (inverted control-flow, users are
+ // at the top):
+ //
+ // Cast %A
+ // ____|
+ // /
+ // %A = Use %B, %C
+ // ________| |
+ // / |
+ // %B = Use %D |
+ // ________| |
+ // / |
+ // %D = Use %C |
+ // ________|___|
+ // /
+ // %C = ...
+ //
+ // In this case, when we check %A, %B and %C, we are confident that we can
+ // make the decision here and now, since we came from their only users.
+ //
+ // For %C, it is harder. We come there twice, and when we come the first
+ // time, it's hard to tell if we will visit the second user (technically
+ // it's not hard, but we might need a lot of repetitive checks with non-zero
+ // cost).
+ //
+ // In the case above, we are allowed to evaluate %C in different type
+ // because all of it users were part of the traversal.
+ //
+ // In the following case, however, we can't make this conclusion:
+ //
+ // Cast %A
+ // ____|
+ // /
+ // %A = Use %B, %C
+ // ________| |
+ // / |
+ // %B = Use %D |
+ // ________| |
+ // / |
+ // %D = Use %C |
+ // | |
+ // foo(%C) | | <- never traversing foo(%C)
+ // ________|___|
+ // /
+ // %C = ...
+ //
+ // In this case, we still can evaluate %C in a different type, but we'd need
+ // to create a copy of the original %C to be used in foo(%C). Such
+ // duplication might be not profitable.
+ //
+ // For this reason, we collect all users of the mult-user values and mark
+ // them as "pending" and defer this decision to the very end. When we are
+ // done and and ready to have a positive verdict, we should double-check all
+ // of the pending users and ensure that we visited them. allPendingVisited
+ // predicate checks exactly that.
+ if (!I->hasOneUse()) {
+ llvm::transform(I->uses(), std::back_inserter(Pending),
+ [](Use &U) { return U.getUser(); });
+ }
+
+ const bool Result = Pred(V, Ty);
+ // We have to set result this way and not via It because Pred is recursive
+ // and it is very likely that we grew Visited and invalidated It.
+ Visited[V] = Result;
+ return Result;
+ }
+
+ /// Filter out values that we can not evaluate in the destination type for
+ /// free.
+ [[nodiscard]] bool canNotEvaluateInType(Value *V, Type *Ty);
+
+ [[nodiscard]] bool canEvaluateTruncatedImpl(Value *V, Type *Ty,
+ InstCombinerImpl &IC,
+ Instruction *CxtI);
+ [[nodiscard]] bool canEvaluateTruncatedPred(Value *V, Type *Ty,
+ InstCombinerImpl &IC,
+ Instruction *CxtI);
+ [[nodiscard]] bool canEvaluateZExtdImpl(Value *V, Type *Ty,
+ unsigned &BitsToClear,
+ InstCombinerImpl &IC,
+ Instruction *CxtI);
+ [[nodiscard]] bool canEvaluateSExtdImpl(Value *V, Type *Ty);
+
+ /// A bookkeeping map to memorize an already made decision for a traversed
+ /// value.
+ SmallDenseMap<Value *, bool, 8> Visited;
+
+ /// A list of pending values to check in the end.
+ SmallVector<Value *, 8> Pending;
+};
+
+} // anonymous namespace
+
/// Constants and extensions/truncates from the destination type are always
/// free to be evaluated in that type. This is a helper for canEvaluate*.
-static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
+bool TypeEvaluationHelper::canAlwaysEvaluateInType(Value *V, Type *Ty) {
if (isa<Constant>(V))
return match(V, m_ImmConstant());
@@ -243,7 +443,7 @@ static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
/// Filter out values that we can not evaluate in the destination type for free.
/// This is a helper for canEvaluate*.
-static bool canNotEvaluateInType(Value *V, Type *Ty) {
+bool TypeEvaluationHelper::canNotEvaluateInType(Value *V, Type *Ty) {
if (!isa<Instruction>(V))
return true;
// We don't extend or shrink something that has multiple uses -- doing so
@@ -265,13 +465,27 @@ static bool canNotEvaluateInType(Value *V, Type *Ty) {
///
/// This function works on both vectors and scalars.
///
-static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
- Instruction *CxtI) {
- if (canAlwaysEvaluateInType(V, Ty))
- return true;
- if (canNotEvaluateInType(V, Ty))
- return false;
+bool TypeEvaluationHelper::canEvaluateTruncated(Value *V, Type *Ty,
+ InstCombinerImpl &IC,
+ Instruction *CxtI) {
+ TypeEvaluationHelper TYH;
+ return TYH.canEvaluateTruncatedImpl(V, Ty, IC, CxtI) &&
+ // We need to check whether we visited all users of multi-user values,
+ // and we have to do it at the very end, outside of the recursion.
+ TYH.allPendingVisited();
+}
+bool TypeEvaluationHelper::canEvaluateTruncatedImpl(Value *V, Type *Ty,
+ InstCombinerImpl &IC,
+ Instruction *CxtI) {
+ return canEvaluate(V, Ty, [this, &IC, CxtI](Value *V, Type *Ty) {
+ return canEvaluateTruncatedPred(V, Ty, IC, CxtI);
+ });
+}
+
+bool TypeEvaluationHelper::canEvaluateTruncatedPred(Value *V, Type *Ty,
+ InstCombinerImpl &IC,
+ Instruction *CxtI) {
auto *I = cast<Instruction>(V);
Type *OrigTy = V->getType();
switch (I->getOpcode()) {
@@ -282,8 +496,8 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
case Instruction::Or:
case Instruction::Xor:
// These operators can all arbitrarily be extended or truncated.
- return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
- canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+ return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
case Instruction::UDiv:
case Instruction::URem: {
@@ -296,8 +510,8 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
// based on later context may introduce a trap.
if (IC.MaskedValueIsZero(I->getOperand(0), Mask, I) &&
IC.MaskedValueIsZero(I->getOperand(1), Mask, I)) {
- return canEvaluateTruncated(I->getOperand(0), Ty, IC, I) &&
- canEvaluateTruncated(I->getOperand(1), Ty, IC, I);
+ return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
}
break;
}
@@ -308,8 +522,8 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
KnownBits AmtKnownBits =
llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
if (AmtKnownBits.getMaxValue().ult(BitWidth))
- return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
- canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+ return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
break;
}
case Instruction::LShr: {
@@ -329,12 +543,12 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
if (auto *Trunc = dyn_cast<TruncInst>(V->user_back())) {
auto DemandedBits = Trunc->getType()->getScalarSizeInBits();
if ((MaxShiftAmt + DemandedBits).ule(BitWidth))
- return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
- canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+ return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
}
if (IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, CxtI))
- return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
- canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+ return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
}
break;
}
@@ -351,8 +565,8 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
unsigned ShiftedBits = OrigBitWidth - BitWidth;
if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), CxtI))
- return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
- canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+ return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
break;
}
case Instruction::Trunc:
@@ -365,18 +579,18 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
return true;
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
- return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
- canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
+ return canEvaluateTruncatedImpl(SI->getTrueValue(), Ty, IC, CxtI) &&
+ canEvaluateTruncatedImpl(SI->getFalseValue(), Ty, IC, CxtI);
}
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
- // get into trouble with cyclic PHIs here because we only consider
- // instructions with a single use.
+ // get into trouble with cyclic PHIs here because canEvaluate handles use
+ // chain loops.
PHINode *PN = cast<PHINode>(I);
- for (Value *IncValue : PN->incoming_values())
- if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
- return false;
- return true;
+ return llvm::all_of(
+ PN->incoming_values(), [this, Ty, &IC, CxtI](Value *IncValue) {
+ return canEvaluateTruncatedImpl(IncValue, Ty, IC, CxtI);
+ });
}
case Instruction::FPToUI:
case Instruction::FPToSI: {
@@ -385,14 +599,14 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
// that did not exist in the original code.
Type *InputTy = I->getOperand(0)->getType()->getScalarType();
const fltSemantics &Semantics = InputTy->getFltSemantics();
- uint32_t MinBitWidth =
- APFloatBase::semanticsIntSizeInBits(Semantics,
- I->getOpcode() == Instruction::FPToSI);
+ uint32_t MinBitWidth = APFloatBase::semanticsIntSizeInBits(
+ Semantics, I->getOpcode() == Instruction::FPToSI);
return Ty->getScalarSizeInBits() >= MinBitWidth;
}
case Instruction::ShuffleVector:
- return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
- canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+ return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
+ canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
+
default:
// TODO: Can handle more cases here.
break;
@@ -767,7 +981,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
// expression tree to something weird like i93 unless the source is also
// strange.
if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
- canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
+ TypeEvaluationHelper::canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
// If this cast is a truncate, evaluting in a different type always
// eliminates the cast, so it is always a win.
@@ -788,7 +1002,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
if (DestWidth * 2 < SrcWidth) {
auto *NewDestTy = DestITy->getExtendedType();
if (shouldChangeType(SrcTy, NewDestTy) &&
- canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
+ TypeEvaluationHelper::canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
LLVM_DEBUG(
dbgs() << "ICE: EvaluateInDifferentType converting expression type"
" to reduce the width of operand of"
@@ -1104,8 +1318,17 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
/// clear the top bits anyway, doing this has no extra cost.
///
/// This function works on both vectors and scalars.
-static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
- InstCombinerImpl &IC, Instruction *CxtI) {
+bool TypeEvaluationHelper::canEvaluateZExtd(Value *V, Type *Ty,
+ unsigned &BitsToClear,
+ InstCombinerImpl &IC,
+ Instruction *CxtI) {
+ TypeEvaluationHelper TYH;
+ return TYH.canEvaluateZExtdImpl(V, Ty, BitsToClear, IC, CxtI);
+}
+bool TypeEvaluationHelper::canEvaluateZExtdImpl(Value *V, Type *Ty,
+ unsigned &BitsToClear,
+ InstCombinerImpl &IC,
+ Instruction *CxtI) {
BitsToClear = 0;
if (canAlwaysEvaluateInType(V, Ty))
return true;
@@ -1125,8 +1348,8 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
- if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
- !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
+ if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
+ !canEvaluateZExtdImpl(I->getOperand(1), Ty, Tmp, IC, CxtI))
return false;
// These can all be promoted if neither operand has 'bits to clear'.
if (BitsToClear == 0 && Tmp == 0)
@@ -1157,7 +1380,7 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
// upper bits we can reduce BitsToClear by the shift amount.
uint64_t ShiftAmt;
if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
- if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
+ if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
return false;
BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
return true;
@@ -1169,7 +1392,7 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
// ultimate 'and' to clear out the high zero bits we're clearing out though.
uint64_t ShiftAmt;
if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
- if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
+ if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
return false;
BitsToClear += ShiftAmt;
if (BitsToClear > V->getType()->getScalarSizeInBits())
@@ -1180,8 +1403,8 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
return false;
}
case Instruction::Select:
- if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
- !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
+ if (!canEvaluateZExtdImpl(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
+ !canEvaluateZExtdImpl(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
// TODO: If important, we could handle the case when the BitsToClear are
// known zero in the disagreeing side.
Tmp != BitsToClear)
@@ -1193,10 +1416,11 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
// get into trouble with cyclic PHIs here because we only consider
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
- if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
+ if (!canEvaluateZExtdImpl(PN->getIncomingValue(0), Ty, BitsToClear, IC,
+ CxtI))
return false;
for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
+ if (!canEvaluateZExtdImpl(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
// TODO: If important, we could handle the case when the BitsToClear
// are known zero in the disagreeing input.
Tmp != BitsToClear)
@@ -1237,7 +1461,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {
// Try to extend the entire expression tree to the wide destination type.
unsigned BitsToClear;
if (shouldChangeType(SrcTy, DestTy) &&
- canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &Zext)) {
+ TypeEvaluationHelper::canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &Zext)) {
assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
"Can't clear more bits than in SrcTy");
@@ -1455,7 +1679,12 @@ Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
///
/// This function works on both vectors and scalars.
///
-static bool canEvaluateSExtd(Value *V, Type *Ty) {
+bool TypeEvaluationHelper::canEvaluateSExtd(Value *V, Type *Ty) {
+ TypeEvaluationHelper TYH;
+ return TYH.canEvaluateSExtdImpl(V, Ty);
+}
+
+bool TypeEvaluationHelper::canEvaluateSExtdImpl(Value *V, Type *Ty) {
assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
"Can't sign extend type to a smaller type");
if (canAlwaysEvaluateInType(V, Ty))
@@ -1476,15 +1705,15 @@ static bool canEvaluateSExtd(Value *V, Type *Ty) {
case Instruction::Sub:
case Instruction::Mul:
// These operators can all arbitrarily be extended if their inputs can.
- return canEvaluateSExtd(I->getOperand(0), Ty) &&
- canEvaluateSExtd(I->getOperand(1), Ty);
+ return canEvaluateSExtdImpl(I->getOperand(0), Ty) &&
+ canEvaluateSExtdImpl(I->getOperand(1), Ty);
//case Instruction::Shl: TODO
//case Instruction::LShr: TODO
case Instruction::Select:
- return canEvaluateSExtd(I->getOperand(1), Ty) &&
- canEvaluateSExtd(I->getOperand(2), Ty);
+ return canEvaluateSExtdImpl(I->getOperand(1), Ty) &&
+ canEvaluateSExtdImpl(I->getOperand(2), Ty);
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
@@ -1492,7 +1721,7 @@ static bool canEvaluateSExtd(Value *V, Type *Ty) {
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
for (Value *IncValue : PN->incoming_values())
- if (!canEvaluateSExtd(IncValue, Ty)) return false;
+ if (!canEvaluateSExtdImpl(IncValue, Ty)) return false;
return true;
}
default:
@@ -1525,7 +1754,7 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
}
// Try to extend the entire expression tree to the wide destination type.
- if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
+ if (shouldChangeType(SrcTy, DestTy) && TypeEvaluationHelper::canEvaluateSExtd(Src, DestTy)) {
// Okay, we can transform this! Insert the new expression now.
LLVM_DEBUG(
dbgs() << "ICE: EvaluateInDifferentType converting expression type"
diff --git a/llvm/test/Transforms/InstCombine/cast-mul-select.ll b/llvm/test/Transforms/InstCombine/cast-mul-select.ll
index 8aa768cbaede5..8410df245befd 100644
--- a/llvm/test/Transforms/InstCombine/cast-mul-select.ll
+++ b/llvm/test/Transforms/InstCombine/cast-mul-select.ll
@@ -91,22 +91,18 @@ define i8 @select2(i1 %cond, i8 %x, i8 %y, i8 %z) {
define i32 @eval_trunc_multi_use_in_one_inst(i32 %x) {
; CHECK-LABEL: @eval_trunc_multi_use_in_one_inst(
-; CHECK-NEXT: [[Z:%.*]] = zext i32 [[X:%.*]] to i64
-; CHECK-NEXT: [[A:%.*]] = add nuw nsw i64 [[Z]], 15
-; CHECK-NEXT: [[M:%.*]] = mul i64 [[A]], [[A]]
-; CHECK-NEXT: [[T:%.*]] = trunc i64 [[M]] to i32
+; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], 15
+; CHECK-NEXT: [[T:%.*]] = mul i32 [[A]], [[A]]
; CHECK-NEXT: ret i32 [[T]]
;
; DBGINFO-LABEL: @eval_trunc_multi_use_in_one_inst(
-; DBGINFO-NEXT: [[Z:%.*]] = zext i32 [[X:%.*]] to i64, !dbg [[DBG57:![0-9]+]]
-; DBGINFO-NEXT: #dbg_value(i64 [[Z]], [[META52:![0-9]+]], !DIExpression(), [[DBG57]])
-; DBGINFO-NEXT: [[A:%.*]] = add nuw nsw i64 [[Z]], 15, !dbg [[DBG58:![0-9]+]]
-; DBGINFO-NEXT: #dbg_value(i64 [[A]], [[META54:![0-9]+]], !DIExpression(), [[DBG58]])
-; DBGINFO-NEXT: [[M:%.*]] = mul i64 [[A]], [[A]], !dbg [[DBG59:![0-9]+]]
-; DBGINFO-NEXT: #dbg_value(i64 [[M]], [[META55:![0-9]+]], !DIExpression(), [[DBG59]])
-; DBGINFO-NEXT: [[T:%.*]] = trunc i64 [[M]] to i32, !dbg [[DBG60:![0-9]+]]
-; DBGINFO-NEXT: #dbg_value(i32 [[T]], [[META56:![0-9]+]], !DIExpression(), [[DBG60]])
-; DBGINFO-NEXT: ret i32 [[T]], !dbg [[DBG61:![0-9]+]]
+; DBGINFO-NEXT: #dbg_value(i32 [[X:%.*]], [[META52:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_stack_value), [[META57:![0-9]+]])
+; DBGINFO-NEXT: [[A:%.*]] = add i32 [[X]], 15, !dbg [[DBG58:![0-9]+]]
+; DBGINFO-NEXT: #dbg_value(i32 [[X]], [[META54:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_plus_uconst, 15, DW_OP_stack_value), [[DBG58]])
+; DBGINFO-NEXT: [[M:%.*]] = mul i32 [[A]], [[A]], !dbg [[DBG59:![0-9]+]]
+; DBGINFO-NEXT: #dbg_value(!DIArgList(i32 [[X]], i32 [[X]]), [[META55:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_plus_uconst, 15, DW_OP_LLVM_arg, 1, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_plus_uconst, 15, DW_OP_mul, DW_OP_stack_value), [[DBG59]])
+; DBGINFO-NEXT: #dbg_value(i32 [[M]], [[META56:![0-9]+]], !DIExpression(), [[META60:![0-9]+]])
+; DBGINFO-NEXT: ret i32 [[M]], !dbg [[DBG61:![0-9]+]]
;
%z = zext i32 %x to i64
%a = add nsw nuw i64 %z, 15
diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
index f1c6fe0c24311..40ec0609aec09 100644
--- a/llvm/test/Transforms/InstCombine/cast.ll
+++ b/llvm/test/Transforms/InstCombine/cast.ll
@@ -2241,13 +2241,10 @@ define i32 @test95(i32 %x) {
define i16 @test96(i16 %x, i16 %y) {
; ALL-LABEL: @test96(
-; ALL-NEXT: [[ZX:%.*]] = zext i16 [[X:%.*]] to i32
-; ALL-NEXT: [[ZY:%.*]] = zext i16 [[Y:%.*]] to i32
-; ALL-NEXT: [[A:%.*]] = add nuw nsw i32 [[ZX]], [[ZY]]
-; ALL-NEXT: [[B:%.*]] = add nuw nsw i32 [[A]], 5
-; ALL-NEXT: [[C:%.*]] = mul nuw nsw i32 [[A]], 3
-; ALL-NEXT: [[D:%.*]] = or i32 [[B]], [[C]]
-; ALL-NEXT: [[T:%.*]] = trunc i32 [[D]] to i16
+; ALL-NEXT: [[A:%.*]] = add i16 [[X:%.*]], [[Y:%.*]]
+; ALL-NEXT: [[B:%.*]] = add i16 [[A]], 5
+; ALL-NEXT: [[C:%.*]] = mul i16 [[A]], 3
+; ALL-NEXT: [[T:%.*]] = or i16 [[B]], [[C]]
; ALL-NEXT: ret i16 [[T]]
;
%zx = zext i16 %x to i32
diff --git a/llvm/test/Transforms/InstCombine/catchswitch-phi.ll b/llvm/test/Transforms/InstCombine/catchswitch-phi.ll
index 720f5258a6346..562dded86be29 100644
--- a/llvm/test/Transforms/InstCombine/catchswitch-phi.ll
+++ b/llvm/test/Transforms/InstCombine/catchswitch-phi.ll
@@ -94,18 +94,17 @@ define void @test1() personality ptr @__gxx_wasm_personality_v0 {
; CHECK-NEXT: [[CALL:%.*]] = invoke i32 @baz()
; CHECK-NEXT: to label [[INVOKE_CONT1:%.*]] unwind label [[CATCH_DISPATCH:%.*]]
; CHECK: invoke.cont1:
-; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK-NEXT: [[TOBOOL_INIT:%.*]] = icmp ne i32 [[CALL]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_INIT]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
-; CHECK-NEXT: [[AP_0:%.*]] = phi i8 [ 1, [[IF_THEN]] ], [ 0, [[INVOKE_CONT1]] ]
; CHECK-NEXT: invoke void @foo()
; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[CATCH_DISPATCH]]
; CHECK: invoke.cont2:
; CHECK-NEXT: br label [[TRY_CONT:%.*]]
; CHECK: catch.dispatch:
-; CHECK-NEXT: [[AP_1:%.*]] = phi i8 [ [[AP_0]], [[IF_END]] ], [ 0, [[INVOKE_CONT]] ]
+; CHECK-NEXT: [[AP_1:%.*]] = phi i1 [ [[TOBOOL_INIT]], [[IF_END]] ], [ false, [[INVOKE_CONT]] ]
; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch.start] unwind label [[CATCH_DISPATCH1]]
; CHECK: catch.start:
; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null]
@@ -116,11 +115,10 @@ define void @test1() personality ptr @__gxx_wasm_personality_v0 {
; CHECK-NEXT: invoke void @llvm.wasm.rethrow() #[[ATTR0:[0-9]+]] [ "funclet"(token [[TMP1]]) ]
; CHECK-NEXT: to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH1]]
; CHECK: catch.dispatch1:
-; CHECK-NEXT: [[AP_2:%.*]] = phi i8 [ [[AP_1]], [[CATCH_DISPATCH]] ], [ [[AP_1]], [[RETHROW]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TOBOOL1:%.*]] = phi i1 [ [[AP_1]], [[CATCH_DISPATCH]] ], [ [[AP_1]], [[RETHROW]] ], [ false, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = catchswitch within none [label %catch.start1] unwind to caller
; CHECK: catch.start1:
; CHECK-NEXT: [[TMP3:%.*]] = catchpad within [[TMP2]] [ptr null]
-; CHECK-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[AP_2]] to i1
; CHECK-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN1:%.*]], label [[IF_END1:%.*]]
; CHECK: if.then1:
; CHECK-NEXT: br label [[IF_END1]]
diff --git a/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll b/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll
index 653b818f7eb5c..23a278c061975 100644
--- a/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll
@@ -57,11 +57,10 @@ define void @PR33765(i8 %beth) {
; CHECK-LABEL: @PR33765(
; CHECK-NEXT: br i1 false, label [[IF_THEN9:%.*]], label [[IF_THEN9]]
; CHECK: if.then9:
-; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[BETH:%.*]] to i32
-; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[BETH:%.*]] to i16
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw i16 [[CONV]], [[CONV]]
; CHECK-NEXT: [[TINKY:%.*]] = load i16, ptr @glob, align 2
-; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw i32 [[MUL]] to i16
-; CHECK-NEXT: [[CONV14:%.*]] = and i16 [[TINKY]], [[TMP1]]
+; CHECK-NEXT: [[CONV14:%.*]] = and i16 [[MUL]], [[TINKY]]
; CHECK-NEXT: store i16 [[CONV14]], ptr @glob, align 2
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
index 9e0c98bb34006..834d48f925305 100644
--- a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
@@ -640,14 +640,12 @@ define <2 x i64> @fp_bitcast(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
define <4 x i32> @computesignbits_through_shuffles(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
; CHECK-LABEL: @computesignbits_through_shuffles(
; CHECK-NEXT: [[CMP:%.*]] = fcmp ole <4 x float> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
-; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
-; CHECK-NEXT: [[SHUF_OR1:%.*]] = or <4 x i32> [[S1]], [[S2]]
-; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
-; CHECK-NEXT: [[S4:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
-; CHECK-NEXT: [[SHUF_OR2:%.*]] = or <4 x i32> [[S3]], [[S4]]
-; CHECK-NEXT: [[TMP1:%.*]] = trunc nsw <4 x i32> [[SHUF_OR2]] to <4 x i1>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i1> [[TMP4]], [[TMP2]]
+; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT: [[S4:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i1> [[S3]], [[S4]]
; CHECK-NEXT: [[SEL_V:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[Z:%.*]], <4 x float> [[X]]
; CHECK-NEXT: [[SEL:%.*]] = bitcast <4 x float> [[SEL_V]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[SEL]]
diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll
index 87e05002665ce..e6de063969a6a 100644
--- a/llvm/test/Transforms/InstCombine/logical-select.ll
+++ b/llvm/test/Transforms/InstCombine/logical-select.ll
@@ -676,14 +676,12 @@ define <2 x i64> @fp_bitcast(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
define <4 x i32> @computesignbits_through_shuffles(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
; CHECK-LABEL: @computesignbits_through_shuffles(
; CHECK-NEXT: [[CMP:%.*]] = fcmp ole <4 x float> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
-; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
-; CHECK-NEXT: [[SHUF_OR1:%.*]] = or <4 x i32> [[S1]], [[S2]]
-; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
-; CHECK-NEXT: [[S4:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
-; CHECK-NEXT: [[SHUF_OR2:%.*]] = or <4 x i32> [[S3]], [[S4]]
-; CHECK-NEXT: [[TMP1:%.*]] = trunc nsw <4 x i32> [[SHUF_OR2]] to <4 x i1>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i1> [[TMP4]], [[TMP2]]
+; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT: [[S4:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i1> [[S3]], [[S4]]
; CHECK-NEXT: [[SEL_V:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[Z:%.*]], <4 x float> [[X]]
; CHECK-NEXT: [[SEL:%.*]] = bitcast <4 x float> [[SEL_V]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[SEL]]
>From c6e4b326e61a0fedfeddc56151ed8f77e9102c3b Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko at apple.com>
Date: Tue, 28 Oct 2025 11:35:16 +0000
Subject: [PATCH 3/3] [InstCombine] Perform sext transformation for values with
>1 users
---
.../InstCombine/InstCombineCasts.cpp | 26 ++++++++++-------
.../Transforms/InstCombine/cast-mul-select.ll | 28 ++++++++-----------
llvm/test/Transforms/InstCombine/cast.ll | 13 ++++-----
3 files changed, 34 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 384f37d0a3310..6184c6d25d929 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -416,6 +416,7 @@ class TypeEvaluationHelper {
InstCombinerImpl &IC,
Instruction *CxtI);
[[nodiscard]] bool canEvaluateSExtdImpl(Value *V, Type *Ty);
+ [[nodiscard]] bool canEvaluateSExtdPred(Value *V, Type *Ty);
/// A bookkeeping map to memorize an already made decision for a traversed
/// value.
@@ -1332,6 +1333,8 @@ bool TypeEvaluationHelper::canEvaluateZExtdImpl(Value *V, Type *Ty,
BitsToClear = 0;
if (canAlwaysEvaluateInType(V, Ty))
return true;
+ // We stick to the one-user limit for the ZExt transform due to the fact
+ // that this predicate returns two values: predicate result and BitsToClear.
if (canNotEvaluateInType(V, Ty))
return false;
@@ -1681,16 +1684,18 @@ Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
///
bool TypeEvaluationHelper::canEvaluateSExtd(Value *V, Type *Ty) {
TypeEvaluationHelper TYH;
- return TYH.canEvaluateSExtdImpl(V, Ty);
+ return TYH.canEvaluateSExtdImpl(V, Ty) && TYH.allPendingVisited();
}
bool TypeEvaluationHelper::canEvaluateSExtdImpl(Value *V, Type *Ty) {
+ return canEvaluate(V, Ty, [this](Value *V, Type *Ty) {
+ return canEvaluateSExtdPred(V, Ty);
+ });
+}
+
+bool TypeEvaluationHelper::canEvaluateSExtdPred(Value *V, Type *Ty) {
assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
"Can't sign extend type to a smaller type");
- if (canAlwaysEvaluateInType(V, Ty))
- return true;
- if (canNotEvaluateInType(V, Ty))
- return false;
auto *I = cast<Instruction>(V);
switch (I->getOpcode()) {
@@ -1708,8 +1713,8 @@ bool TypeEvaluationHelper::canEvaluateSExtdImpl(Value *V, Type *Ty) {
return canEvaluateSExtdImpl(I->getOperand(0), Ty) &&
canEvaluateSExtdImpl(I->getOperand(1), Ty);
- //case Instruction::Shl: TODO
- //case Instruction::LShr: TODO
+ // case Instruction::Shl: TODO
+ // case Instruction::LShr: TODO
case Instruction::Select:
return canEvaluateSExtdImpl(I->getOperand(1), Ty) &&
@@ -1717,11 +1722,12 @@ bool TypeEvaluationHelper::canEvaluateSExtdImpl(Value *V, Type *Ty) {
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
- // get into trouble with cyclic PHIs here because we only consider
- // instructions with a single use.
+ // get into trouble with cyclic PHIs here because canEvaluate handles use
+ // chain loops.
PHINode *PN = cast<PHINode>(I);
for (Value *IncValue : PN->incoming_values())
- if (!canEvaluateSExtdImpl(IncValue, Ty)) return false;
+ if (!canEvaluateSExtdImpl(IncValue, Ty))
+ return false;
return true;
}
default:
diff --git a/llvm/test/Transforms/InstCombine/cast-mul-select.ll b/llvm/test/Transforms/InstCombine/cast-mul-select.ll
index 8410df245befd..29c5bb57a4667 100644
--- a/llvm/test/Transforms/InstCombine/cast-mul-select.ll
+++ b/llvm/test/Transforms/InstCombine/cast-mul-select.ll
@@ -139,25 +139,21 @@ define i32 @eval_zext_multi_use_in_one_inst(i32 %x) {
define i32 @eval_sext_multi_use_in_one_inst(i32 %x) {
; CHECK-LABEL: @eval_sext_multi_use_in_one_inst(
-; CHECK-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i16
-; CHECK-NEXT: [[A:%.*]] = and i16 [[T]], 14
-; CHECK-NEXT: [[M:%.*]] = mul nuw nsw i16 [[A]], [[A]]
-; CHECK-NEXT: [[O:%.*]] = or disjoint i16 [[M]], -32768
-; CHECK-NEXT: [[R:%.*]] = sext i16 [[O]] to i32
+; CHECK-NEXT: [[A:%.*]] = and i32 [[X:%.*]], 14
+; CHECK-NEXT: [[M:%.*]] = mul nuw nsw i32 [[A]], [[A]]
+; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[M]], -32768
; CHECK-NEXT: ret i32 [[R]]
;
; DBGINFO-LABEL: @eval_sext_multi_use_in_one_inst(
-; DBGINFO-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i16, !dbg [[DBG81:![0-9]+]]
-; DBGINFO-NEXT: #dbg_value(i16 [[T]], [[META76:![0-9]+]], !DIExpression(), [[DBG81]])
-; DBGINFO-NEXT: [[A:%.*]] = and i16 [[T]], 14, !dbg [[DBG82:![0-9]+]]
-; DBGINFO-NEXT: #dbg_value(i16 [[A]], [[META77:![0-9]+]], !DIExpression(), [[DBG82]])
-; DBGINFO-NEXT: [[M:%.*]] = mul nuw nsw i16 [[A]], [[A]], !dbg [[DBG83:![0-9]+]]
-; DBGINFO-NEXT: #dbg_value(i16 [[M]], [[META78:![0-9]+]], !DIExpression(), [[DBG83]])
-; DBGINFO-NEXT: [[O:%.*]] = or disjoint i16 [[M]], -32768, !dbg [[DBG84:![0-9]+]]
-; DBGINFO-NEXT: #dbg_value(i16 [[O]], [[META79:![0-9]+]], !DIExpression(), [[DBG84]])
-; DBGINFO-NEXT: [[R:%.*]] = sext i16 [[O]] to i32, !dbg [[DBG85:![0-9]+]]
-; DBGINFO-NEXT: #dbg_value(i32 [[R]], [[META80:![0-9]+]], !DIExpression(), [[DBG85]])
-; DBGINFO-NEXT: ret i32 [[R]], !dbg [[DBG86:![0-9]+]]
+; DBGINFO-NEXT: #dbg_value(i32 [[X:%.*]], [[META76:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_stack_value), [[META81:![0-9]+]])
+; DBGINFO-NEXT: [[A:%.*]] = and i32 [[X]], 14, !dbg [[DBG82:![0-9]+]]
+; DBGINFO-NEXT: #dbg_value(i32 [[X]], [[META77:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_constu, 14, DW_OP_and, DW_OP_stack_value), [[DBG82]])
+; DBGINFO-NEXT: [[M:%.*]] = mul nuw nsw i32 [[A]], [[A]], !dbg [[DBG83:![0-9]+]]
+; DBGINFO-NEXT: #dbg_value(!DIArgList(i32 [[X]], i32 [[X]]), [[META78:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_constu, 14, DW_OP_and, DW_OP_LLVM_arg, 1, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_constu, 14, DW_OP_and, DW_OP_mul, DW_OP_stack_value), [[DBG83]])
+; DBGINFO-NEXT: [[O:%.*]] = or disjoint i32 [[M]], -32768, !dbg [[DBG84:![0-9]+]]
+; DBGINFO-NEXT: #dbg_value(!DIArgList(i32 [[X]], i32 [[X]]), [[META79:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_constu, 14, DW_OP_and, DW_OP_LLVM_arg, 1, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_constu, 14, DW_OP_and, DW_OP_mul, DW_OP_constu, 18446744073709518848, DW_OP_or, DW_OP_stack_value), [[DBG84]])
+; DBGINFO-NEXT: #dbg_value(i32 [[O]], [[META80:![0-9]+]], !DIExpression(), [[META85:![0-9]+]])
+; DBGINFO-NEXT: ret i32 [[O]], !dbg [[DBG86:![0-9]+]]
;
%t = trunc i32 %x to i16
%a = and i16 %t, 14
diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
index 40ec0609aec09..46deb294b9d45 100644
--- a/llvm/test/Transforms/InstCombine/cast.ll
+++ b/llvm/test/Transforms/InstCombine/cast.ll
@@ -2312,13 +2312,12 @@ exit:
define i32 @test99(i32 %x, i32 %y) {
; ALL-LABEL: @test99(
-; ALL-NEXT: [[TX:%.*]] = trunc i32 [[X:%.*]] to i16
-; ALL-NEXT: [[TY:%.*]] = trunc i32 [[Y:%.*]] to i16
-; ALL-NEXT: [[A:%.*]] = add i16 [[TX]], [[TY]]
-; ALL-NEXT: [[B:%.*]] = add i16 [[A]], 5
-; ALL-NEXT: [[C:%.*]] = mul i16 [[A]], 3
-; ALL-NEXT: [[D:%.*]] = or i16 [[B]], [[C]]
-; ALL-NEXT: [[S:%.*]] = sext i16 [[D]] to i32
+; ALL-NEXT: [[A:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; ALL-NEXT: [[B:%.*]] = add i32 [[A]], 5
+; ALL-NEXT: [[C:%.*]] = mul i32 [[A]], 3
+; ALL-NEXT: [[D:%.*]] = or i32 [[B]], [[C]]
+; ALL-NEXT: [[SEXT:%.*]] = shl i32 [[D]], 16
+; ALL-NEXT: [[S:%.*]] = ashr exact i32 [[SEXT]], 16
; ALL-NEXT: ret i32 [[S]]
;
%tx = trunc i32 %x to i16
More information about the llvm-commits
mailing list