[llvm] 62fd332 - [InstCombine] Optimize usub.sat pattern (#151044)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 2 09:47:07 PDT 2025
Author: Nimit Sachdeva
Date: 2025-09-02T16:47:03Z
New Revision: 62fd3320850c7632109b1f282d2db1fb89d9b453
URL: https://github.com/llvm/llvm-project/commit/62fd3320850c7632109b1f282d2db1fb89d9b453
DIFF: https://github.com/llvm/llvm-project/commit/62fd3320850c7632109b1f282d2db1fb89d9b453.diff
LOG: [InstCombine] Optimize usub.sat pattern (#151044)
Fixes #79690
Generalized proof: https://alive2.llvm.org/ce/z/22ybrr
---------
Co-authored-by: Nimit Sachdeva <nimsach at amazon.com>
Added:
llvm/test/Transforms/InstCombine/usub_sat_to_msb_mask.ll
Modified:
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index eb4332fbc0959..ba8b4c47e8f88 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1993,6 +1993,63 @@ Value *InstCombinerImpl::foldSelectWithConstOpToBinOp(ICmpInst *Cmp,
return BinOp;
}
+/// Folds:
+/// %a_sub = call @llvm.usub.sat(x, IntConst1)
+/// %b_sub = call @llvm.usub.sat(y, IntConst2)
+/// %or = or %a_sub, %b_sub
+/// %cmp = icmp eq %or, 0
+/// %sel = select %cmp, 0, MostSignificantBit
+/// into:
+/// %a_sub' = usub.sat(x, IntConst1 - MostSignificantBit)
+/// %b_sub' = usub.sat(y, IntConst2 - MostSignificantBit)
+/// %or = or %a_sub', %b_sub'
+/// %and = and %or, MostSignificantBit
+/// Likewise, for vector arguments as well.
+static Instruction *foldICmpUSubSatWithAndForMostSignificantBitCmp(
+ SelectInst &SI, ICmpInst *ICI, InstCombiner::BuilderTy &Builder) {
+ if (!SI.hasOneUse() || !ICI->hasOneUse())
+ return nullptr;
+ CmpPredicate Pred;
+ Value *A, *B;
+ const APInt *Constant1, *Constant2;
+ if (!match(SI.getCondition(),
+ m_ICmp(Pred,
+ m_OneUse(m_Or(m_OneUse(m_Intrinsic<Intrinsic::usub_sat>(
+ m_Value(A), m_APInt(Constant1))),
+ m_OneUse(m_Intrinsic<Intrinsic::usub_sat>(
+ m_Value(B), m_APInt(Constant2))))),
+ m_Zero())))
+ return nullptr;
+
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+ if (!(Pred == ICmpInst::ICMP_EQ &&
+ (match(TrueVal, m_Zero()) && match(FalseVal, m_SignMask()))) ||
+ (Pred == ICmpInst::ICMP_NE &&
+ (match(TrueVal, m_SignMask()) && match(FalseVal, m_Zero()))))
+ return nullptr;
+
+ auto *Ty = A->getType();
+ unsigned BW = Constant1->getBitWidth();
+ APInt MostSignificantBit = APInt::getSignMask(BW);
+
+ // Anything over MSB is negative
+ if (Constant1->isNonNegative() || Constant2->isNonNegative())
+ return nullptr;
+
+ APInt AdjAP1 = *Constant1 - MostSignificantBit + 1;
+ APInt AdjAP2 = *Constant2 - MostSignificantBit + 1;
+
+ auto *Adj1 = ConstantInt::get(Ty, AdjAP1);
+ auto *Adj2 = ConstantInt::get(Ty, AdjAP2);
+
+ Value *NewA = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A, Adj1);
+ Value *NewB = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, B, Adj2);
+ Value *Or = Builder.CreateOr(NewA, NewB);
+ Constant *MSBConst = ConstantInt::get(Ty, MostSignificantBit);
+ return BinaryOperator::CreateAnd(Or, MSBConst);
+}
+
/// Visit a SelectInst that has an ICmpInst as its first operand.
Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
ICmpInst *ICI) {
@@ -2009,6 +2066,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
if (Instruction *NewSel =
tryToReuseConstantFromSelectInComparison(SI, *ICI, *this))
return NewSel;
+ if (Instruction *Folded =
+ foldICmpUSubSatWithAndForMostSignificantBitCmp(SI, ICI, Builder))
+ return Folded;
// NOTE: if we wanted to, this is where to detect integer MIN/MAX
bool Changed = false;
diff --git a/llvm/test/Transforms/InstCombine/usub_sat_to_msb_mask.ll b/llvm/test/Transforms/InstCombine/usub_sat_to_msb_mask.ll
new file mode 100644
index 0000000000000..bca1e7a227917
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/usub_sat_to_msb_mask.ll
@@ -0,0 +1,315 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+
+; RUN: opt -passes=instcombine -S < %s 2>&1 | FileCheck %s
+
+define i8 @test_i8(i8 %a, i8 %b) {
+; CHECK-LABEL: define i8 @test_i8(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 96)
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 112)
+; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[RES:%.*]] = and i8 [[TMP3]], -128
+; CHECK-NEXT: ret i8 [[RES]]
+;
+
+ %a_sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 223)
+ %b_sub = call i8 @llvm.usub.sat.i8(i8 %b, i8 239)
+ %or = or i8 %a_sub, %b_sub
+ %cmp = icmp eq i8 %or, 0
+ %res = select i1 %cmp, i8 0, i8 128
+ ret i8 %res
+}
+
+define i8 @test_i8_ne(i8 %a, i8 %b) {
+; CHECK-LABEL: define i8 @test_i8_ne(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 96)
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 112)
+; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[RES:%.*]] = and i8 [[TMP3]], -128
+; CHECK-NEXT: ret i8 [[RES]]
+;
+
+ %a_sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 223)
+ %b_sub = call i8 @llvm.usub.sat.i8(i8 %b, i8 239)
+ %or = or i8 %a_sub, %b_sub
+ %cmp = icmp ne i8 %or, 0
+ %res = select i1 %cmp, i8 128, i8 0
+ ret i8 %res
+}
+
+define i16 @test_i16(i16 %a, i16 %b) {
+; CHECK-LABEL: define i16 @test_i16(
+; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[A]], i16 32642)
+; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[B]], i16 32656)
+; CHECK-NEXT: [[TMP3:%.*]] = or i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[RES:%.*]] = and i16 [[TMP3]], -32768
+; CHECK-NEXT: ret i16 [[RES]]
+;
+
+ %a_sub = call i16 @llvm.usub.sat.i16(i16 %a, i16 65409)
+ %b_sub = call i16 @llvm.usub.sat.i16(i16 %b, i16 65423)
+ %or = or i16 %a_sub, %b_sub
+ %cmp = icmp eq i16 %or, 0
+ %res = select i1 %cmp, i16 0, i16 32768
+ ret i16 %res
+}
+
+define i32 @test_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @test_i32(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 224)
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 240)
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[RES:%.*]] = and i32 [[TMP3]], -2147483648
+; CHECK-NEXT: ret i32 [[RES]]
+;
+
+ %a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 2147483871)
+ %b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 2147483887)
+ %or = or i32 %a_sub, %b_sub
+ %cmp = icmp eq i32 %or, 0
+ %res = select i1 %cmp, i32 0, i32 2147483648
+ ret i32 %res
+}
+
+define i64 @test_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: define i64 @test_i64(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A]], i64 224)
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[B]], i64 240)
+; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[RES:%.*]] = and i64 [[TMP3]], -9223372036854775808
+; CHECK-NEXT: ret i64 [[RES]]
+;
+
+ %a_sub = call i64 @llvm.usub.sat.i64(i64 %a, i64 9223372036854776031)
+ %b_sub = call i64 @llvm.usub.sat.i64(i64 %b, i64 9223372036854776047)
+ %or = or i64 %a_sub, %b_sub
+ %cmp = icmp eq i64 %or, 0
+ %res = select i1 %cmp, i64 0, i64 9223372036854775808
+ ret i64 %res
+}
+
+define i32 @no_fold_due_to_small_K(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @no_fold_due_to_small_K(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[A_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 100)
+; CHECK-NEXT: [[B_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 239)
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[A_SUB]], [[B_SUB]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[OR]], 0
+; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 0, i32 -2147483648
+; CHECK-NEXT: ret i32 [[RES]]
+;
+
+ %a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 100)
+ %b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 239)
+ %or = or i32 %a_sub, %b_sub
+ %cmp = icmp eq i32 %or, 0
+ %res = select i1 %cmp, i32 0, i32 2147483648
+ ret i32 %res
+}
+
+define i32 @commuted_test_neg(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @commuted_test_neg(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[B_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 239)
+; CHECK-NEXT: [[A_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 223)
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[B_SUB]], [[A_SUB]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[OR]], 0
+; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 0, i32 -2147483648
+; CHECK-NEXT: ret i32 [[RES]]
+;
+
+ %b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 239)
+ %a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 223)
+ %or = or i32 %b_sub, %a_sub
+ %cmp = icmp eq i32 %or, 0
+ %res = select i1 %cmp, i32 0, i32 2147483648
+ ret i32 %res
+}
+define <4 x i32> @vector_test(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: define <4 x i32> @vector_test(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A]], <4 x i32> splat (i32 224))
+; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[B]], <4 x i32> splat (i32 240))
+; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[RES:%.*]] = and <4 x i32> [[TMP3]], splat (i32 -2147483648)
+; CHECK-NEXT: ret <4 x i32> [[RES]]
+;
+
+
+ %a_sub = call <4 x i32> @llvm.usub.sat.v4i32(
+ <4 x i32> %a, <4 x i32> splat (i32 2147483871))
+ %b_sub = call <4 x i32> @llvm.usub.sat.v4i32(
+ <4 x i32> %b, <4 x i32> splat (i32 2147483887))
+ %or = or <4 x i32> %a_sub, %b_sub
+ %cmp = icmp eq <4 x i32> %or, zeroinitializer
+ %res = select <4 x i1> %cmp,
+ <4 x i32> zeroinitializer,
+ <4 x i32> splat (i32 -2147483648)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @vector_negative_test(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: define <4 x i32> @vector_negative_test(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: [[A_SUB:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A]], <4 x i32> <i32 -2147483425, i32 0, i32 -2147483425, i32 -2147483425>)
+; CHECK-NEXT: [[B_SUB:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[B]], <4 x i32> splat (i32 -2147483409))
+; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[A_SUB]], [[B_SUB]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[OR]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 -2147483648)
+; CHECK-NEXT: ret <4 x i32> [[RES]]
+;
+ %a_sub = call <4 x i32> @llvm.usub.sat.v4i32(
+ <4 x i32> %a,
+ <4 x i32> <i32 2147483871, i32 0, i32 2147483871, i32 2147483871>)
+ %b_sub = call <4 x i32> @llvm.usub.sat.v4i32(
+ <4 x i32> %b,
+ <4 x i32> <i32 2147483887, i32 2147483887, i32 2147483887, i32 2147483887>)
+ %or = or <4 x i32> %a_sub, %b_sub
+ %cmp = icmp eq <4 x i32> %or, zeroinitializer
+ %res = select <4 x i1> %cmp, <4 x i32> zeroinitializer,
+ <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @vector_ne_test(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: define <4 x i32> @vector_ne_test(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A]], <4 x i32> splat (i32 224))
+; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[B]], <4 x i32> splat (i32 240))
+; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[RES:%.*]] = and <4 x i32> [[TMP3]], splat (i32 -2147483648)
+; CHECK-NEXT: ret <4 x i32> [[RES]]
+;
+
+
+ %a_sub = call <4 x i32> @llvm.usub.sat.v4i32(
+ <4 x i32> %a, <4 x i32> splat (i32 2147483871))
+ %b_sub = call <4 x i32> @llvm.usub.sat.v4i32(
+ <4 x i32> %b, <4 x i32> splat (i32 2147483887))
+ %or = or <4 x i32> %a_sub, %b_sub
+ %cmp = icmp eq <4 x i32> %or, zeroinitializer
+ %res = select <4 x i1> %cmp,
+ <4 x i32> zeroinitializer,
+ <4 x i32> splat (i32 -2147483648)
+ ret <4 x i32> %res
+}
+
+declare i1 @id_i1(i1)
+
+
+define i1 @multi_use_icmp(i32 %a, i32 %b) {
+; CHECK-LABEL: define i1 @multi_use_icmp(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[A_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 5)
+; CHECK-NEXT: [[B_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 7)
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[A_SUB]], [[B_SUB]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[OR]], 0
+; CHECK-NEXT: [[CMP_OPAQUE:%.*]] = call i1 @id_i1(i1 [[CMP]])
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 0, i32 -2147483648
+; CHECK-NEXT: [[EXTRA:%.*]] = xor i1 [[CMP_OPAQUE]], true
+; CHECK-NEXT: [[SEL_OPAQUE:%.*]] = call i32 @id_i32(i32 [[SEL]])
+; CHECK-NEXT: [[SEL_NZ:%.*]] = icmp ne i32 [[SEL_OPAQUE]], 0
+; CHECK-NEXT: [[R:%.*]] = and i1 [[SEL_NZ]], [[EXTRA]]
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 5)
+ %b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 7)
+ %or = or i32 %a_sub, %b_sub
+ %cmp = icmp eq i32 %or, 0
+ %cmp_opaque = call i1 @id_i1(i1 %cmp)
+ %sel = select i1 %cmp, i32 0, i32 -2147483648
+ %extra = xor i1 %cmp_opaque, true
+ %sel_opaque = call i32 @id_i32(i32 %sel)
+ %sel_is_nonzero = icmp ne i32 %sel_opaque, 0
+ %r = and i1 %extra, %sel_is_nonzero
+ ret i1 %r
+}
+
+
+declare i32 @id_i32(i32)
+
+define i32 @multi_use_select(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @multi_use_select(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[A_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 224)
+; CHECK-NEXT: [[B_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 240)
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[A_SUB]], [[B_SUB]]
+; CHECK-NEXT: [[SEL:%.*]] = and i32 [[OR]], -2147483648
+; CHECK-NEXT: [[SEL_OPAQUE:%.*]] = call i32 @id_i32(i32 [[SEL]])
+; CHECK-NEXT: ret i32 [[SEL_OPAQUE]]
+;
+ %a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 -2147483425)
+ %b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 -2147483409)
+ %or = or i32 %a_sub, %b_sub
+ %cmp = icmp eq i32 %or, 0
+ %sel = select i1 %cmp, i32 0, i32 -2147483648
+ %sel_opaque = call i32 @id_i32(i32 %sel)
+ ret i32 %sel_opaque
+}
+
+
+define i8 @no_fold_usub_extra_use(i8 %a, i8 %b) {
+; CHECK-LABEL: define i8 @no_fold_usub_extra_use(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT: [[A_SUB:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 -33)
+; CHECK-NEXT: [[B_SUB:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 -17)
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[A_SUB]], [[B_SUB]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i8 0, i8 -128
+; CHECK-NEXT: call void @use(i8 [[A_SUB]])
+; CHECK-NEXT: ret i8 [[RES]]
+;
+ %a_sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 223)
+ %b_sub = call i8 @llvm.usub.sat.i8(i8 %b, i8 239)
+ %or = or i8 %a_sub, %b_sub
+ %cmp = icmp eq i8 %or, 0
+ %res = select i1 %cmp, i8 0, i8 128
+ call void @use(i8 %a_sub)
+ ret i8 %res
+}
+
+define i8 @no_fold_or_extra_use(i8 %a, i8 %b) {
+; CHECK-LABEL: define i8 @no_fold_or_extra_use(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT: [[A_SUB:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 -33)
+; CHECK-NEXT: [[B_SUB:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 -17)
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[A_SUB]], [[B_SUB]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i8 0, i8 -128
+; CHECK-NEXT: call void @use(i8 [[OR]])
+; CHECK-NEXT: ret i8 [[RES]]
+;
+ %a_sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 223)
+ %b_sub = call i8 @llvm.usub.sat.i8(i8 %b, i8 239)
+ %or = or i8 %a_sub, %b_sub
+ %cmp = icmp eq i8 %or, 0
+ %res = select i1 %cmp, i8 0, i8 128
+ call void @use(i8 %or)
+ ret i8 %res
+}
+
+define i8 @no_fold_usub_b_extra_use(i8 %a, i8 %b) {
+; CHECK-LABEL: define i8 @no_fold_usub_b_extra_use(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT: [[A_SUB:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 -33)
+; CHECK-NEXT: [[B_SUB:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 -17)
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[A_SUB]], [[B_SUB]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[OR]], 0
+; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i8 0, i8 -128
+; CHECK-NEXT: call void @use(i8 [[B_SUB]])
+; CHECK-NEXT: ret i8 [[RES]]
+;
+ %a_sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 223)
+ %b_sub = call i8 @llvm.usub.sat.i8(i8 %b, i8 239)
+ %or = or i8 %a_sub, %b_sub
+ %cmp = icmp eq i8 %or, 0
+ %res = select i1 %cmp, i8 0, i8 128
+ call void @use(i8 %b_sub)
+ ret i8 %res
+}
+
+declare void @use(i8)
More information about the llvm-commits
mailing list