[llvm] [InstCombine] Fold (sub nsw X, umin(X, C)) s< C2 to (X s< C + C2) (PR #170422)

Tue Dec 2 21:45:04 PST 2025

https://github.com/ParkHanbum created https://github.com/llvm/llvm-project/pull/170422

This patch implements a fold for the pattern
icmp slt (sub nsw %x, umin(%x, %c)), %c2.

Typically, the sub nsw ... umin sequence is canonicalized to
@llvm.usub.sat. However, that transformation discards the nsw
constraint, which provides valuable range information
(guaranteeing no signed underflow). By detecting this pattern before
canonicalization, we can merge the range check into a single
comparison

This optimization is valid under the following constraints:
- %c is non-negative (C >= 0).
- %c2 is strictly positive (C2 > 0).
- The sum %c + %c2 does not cause a signed overflow.

Alive Proof : https://alive2.llvm.org/ce/z/JG3SnP
Fixed: https://github.com/llvm/llvm-project/issues/167055



>From 23c4e19e97dabb4008309bbd26db7137992aabbc Mon Sep 17 00:00:00 2001
From: Hanbum Park <kese111 at gmail.com>
Date: Wed, 3 Dec 2025 14:34:01 +0900
Subject: [PATCH 1/2] Add pretests for upcomming patch

---
 llvm/test/Transforms/InstCombine/icmp-sub.ll | 122 ++++++++++++++++++-
 1 file changed, 121 insertions(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/InstCombine/icmp-sub.ll b/llvm/test/Transforms/InstCombine/icmp-sub.ll
index 13ed7ba0c1703..e261e38697c69 100644
--- a/llvm/test/Transforms/InstCombine/icmp-sub.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-sub.ll
@@ -3,6 +3,125 @@
 
 declare void @use(i32)
 declare void @use_vec(<2 x i8>)
+declare i8 @llvm.umin.i8(i8, i8)
+
+; 1. Positive Tests
+; Basic valid case
+define i1 @test_basic_opt(i8 %x) {
+; CHECK-LABEL: @test_basic_opt(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 118
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], -108
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 10)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, 20
+  ret i1 %cmp
+}
+
+; Boundary case - C is 0
+define i1 @test_c_is_zero(i8 %x) {
+; CHECK-LABEL: @test_c_is_zero(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 10
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 0)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, 10
+  ret i1 %cmp
+}
+
+; Boundary case - Sum is exactly SMAX (127)
+define i1 @test_sum_is_smax(i8 %x) {
+; CHECK-LABEL: @test_sum_is_smax(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 28
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], -101
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 100)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, 27
+  ret i1 %cmp
+}
+
+; 2. Negative Tests
+; Missing 'nsw' flag on sub
+define i1 @fail_no_nsw(i8 %x) {
+; CHECK-LABEL: @fail_no_nsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 118
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], -108
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 10)
+  %sub = sub i8 %x, %min
+  %cmp = icmp slt i8 %sub, 20
+  ret i1 %cmp
+}
+
+; C is Negative (Constraint 1 Violation)
+define i1 @fail_c_negative(i8 %x) {
+; CHECK-LABEL: @fail_c_negative(
+; CHECK-NEXT:    ret i1 true
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 -10)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, 20
+  ret i1 %cmp
+}
+
+; C2 is Zero (Constraint 2 Violation)
+define i1 @fail_c2_zero(i8 %x) {
+; CHECK-LABEL: @fail_c2_zero(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[X:%.*]], -119
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 10)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, 0
+  ret i1 %cmp
+}
+
+; C2 is Negative (Constraint 2 Violation)
+define i1 @fail_c2_negative(i8 %x) {
+; CHECK-LABEL: @fail_c2_negative(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[X:%.*]], -119
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 10)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, -5
+  ret i1 %cmp
+}
+
+; Signed Overflow in C + C2 (Constraint 3 Violation)
+define i1 @fail_sum_overflow(i8 %x) {
+; CHECK-LABEL: @fail_sum_overflow(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 28
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], -98
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 100)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, 30
+  ret i1 %cmp
+}
+
+; Multi-use of sub instruction
+define i8 @fail_multi_use(i8 %x) {
+; CHECK-LABEL: @fail_multi_use(
+; CHECK-NEXT:    [[SUB:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[X:%.*]], i8 10)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SUB]], 20
+; CHECK-NEXT:    [[RES:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    [[RET:%.*]] = add nuw i8 [[SUB]], [[RES]]
+; CHECK-NEXT:    ret i8 [[RET]]
+;
+  %min = call i8 @llvm.umin.i8(i8 %x, i8 10)
+  %sub = sub nsw i8 %x, %min
+  %cmp = icmp slt i8 %sub, 20
+  %res = zext i1 %cmp to i8
+  %ret = add i8 %sub, %res
+  ret i8 %ret
+}
 
 define i1 @test_nuw_and_unsigned_pred(i64 %x) {
 ; CHECK-LABEL: @test_nuw_and_unsigned_pred(
@@ -290,7 +409,8 @@ define i1 @subC_nsw_ne(i32 %x) {
 ; CHECK-LABEL: @subC_nsw_ne(
 ; CHECK-NEXT:    [[SUBX:%.*]] = sub nsw i32 -2147483647, [[X:%.*]]
 ; CHECK-NEXT:    call void @use(i32 [[SUBX]])
-; CHECK-NEXT:    ret i1 true
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[X]], 2147483603
+; CHECK-NEXT:    ret i1 [[R]]
 ;
   %subx = sub nsw i32 -2147483647, %x
   call void @use(i32 %subx)

>From 209ab8099de249716fa3fdf194e8c4e8c11d2e32 Mon Sep 17 00:00:00 2001
From: Hanbum Park <kese111 at gmail.com>
Date: Wed, 3 Dec 2025 14:39:22 +0900
Subject: [PATCH 2/2] [InstCombine] Fold (sub nsw X, umin(X, C)) s< C2 to (X s<
 C + C2)

This patch implements a fold for the pattern
icmp slt (sub nsw %x, umin(%x, %c)), %c2.

Typically, the sub nsw ... umin sequence is canonicalized to
@llvm.usub.sat. However, that transformation discards the nsw
constraint, which provides valuable range information
(guaranteeing no signed underflow). By detecting this pattern before
canonicalization, we can merge the range check into a single
comparison

This optimization is valid under the following constraints:
- %c is non-negative (C >= 0).
- %c2 is strictly positive (C2 > 0).
- The sum %c + %c2 does not cause a signed overflow.

Alive Proof : https://alive2.llvm.org/ce/z/JG3SnP
Fixed: #167055
---
 .../InstCombine/InstCombineAddSub.cpp         | 30 +++++++++++++++++--
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 9bee523c7b7e5..df096e7b806ad 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2857,9 +2857,33 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
         I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op1}));
 
   // Op0 - umin(X, Op0) --> usub.sat(Op0, X)
-  if (match(Op1, m_OneUse(m_c_UMin(m_Value(X), m_Specific(Op0)))))
-    return replaceInstUsesWith(
-        I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {Op0, X}));
+  if (match(Op1, m_OneUse(m_c_UMin(m_Value(X), m_Specific(Op0))))) {
+    // Op0 - umin(Op0, C) s< C2 --> Op0 s< C2 + C
+    // Constraints:
+    // 1. C >= 0: Range [0, C) must be valid.
+    // 2. C2 > 0: Range [C, C+C2) must imply extension.
+    // 3. No Signed Overflow: Merged upper bound (C + C2) must be safe.
+    const APInt *C;
+    if (I.hasOneUse() && I.hasNoSignedWrap() &&
+        match(X, m_APInt(C)) && C->isNonNegative()) {
+      const APInt *C2;
+      CmpPredicate Pred;
+      Instruction *MustICmp = cast<Instruction>(I.user_back());
+      if (match(MustICmp, m_ICmp(Pred, m_Specific(&I), m_APInt(C2))) &&
+          Pred == CmpInst::ICMP_SLT) {
+        bool Overflow;
+        APInt Sum = C->sadd_ov(*C2, Overflow);
+        if (C2->isStrictlyPositive() && !Overflow) {
+          Value *NewCmp = Builder.CreateICmpSLT(Op0, ConstantInt::get(Ty, Sum));
+          eraseInstFromFunction(*replaceInstUsesWith(*MustICmp, NewCmp));
+          // return eraseInstFromFunction(I);
+          return nullptr;
+        }
+      }
+    }
+    Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {Op0, X});
+    return replaceInstUsesWith(I, USub);
+  }
 
   // Op0 - umax(X, Op0) --> 0 - usub.sat(X, Op0)
   if (match(Op1, m_OneUse(m_c_UMax(m_Value(X), m_Specific(Op0))))) {