[llvm] [InstCombine] Fix i1 ssub.sat compare folding (PR #173742)

Sun Jan 11 14:36:59 PST 2026

https://github.com/jlebar updated https://github.com/llvm/llvm-project/pull/173742

>From 8f490e5e845869cce979311bc4e36324d7b1dd6e Mon Sep 17 00:00:00 2001
From: Justin Lebar <justin.lebar at gmail.com>
Date: Sun, 11 Jan 2026 14:11:13 -0800
Subject: [PATCH] Fix incorrect folds of ssub.sat.i1.

For every type other than i1, ssub.sat x, y = 0 implies x == y.  But for
i1, 1 - 0 = 0 (because the result of 1 saturates to 0).

The changes to instcombine are not strictly necessary.  Instcombine
canonicalizes the ssub.sat.i1 before we arrive at these pattern-matches.
The real fix is in ValueTracking.

Nonetheless we agreed in review it makes sense to add these checks to
instcombine, even though they're currently unreachable:
https://github.com/llvm/llvm-project/pull/173742#issuecomment-3694590010
---
 llvm/lib/Analysis/ValueTracking.cpp           |  5 +++
 .../InstCombine/InstCombineCompares.cpp       | 10 ++++--
 .../InstCombine/saturating-add-sub.ll         | 36 +++++++++++++++++++
 3 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 995aa8ed56de9..b7bf1a559d177 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -3540,6 +3540,11 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
         // NB: We don't do usub_sat here as in any case we can prove its
         // non-zero, we will fold it to `sub nuw` in InstCombine.
       case Intrinsic::ssub_sat:
+        // For most types, if x != y then ssub.sat x, y != 0.  But
+        // ssub.sat.i1 1, 0 = 0, because 1 saturates to 0.  This means
+        // isNonZeroSub will do the wrong thing for ssub.sat.i1.
+        if (BitWidth == 1)
+          return false;
         return isNonZeroSub(DemandedElts, Q, BitWidth, II->getArgOperand(0),
                             II->getArgOperand(1), Depth);
       case Intrinsic::sadd_sat:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index adf1649e5e1bc..1f1ff1597c9d4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3878,7 +3878,10 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
 
   case Intrinsic::ssub_sat:
     // ssub.sat(a, b) == 0 -> a == b
-    if (C.isZero())
+    //
+    // Note this doesn't work for ssub.sat.i1 because ssub.sat.i1 1, 0 = 0
+    // (because 1 saturates to 0).  Just skip the optimization for i1.
+    if (C.isZero() && II->getType()->getScalarSizeInBits() > 1)
       return new ICmpInst(Pred, II->getArgOperand(0), II->getArgOperand(1));
     break;
   case Intrinsic::usub_sat: {
@@ -4269,7 +4272,10 @@ Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
   }
   case Intrinsic::ssub_sat:
     // ssub.sat(a, b) spred 0 -> a spred b
-    if (ICmpInst::isSigned(Pred)) {
+    //
+    // Note this doesn't work for ssub.sat.i1 because ssub.sat.i1 1, 0 = 0
+    // (because 1 saturates to 0).  Just skip the optimization for i1.
+    if (ICmpInst::isSigned(Pred) && C.getBitWidth() > 1) {
       if (C.isZero())
         return new ICmpInst(Pred, II->getArgOperand(0), II->getArgOperand(1));
       // X s<= 0 is cannonicalized to X s< 1
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index 1294f867f07c0..80676de47f193 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -451,6 +451,42 @@ declare i8 @llvm.usub.sat.i8(i8, i8)
 declare i8 @llvm.ssub.sat.i8(i8, i8)
 declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>)
 declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>)
+declare i1 @llvm.ssub.sat.i1(i1, i1)
+
+define i1 @test_ssub_sat_i1_cmp_ule_zero(i1 %a) {
+; CHECK-LABEL: @test_ssub_sat_i1_cmp_ule_zero(
+; CHECK-NEXT:    [[CMP:%.*]] = xor i1 [[A:%.*]], true
+; CHECK-NEXT:    [[SAT:%.*]] = call i1 @llvm.ssub.sat.i1(i1 [[A]], i1 [[CMP]])
+; CHECK-NEXT:    [[RES:%.*]] = xor i1 [[SAT]], true
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %cmp = icmp ule i1 %a, false
+  %sat = call i1 @llvm.ssub.sat.i1(i1 %a, i1 %cmp)
+  %res = icmp ule i1 %sat, false
+  ret i1 %res
+}
+
+define i1 @test_ssub_sat_i1_cmp_eq_zero(i1 %a, i1 %b) {
+; CHECK-LABEL: @test_ssub_sat_i1_cmp_eq_zero(
+; CHECK-NEXT:    [[SAT:%.*]] = call i1 @llvm.ssub.sat.i1(i1 [[A:%.*]], i1 [[B:%.*]])
+; CHECK-NEXT:    [[RES:%.*]] = xor i1 [[SAT]], true
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %sat = call i1 @llvm.ssub.sat.i1(i1 %a, i1 %b)
+  %res = icmp eq i1 %sat, false
+  ret i1 %res
+}
+
+define i1 @test_ssub_sat_i1_cmp_sgt_allones(i1 %a, i1 %b) {
+; CHECK-LABEL: @test_ssub_sat_i1_cmp_sgt_allones(
+; CHECK-NEXT:    [[SAT:%.*]] = call i1 @llvm.ssub.sat.i1(i1 [[A:%.*]], i1 [[B:%.*]])
+; CHECK-NEXT:    [[RES:%.*]] = xor i1 [[SAT]], true
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %sat = call i1 @llvm.ssub.sat.i1(i1 %a, i1 %b)
+  %res = icmp sgt i1 %sat, true
+  ret i1 %res
+}
 
 ; Cannot canonicalize usub to uadd.
 define i8 @test_scalar_usub_canonical(i8 %a) {