[llvm] [InstCombine]: Replace overflow calculation with intrinsic (PR #168195)

Sat Nov 15 02:36:17 PST 2025

https://github.com/kper created https://github.com/llvm/llvm-project/pull/168195

Replaces the manual overflow calculation with `ssub_with_overflow` intrinsic.
alive: https://alive2.llvm.org/ce/z/NUUUhw

Closes https://github.com/llvm/llvm-project/issues/162717

>From 4f217531bf9e501dfebad9605c23a6ca55ee66f8 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sun, 19 Oct 2025 11:57:34 +0000
Subject: [PATCH] [InstCombine]: Replace overflow calculation with intrinsic

---
 .../InstCombine/InstCombineCompares.cpp       |  66 +++++++++++
 .../InstCombine/InstCombineInternal.h         |   1 +
 .../InstCombine/icmp-fold-ssub-overflow.ll    | 103 ++++++++++++++++++
 3 files changed, 170 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/icmp-fold-ssub-overflow.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index fba1ccf2c8c9b..5c433277a8f7d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -12,6 +12,7 @@
 
 #include "InstCombineInternal.h"
 #include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -22,12 +23,14 @@
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/Utils/Local.h"
 #include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/CmpPredicate.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/InstCombine/InstCombiner.h"
@@ -7161,6 +7164,66 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) {
   return nullptr;
 }
 
+/// Fold icmp(ult, sub(add(sext(X), Cst1), sext(Y)), Cst2) -->
+/// extract(__builtin_ssub_overflow(X, Y), 1)
+Instruction *InstCombinerImpl::foldICmpsToSignedSubOverflow(Instruction &I) {
+  CmpPredicate Pred;
+  ConstantInt *Cst1, *Cst2;
+  Value *X, *Y;
+
+  /*
+    This transformation detects the pattern used to check for
+    a signed subtraction overflow.
+
+    The matched sequence performs the following steps:
+
+      1. X = sext(x)
+         Y = sext(y)
+         // Sign-extend 32-bit operands to 64 bits.
+
+      2. Shifted = X + Cst1
+         // Shift the signed range [-2^31, 2^31-1] by adding the minimum
+         // signed value (Cst1 = INT_MIN), producing an unsigned range
+         // [0, 2^32).
+
+      3. Sub = Shifted - Y
+         // Compute the shifted subtraction result.
+
+      4. icmp ult Sub, Cst2
+         // Check whether the result fits in [0, 2^32).
+         // If not, the subtraction overflowed.
+  */
+
+  auto SExtX = m_SExt(m_Value(X));
+  auto SExtY = m_SExt(m_Value(Y));
+  auto Shifted = m_Add(SExtX, m_ConstantInt(Cst1));
+  auto Sub = m_Sub(Shifted, SExtY);
+
+  if (!match(&I, m_ICmp(Pred, Sub, m_ConstantInt(Cst2))) ||
+      Pred != CmpInst::ICMP_ULT)
+    return nullptr;
+
+  const auto SignedMin =
+      APInt::getSignedMinValue(X->getType()->getScalarSizeInBits());
+  const auto ExpectedRange = SignedMin.getSExtValue() << 1;
+
+  // Cst1 must equal to SignedMin
+  // Cst2 must equal to ExpectedRange
+  if (SignedMin.getSExtValue() != Cst1->getValue().getSExtValue() ||
+      ExpectedRange != Cst2->getValue().getSExtValue())
+    return nullptr;
+
+  Module *M = I.getModule();
+  Function *F = Intrinsic::getOrInsertDeclaration(
+      M, Intrinsic::ssub_with_overflow, X->getType());
+
+  Builder.SetInsertPoint(&I);
+  auto *Call = Builder.CreateCall(F, {X, Y});
+  auto *Extract = Builder.CreateExtractValue(Call, 1);
+
+  return replaceInstUsesWith(I, Extract);
+}
+
 /// If we have an icmp le or icmp ge instruction with a constant operand, turn
 /// it into the appropriate icmp lt or icmp gt instruction. This transform
 /// allows them to be folded in visitICmpInst.
@@ -7970,6 +8033,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
     }
   }
 
+  if (Instruction *R = foldICmpsToSignedSubOverflow(I))
+    return R;
+
   return Changed ? &I : nullptr;
 }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 9bdd8cb71f7f3..5791932450711 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -791,6 +791,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Instruction *foldICmpWithTrunc(ICmpInst &Cmp);
   Instruction *foldICmpCommutative(CmpPredicate Pred, Value *Op0, Value *Op1,
                                    ICmpInst &CxtI);
+  Instruction *foldICmpsToSignedSubOverflow(Instruction &I);
 
   // Helpers of visitSelectInst().
   Instruction *foldSelectOfBools(SelectInst &SI);
diff --git a/llvm/test/Transforms/InstCombine/icmp-fold-ssub-overflow.ll b/llvm/test/Transforms/InstCombine/icmp-fold-ssub-overflow.ll
new file mode 100644
index 0000000000000..1d22781db16f2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-fold-ssub-overflow.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+; Fold
+;
+; int result = x - y;
+; return !(INT_MIN <= result && result <= INT_MAX);
+;
+; into
+;
+; __builtin_ssub_overflow(x, y)
+
+define i1 @idiomatic_check_sub_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: @idiomatic_check_sub_i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 [[X:%.*]], i16 [[Y:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %3 = sext i16 %x to i32
+  %4 = sext i16 %y to i32
+  %5 = add nsw i32 %3, -32768
+  %6 = sub nsw i32 %5, %4
+  %7 = icmp ult i32 %6, -65536
+  ret i1 %7
+}
+
+define i1 @idiomatic_check_sub_i16_no_flags(i16 %x, i16 %y) {
+; CHECK-LABEL: @idiomatic_check_sub_i16_no_flags(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 [[X:%.*]], i16 [[Y:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %3 = sext i16 %x to i32
+  %4 = sext i16 %y to i32
+  %5 = add i32 %3, -32768
+  %6 = sub i32 %5, %4
+  %7 = icmp ult i32 %6, -65536
+  ret i1 %7
+}
+
+define i1 @idiomatic_check_sub_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: @idiomatic_check_sub_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %3 = sext i32 %x to i64
+  %4 = sext i32 %y to i64
+  %5 = add nsw i64 %3, -2147483648
+  %6 = sub nsw i64 %5, %4
+  %7 = icmp ult i64 %6, -4294967296
+  ret i1 %7
+}
+
+define i1 @idiomatic_check_sub_i32_negative_test_1(i32 %x, i32 %y) {
+; CHECK-LABEL: @idiomatic_check_sub_i32_negative_test_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP3]], -4294967296
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %3 = sext i32 %x to i64
+  %4 = sext i32 %y to i64
+  %5 = add nsw i64 %3, 0 ; Constant wrong
+  %6 = sub nsw i64 %5, %4
+  %7 = icmp ult i64 %6, -4294967296
+  ret i1 %7
+}
+
+define i1 @idiomatic_check_sub_i32_negative_test_2(i32 %x, i32 %y) {
+; CHECK-LABEL: @idiomatic_check_sub_i32_negative_test_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw i64 [[TMP1]], -2147483648
+; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw i64 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], -4294967295
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %3 = sext i32 %x to i64
+  %4 = sext i32 %y to i64
+  %5 = add nsw i64 %3, -2147483648
+  %6 = sub nsw i64 %5, %4
+  %7 = icmp ult i64 %6, -4294967295 ; Constant wrong
+  ret i1 %7
+}
+
+define i1 @idiomatic_check_sub_i32_negative_test_3(i32 %x, i32 %y) {
+; CHECK-LABEL: @idiomatic_check_sub_i32_negative_test_3(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw i64 [[TMP1]], -2147483648
+; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw i64 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i64 [[TMP4]], -4294967296
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %3 = sext i32 %x to i64
+  %4 = sext i32 %y to i64
+  %5 = add nsw i64 %3, -2147483648
+  %6 = sub nsw i64 %5, %4
+  %7 = icmp slt i64 %6, -4294967296 ; wrong Condition
+  ret i1 %7
+}