[llvm] [InstCombine] Enable saturated add canonicalization in more variants (PR #97973)

Mon Jul 22 07:52:25 PDT 2024

https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/97973

>From f8aa27e5570c2a423d4e6792eab0cf8371dced90 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sun, 7 Jul 2024 17:51:02 -0400
Subject: [PATCH 1/2] Pre-commit tests (NFC)

---
 .../InstCombine/saturating-add-sub.ll         | 221 ++++++++++++++++++
 1 file changed, 221 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index bf1568f1cd8c0..7ae50b5b0085f 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -1395,6 +1395,227 @@ define i32 @uadd_sat(i32 %x, i32 %y) {
   %r = select i1 %c, i32 -1, i32 %a
   ret i32 %r
 }
+
+define i32 @uadd_sat_flipped(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -11
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp uge i32 %x, -10
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 -1, i32 %add
+  ret i32 %cond
+}
+
+define i32 @uadd_sat_flipped2(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -10
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp ugt i32 %x, -10
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 -1, i32 %add
+  ret i32 %cond
+}
+
+define i32 @uadd_sat_flipped3(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped3(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -11
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp ugt i32 %x, -11
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 -1, i32 %add
+  ret i32 %cond
+}
+
+define i32 @uadd_sat_flipped4(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped4(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -10
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp uge i32 %x, -9
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 -1, i32 %add
+  ret i32 %cond
+}
+
+define i32 @uadd_sat_flipped5(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped5(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], -9
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 -1
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp ult i32 %x, -9
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 %add, i32 -1
+  ret i32 %cond
+}
+
+define i32 @uadd_sat_flipped6(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped6(
+; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp ule i32 %x, -11
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 %add, i32 -1
+  ret i32 %cond
+}
+
+define i32 @uadd_sat_flipped7(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped7(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], -9
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 -1
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp ule i32 %x, -10
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 %add, i32 -1
+  ret i32 %cond
+}
+
+define i32 @uadd_sat_flipped8(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped8(
+; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp ult i32 %x, -10
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 %add, i32 -1
+  ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -13
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp uge i32 %x, -12
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 -1, i32 %add
+  ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds2(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -12
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp ugt i32 %x, -12
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 -1, i32 %add
+  ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds3(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds3(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -12
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp ugt i32 %x, -12
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 -1, i32 %add
+  ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds4(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds4(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -9
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp uge i32 %x, -8
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 -1, i32 %add
+  ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds5(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds5(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], -8
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 -1
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp ult i32 %x, -8
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 %add, i32 -1
+  ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds6(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds6(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], -11
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 -1
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp ule i32 %x, -12
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 %add, i32 -1
+  ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds7(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds7(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], -11
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 -1
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp ule i32 %x, -12
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 %add, i32 -1
+  ret i32 %cond
+}
+
+; Negative test:
+
+define i32 @uadd_sat_flipped_wrong_bounds8(i32 %x) {
+; CHECK-LABEL: @uadd_sat_flipped_wrong_bounds8(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], -12
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 -1
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp ult i32 %x, -12
+  %add = add i32 %x, 9
+  %cond = select i1 %cmp, i32 %add, i32 -1
+  ret i32 %cond
+}
+
 define i32 @uadd_sat_nonstrict(i32 %x, i32 %y) {
 ; CHECK-LABEL: @uadd_sat_nonstrict(
 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y:%.*]])

>From 75bc171352946c911aedc26e8e3453634287efcb Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 12 Jul 2024 09:53:54 -0400
Subject: [PATCH 2/2] [InstCombine] Enable saturated add canonicalization in
 more variants

LLVM is not evaluating X u > C, a, b the same way it evaluates X <= C, b, a.

To fix this, let's move the folds to after the canonicalization of -1 to TrueVal.

Alive2 Proof:
https://alive2.llvm.org/ce/z/8QbZfx
---
 .../InstCombine/InstCombineSelect.cpp         | 33 ++++++++++++++-----
 .../InstCombine/saturating-add-sub.ll         | 32 +++++-------------
 2 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index aaf4ece3249a2..dbe9d39a89dcb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -977,14 +977,7 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
   Value *Cmp1 = Cmp->getOperand(1);
   ICmpInst::Predicate Pred = Cmp->getPredicate();
   Value *X;
-  const APInt *C, *CmpC;
-  if (Pred == ICmpInst::ICMP_ULT &&
-      match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 &&
-      match(FVal, m_AllOnes()) && match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) {
-    // (X u< ~C) ? (X + C) : -1 --> uadd.sat(X, C)
-    return Builder.CreateBinaryIntrinsic(
-        Intrinsic::uadd_sat, X, ConstantInt::get(X->getType(), *C));
-  }
+  const APInt *C;
 
   // Match unsigned saturated add of 2 variables with an unnecessary 'not'.
   // There are 8 commuted variants.
@@ -996,6 +989,30 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
   if (!match(TVal, m_AllOnes()))
     return nullptr;
 
+  if ((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_UGT) &&
+      match(FVal, m_Add(m_Specific(Cmp0), m_APInt(C))) &&
+      match(Cmp1, m_SpecificInt(~*C))) {
+    // (X u> ~C) ? -1 : (X + C) --> uadd.sat(X, C)
+    return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, Cmp0,
+                                         ConstantInt::get(Cmp0->getType(), *C));
+  }
+
+  if (Pred == ICmpInst::ICMP_UGE &&
+      match(FVal, m_Add(m_Specific(Cmp0), m_APInt(C))) &&
+      match(Cmp1, m_SpecificInt(-*C))) {
+    // (X u> -C) ? -1 : (X + C) --> uadd.sat(X, C)
+    return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, Cmp0,
+                                         ConstantInt::get(Cmp0->getType(), *C));
+  }
+
+  if (Pred == ICmpInst::ICMP_UGT &&
+      match(FVal, m_Add(m_Specific(Cmp0), m_APInt(C))) &&
+      match(Cmp1, m_SpecificInt(~*C - 1))) {
+    // (X u> ~C - 1) ? -1 : (X + C) --> uadd.sat(X, C)
+    return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, Cmp0,
+                                         ConstantInt::get(Cmp0->getType(), *C));
+  }
+
   // Canonicalize predicate to less-than or less-or-equal-than.
   if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
     std::swap(Cmp0, Cmp1);
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index 7ae50b5b0085f..0751efa16e402 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -1398,9 +1398,7 @@ define i32 @uadd_sat(i32 %x, i32 %y) {
 
 define i32 @uadd_sat_flipped(i32 %x) {
 ; CHECK-LABEL: @uadd_sat_flipped(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -11
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
-; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
 ; CHECK-NEXT:    ret i32 [[COND]]
 ;
   %cmp = icmp uge i32 %x, -10
@@ -1411,9 +1409,7 @@ define i32 @uadd_sat_flipped(i32 %x) {
 
 define i32 @uadd_sat_flipped2(i32 %x) {
 ; CHECK-LABEL: @uadd_sat_flipped2(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -10
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
-; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
 ; CHECK-NEXT:    ret i32 [[COND]]
 ;
   %cmp = icmp ugt i32 %x, -10
@@ -1424,9 +1420,7 @@ define i32 @uadd_sat_flipped2(i32 %x) {
 
 define i32 @uadd_sat_flipped3(i32 %x) {
 ; CHECK-LABEL: @uadd_sat_flipped3(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -11
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
-; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
 ; CHECK-NEXT:    ret i32 [[COND]]
 ;
   %cmp = icmp ugt i32 %x, -11
@@ -1437,9 +1431,7 @@ define i32 @uadd_sat_flipped3(i32 %x) {
 
 define i32 @uadd_sat_flipped4(i32 %x) {
 ; CHECK-LABEL: @uadd_sat_flipped4(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -10
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
-; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]]
+; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
 ; CHECK-NEXT:    ret i32 [[COND]]
 ;
   %cmp = icmp uge i32 %x, -9
@@ -1450,9 +1442,7 @@ define i32 @uadd_sat_flipped4(i32 %x) {
 
 define i32 @uadd_sat_flipped5(i32 %x) {
 ; CHECK-LABEL: @uadd_sat_flipped5(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], -9
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
-; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 -1
+; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
 ; CHECK-NEXT:    ret i32 [[COND]]
 ;
   %cmp = icmp ult i32 %x, -9
@@ -1474,9 +1464,7 @@ define i32 @uadd_sat_flipped6(i32 %x) {
 
 define i32 @uadd_sat_flipped7(i32 %x) {
 ; CHECK-LABEL: @uadd_sat_flipped7(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], -9
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 9
-; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 -1
+; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9)
 ; CHECK-NEXT:    ret i32 [[COND]]
 ;
   %cmp = icmp ule i32 %x, -10
@@ -1957,9 +1945,7 @@ define i32 @uadd_sat_not_commute_select_uge_commute_add(i32 %x, i32 %y) {
 
 define i32 @uadd_sat_constant(i32 %x) {
 ; CHECK-LABEL: @uadd_sat_constant(
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], 42
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[X]], -43
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]]
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 42)
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %a = add i32 %x, 42
@@ -2025,9 +2011,7 @@ define i32 @uadd_sat_canon_y_nuw(i32 %x, i32 %y) {
 
 define <4 x i32> @uadd_sat_constant_vec(<4 x i32> %x) {
 ; CHECK-LABEL: @uadd_sat_constant_vec(
-; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt <4 x i32> [[X]], <i32 -43, i32 -43, i32 -43, i32 -43>
-; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[C]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[A]]
+; CHECK-NEXT:    [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> <i32 42, i32 42, i32 42, i32 42>)
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>