[llvm] [InstCombine] Fold usub_sat((sub nuw C1, A), C2) to usub_sat(C1 - C2, A) or 0 (PR #82280)

via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 11 05:29:22 PDT 2024


https://github.com/elhewaty updated https://github.com/llvm/llvm-project/pull/82280

>From e03339a1624f09326d748305b05517fab602e44d Mon Sep 17 00:00:00 2001
From: Mohamed Atef <mohamedatef1698 at gmail.com>
Date: Mon, 19 Feb 2024 22:15:10 +0200
Subject: [PATCH 1/2] [InstCombine] Add test coverage for folding usub_sat((sub
 nuw C1, A), C2) to usub_sat(C1 - C2, A) or 0(NFC)

---
 .../InstCombine/unsigned_saturated_sub.ll     | 102 ++++++++++++++++++
 1 file changed, 102 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll
index 5cece931b8d987..ea30e235af3db2 100644
--- a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll
+++ b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll
@@ -8,6 +8,108 @@ declare void @use(i64)
 declare void @usei32(i32)
 declare void @usei1(i1)
 
+; usub_sat((sub nuw C1, A), C2) to usub_sat(usub_sat(C1 - C2), A)
+define i32 @usub_sat_C1_C2(i32 %a){
+; CHECK-LABEL: @usub_sat_C1_C2(
+; CHECK-NEXT:    [[ADD:%.*]] = sub nuw i32 64, [[A:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ADD]], i32 14)
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %add = sub nuw i32 64, %a
+  %cond = call i32 @llvm.usub.sat.i32(i32 %add, i32 14)
+  ret i32 %cond
+}
+
+define i32 @usub_sat_C1_C2_produce_0(i32 %a){
+; CHECK-LABEL: @usub_sat_C1_C2_produce_0(
+; CHECK-NEXT:    [[ADD:%.*]] = sub nuw i32 14, [[A:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ADD]], i32 14)
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %add = sub nuw i32 14, %a
+  %cond = call i32 @llvm.usub.sat.i32(i32 %add, i32 14)
+  ret i32 %cond
+}
+
+define i32 @usub_sat_C1_C2_produce_0_too(i32 %a){
+; CHECK-LABEL: @usub_sat_C1_C2_produce_0_too(
+; CHECK-NEXT:    [[ADD:%.*]] = sub nuw i32 12, [[A:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ADD]], i32 14)
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %add = sub nuw i32 12, %a
+  %cond = call i32 @llvm.usub.sat.i32(i32 %add, i32 14)
+  ret i32 %cond
+}
+
+; vector tests
+define <2 x i16> @usub_sat_C1_C2_splat(<2 x i16> %a) {
+; CHECK-LABEL: @usub_sat_C1_C2_splat(
+; CHECK-NEXT:    [[ADD:%.*]] = sub nuw <2 x i16> <i16 64, i16 64>, [[A:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ADD]], <2 x i16> <i16 14, i16 14>)
+; CHECK-NEXT:    ret <2 x i16> [[COND]]
+;
+  %add = sub nuw <2 x i16> <i16 64, i16 64>, %a
+  %cond = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %add, <2 x i16> <i16 14, i16 14>)
+  ret <2 x i16> %cond
+}
+
+define <2 x i16> @usub_sat_C1_C2_non_splat(<2 x i16> %a) {
+; CHECK-LABEL: @usub_sat_C1_C2_non_splat(
+; CHECK-NEXT:    [[ADD:%.*]] = sub nuw <2 x i16> <i16 50, i16 64>, [[A:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ADD]], <2 x i16> <i16 20, i16 14>)
+; CHECK-NEXT:    ret <2 x i16> [[COND]]
+;
+  %add = sub nuw <2 x i16> <i16 50, i16 64>, %a
+  %cond = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %add, <2 x i16> <i16 20, i16 14>)
+  ret <2 x i16> %cond
+}
+
+define <2 x i16> @usub_sat_C1_C2_splat_produce_0(<2 x i16> %a){
+; CHECK-LABEL: @usub_sat_C1_C2_splat_produce_0(
+; CHECK-NEXT:    [[ADD:%.*]] = sub nuw <2 x i16> <i16 14, i16 14>, [[A:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ADD]], <2 x i16> <i16 14, i16 14>)
+; CHECK-NEXT:    ret <2 x i16> [[COND]]
+;
+  %add = sub nuw <2 x i16> <i16 14, i16 14>, %a
+  %cond = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %add, <2 x i16> <i16 14, i16 14>)
+  ret <2 x i16> %cond
+}
+
+define <2 x i16> @usub_sat_C1_C2_splat_produce_0_too(<2 x i16> %a){
+; CHECK-LABEL: @usub_sat_C1_C2_splat_produce_0_too(
+; CHECK-NEXT:    [[ADD:%.*]] = sub nuw <2 x i16> <i16 12, i16 12>, [[A:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ADD]], <2 x i16> <i16 14, i16 14>)
+; CHECK-NEXT:    ret <2 x i16> [[COND]]
+;
+  %add = sub nuw <2 x i16> <i16 12, i16 12>, %a
+  %cond = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %add, <2 x i16> <i16 14, i16 14>)
+  ret <2 x i16> %cond
+}
+
+define <2 x i16> @usub_sat_C1_C2_non_splat_produce_0_too(<2 x i16> %a){
+; CHECK-LABEL: @usub_sat_C1_C2_non_splat_produce_0_too(
+; CHECK-NEXT:    [[ADD:%.*]] = sub nuw <2 x i16> <i16 12, i16 13>, [[A:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ADD]], <2 x i16> <i16 14, i16 15>)
+; CHECK-NEXT:    ret <2 x i16> [[COND]]
+;
+  %add = sub nuw <2 x i16> <i16 12, i16 13>, %a
+  %cond = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %add, <2 x i16> <i16 14, i16 15>)
+  ret <2 x i16> %cond
+}
+
+; negative tests this souldn't work
+define i32 @usub_sat_C1_C2_without_nuw(i32 %a){
+; CHECK-LABEL: @usub_sat_C1_C2_without_nuw(
+; CHECK-NEXT:    [[ADD:%.*]] = sub i32 12, [[A:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ADD]], i32 14)
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %add = sub i32 12, %a
+  %cond = call i32 @llvm.usub.sat.i32(i32 %add, i32 14)
+  ret i32 %cond
+}
+
 ; (a > b) ? a - b : 0 -> usub.sat(a, b)
 
 define i64 @max_sub_ugt(i64 %a, i64 %b) {

>From 84560f7e1cf0fb0b7e0f8fce76b1e4c6292dfa6f Mon Sep 17 00:00:00 2001
From: Mohamed Atef <mohamedatef1698 at gmail.com>
Date: Mon, 11 Mar 2024 14:28:14 +0200
Subject: [PATCH 2/2] [InstCombine] Fold usub_sat((sub nuw C1, A), C2) to
 usub_sat(C1 - C2, A) or 0

---
 .../InstCombine/InstCombineCalls.cpp          | 16 +++++++++-
 .../InstCombine/unsigned_saturated_sub.ll     | 29 +++++--------------
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 5266808c5abab4..270b7227020f7f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2139,8 +2139,22 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       }
     }
 
+    // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
+    // which after that:
+    // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
+    // usub_sat((sub nuw C, A), C1) -> 0 otherwise
+    Constant *C, *C1;
+    Value *A;
+    if (IID == Intrinsic::usub_sat &&
+        match(Arg0, m_NUWSub(m_ImmConstant(C), m_Value(A))) &&
+        match(Arg1, m_ImmConstant(C1))) {
+      auto *NewC = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, C, C1);
+      auto *NewSub =
+          Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, NewC, A);
+      return replaceInstUsesWith(*SI, NewSub);
+    }
+
     // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
-    Constant *C;
     if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
         C->isNotMinSignedValue()) {
       Value *NegVal = ConstantExpr::getNeg(C);
diff --git a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll
index ea30e235af3db2..ab147584d2108f 100644
--- a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll
+++ b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll
@@ -11,8 +11,7 @@ declare void @usei1(i1)
 ; usub_sat((sub nuw C1, A), C2) to usub_sat(usub_sat(C1 - C2), A)
 define i32 @usub_sat_C1_C2(i32 %a){
 ; CHECK-LABEL: @usub_sat_C1_C2(
-; CHECK-NEXT:    [[ADD:%.*]] = sub nuw i32 64, [[A:%.*]]
-; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ADD]], i32 14)
+; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.usub.sat.i32(i32 50, i32 [[A:%.*]])
 ; CHECK-NEXT:    ret i32 [[COND]]
 ;
   %add = sub nuw i32 64, %a
@@ -22,9 +21,7 @@ define i32 @usub_sat_C1_C2(i32 %a){
 
 define i32 @usub_sat_C1_C2_produce_0(i32 %a){
 ; CHECK-LABEL: @usub_sat_C1_C2_produce_0(
-; CHECK-NEXT:    [[ADD:%.*]] = sub nuw i32 14, [[A:%.*]]
-; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ADD]], i32 14)
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    ret i32 0
 ;
   %add = sub nuw i32 14, %a
   %cond = call i32 @llvm.usub.sat.i32(i32 %add, i32 14)
@@ -33,9 +30,7 @@ define i32 @usub_sat_C1_C2_produce_0(i32 %a){
 
 define i32 @usub_sat_C1_C2_produce_0_too(i32 %a){
 ; CHECK-LABEL: @usub_sat_C1_C2_produce_0_too(
-; CHECK-NEXT:    [[ADD:%.*]] = sub nuw i32 12, [[A:%.*]]
-; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ADD]], i32 14)
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    ret i32 0
 ;
   %add = sub nuw i32 12, %a
   %cond = call i32 @llvm.usub.sat.i32(i32 %add, i32 14)
@@ -45,8 +40,7 @@ define i32 @usub_sat_C1_C2_produce_0_too(i32 %a){
 ; vector tests
 define <2 x i16> @usub_sat_C1_C2_splat(<2 x i16> %a) {
 ; CHECK-LABEL: @usub_sat_C1_C2_splat(
-; CHECK-NEXT:    [[ADD:%.*]] = sub nuw <2 x i16> <i16 64, i16 64>, [[A:%.*]]
-; CHECK-NEXT:    [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ADD]], <2 x i16> <i16 14, i16 14>)
+; CHECK-NEXT:    [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> <i16 50, i16 50>, <2 x i16> [[A:%.*]])
 ; CHECK-NEXT:    ret <2 x i16> [[COND]]
 ;
   %add = sub nuw <2 x i16> <i16 64, i16 64>, %a
@@ -56,8 +50,7 @@ define <2 x i16> @usub_sat_C1_C2_splat(<2 x i16> %a) {
 
 define <2 x i16> @usub_sat_C1_C2_non_splat(<2 x i16> %a) {
 ; CHECK-LABEL: @usub_sat_C1_C2_non_splat(
-; CHECK-NEXT:    [[ADD:%.*]] = sub nuw <2 x i16> <i16 50, i16 64>, [[A:%.*]]
-; CHECK-NEXT:    [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ADD]], <2 x i16> <i16 20, i16 14>)
+; CHECK-NEXT:    [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> <i16 30, i16 50>, <2 x i16> [[A:%.*]])
 ; CHECK-NEXT:    ret <2 x i16> [[COND]]
 ;
   %add = sub nuw <2 x i16> <i16 50, i16 64>, %a
@@ -67,9 +60,7 @@ define <2 x i16> @usub_sat_C1_C2_non_splat(<2 x i16> %a) {
 
 define <2 x i16> @usub_sat_C1_C2_splat_produce_0(<2 x i16> %a){
 ; CHECK-LABEL: @usub_sat_C1_C2_splat_produce_0(
-; CHECK-NEXT:    [[ADD:%.*]] = sub nuw <2 x i16> <i16 14, i16 14>, [[A:%.*]]
-; CHECK-NEXT:    [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ADD]], <2 x i16> <i16 14, i16 14>)
-; CHECK-NEXT:    ret <2 x i16> [[COND]]
+; CHECK-NEXT:    ret <2 x i16> zeroinitializer
 ;
   %add = sub nuw <2 x i16> <i16 14, i16 14>, %a
   %cond = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %add, <2 x i16> <i16 14, i16 14>)
@@ -78,9 +69,7 @@ define <2 x i16> @usub_sat_C1_C2_splat_produce_0(<2 x i16> %a){
 
 define <2 x i16> @usub_sat_C1_C2_splat_produce_0_too(<2 x i16> %a){
 ; CHECK-LABEL: @usub_sat_C1_C2_splat_produce_0_too(
-; CHECK-NEXT:    [[ADD:%.*]] = sub nuw <2 x i16> <i16 12, i16 12>, [[A:%.*]]
-; CHECK-NEXT:    [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ADD]], <2 x i16> <i16 14, i16 14>)
-; CHECK-NEXT:    ret <2 x i16> [[COND]]
+; CHECK-NEXT:    ret <2 x i16> zeroinitializer
 ;
   %add = sub nuw <2 x i16> <i16 12, i16 12>, %a
   %cond = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %add, <2 x i16> <i16 14, i16 14>)
@@ -89,9 +78,7 @@ define <2 x i16> @usub_sat_C1_C2_splat_produce_0_too(<2 x i16> %a){
 
 define <2 x i16> @usub_sat_C1_C2_non_splat_produce_0_too(<2 x i16> %a){
 ; CHECK-LABEL: @usub_sat_C1_C2_non_splat_produce_0_too(
-; CHECK-NEXT:    [[ADD:%.*]] = sub nuw <2 x i16> <i16 12, i16 13>, [[A:%.*]]
-; CHECK-NEXT:    [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ADD]], <2 x i16> <i16 14, i16 15>)
-; CHECK-NEXT:    ret <2 x i16> [[COND]]
+; CHECK-NEXT:    ret <2 x i16> zeroinitializer
 ;
   %add = sub nuw <2 x i16> <i16 12, i16 13>, %a
   %cond = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %add, <2 x i16> <i16 14, i16 15>)



More information about the llvm-commits mailing list