[llvm] [InstCombine] Treat umax as select(icmp eq x, 0), 1, x) in binop select fold. (PR #65978)

Mon Sep 11 09:49:04 PDT 2023

https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/65978:

There is an existing instcombine in SimplifySelectsFeedingBinaryOp for folding `(A ? B : C) binop (A ? E : F) -> A ? (B binop E) : (C binop F)`. However this will not combine if the select `(x>=1 ? x : 1)` has been converted to a `umax(x, 1)`. This adds code to treat the umax as a `select(x==0,1,x)` if it matches the other operand, allowing binops to fold into the select/umax pair.

>From d94b9fe9cdd4d9db9307cdf504d0c41730389acb Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 11 Sep 2023 17:14:13 +0100
Subject: [PATCH 1/2] [Instcombine] Add tests for treating umax as a select.
 NFC

---
 .../Transforms/InstCombine/binop-select.ll    | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/binop-select.ll b/llvm/test/Transforms/InstCombine/binop-select.ll
index a59e19897f061d1..bb82857db10f4fd 100644
--- a/llvm/test/Transforms/InstCombine/binop-select.ll
+++ b/llvm/test/Transforms/InstCombine/binop-select.ll
@@ -403,3 +403,66 @@ define i32 @ashr_sel_op1_use(i1 %b) {
   %r = ashr i32 -2, %s
   ret i32 %r
 }
+
+
+define i32 @umax_as_select_sub(i32 %a) {
+; CHECK-LABEL: @umax_as_select_sub(
+; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C_NOT]], i32 0, i32 2
+; CHECK-NEXT:    [[M:%.*]] = call i32 @llvm.umax.i32(i32 [[A]], i32 1)
+; CHECK-NEXT:    [[B:%.*]] = sub i32 [[S]], [[M]]
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %c = icmp ugt i32 %a, 0
+  %s = select i1 %c, i32 2, i32 0
+  %m = call i32 @llvm.umax.i32(i32 %a, i32 1)
+  %b = sub i32 %s, %m
+  ret i32 %b
+}
+
+define i32 @umax_as_select_sub_c(i32 %a) {
+; CHECK-LABEL: @umax_as_select_sub_c(
+; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[S_NEG:%.*]] = select i1 [[C_NOT]], i32 0, i32 -2
+; CHECK-NEXT:    [[M:%.*]] = call i32 @llvm.umax.i32(i32 [[A]], i32 1)
+; CHECK-NEXT:    [[B:%.*]] = add i32 [[S_NEG]], [[M]]
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %c = icmp ugt i32 %a, 0
+  %s = select i1 %c, i32 2, i32 0
+  %m = call i32 @llvm.umax.i32(i32 %a, i32 1)
+  %b = sub i32 %m, %s
+  ret i32 %b
+}
+
+define i32 @umax_as_select_add(i32 %a, i32 %x, i32 %y) {
+; CHECK-LABEL: @umax_as_select_add(
+; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C_NOT]], i32 [[Y:%.*]], i32 [[X:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = call i32 @llvm.umax.i32(i32 [[A]], i32 1)
+; CHECK-NEXT:    [[B:%.*]] = add i32 [[S]], [[M]]
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %c = icmp ugt i32 %a, 0
+  %s = select i1 %c, i32 %x, i32 %y
+  %m = call i32 @llvm.umax.i32(i32 %a, i32 1)
+  %b = add i32 %s, %m
+  ret i32 %b
+}
+
+define i32 @umax_as_select_mul(i32 %a) {
+; CHECK-LABEL: @umax_as_select_mul(
+; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C_NOT]], i32 0, i32 2
+; CHECK-NEXT:    [[M:%.*]] = call i32 @llvm.umax.i32(i32 [[A]], i32 1)
+; CHECK-NEXT:    [[B:%.*]] = mul i32 [[S]], [[M]]
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %c = icmp ugt i32 %a, 0
+  %s = select i1 %c, i32 2, i32 0
+  %m = select i1 %c, i32 %a, i32 1
+  %b = mul i32 %s, %m
+  ret i32 %b
+}
+
+declare i32 @llvm.umax.i32(i32, i32)

>From 88831735addd78e40f2046e8a38353c22a1c4305 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 11 Sep 2023 17:24:40 +0100
Subject: [PATCH 2/2] [InstCombine] Treat umax as select(icmp eq x, 0), 1, x)
 in binop select fold.

There is an existing combine in instcombine for combining
`(A ? B : C) binop (A ? E : F) -> A ? (B binop E) : (C binop F)`.
However this will not combine if the select `(x>=1 ? x : 1)` has
been converted to a `umax(x, 1)`. This adds code to treat the umax
as a select, allowing binops to fold into the select.
---
 .../InstCombine/InstructionCombining.cpp      | 20 +++++++++++++++++++
 .../Transforms/InstCombine/binop-select.ll    | 17 ++++++----------
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index ed8709ea4c051f7..dc551a8dba1900f 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1103,9 +1103,29 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
   Value *A, *B, *C, *D, *E, *F;
   bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
   bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
+
   if (!LHSIsSelect && !RHSIsSelect)
     return nullptr;
 
+  // Treat umax(x, 1) as select(icmp(eq, x, 0), 1, x), if it matches the other
+  // predicate.
+  auto TryMatchSelectFromUMax = [](bool LHSIsSelect, Value *RHS, bool &RHSIsSelect,
+                                   Value *A, Value *B, Value *C, Value *&D,
+                                   Value *&E, Value *&F) {
+    CmpInst::Predicate Pred;
+    Value *X;
+    if (LHSIsSelect && !RHSIsSelect &&
+        match(RHS, m_UMax(m_Value(X), m_One())) &&
+        match(A, m_c_ICmp(Pred, m_Specific(X), m_Zero())) &&
+        Pred == ICmpInst::ICMP_EQ) {
+      RHSIsSelect = true;
+      match(RHS, m_UMax(m_Value(F), m_Value(E)));
+      D = A;
+    }
+  };
+  TryMatchSelectFromUMax(LHSIsSelect, RHS, RHSIsSelect, A, B, C, D, E, F);
+  TryMatchSelectFromUMax(RHSIsSelect, LHS, LHSIsSelect, D, E, F, A, B, C);
+
   FastMathFlags FMF;
   BuilderTy::FastMathFlagGuard Guard(Builder);
   if (isa<FPMathOperator>(&I)) {
diff --git a/llvm/test/Transforms/InstCombine/binop-select.ll b/llvm/test/Transforms/InstCombine/binop-select.ll
index bb82857db10f4fd..b064b3447e8135b 100644
--- a/llvm/test/Transforms/InstCombine/binop-select.ll
+++ b/llvm/test/Transforms/InstCombine/binop-select.ll
@@ -408,9 +408,8 @@ define i32 @ashr_sel_op1_use(i1 %b) {
 define i32 @umax_as_select_sub(i32 %a) {
 ; CHECK-LABEL: @umax_as_select_sub(
 ; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C_NOT]], i32 0, i32 2
-; CHECK-NEXT:    [[M:%.*]] = call i32 @llvm.umax.i32(i32 [[A]], i32 1)
-; CHECK-NEXT:    [[B:%.*]] = sub i32 [[S]], [[M]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 2, [[A]]
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C_NOT]], i32 -1, i32 [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[B]]
 ;
   %c = icmp ugt i32 %a, 0
@@ -423,9 +422,8 @@ define i32 @umax_as_select_sub(i32 %a) {
 define i32 @umax_as_select_sub_c(i32 %a) {
 ; CHECK-LABEL: @umax_as_select_sub_c(
 ; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT:    [[S_NEG:%.*]] = select i1 [[C_NOT]], i32 0, i32 -2
-; CHECK-NEXT:    [[M:%.*]] = call i32 @llvm.umax.i32(i32 [[A]], i32 1)
-; CHECK-NEXT:    [[B:%.*]] = add i32 [[S_NEG]], [[M]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A]], -2
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[C_NOT]], i32 1, i32 [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[B]]
 ;
   %c = icmp ugt i32 %a, 0
@@ -452,11 +450,8 @@ define i32 @umax_as_select_add(i32 %a, i32 %x, i32 %y) {
 
 define i32 @umax_as_select_mul(i32 %a) {
 ; CHECK-LABEL: @umax_as_select_mul(
-; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C_NOT]], i32 0, i32 2
-; CHECK-NEXT:    [[M:%.*]] = call i32 @llvm.umax.i32(i32 [[A]], i32 1)
-; CHECK-NEXT:    [[B:%.*]] = mul i32 [[S]], [[M]]
-; CHECK-NEXT:    ret i32 [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[A:%.*]], 1
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %c = icmp ugt i32 %a, 0
   %s = select i1 %c, i32 2, i32 0