[llvm] [DAGCombiner] Turn `(neg (max x, (neg x)))` into `(min x, (neg x))` (PR #120666)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 23 09:50:53 PST 2024
https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/120666
>From abf6f85b918bade28952c53e19578d36f1947a37 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Thu, 19 Dec 2024 17:26:19 -0800
Subject: [PATCH 1/5] Pre-commit test
---
llvm/test/CodeGen/RISCV/neg-abs.ll | 226 +++++++++++++++++++++++++++++
1 file changed, 226 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll
index 7d6a6d7ed4ce64..9d2397756300b4 100644
--- a/llvm/test/CodeGen/RISCV/neg-abs.ll
+++ b/llvm/test/CodeGen/RISCV/neg-abs.ll
@@ -258,3 +258,229 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) {
%neg = sub nsw i64 0, %abs
ret i64 %neg
}
+
+define i32 @expanded_neg_abs32(i32 %x) {
+; RV32I-LABEL: expanded_neg_abs32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: blt a0, a1, .LBB6_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: .LBB6_2:
+; RV32I-NEXT: neg a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: neg a1, a0
+; RV32ZBB-NEXT: max a0, a1, a0
+; RV32ZBB-NEXT: neg a0, a0
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_abs32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a1, a0
+; RV64I-NEXT: negw a0, a0
+; RV64I-NEXT: blt a1, a0, .LBB6_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: .LBB6_2:
+; RV64I-NEXT: negw a0, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sext.w a1, a0
+; RV64ZBB-NEXT: negw a0, a0
+; RV64ZBB-NEXT: max a0, a0, a1
+; RV64ZBB-NEXT: negw a0, a0
+; RV64ZBB-NEXT: ret
+ %n = sub i32 0, %x
+ %t = call i32 @llvm.smax.i32(i32 %n, i32 %x)
+ %r = sub i32 0, %t
+ ret i32 %r
+}
+
+define i32 @expanded_neg_abs32_unsigned(i32 %x) {
+; RV32I-LABEL: expanded_neg_abs32_unsigned:
+; RV32I: # %bb.0:
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: bltu a0, a1, .LBB7_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: .LBB7_2:
+; RV32I-NEXT: neg a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs32_unsigned:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: neg a1, a0
+; RV32ZBB-NEXT: maxu a0, a1, a0
+; RV32ZBB-NEXT: neg a0, a0
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_abs32_unsigned:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a1, a0
+; RV64I-NEXT: negw a0, a0
+; RV64I-NEXT: bltu a1, a0, .LBB7_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: .LBB7_2:
+; RV64I-NEXT: negw a0, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs32_unsigned:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sext.w a1, a0
+; RV64ZBB-NEXT: negw a0, a0
+; RV64ZBB-NEXT: maxu a0, a0, a1
+; RV64ZBB-NEXT: negw a0, a0
+; RV64ZBB-NEXT: ret
+ %n = sub i32 0, %x
+ %t = call i32 @llvm.umax.i32(i32 %n, i32 %x)
+ %r = sub i32 0, %t
+ ret i32 %r
+}
+
+define i64 @expanded_neg_abs64(i64 %x) {
+; RV32I-LABEL: expanded_neg_abs64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: snez a2, a0
+; RV32I-NEXT: neg a3, a1
+; RV32I-NEXT: sub a2, a3, a2
+; RV32I-NEXT: neg a3, a0
+; RV32I-NEXT: beq a2, a1, .LBB8_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slt a4, a1, a2
+; RV32I-NEXT: beqz a4, .LBB8_3
+; RV32I-NEXT: j .LBB8_4
+; RV32I-NEXT: .LBB8_2:
+; RV32I-NEXT: sltu a4, a0, a3
+; RV32I-NEXT: bnez a4, .LBB8_4
+; RV32I-NEXT: .LBB8_3:
+; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: .LBB8_4:
+; RV32I-NEXT: snez a0, a3
+; RV32I-NEXT: add a0, a2, a0
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: neg a0, a3
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs64:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: snez a2, a0
+; RV32ZBB-NEXT: neg a3, a1
+; RV32ZBB-NEXT: sub a2, a3, a2
+; RV32ZBB-NEXT: neg a3, a0
+; RV32ZBB-NEXT: beq a2, a1, .LBB8_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: slt a4, a1, a2
+; RV32ZBB-NEXT: beqz a4, .LBB8_3
+; RV32ZBB-NEXT: j .LBB8_4
+; RV32ZBB-NEXT: .LBB8_2:
+; RV32ZBB-NEXT: sltu a4, a0, a3
+; RV32ZBB-NEXT: bnez a4, .LBB8_4
+; RV32ZBB-NEXT: .LBB8_3:
+; RV32ZBB-NEXT: mv a2, a1
+; RV32ZBB-NEXT: mv a3, a0
+; RV32ZBB-NEXT: .LBB8_4:
+; RV32ZBB-NEXT: snez a0, a3
+; RV32ZBB-NEXT: add a0, a2, a0
+; RV32ZBB-NEXT: neg a1, a0
+; RV32ZBB-NEXT: neg a0, a3
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_abs64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: blt a0, a1, .LBB8_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: .LBB8_2:
+; RV64I-NEXT: neg a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs64:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: neg a1, a0
+; RV64ZBB-NEXT: max a0, a1, a0
+; RV64ZBB-NEXT: neg a0, a0
+; RV64ZBB-NEXT: ret
+ %n = sub i64 0, %x
+ %t = call i64 @llvm.smax.i64(i64 %n, i64 %x)
+ %r = sub i64 0, %t
+ ret i64 %r
+}
+
+define i64 @expanded_neg_abs64_unsigned(i64 %x) {
+; RV32I-LABEL: expanded_neg_abs64_unsigned:
+; RV32I: # %bb.0:
+; RV32I-NEXT: snez a2, a0
+; RV32I-NEXT: neg a3, a1
+; RV32I-NEXT: sub a2, a3, a2
+; RV32I-NEXT: neg a3, a0
+; RV32I-NEXT: beq a2, a1, .LBB9_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu a4, a1, a2
+; RV32I-NEXT: beqz a4, .LBB9_3
+; RV32I-NEXT: j .LBB9_4
+; RV32I-NEXT: .LBB9_2:
+; RV32I-NEXT: sltu a4, a0, a3
+; RV32I-NEXT: bnez a4, .LBB9_4
+; RV32I-NEXT: .LBB9_3:
+; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: .LBB9_4:
+; RV32I-NEXT: snez a0, a3
+; RV32I-NEXT: add a0, a2, a0
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: neg a0, a3
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs64_unsigned:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: snez a2, a0
+; RV32ZBB-NEXT: neg a3, a1
+; RV32ZBB-NEXT: sub a2, a3, a2
+; RV32ZBB-NEXT: neg a3, a0
+; RV32ZBB-NEXT: beq a2, a1, .LBB9_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: sltu a4, a1, a2
+; RV32ZBB-NEXT: beqz a4, .LBB9_3
+; RV32ZBB-NEXT: j .LBB9_4
+; RV32ZBB-NEXT: .LBB9_2:
+; RV32ZBB-NEXT: sltu a4, a0, a3
+; RV32ZBB-NEXT: bnez a4, .LBB9_4
+; RV32ZBB-NEXT: .LBB9_3:
+; RV32ZBB-NEXT: mv a2, a1
+; RV32ZBB-NEXT: mv a3, a0
+; RV32ZBB-NEXT: .LBB9_4:
+; RV32ZBB-NEXT: snez a0, a3
+; RV32ZBB-NEXT: add a0, a2, a0
+; RV32ZBB-NEXT: neg a1, a0
+; RV32ZBB-NEXT: neg a0, a3
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_abs64_unsigned:
+; RV64I: # %bb.0:
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: bltu a0, a1, .LBB9_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: .LBB9_2:
+; RV64I-NEXT: neg a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs64_unsigned:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: neg a1, a0
+; RV64ZBB-NEXT: maxu a0, a1, a0
+; RV64ZBB-NEXT: neg a0, a0
+; RV64ZBB-NEXT: ret
+ %n = sub i64 0, %x
+ %t = call i64 @llvm.umax.i64(i64 %n, i64 %x)
+ %r = sub i64 0, %t
+ ret i64 %r
+}
>From 25c43d8a1e28c96ca7b2fecefb47da47dfb7fe67 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Thu, 19 Dec 2024 17:27:47 -0800
Subject: [PATCH 2/5] [DAGCombiner] Turn `(neg (max x, (neg x)))` into `(min x,
(neg x))`
We already have a rule to turn `(neg (abs x))` into `(min x, (neg x))`.
But in some cases `(neg (max x, (neg x)))` is formed by an expanded
`abs` followed by a `neg` that is only generated after the expansion.
This patch adds a separate pattern to match this kind of cases.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 ++++++++++++++
llvm/test/CodeGen/RISCV/neg-abs.ll | 12 ++++--------
2 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6cbfef2d238bbe..3cb33bdd02ef39 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3949,6 +3949,20 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
return Result;
+ // Similar to the previous rule, but this time targeting an expanded abs.
+ // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
+ // Note that this is applicable to both signed and unsigned min/max.
+ SDValue X;
+ if (LegalOperations &&
+ sd_match(N1,
+ m_OneUse(m_AnyOf(m_SMax(m_Value(X), m_Neg(m_Deferred(X))),
+ m_UMax(m_Value(X), m_Neg(m_Deferred(X))))))) {
+ unsigned MinOpc = N1->getOpcode() == ISD::SMAX ? ISD::SMIN : ISD::UMIN;
+ if (hasOperation(MinOpc, VT))
+ return DAG.getNode(MinOpc, DL, VT, X,
+ DAG.getNode(ISD::SUB, DL, VT, N0, X));
+ }
+
// Fold neg(splat(neg(x)) -> splat(x)
if (VT.isVector()) {
SDValue N1S = DAG.getSplatValue(N1, true);
diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll
index 9d2397756300b4..c1695c88f1f384 100644
--- a/llvm/test/CodeGen/RISCV/neg-abs.ll
+++ b/llvm/test/CodeGen/RISCV/neg-abs.ll
@@ -273,8 +273,7 @@ define i32 @expanded_neg_abs32(i32 %x) {
; RV32ZBB-LABEL: expanded_neg_abs32:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: neg a1, a0
-; RV32ZBB-NEXT: max a0, a1, a0
-; RV32ZBB-NEXT: neg a0, a0
+; RV32ZBB-NEXT: min a0, a0, a1
; RV32ZBB-NEXT: ret
;
; RV64I-LABEL: expanded_neg_abs32:
@@ -315,8 +314,7 @@ define i32 @expanded_neg_abs32_unsigned(i32 %x) {
; RV32ZBB-LABEL: expanded_neg_abs32_unsigned:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: neg a1, a0
-; RV32ZBB-NEXT: maxu a0, a1, a0
-; RV32ZBB-NEXT: neg a0, a0
+; RV32ZBB-NEXT: minu a0, a0, a1
; RV32ZBB-NEXT: ret
;
; RV64I-LABEL: expanded_neg_abs32_unsigned:
@@ -405,8 +403,7 @@ define i64 @expanded_neg_abs64(i64 %x) {
; RV64ZBB-LABEL: expanded_neg_abs64:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: neg a1, a0
-; RV64ZBB-NEXT: max a0, a1, a0
-; RV64ZBB-NEXT: neg a0, a0
+; RV64ZBB-NEXT: min a0, a0, a1
; RV64ZBB-NEXT: ret
%n = sub i64 0, %x
%t = call i64 @llvm.smax.i64(i64 %n, i64 %x)
@@ -476,8 +473,7 @@ define i64 @expanded_neg_abs64_unsigned(i64 %x) {
; RV64ZBB-LABEL: expanded_neg_abs64_unsigned:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: neg a1, a0
-; RV64ZBB-NEXT: maxu a0, a1, a0
-; RV64ZBB-NEXT: neg a0, a0
+; RV64ZBB-NEXT: minu a0, a0, a1
; RV64ZBB-NEXT: ret
%n = sub i64 0, %x
%t = call i64 @llvm.umax.i64(i64 %n, i64 %x)
>From 1abcdc3172121d95b309080fef162e7df32475fa Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Fri, 20 Dec 2024 10:30:12 -0800
Subject: [PATCH 3/5] Address review comments
Preserve the flags from the first sub
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3cb33bdd02ef39..59d16d5cbb739a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3953,14 +3953,16 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
// Note that this is applicable to both signed and unsigned min/max.
SDValue X;
+ SDValue S0;
if (LegalOperations &&
- sd_match(N1,
- m_OneUse(m_AnyOf(m_SMax(m_Value(X), m_Neg(m_Deferred(X))),
- m_UMax(m_Value(X), m_Neg(m_Deferred(X))))))) {
+ sd_match(N1, m_OneUse(m_AnyOf(
+ m_SMax(m_Value(X),
+ m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0))),
+ m_UMax(m_Value(X), m_AllOf(m_Neg(m_Deferred(X)),
+ m_Value(S0))))))) {
unsigned MinOpc = N1->getOpcode() == ISD::SMAX ? ISD::SMIN : ISD::UMIN;
if (hasOperation(MinOpc, VT))
- return DAG.getNode(MinOpc, DL, VT, X,
- DAG.getNode(ISD::SUB, DL, VT, N0, X));
+ return DAG.getNode(MinOpc, DL, VT, X, S0);
}
// Fold neg(splat(neg(x)) -> splat(x)
>From 55d7531db25c9b79db35daf321cc49cb996eb9bb Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Fri, 20 Dec 2024 10:51:16 -0800
Subject: [PATCH 4/5] Add `sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))`
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 36 ++-
llvm/test/CodeGen/RISCV/neg-abs.ll | 222 ++++++++++++++++++
2 files changed, 249 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 59d16d5cbb739a..808dedc9d679fd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3951,18 +3951,36 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// Similar to the previous rule, but this time targeting an expanded abs.
// (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
- // Note that this is applicable to both signed and unsigned min/max.
+ // as well as
+ // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
+ // Note that these two are applicable to both signed and unsigned min/max.
SDValue X;
SDValue S0;
+ auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
if (LegalOperations &&
- sd_match(N1, m_OneUse(m_AnyOf(
- m_SMax(m_Value(X),
- m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0))),
- m_UMax(m_Value(X), m_AllOf(m_Neg(m_Deferred(X)),
- m_Value(S0))))))) {
- unsigned MinOpc = N1->getOpcode() == ISD::SMAX ? ISD::SMIN : ISD::UMIN;
- if (hasOperation(MinOpc, VT))
- return DAG.getNode(MinOpc, DL, VT, X, S0);
+ sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
+ m_UMax(m_Value(X), NegPat),
+ m_SMin(m_Value(X), NegPat),
+ m_UMin(m_Value(X), NegPat))))) {
+ unsigned NewOpc = 0;
+ switch (N1->getOpcode()) {
+ case ISD::SMAX:
+ NewOpc = ISD::SMIN;
+ break;
+ case ISD::UMAX:
+ NewOpc = ISD::UMIN;
+ break;
+ case ISD::SMIN:
+ NewOpc = ISD::SMAX;
+ break;
+ case ISD::UMIN:
+ NewOpc = ISD::UMAX;
+ break;
+ default:
+ llvm_unreachable("unrecognized opcode");
+ }
+ if (hasOperation(NewOpc, VT))
+ return DAG.getNode(NewOpc, DL, VT, X, S0);
}
// Fold neg(splat(neg(x)) -> splat(x)
diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll
index c1695c88f1f384..fe19a4fa8bbd81 100644
--- a/llvm/test/CodeGen/RISCV/neg-abs.ll
+++ b/llvm/test/CodeGen/RISCV/neg-abs.ll
@@ -480,3 +480,225 @@ define i64 @expanded_neg_abs64_unsigned(i64 %x) {
%r = sub i64 0, %t
ret i64 %r
}
+
+define i32 @expanded_neg_inv_abs32(i32 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: blt a1, a0, .LBB10_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: .LBB10_2:
+; RV32I-NEXT: neg a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: neg a1, a0
+; RV32ZBB-NEXT: max a0, a0, a1
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a1, a0
+; RV64I-NEXT: negw a0, a0
+; RV64I-NEXT: blt a0, a1, .LBB10_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: .LBB10_2:
+; RV64I-NEXT: negw a0, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sext.w a1, a0
+; RV64ZBB-NEXT: negw a0, a0
+; RV64ZBB-NEXT: min a0, a0, a1
+; RV64ZBB-NEXT: negw a0, a0
+; RV64ZBB-NEXT: ret
+ %n = sub i32 0, %x
+ %t = call i32 @llvm.smin.i32(i32 %n, i32 %x)
+ %r = sub i32 0, %t
+ ret i32 %r
+}
+
+define i32 @expanded_neg_inv_abs32_unsigned(i32 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV32I: # %bb.0:
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: bltu a1, a0, .LBB11_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: .LBB11_2:
+; RV32I-NEXT: neg a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: neg a1, a0
+; RV32ZBB-NEXT: maxu a0, a0, a1
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a1, a0
+; RV64I-NEXT: negw a0, a0
+; RV64I-NEXT: bltu a0, a1, .LBB11_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: .LBB11_2:
+; RV64I-NEXT: negw a0, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: sext.w a1, a0
+; RV64ZBB-NEXT: negw a0, a0
+; RV64ZBB-NEXT: minu a0, a0, a1
+; RV64ZBB-NEXT: negw a0, a0
+; RV64ZBB-NEXT: ret
+ %n = sub i32 0, %x
+ %t = call i32 @llvm.umin.i32(i32 %n, i32 %x)
+ %r = sub i32 0, %t
+ ret i32 %r
+}
+
+define i64 @expanded_neg_inv_abs64(i64 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: snez a2, a0
+; RV32I-NEXT: neg a3, a1
+; RV32I-NEXT: sub a2, a3, a2
+; RV32I-NEXT: neg a3, a0
+; RV32I-NEXT: beq a2, a1, .LBB12_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slt a4, a2, a1
+; RV32I-NEXT: beqz a4, .LBB12_3
+; RV32I-NEXT: j .LBB12_4
+; RV32I-NEXT: .LBB12_2:
+; RV32I-NEXT: sltu a4, a3, a0
+; RV32I-NEXT: bnez a4, .LBB12_4
+; RV32I-NEXT: .LBB12_3:
+; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: .LBB12_4:
+; RV32I-NEXT: snez a0, a3
+; RV32I-NEXT: add a0, a2, a0
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: neg a0, a3
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs64:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: snez a2, a0
+; RV32ZBB-NEXT: neg a3, a1
+; RV32ZBB-NEXT: sub a2, a3, a2
+; RV32ZBB-NEXT: neg a3, a0
+; RV32ZBB-NEXT: beq a2, a1, .LBB12_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: slt a4, a2, a1
+; RV32ZBB-NEXT: beqz a4, .LBB12_3
+; RV32ZBB-NEXT: j .LBB12_4
+; RV32ZBB-NEXT: .LBB12_2:
+; RV32ZBB-NEXT: sltu a4, a3, a0
+; RV32ZBB-NEXT: bnez a4, .LBB12_4
+; RV32ZBB-NEXT: .LBB12_3:
+; RV32ZBB-NEXT: mv a2, a1
+; RV32ZBB-NEXT: mv a3, a0
+; RV32ZBB-NEXT: .LBB12_4:
+; RV32ZBB-NEXT: snez a0, a3
+; RV32ZBB-NEXT: add a0, a2, a0
+; RV32ZBB-NEXT: neg a1, a0
+; RV32ZBB-NEXT: neg a0, a3
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: blt a1, a0, .LBB12_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: .LBB12_2:
+; RV64I-NEXT: neg a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs64:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: neg a1, a0
+; RV64ZBB-NEXT: max a0, a0, a1
+; RV64ZBB-NEXT: ret
+ %n = sub i64 0, %x
+ %t = call i64 @llvm.smin.i64(i64 %n, i64 %x)
+ %r = sub i64 0, %t
+ ret i64 %r
+}
+
+define i64 @expanded_neg_inv_abs64_unsigned(i64 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV32I: # %bb.0:
+; RV32I-NEXT: snez a2, a0
+; RV32I-NEXT: neg a3, a1
+; RV32I-NEXT: sub a2, a3, a2
+; RV32I-NEXT: neg a3, a0
+; RV32I-NEXT: beq a2, a1, .LBB13_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu a4, a2, a1
+; RV32I-NEXT: beqz a4, .LBB13_3
+; RV32I-NEXT: j .LBB13_4
+; RV32I-NEXT: .LBB13_2:
+; RV32I-NEXT: sltu a4, a3, a0
+; RV32I-NEXT: bnez a4, .LBB13_4
+; RV32I-NEXT: .LBB13_3:
+; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: .LBB13_4:
+; RV32I-NEXT: snez a0, a3
+; RV32I-NEXT: add a0, a2, a0
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: neg a0, a3
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: snez a2, a0
+; RV32ZBB-NEXT: neg a3, a1
+; RV32ZBB-NEXT: sub a2, a3, a2
+; RV32ZBB-NEXT: neg a3, a0
+; RV32ZBB-NEXT: beq a2, a1, .LBB13_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: sltu a4, a2, a1
+; RV32ZBB-NEXT: beqz a4, .LBB13_3
+; RV32ZBB-NEXT: j .LBB13_4
+; RV32ZBB-NEXT: .LBB13_2:
+; RV32ZBB-NEXT: sltu a4, a3, a0
+; RV32ZBB-NEXT: bnez a4, .LBB13_4
+; RV32ZBB-NEXT: .LBB13_3:
+; RV32ZBB-NEXT: mv a2, a1
+; RV32ZBB-NEXT: mv a3, a0
+; RV32ZBB-NEXT: .LBB13_4:
+; RV32ZBB-NEXT: snez a0, a3
+; RV32ZBB-NEXT: add a0, a2, a0
+; RV32ZBB-NEXT: neg a1, a0
+; RV32ZBB-NEXT: neg a0, a3
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV64I: # %bb.0:
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: bltu a1, a0, .LBB13_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: .LBB13_2:
+; RV64I-NEXT: neg a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: neg a1, a0
+; RV64ZBB-NEXT: maxu a0, a0, a1
+; RV64ZBB-NEXT: ret
+ %n = sub i64 0, %x
+ %t = call i64 @llvm.umin.i64(i64 %n, i64 %x)
+ %r = sub i64 0, %t
+ ret i64 %r
+}
>From 3e85bc412d1fb8ec6c0214724fd918f2a513af25 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Mon, 23 Dec 2024 09:50:10 -0800
Subject: [PATCH 5/5] Extract min<->max conversion into its own helper function
---
llvm/include/llvm/CodeGen/ISDOpcodes.h | 2 ++
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 +-----------------
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 15 +++++++++++++++
3 files changed, 18 insertions(+), 17 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 0b6d155b6d161e..01346f01cead35 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1506,6 +1506,8 @@ inline bool isBitwiseLogicOp(unsigned Opcode) {
return Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR;
}
+NodeType getInverseMinMaxOpcode(unsigned MinMaxOpc);
+
/// Get underlying scalar opcode for VECREDUCE opcode.
/// For example ISD::AND for ISD::VECREDUCE_AND.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 808dedc9d679fd..74a5a64f616b30 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3962,23 +3962,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
m_UMax(m_Value(X), NegPat),
m_SMin(m_Value(X), NegPat),
m_UMin(m_Value(X), NegPat))))) {
- unsigned NewOpc = 0;
- switch (N1->getOpcode()) {
- case ISD::SMAX:
- NewOpc = ISD::SMIN;
- break;
- case ISD::UMAX:
- NewOpc = ISD::UMIN;
- break;
- case ISD::SMIN:
- NewOpc = ISD::SMAX;
- break;
- case ISD::UMIN:
- NewOpc = ISD::UMAX;
- break;
- default:
- llvm_unreachable("unrecognized opcode");
- }
+ unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
if (hasOperation(NewOpc, VT))
return DAG.getNode(NewOpc, DL, VT, X, S0);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 07749ec87d0b20..6a65e3b88d0fc0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -430,6 +430,21 @@ bool ISD::matchBinaryPredicate(
return true;
}
+ISD::NodeType ISD::getInverseMinMaxOpcode(unsigned MinMaxOpc) {
+ switch (MinMaxOpc) {
+ default:
+ llvm_unreachable("unrecognized opcode");
+ case ISD::UMIN:
+ return ISD::UMAX;
+ case ISD::UMAX:
+ return ISD::UMIN;
+ case ISD::SMIN:
+ return ISD::SMAX;
+ case ISD::SMAX:
+ return ISD::SMIN;
+ }
+}
+
ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
switch (VecReduceOpcode) {
default:
More information about the llvm-commits
mailing list