[llvm] [GISel] Combine `(neg (min/max x, (neg x)))` into `(max/min x, (neg x))` (PR #120998)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 2 13:49:41 PST 2025
https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/120998
>From c05fd18f226ea84f9cdc35be52db4e32189e49dc Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Mon, 23 Dec 2024 11:46:16 -0800
Subject: [PATCH 1/4] [GISel] Combine `(neg (max x, (neg x)))` into `(min x,
(neg x))`
This is the GISel version of #120666.
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 +
llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 4 +
.../include/llvm/Target/GlobalISel/Combine.td | 8 +-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 36 ++
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 15 +
llvm/lib/Target/RISCV/RISCVCombine.td | 2 +-
.../RISCV/GlobalISel/combine-neg-abs.ll | 457 ++++++++++++++++++
7 files changed, 523 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 871456d2a55b5e..94e36e412b0cf7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -864,6 +864,9 @@ class CombinerHelper {
/// Combine select to integer min/max.
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const;
+ /// Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
+ bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const;
+
/// Combine selects.
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index cb5a4c14b364c8..c4ba732879f404 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -171,6 +171,10 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC,
MachineOptimizationRemarkEmitter &MORE,
MachineOptimizationRemarkMissed &R);
+/// Returns the inverse opcode of \p MinMaxOpc , which is a generic min/max
+/// opcode like G_SMIN.
+unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc);
+
/// If \p VReg is defined by a G_CONSTANT, return the corresponding value.
std::optional<APInt> getIConstantVRegVal(Register VReg,
const MachineRegisterInfo &MRI);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 013c3a6ed83d8b..8641eabbdd84c6 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1372,6 +1372,12 @@ def select_to_iminmax: GICombineRule<
[{ return Helper.matchSelectIMinMax(${root}, ${info}); }]),
(apply [{ Helper.applyBuildFnMO(${root}, ${info}); }])>;
+def simplify_neg_minmax : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_SUB):$root,
+ [{ return Helper.matchSimplifyNegMinMax(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
def match_selects : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_SELECT):$root,
@@ -2008,7 +2014,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
sub_add_reg, select_to_minmax,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
- combine_concat_vector,
+ simplify_neg_minmax, combine_concat_vector,
sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
combine_use_vector_truncate, merge_combines, overflow_combines]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index c061c01d3c1b12..71f9fab9419de3 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -7062,6 +7062,42 @@ bool CombinerHelper::matchSelectIMinMax(const MachineOperand &MO,
}
}
+// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
+bool CombinerHelper::matchSimplifyNegMinMax(MachineInstr &MI,
+ BuildFnTy &MatchInfo) const {
+ assert(MI.getOpcode() == TargetOpcode::G_SUB);
+ Register DestReg = MI.getOperand(0).getReg();
+ LLT DestTy = MRI.getType(DestReg);
+ if (!isLegal({TargetOpcode::G_SUB, {DestTy}}))
+ return false;
+
+ // GISel doesn't have m_Deferred at this moment, so we have to
+ // match this pattern in two phases.
+ Register X, Y;
+ Register Sub0;
+ if (mi_match(DestReg, MRI,
+ m_Neg(m_OneUse(m_any_of(
+ m_GSMin(m_Reg(X), m_Reg(Y)), m_GSMax(m_Reg(X), m_Reg(Y)),
+ m_CommutativeBinOp(TargetOpcode::G_UMIN, m_Reg(X), m_Reg(Y)),
+ m_CommutativeBinOp(TargetOpcode::G_UMAX, m_Reg(X),
+ m_Reg(Y)))))) &&
+ (mi_match(Y, MRI, m_all_of(m_Neg(m_SpecificReg(X)), m_Reg(Sub0))) ||
+ mi_match(X, MRI, m_all_of(m_Neg(m_SpecificReg(Y)), m_Reg(Sub0))))) {
+ MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
+ MachineInstr *Sub0MI = MRI.getVRegDef(Sub0);
+ X = Sub0MI->getOperand(2).getReg();
+ unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
+ if (isLegal({NewOpc, {DestTy}})) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
+ };
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const {
GSelect *Select = cast<GSelect>(&MI);
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 79382933a1f42d..625d556e3ff5ea 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -276,6 +276,21 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
reportGISelFailure(MF, TPC, MORE, R);
}
+unsigned llvm::getInverseGMinMaxOpcode(unsigned MinMaxOpc) {
+ switch (MinMaxOpc) {
+ case TargetOpcode::G_SMIN:
+ return TargetOpcode::G_SMAX;
+ case TargetOpcode::G_SMAX:
+ return TargetOpcode::G_SMIN;
+ case TargetOpcode::G_UMIN:
+ return TargetOpcode::G_UMAX;
+ case TargetOpcode::G_UMAX:
+ return TargetOpcode::G_UMIN;
+ default:
+ llvm_unreachable("unrecognized opcode");
+ }
+}
+
std::optional<APInt> llvm::getIConstantVRegVal(Register VReg,
const MachineRegisterInfo &MRI) {
std::optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough(
diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td
index 030613a7d89040..995dd0c5d82eba 100644
--- a/llvm/lib/Target/RISCV/RISCVCombine.td
+++ b/llvm/lib/Target/RISCV/RISCVCombine.td
@@ -25,5 +25,5 @@ def RISCVPostLegalizerCombiner
: GICombiner<"RISCVPostLegalizerCombinerImpl",
[sub_to_add, combines_for_extload, redundant_and,
identity_combines, shift_immed_chain,
- commute_constant_to_rhs]> {
+ commute_constant_to_rhs, simplify_neg_minmax]> {
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll b/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll
new file mode 100644
index 00000000000000..6c848ecf0fffdb
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll
@@ -0,0 +1,457 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=RV32I
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+zbb -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=RV32ZBB
+; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=RV64I
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+zbb -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=RV64ZBB
+
+define i32 @expanded_neg_abs32(i32 %x) {
+; RV32I-LABEL: expanded_neg_abs32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: blt a0, a1, .LBB0_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: .LBB0_2:
+; RV32I-NEXT: neg a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: neg a1, a0
+; RV32ZBB-NEXT: min a0, a0, a1
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_abs32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: sext.w a2, a1
+; RV64I-NEXT: sext.w a3, a0
+; RV64I-NEXT: blt a3, a2, .LBB0_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: .LBB0_2:
+; RV64I-NEXT: neg a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: negw a1, a0
+; RV64ZBB-NEXT: sext.w a0, a0
+; RV64ZBB-NEXT: max a0, a1, a0
+; RV64ZBB-NEXT: neg a0, a0
+; RV64ZBB-NEXT: ret
+ %n = sub i32 0, %x
+ %t = call i32 @llvm.smax.i32(i32 %n, i32 %x)
+ %r = sub i32 0, %t
+ ret i32 %r
+}
+
+define i32 @expanded_neg_abs32_unsigned(i32 %x) {
+; RV32I-LABEL: expanded_neg_abs32_unsigned:
+; RV32I: # %bb.0:
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: bltu a0, a1, .LBB1_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: .LBB1_2:
+; RV32I-NEXT: neg a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs32_unsigned:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: neg a1, a0
+; RV32ZBB-NEXT: minu a0, a0, a1
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_abs32_unsigned:
+; RV64I: # %bb.0:
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: sext.w a2, a1
+; RV64I-NEXT: sext.w a3, a0
+; RV64I-NEXT: bltu a3, a2, .LBB1_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: .LBB1_2:
+; RV64I-NEXT: neg a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs32_unsigned:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: negw a1, a0
+; RV64ZBB-NEXT: sext.w a0, a0
+; RV64ZBB-NEXT: maxu a0, a1, a0
+; RV64ZBB-NEXT: neg a0, a0
+; RV64ZBB-NEXT: ret
+ %n = sub i32 0, %x
+ %t = call i32 @llvm.umax.i32(i32 %n, i32 %x)
+ %r = sub i32 0, %t
+ ret i32 %r
+}
+
+define i64 @expanded_neg_abs64(i64 %x) {
+; RV32I-LABEL: expanded_neg_abs64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: snez a2, a0
+; RV32I-NEXT: neg a3, a1
+; RV32I-NEXT: sub a2, a3, a2
+; RV32I-NEXT: neg a3, a0
+; RV32I-NEXT: beq a2, a1, .LBB2_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slt a4, a1, a2
+; RV32I-NEXT: beqz a4, .LBB2_3
+; RV32I-NEXT: j .LBB2_4
+; RV32I-NEXT: .LBB2_2:
+; RV32I-NEXT: sltu a4, a0, a3
+; RV32I-NEXT: bnez a4, .LBB2_4
+; RV32I-NEXT: .LBB2_3:
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: .LBB2_4:
+; RV32I-NEXT: neg a0, a3
+; RV32I-NEXT: snez a1, a3
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: sub a1, a2, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs64:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: snez a2, a0
+; RV32ZBB-NEXT: neg a3, a1
+; RV32ZBB-NEXT: sub a2, a3, a2
+; RV32ZBB-NEXT: neg a3, a0
+; RV32ZBB-NEXT: beq a2, a1, .LBB2_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: slt a4, a1, a2
+; RV32ZBB-NEXT: beqz a4, .LBB2_3
+; RV32ZBB-NEXT: j .LBB2_4
+; RV32ZBB-NEXT: .LBB2_2:
+; RV32ZBB-NEXT: sltu a4, a0, a3
+; RV32ZBB-NEXT: bnez a4, .LBB2_4
+; RV32ZBB-NEXT: .LBB2_3:
+; RV32ZBB-NEXT: mv a3, a0
+; RV32ZBB-NEXT: mv a2, a1
+; RV32ZBB-NEXT: .LBB2_4:
+; RV32ZBB-NEXT: neg a0, a3
+; RV32ZBB-NEXT: snez a1, a3
+; RV32ZBB-NEXT: neg a2, a2
+; RV32ZBB-NEXT: sub a1, a2, a1
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_abs64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: blt a0, a1, .LBB2_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: .LBB2_2:
+; RV64I-NEXT: neg a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs64:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: neg a1, a0
+; RV64ZBB-NEXT: min a0, a0, a1
+; RV64ZBB-NEXT: ret
+ %n = sub i64 0, %x
+ %t = call i64 @llvm.smax.i64(i64 %n, i64 %x)
+ %r = sub i64 0, %t
+ ret i64 %r
+}
+
+define i64 @expanded_neg_abs64_unsigned(i64 %x) {
+; RV32I-LABEL: expanded_neg_abs64_unsigned:
+; RV32I: # %bb.0:
+; RV32I-NEXT: snez a2, a0
+; RV32I-NEXT: neg a3, a1
+; RV32I-NEXT: sub a2, a3, a2
+; RV32I-NEXT: neg a3, a0
+; RV32I-NEXT: beq a2, a1, .LBB3_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu a4, a1, a2
+; RV32I-NEXT: beqz a4, .LBB3_3
+; RV32I-NEXT: j .LBB3_4
+; RV32I-NEXT: .LBB3_2:
+; RV32I-NEXT: sltu a4, a0, a3
+; RV32I-NEXT: bnez a4, .LBB3_4
+; RV32I-NEXT: .LBB3_3:
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: .LBB3_4:
+; RV32I-NEXT: neg a0, a3
+; RV32I-NEXT: snez a1, a3
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: sub a1, a2, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs64_unsigned:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: snez a2, a0
+; RV32ZBB-NEXT: neg a3, a1
+; RV32ZBB-NEXT: sub a2, a3, a2
+; RV32ZBB-NEXT: neg a3, a0
+; RV32ZBB-NEXT: beq a2, a1, .LBB3_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: sltu a4, a1, a2
+; RV32ZBB-NEXT: beqz a4, .LBB3_3
+; RV32ZBB-NEXT: j .LBB3_4
+; RV32ZBB-NEXT: .LBB3_2:
+; RV32ZBB-NEXT: sltu a4, a0, a3
+; RV32ZBB-NEXT: bnez a4, .LBB3_4
+; RV32ZBB-NEXT: .LBB3_3:
+; RV32ZBB-NEXT: mv a3, a0
+; RV32ZBB-NEXT: mv a2, a1
+; RV32ZBB-NEXT: .LBB3_4:
+; RV32ZBB-NEXT: neg a0, a3
+; RV32ZBB-NEXT: snez a1, a3
+; RV32ZBB-NEXT: neg a2, a2
+; RV32ZBB-NEXT: sub a1, a2, a1
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_abs64_unsigned:
+; RV64I: # %bb.0:
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: bltu a0, a1, .LBB3_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: .LBB3_2:
+; RV64I-NEXT: neg a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs64_unsigned:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: neg a1, a0
+; RV64ZBB-NEXT: minu a0, a0, a1
+; RV64ZBB-NEXT: ret
+ %n = sub i64 0, %x
+ %t = call i64 @llvm.umax.i64(i64 %n, i64 %x)
+ %r = sub i64 0, %t
+ ret i64 %r
+}
+
+define i32 @expanded_neg_inv_abs32(i32 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: blt a1, a0, .LBB4_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: .LBB4_2:
+; RV32I-NEXT: neg a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: neg a1, a0
+; RV32ZBB-NEXT: max a0, a0, a1
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: sext.w a2, a1
+; RV64I-NEXT: sext.w a3, a0
+; RV64I-NEXT: blt a2, a3, .LBB4_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: .LBB4_2:
+; RV64I-NEXT: neg a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: negw a1, a0
+; RV64ZBB-NEXT: sext.w a0, a0
+; RV64ZBB-NEXT: min a0, a1, a0
+; RV64ZBB-NEXT: neg a0, a0
+; RV64ZBB-NEXT: ret
+ %n = sub i32 0, %x
+ %t = call i32 @llvm.smin.i32(i32 %n, i32 %x)
+ %r = sub i32 0, %t
+ ret i32 %r
+}
+
+define i32 @expanded_neg_inv_abs32_unsigned(i32 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV32I: # %bb.0:
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: bltu a1, a0, .LBB5_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: .LBB5_2:
+; RV32I-NEXT: neg a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: neg a1, a0
+; RV32ZBB-NEXT: maxu a0, a0, a1
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV64I: # %bb.0:
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: sext.w a2, a1
+; RV64I-NEXT: sext.w a3, a0
+; RV64I-NEXT: bltu a2, a3, .LBB5_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: .LBB5_2:
+; RV64I-NEXT: neg a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: negw a1, a0
+; RV64ZBB-NEXT: sext.w a0, a0
+; RV64ZBB-NEXT: minu a0, a1, a0
+; RV64ZBB-NEXT: neg a0, a0
+; RV64ZBB-NEXT: ret
+ %n = sub i32 0, %x
+ %t = call i32 @llvm.umin.i32(i32 %n, i32 %x)
+ %r = sub i32 0, %t
+ ret i32 %r
+}
+
+define i64 @expanded_neg_inv_abs64(i64 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: snez a2, a0
+; RV32I-NEXT: neg a3, a1
+; RV32I-NEXT: sub a2, a3, a2
+; RV32I-NEXT: neg a3, a0
+; RV32I-NEXT: beq a2, a1, .LBB6_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slt a4, a2, a1
+; RV32I-NEXT: beqz a4, .LBB6_3
+; RV32I-NEXT: j .LBB6_4
+; RV32I-NEXT: .LBB6_2:
+; RV32I-NEXT: sltu a4, a3, a0
+; RV32I-NEXT: bnez a4, .LBB6_4
+; RV32I-NEXT: .LBB6_3:
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: .LBB6_4:
+; RV32I-NEXT: neg a0, a3
+; RV32I-NEXT: snez a1, a3
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: sub a1, a2, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs64:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: snez a2, a0
+; RV32ZBB-NEXT: neg a3, a1
+; RV32ZBB-NEXT: sub a2, a3, a2
+; RV32ZBB-NEXT: neg a3, a0
+; RV32ZBB-NEXT: beq a2, a1, .LBB6_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: slt a4, a2, a1
+; RV32ZBB-NEXT: beqz a4, .LBB6_3
+; RV32ZBB-NEXT: j .LBB6_4
+; RV32ZBB-NEXT: .LBB6_2:
+; RV32ZBB-NEXT: sltu a4, a3, a0
+; RV32ZBB-NEXT: bnez a4, .LBB6_4
+; RV32ZBB-NEXT: .LBB6_3:
+; RV32ZBB-NEXT: mv a3, a0
+; RV32ZBB-NEXT: mv a2, a1
+; RV32ZBB-NEXT: .LBB6_4:
+; RV32ZBB-NEXT: neg a0, a3
+; RV32ZBB-NEXT: snez a1, a3
+; RV32ZBB-NEXT: neg a2, a2
+; RV32ZBB-NEXT: sub a1, a2, a1
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: blt a1, a0, .LBB6_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: .LBB6_2:
+; RV64I-NEXT: neg a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs64:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: neg a1, a0
+; RV64ZBB-NEXT: max a0, a0, a1
+; RV64ZBB-NEXT: ret
+ %n = sub i64 0, %x
+ %t = call i64 @llvm.smin.i64(i64 %n, i64 %x)
+ %r = sub i64 0, %t
+ ret i64 %r
+}
+
+define i64 @expanded_neg_inv_abs64_unsigned(i64 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV32I: # %bb.0:
+; RV32I-NEXT: snez a2, a0
+; RV32I-NEXT: neg a3, a1
+; RV32I-NEXT: sub a2, a3, a2
+; RV32I-NEXT: neg a3, a0
+; RV32I-NEXT: beq a2, a1, .LBB7_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu a4, a2, a1
+; RV32I-NEXT: beqz a4, .LBB7_3
+; RV32I-NEXT: j .LBB7_4
+; RV32I-NEXT: .LBB7_2:
+; RV32I-NEXT: sltu a4, a3, a0
+; RV32I-NEXT: bnez a4, .LBB7_4
+; RV32I-NEXT: .LBB7_3:
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: .LBB7_4:
+; RV32I-NEXT: neg a0, a3
+; RV32I-NEXT: snez a1, a3
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: sub a1, a2, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: snez a2, a0
+; RV32ZBB-NEXT: neg a3, a1
+; RV32ZBB-NEXT: sub a2, a3, a2
+; RV32ZBB-NEXT: neg a3, a0
+; RV32ZBB-NEXT: beq a2, a1, .LBB7_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: sltu a4, a2, a1
+; RV32ZBB-NEXT: beqz a4, .LBB7_3
+; RV32ZBB-NEXT: j .LBB7_4
+; RV32ZBB-NEXT: .LBB7_2:
+; RV32ZBB-NEXT: sltu a4, a3, a0
+; RV32ZBB-NEXT: bnez a4, .LBB7_4
+; RV32ZBB-NEXT: .LBB7_3:
+; RV32ZBB-NEXT: mv a3, a0
+; RV32ZBB-NEXT: mv a2, a1
+; RV32ZBB-NEXT: .LBB7_4:
+; RV32ZBB-NEXT: neg a0, a3
+; RV32ZBB-NEXT: snez a1, a3
+; RV32ZBB-NEXT: neg a2, a2
+; RV32ZBB-NEXT: sub a1, a2, a1
+; RV32ZBB-NEXT: ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV64I: # %bb.0:
+; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: bltu a1, a0, .LBB7_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: .LBB7_2:
+; RV64I-NEXT: neg a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: neg a1, a0
+; RV64ZBB-NEXT: maxu a0, a0, a1
+; RV64ZBB-NEXT: ret
+ %n = sub i64 0, %x
+ %t = call i64 @llvm.umin.i64(i64 %n, i64 %x)
+ %r = sub i64 0, %t
+ ret i64 %r
+}
>From ae586ecf1a738b529426b491cc34d15f9994e06b Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Mon, 23 Dec 2024 12:56:22 -0800
Subject: [PATCH 2/4] Use m_BinOp instead of m_CommutativeBinOp
Because the way we match it, it doesn't matter whether it's commutative
or not.
---
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 71f9fab9419de3..3ab9ab23a8effd 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -7078,9 +7078,8 @@ bool CombinerHelper::matchSimplifyNegMinMax(MachineInstr &MI,
if (mi_match(DestReg, MRI,
m_Neg(m_OneUse(m_any_of(
m_GSMin(m_Reg(X), m_Reg(Y)), m_GSMax(m_Reg(X), m_Reg(Y)),
- m_CommutativeBinOp(TargetOpcode::G_UMIN, m_Reg(X), m_Reg(Y)),
- m_CommutativeBinOp(TargetOpcode::G_UMAX, m_Reg(X),
- m_Reg(Y)))))) &&
+ m_BinOp(TargetOpcode::G_UMIN, m_Reg(X), m_Reg(Y)),
+ m_BinOp(TargetOpcode::G_UMAX, m_Reg(X), m_Reg(Y)))))) &&
(mi_match(Y, MRI, m_all_of(m_Neg(m_SpecificReg(X)), m_Reg(Sub0))) ||
mi_match(X, MRI, m_all_of(m_Neg(m_SpecificReg(Y)), m_Reg(Sub0))))) {
MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
>From 1bcb38120a5411879a5ca8540825b7e8ad93af3b Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Tue, 24 Dec 2024 11:44:44 -0800
Subject: [PATCH 3/4] fixup! Use m_BinOp instead of m_CommutativeBinOp
---
llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index c4ba732879f404..a35ecae5d18bfb 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -171,7 +171,7 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC,
MachineOptimizationRemarkEmitter &MORE,
MachineOptimizationRemarkMissed &R);
-/// Returns the inverse opcode of \p MinMaxOpc , which is a generic min/max
+/// Returns the inverse opcode of \p MinMaxOpc, which is a generic min/max
/// opcode like G_SMIN.
unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc);
>From e1f5da5aae9db4b736084bd6ba0aea621365ab5c Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Thu, 2 Jan 2025 13:43:03 -0800
Subject: [PATCH 4/4] fixup! Apply m_DeferredReg
---
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 3ab9ab23a8effd..78f9730fbbcd90 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -7071,20 +7071,15 @@ bool CombinerHelper::matchSimplifyNegMinMax(MachineInstr &MI,
if (!isLegal({TargetOpcode::G_SUB, {DestTy}}))
return false;
- // GISel doesn't have m_Deferred at this moment, so we have to
- // match this pattern in two phases.
- Register X, Y;
+ Register X;
Register Sub0;
+ auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
if (mi_match(DestReg, MRI,
- m_Neg(m_OneUse(m_any_of(
- m_GSMin(m_Reg(X), m_Reg(Y)), m_GSMax(m_Reg(X), m_Reg(Y)),
- m_BinOp(TargetOpcode::G_UMIN, m_Reg(X), m_Reg(Y)),
- m_BinOp(TargetOpcode::G_UMAX, m_Reg(X), m_Reg(Y)))))) &&
- (mi_match(Y, MRI, m_all_of(m_Neg(m_SpecificReg(X)), m_Reg(Sub0))) ||
- mi_match(X, MRI, m_all_of(m_Neg(m_SpecificReg(Y)), m_Reg(Sub0))))) {
+ m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
+ m_GSMax(m_Reg(X), NegPattern),
+ m_GUMin(m_Reg(X), NegPattern),
+ m_GUMax(m_Reg(X), NegPattern)))))) {
MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
- MachineInstr *Sub0MI = MRI.getVRegDef(Sub0);
- X = Sub0MI->getOperand(2).getReg();
unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
if (isLegal({NewOpc, {DestTy}})) {
MatchInfo = [=](MachineIRBuilder &B) {
More information about the llvm-commits
mailing list