[llvm] [AMDGPU][GISel] Fold 'min(min(x,y),z)' and 'max(max(x,y),z)' into min3 and max3 (PR #124263)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 25 21:51:39 PST 2025
https://github.com/Ruhung updated https://github.com/llvm/llvm-project/pull/124263
>From 15e8dc172675ef78031893fbf294834d766c533c Mon Sep 17 00:00:00 2001
From: Ruhung <jhlee at pllab.cs.nthu.edu.tw>
Date: Fri, 24 Jan 2025 11:17:37 +0800
Subject: [PATCH] [AMDGPU][GISel] Fold 'min(min(x,y),z)' and 'max(max(x,y),z)'
into min3 and max3.
---
llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 23 +-
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 6 +
.../Target/AMDGPU/AMDGPURegBankCombiner.cpp | 74 +++
llvm/lib/Target/AMDGPU/SIInstructions.td | 36 ++
.../AMDGPU/GlobalISel/fmin3-fmax3-combine.ll | 103 ++++
.../AMDGPU/GlobalISel/min3-max3-combine.ll | 172 ++++++
.../regbankcombiner-fmin3-fmax3-combine.mir | 154 ++++++
.../regbankcombiner-min3-max3-combine.mir | 500 ++++++++++++++++++
llvm/test/CodeGen/AMDGPU/ctlz.ll | 6 +-
llvm/test/CodeGen/AMDGPU/cttz.ll | 6 +-
10 files changed, 1065 insertions(+), 15 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/fmin3-fmax3-combine.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/min3-max3-combine.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmin3-fmax3-combine.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-min3-max3-combine.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index da47aaf8a3b5c9..7e0394fbeabdf8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -71,6 +71,16 @@ def int_minmax_to_med3 : GICombineRule<
[{ return matchIntMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]),
(apply [{ applyMed3(*${min_or_max}, ${matchinfo}); }])>;
+def minmax3_matchdata : GIDefMatchData<"VOP3MatchInfo">;
+
+def minmax_to_minmax3
+ : GICombineRule<
+ (defs root:$min_or_max, minmax3_matchdata:$matchinfo),
+ (match(wip_match_opcode G_SMAX, G_SMIN, G_UMAX, G_UMIN, G_FMAXNUM,
+ G_FMINNUM, G_FMAXNUM_IEEE, G_FMINNUM_IEEE):$min_or_max,
+ [{ return matchMinMaxToMinMax3(*${min_or_max}, ${matchinfo}); }]),
+ (apply [{ applyVOP3(*${min_or_max}, ${matchinfo}); }])>;
+
def fp_minmax_to_med3 : GICombineRule<
(defs root:$min_or_max, med3_matchdata:$matchinfo),
(match (wip_match_opcode G_FMAXNUM,
@@ -175,10 +185,9 @@ def AMDGPUPostLegalizerCombiner: GICombiner<
let CombineAllMethodName = "tryCombineAllImpl";
}
-def AMDGPURegBankCombiner : GICombiner<
- "AMDGPURegBankCombinerImpl",
- [unmerge_merge, unmerge_cst, unmerge_undef,
- zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
- fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
- redundant_and]> {
-}
+def AMDGPURegBankCombiner
+ : GICombiner<"AMDGPURegBankCombinerImpl",
+ [unmerge_merge, unmerge_cst, unmerge_undef, zext_trunc_fold,
+ int_minmax_to_med3, ptr_add_immed_chain, fp_minmax_to_clamp,
+ fp_minmax_to_med3, fmed3_intrinsic_to_clamp, redundant_and,
+ minmax_to_minmax3]> {}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 1b909568fc555c..fd4237cdc4e238 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -244,6 +244,12 @@ def : GINodeEquiv<G_AMDGPU_CVT_PK_I16_I32, AMDGPUpk_i16_i32_impl>;
def : GINodeEquiv<G_AMDGPU_SMED3, AMDGPUsmed3>;
def : GINodeEquiv<G_AMDGPU_UMED3, AMDGPUumed3>;
def : GINodeEquiv<G_AMDGPU_FMED3, AMDGPUfmed3_impl>;
+def : GINodeEquiv<G_AMDGPU_SMAX3, AMDGPUsmax3>;
+def : GINodeEquiv<G_AMDGPU_UMAX3, AMDGPUumax3>;
+def : GINodeEquiv<G_AMDGPU_FMAX3, AMDGPUfmax3>;
+def : GINodeEquiv<G_AMDGPU_SMIN3, AMDGPUsmin3>;
+def : GINodeEquiv<G_AMDGPU_UMIN3, AMDGPUumin3>;
+def : GINodeEquiv<G_AMDGPU_FMIN3, AMDGPUfmin3>;
def : GINodeEquiv<G_AMDGPU_CLAMP, AMDGPUclamp>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 98c48f4fe3705b..22fb4668797889 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -74,17 +74,27 @@ class AMDGPURegBankCombinerImpl : public Combiner {
Register Val0, Val1, Val2;
};
+ struct VOP3MatchInfo {
+ unsigned Opc;
+ Register Val0, Val1, Val2;
+ };
+
MinMaxMedOpc getMinMaxPair(unsigned Opc) const;
+ unsigned getMinMax3(unsigned Opc) const;
template <class m_Cst, typename CstTy>
bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
Register &Val, CstTy &K0, CstTy &K1) const;
+ bool matchVOP3(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned Opc,
+ Register &A, Register &B, Register &C) const;
bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
+ bool matchMinMaxToMinMax3(MachineInstr &MI, VOP3MatchInfo &MatchInfo) const;
bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg) const;
bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg) const;
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
+ void applyVOP3(MachineInstr &MI, VOP3MatchInfo &MatchInfo) const;
void applyClamp(MachineInstr &MI, Register &Reg) const;
private:
@@ -165,6 +175,27 @@ AMDGPURegBankCombinerImpl::getMinMaxPair(unsigned Opc) const {
}
}
+unsigned AMDGPURegBankCombinerImpl::getMinMax3(unsigned Opc) const {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unsupported opcode");
+ case AMDGPU::G_SMAX:
+ return AMDGPU::G_AMDGPU_SMAX3;
+ case AMDGPU::G_SMIN:
+ return AMDGPU::G_AMDGPU_SMIN3;
+ case AMDGPU::G_UMAX:
+ return AMDGPU::G_AMDGPU_UMAX3;
+ case AMDGPU::G_UMIN:
+ return AMDGPU::G_AMDGPU_UMIN3;
+ case AMDGPU::G_FMAXNUM:
+ case AMDGPU::G_FMAXNUM_IEEE:
+ return AMDGPU::G_AMDGPU_FMAX3;
+ case AMDGPU::G_FMINNUM:
+ case AMDGPU::G_FMINNUM_IEEE:
+ return AMDGPU::G_AMDGPU_FMIN3;
+ }
+}
+
template <class m_Cst, typename CstTy>
bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &MI,
MachineRegisterInfo &MRI,
@@ -187,6 +218,40 @@ bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &MI,
m_Cst(K0))));
}
+bool AMDGPURegBankCombinerImpl::matchVOP3(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ unsigned Opc, Register &A,
+ Register &B, Register &C) const {
+ return mi_match(
+ MI, MRI,
+ m_any_of(m_BinOp(Opc, m_OneNonDBGUse(m_BinOp(Opc, m_Reg(A), m_Reg(B))),
+ m_Reg(C)),
+ m_BinOp(Opc, m_Reg(A),
+ m_OneNonDBGUse(m_BinOp(Opc, m_Reg(B), m_Reg(C))))));
+}
+
+bool AMDGPURegBankCombinerImpl::matchMinMaxToMinMax3(
+ MachineInstr &MI, VOP3MatchInfo &MatchInfo) const {
+ Register Dst = MI.getOperand(0).getReg();
+ if (!isVgprRegBank(Dst))
+ return false;
+
+ LLT Ty = MRI.getType(Dst);
+ if (Ty == LLT::scalar(16)) {
+ if (!STI.hasMin3Max3_16())
+ return false;
+ } else if (Ty != LLT::scalar(32)) {
+ return false;
+ }
+
+ unsigned Opc = MI.getOpcode();
+ Register R0, R1, R2;
+ if (!matchVOP3(MI, MRI, Opc, R0, R1, R2))
+ return false;
+ MatchInfo = {getMinMax3(Opc), R0, R1, R2};
+ return true;
+}
+
bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3(
MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
Register Dst = MI.getOperand(0).getReg();
@@ -362,6 +427,15 @@ void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &MI,
MI.eraseFromParent();
}
+void AMDGPURegBankCombinerImpl::applyVOP3(MachineInstr &MI,
+ VOP3MatchInfo &MatchInfo) const {
+ B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
+ {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
+ getAsVgpr(MatchInfo.Val2)},
+ MI.getFlags());
+ MI.eraseFromParent();
+}
+
SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const {
return MF.getInfo<SIMachineFunctionInfo>()->getMode();
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 4325ab448e5815..b5b4b8cf91db8f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3955,6 +3955,42 @@ def G_AMDGPU_FMED3 : AMDGPUGenericInstruction {
let hasSideEffects = 0;
}
+def G_AMDGPU_SMIN3 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_AMDGPU_UMIN3 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_AMDGPU_FMIN3 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_AMDGPU_SMAX3 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_AMDGPU_UMAX3 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_AMDGPU_FMAX3 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
def G_AMDGPU_CLAMP : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin3-fmax3-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin3-fmax3-combine.ll
new file mode 100644
index 00000000000000..0f04e7a6210625
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin3-fmax3-combine.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+
+define float @test_fmin3(float %a, float %b, float %c) {
+; GFX10-LABEL: test_fmin3:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX10-NEXT: v_min3_f32 v0, v0, v1, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %min1 = call float @llvm.minnum.f32(float %a, float %b)
+ %min2 = call float @llvm.minnum.f32(float %min1, float %c)
+ ret float %min2
+}
+
+define float @test_fmin3_nnan(float %a, float %b, float %c) {
+; GFX10-LABEL: test_fmin3_nnan:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_min3_f32 v0, v0, v1, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %min1 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %min2 = call nnan float @llvm.minnum.f32(float %min1, float %c)
+ ret float %min2
+}
+
+define float @test_fmin3_with_constants_nnan(float %a, float %b) {
+; GFX10-LABEL: test_fmin3_with_constants_nnan:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_min3_f32 v0, v0, v1, 0x40e00000
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %min1 = call nnan float @llvm.minnum.f32(float %a, float %b)
+ %min2 = call nnan float @llvm.minnum.f32(float %min1, float 7.0)
+ ret float %min2
+}
+
+define <2 x float> @test_fmin3_v2f32_nnan(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
+; GFX10-LABEL: test_fmin3_v2f32_nnan:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_min3_f32 v0, v0, v2, v4
+; GFX10-NEXT: v_min3_f32 v1, v1, v3, v5
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %min1 = call nnan <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b)
+ %min2 = call nnan <2 x float> @llvm.minnum.v2f32(<2 x float> %min1, <2 x float> %c)
+ ret <2 x float> %min2
+}
+
+define float @test_fmax3(float %a, float %b, float %c) {
+; GFX10-LABEL: test_fmax3:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX10-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX10-NEXT: v_max3_f32 v0, v0, v1, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %max1 = call float @llvm.maxnum.f32(float %a, float %b)
+ %max2 = call float @llvm.maxnum.f32(float %max1, float %c)
+ ret float %max2
+}
+
+define float @test_fmax3_nnan(float %a, float %b, float %c) {
+; GFX10-LABEL: test_fmax3_nnan:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_max3_f32 v0, v0, v1, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %max1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %max2 = call nnan float @llvm.maxnum.f32(float %max1, float %c)
+ ret float %max2
+}
+
+define float @test_fmax3_with_constants_nnan(float %a, float %b) {
+; GFX10-LABEL: test_fmax3_with_constants_nnan:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_max3_f32 v0, v0, v1, 0x40e00000
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %max1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+ %max2 = call nnan float @llvm.maxnum.f32(float %max1, float 7.0)
+ ret float %max2
+}
+
+define <2 x float> @test_fmax3_v2f32_nnan(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
+; GFX10-LABEL: test_fmax3_v2f32_nnan:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_max3_f32 v0, v0, v2, v4
+; GFX10-NEXT: v_max3_f32 v1, v1, v3, v5
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %min1 = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b)
+ %min2 = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> %min1, <2 x float> %c)
+ ret <2 x float> %min2
+}
+
+declare float @llvm.minnum.f32(float, float)
+declare float @llvm.maxnum.f32(float, float)
+declare <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b)
+declare <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/min3-max3-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/min3-max3-combine.ll
new file mode 100644
index 00000000000000..dfd2cba580e735
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/min3-max3-combine.ll
@@ -0,0 +1,172 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+
+define i32 @test_smin3(i32 %a, i32 %b, i32 %c) {
+; GFX10-LABEL: test_smin3:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_min3_i32 v0, v0, v1, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %min1 = call i32 @llvm.smin.i32(i32 %a, i32 %b)
+ %min2 = call i32 @llvm.smin.i32(i32 %min1, i32 %c)
+ ret i32 %min2
+}
+
+define i32 @test_smin3_with_constants(i32 %a, i32 %b) {
+; GFX10-LABEL: test_smin3_with_constants:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_min3_i32 v0, v0, v1, 7
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %min1 = call i32 @llvm.smin.i32(i32 %a, i32 %b)
+ %min2 = call i32 @llvm.smin.i32(i32 %min1, i32 7)
+ ret i32 %min2
+}
+
+define i32 @test_smin3_smin_umin(i32 %a, i32 %b) {
+; GFX10-LABEL: test_smin3_smin_umin:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_min_i32_e32 v0, v0, v1
+; GFX10-NEXT: v_min_u32_e32 v0, 7, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %min1 = call i32 @llvm.smin.i32(i32 %a, i32 %b)
+ %min2 = call i32 @llvm.umin.i32(i32 %min1, i32 7)
+ ret i32 %min2
+}
+
+define <2 x i16> @test_smin3_v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) {
+; GFX10-LABEL: test_smin3_v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_min_i16 v0, v0, v1
+; GFX10-NEXT: v_pk_min_i16 v0, v0, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %min1 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %a, <2 x i16> %b)
+ %min2 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %min1, <2 x i16> %c)
+ ret <2 x i16> %min2
+}
+
+define i32 @test_smax3(i32 %a, i32 %b, i32 %c) {
+; GFX10-LABEL: test_smax3:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_max3_i32 v0, v0, v1, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %max1 = call i32 @llvm.smax.i32(i32 %a, i32 %b)
+ %max2 = call i32 @llvm.smax.i32(i32 %max1, i32 %c)
+ ret i32 %max2
+}
+
+define i32 @test_smax3_with_constants(i32 %a, i32 %b) {
+; GFX10-LABEL: test_smax3_with_constants:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_max3_i32 v0, v0, v1, 7
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %min1 = call i32 @llvm.smax.i32(i32 %a, i32 %b)
+ %min2 = call i32 @llvm.smax.i32(i32 %min1, i32 7)
+ ret i32 %min2
+}
+
+define i32 @test_smin3_smax_umax(i32 %a, i32 %b) {
+; GFX10-LABEL: test_smin3_smax_umax:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_max_i32_e32 v0, v0, v1
+; GFX10-NEXT: v_max_u32_e32 v0, 7, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %min1 = call i32 @llvm.smax.i32(i32 %a, i32 %b)
+ %min2 = call i32 @llvm.umax.i32(i32 %min1, i32 7)
+ ret i32 %min2
+}
+
+define <2 x i16> @test_smax3_v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) {
+; GFX10-LABEL: test_smax3_v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_max_i16 v0, v0, v1
+; GFX10-NEXT: v_pk_max_i16 v0, v0, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %max1 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %a, <2 x i16> %b)
+ %max2 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %max1, <2 x i16> %c)
+ ret <2 x i16> %max2
+}
+
+define i32 @test_umin3(i32 %a, i32 %b, i32 %c) {
+; GFX10-LABEL: test_umin3:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_min3_u32 v0, v0, v1, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %min1 = call i32 @llvm.umin.i32(i32 %a, i32 %b)
+ %min2 = call i32 @llvm.umin.i32(i32 %min1, i32 %c)
+ ret i32 %min2
+}
+
+define i32 @test_umin3_with_constants(i32 %a, i32 %b) {
+; GFX10-LABEL: test_umin3_with_constants:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_min3_u32 v0, v0, v1, 7
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %min1 = call i32 @llvm.umin.i32(i32 %a, i32 %b)
+ %min2 = call i32 @llvm.umin.i32(i32 %min1, i32 7)
+ ret i32 %min2
+}
+
+define <2 x i16> @test_umin3_v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) {
+; GFX10-LABEL: test_umin3_v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_min_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_min_u16 v0, v0, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %min1 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> %a, <2 x i16> %b)
+ %min2 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> %min1, <2 x i16> %c)
+ ret <2 x i16> %min2
+}
+
+define i32 @test_umax3(i32 %a, i32 %b, i32 %c) {
+; GFX10-LABEL: test_umax3:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_max3_u32 v0, v0, v1, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %max1 = call i32 @llvm.umax.i32(i32 %a, i32 %b)
+ %max2 = call i32 @llvm.umax.i32(i32 %max1, i32 %c)
+ ret i32 %max2
+}
+
+define i32 @test_umax3_with_constants(i32 %a, i32 %b) {
+; GFX10-LABEL: test_umax3_with_constants:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_max3_u32 v0, v0, v1, 7
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %max1 = call i32 @llvm.umax.i32(i32 %a, i32 %b)
+ %max2 = call i32 @llvm.umax.i32(i32 %max1, i32 7)
+ ret i32 %max2
+}
+
+define <2 x i16> @test_umax3_v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) {
+; GFX10-LABEL: test_umax3_v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_max_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_max_u16 v0, v0, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %max1 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> %a, <2 x i16> %b)
+ %max2 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> %max1, <2 x i16> %c)
+ ret <2 x i16> %max2
+}
+
+declare i32 @llvm.smin.i32(i32, i32)
+declare i32 @llvm.umin.i32(i32, i32)
+declare i32 @llvm.smax.i32(i32, i32)
+declare i32 @llvm.umax.i32(i32, i32)
+declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>)
+declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>)
+declare <2 x i16> @llvm.smax.v2i16(<2 x i16>, <2 x i16>)
+declare <2 x i16> @llvm.umax.v2i16(<2 x i16>, <2 x i16>)
+
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmin3-fmax3-combine.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmin3-fmax3-combine.mir
new file mode 100644
index 00000000000000..356e416d13e0f6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmin3-fmax3-combine.mir
@@ -0,0 +1,154 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: test_fmin3
+alignment: 1
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_fmin3
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: renamable $vgpr0 = nofpexcept V_MUL_F32_e32 1065353216, killed $vgpr0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F32_e32 1065353216, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr2 = nofpexcept V_MUL_F32_e32 1065353216, killed $vgpr2, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0 = V_MIN3_F32_e64 0, killed $vgpr0, 0, killed $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+ S_WAITCNT 0
+ renamable $vgpr0 = nofpexcept V_MUL_F32_e32 1065353216, killed $vgpr0, implicit $mode, implicit $exec
+ renamable $vgpr1 = nofpexcept V_MUL_F32_e32 1065353216, killed $vgpr1, implicit $mode, implicit $exec
+ renamable $vgpr2 = nofpexcept V_MUL_F32_e32 1065353216, killed $vgpr2, implicit $mode, implicit $exec
+ renamable $vgpr0 = V_MIN3_F32_e64 0, killed $vgpr0, 0, killed $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+ S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+
+...
+---
+name: test_fmin3_nnan
+alignment: 1
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_fmin3_nnan
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: renamable $vgpr0 = nnan V_MIN3_F32_e64 0, killed $vgpr0, 0, killed $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+ S_WAITCNT 0
+ renamable $vgpr0 = nnan V_MIN3_F32_e64 0, killed $vgpr0, 0, killed $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+ S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+
+...
+---
+name: test_fmin3_with_constants_nnan
+alignment: 1
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_fmin3_with_constants_nnan
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: renamable $vgpr2 = V_MOV_B32_e32 1088421888, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0 = nnan V_MIN3_F32_e64 0, killed $vgpr0, 0, killed $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+ S_WAITCNT 0
+ renamable $vgpr2 = V_MOV_B32_e32 1088421888, implicit $exec
+ renamable $vgpr0 = nnan V_MIN3_F32_e64 0, killed $vgpr0, 0, killed $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+ S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+
+...
+---
+name: test_fmin3_v2f32_nnan
+alignment: 1
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ ; CHECK-LABEL: name: test_fmin3_v2f32_nnan
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: renamable $vgpr0 = nnan V_MIN3_F32_e64 0, killed $vgpr0, 0, killed $vgpr2, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr1 = nnan V_MIN3_F32_e64 0, killed $vgpr1, 0, killed $vgpr3, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ S_WAITCNT 0
+ renamable $vgpr0 = nnan V_MIN3_F32_e64 0, killed $vgpr0, 0, killed $vgpr2, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+ renamable $vgpr1 = nnan V_MIN3_F32_e64 0, killed $vgpr1, 0, killed $vgpr3, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+ S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+
+...
+---
+name: test_fmax3
+alignment: 1
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_fmax3
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: renamable $vgpr0 = nofpexcept V_MUL_F32_e32 1065353216, killed $vgpr0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F32_e32 1065353216, killed $vgpr1, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr2 = nofpexcept V_MUL_F32_e32 1065353216, killed $vgpr2, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0 = V_MAX3_F32_e64 0, killed $vgpr0, 0, killed $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+ S_WAITCNT 0
+ renamable $vgpr0 = nofpexcept V_MUL_F32_e32 1065353216, killed $vgpr0, implicit $mode, implicit $exec
+ renamable $vgpr1 = nofpexcept V_MUL_F32_e32 1065353216, killed $vgpr1, implicit $mode, implicit $exec
+ renamable $vgpr2 = nofpexcept V_MUL_F32_e32 1065353216, killed $vgpr2, implicit $mode, implicit $exec
+ renamable $vgpr0 = V_MAX3_F32_e64 0, killed $vgpr0, 0, killed $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+ S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+
+...
+---
+name: test_fmax3_nnan
+alignment: 1
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_fmax3_nnan
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: renamable $vgpr0 = nnan V_MAX3_F32_e64 0, killed $vgpr0, 0, killed $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+ S_WAITCNT 0
+ renamable $vgpr0 = nnan V_MAX3_F32_e64 0, killed $vgpr0, 0, killed $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+ S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+
+...
+---
+name: test_fmax3_with_constants_nnan
+alignment: 1
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ ; CHECK-LABEL: name: test_fmax3_with_constants_nnan
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: renamable $vgpr0 = nnan V_MAX3_F32_e64 0, killed $vgpr0, 0, killed $vgpr2, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr1 = nnan V_MAX3_F32_e64 0, killed $vgpr1, 0, killed $vgpr3, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ S_WAITCNT 0
+ renamable $vgpr0 = nnan V_MAX3_F32_e64 0, killed $vgpr0, 0, killed $vgpr2, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+ renamable $vgpr1 = nnan V_MAX3_F32_e64 0, killed $vgpr1, 0, killed $vgpr3, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+ S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-min3-max3-combine.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-min3-max3-combine.mir
new file mode 100644
index 00000000000000..f99677f646e187
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-min3-max3-combine.mir
@@ -0,0 +1,500 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: test_smin3
+alignment: 1
+legalized: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_smin3
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[AMDGPU_SMIN3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMIN3 [[COPY]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMIN3_]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %2:vgpr(s32) = COPY $vgpr2
+ %11:vgpr(s32) = G_SMIN %0, %1
+ %12:vgpr(s32) = G_SMIN %11, %2
+ $vgpr0 = COPY %12(s32)
+ SI_RETURN implicit $vgpr0
+
+...
+---
+name: test_smin3_with_constants
+alignment: 1
+legalized: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_smin3_with_constants
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AMDGPU_SMIN3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMIN3 [[COPY]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMIN3_]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %10:vgpr(s32) = G_SMIN %0, %1
+ %11:sgpr(s32) = G_CONSTANT i32 7
+ %13:vgpr(s32) = COPY %11(s32)
+ %12:vgpr(s32) = G_SMIN %10, %13
+ $vgpr0 = COPY %12(s32)
+ SI_RETURN implicit $vgpr0
+
+...
+---
+name: test_smin3_smin_umin
+alignment: 1
+legalized: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_smin3_smin_umin
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[SMIN]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %10:vgpr(s32) = G_SMIN %0, %1
+ %11:sgpr(s32) = G_CONSTANT i32 7
+ %13:vgpr(s32) = COPY %11(s32)
+ %12:vgpr(s32) = G_UMIN %10, %13
+ $vgpr0 = COPY %12(s32)
+ SI_RETURN implicit $vgpr0
+
+...
+---
+name: test_smin3_v2i16
+alignment: 1
+legalized: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ ; CHECK-LABEL: name: test_smin3_v2i16
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY]], 16
+ ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY2]], 16
+ ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 16
+ ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY3]], 16
+ ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY4]], 16
+ ; CHECK-NEXT: [[AMDGPU_SMIN3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMIN3 [[SEXT_INREG]], [[SEXT_INREG1]], [[SEXT_INREG4]]
+ ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY5]], 16
+ ; CHECK-NEXT: [[AMDGPU_SMIN3_1:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMIN3 [[SEXT_INREG2]], [[SEXT_INREG3]], [[SEXT_INREG5]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMIN3_]](s32)
+ ; CHECK-NEXT: $vgpr1 = COPY [[AMDGPU_SMIN3_1]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ %11:vgpr(s32) = COPY $vgpr0
+ %12:vgpr(s32) = COPY $vgpr1
+ %14:vgpr(s32) = COPY $vgpr2
+ %15:vgpr(s32) = COPY $vgpr3
+ %17:vgpr(s32) = COPY $vgpr4
+ %18:vgpr(s32) = COPY $vgpr5
+ %47:vgpr(s32) = G_SEXT_INREG %11, 16
+ %48:vgpr(s32) = G_SEXT_INREG %14, 16
+ %49:vgpr(s32) = G_SMIN %47, %48
+ %44:vgpr(s32) = G_SEXT_INREG %12, 16
+ %45:vgpr(s32) = G_SEXT_INREG %15, 16
+ %46:vgpr(s32) = G_SMIN %44, %45
+ %36:vgpr(s32) = G_SEXT_INREG %17, 16
+ %37:vgpr(s32) = G_SMIN %49, %36
+ %33:vgpr(s32) = G_SEXT_INREG %18, 16
+ %34:vgpr(s32) = G_SMIN %46, %33
+ $vgpr0 = COPY %37(s32)
+ $vgpr1 = COPY %34(s32)
+ SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...
+---
+name: test_smax3
+alignment: 1
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_smax3
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[AMDGPU_SMAX3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMAX3 [[COPY]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMAX3_]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %2:vgpr(s32) = COPY $vgpr2
+ %11:vgpr(s32) = G_SMAX %0, %1
+ %12:vgpr(s32) = G_SMAX %11, %2
+ $vgpr0 = COPY %12(s32)
+ SI_RETURN implicit $vgpr0
+
+...
+---
+name: test_smax3_with_constants
+alignment: 1
+legalized: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_smax3_with_constants
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AMDGPU_SMAX3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMAX3 [[COPY]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMAX3_]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %10:vgpr(s32) = G_SMAX %0, %1
+ %11:sgpr(s32) = G_CONSTANT i32 7
+ %13:vgpr(s32) = COPY %11(s32)
+ %12:vgpr(s32) = G_SMAX %10, %13
+ $vgpr0 = COPY %12(s32)
+ SI_RETURN implicit $vgpr0
+
+...
+---
+name: test_smin3_smax_umax
+alignment: 1
+legalized: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_smin3_smax_umax
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[SMAX]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %10:vgpr(s32) = G_SMAX %0, %1
+ %11:sgpr(s32) = G_CONSTANT i32 7
+ %13:vgpr(s32) = COPY %11(s32)
+ %12:vgpr(s32) = G_UMAX %10, %13
+ $vgpr0 = COPY %12(s32)
+ SI_RETURN implicit $vgpr0
+
+...
+---
+name: test_smax3_v2i16
+alignment: 1
+legalized: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ ; CHECK-LABEL: name: test_smax3_v2i16
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY]], 16
+ ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY2]], 16
+ ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 16
+ ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY3]], 16
+ ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY4]], 16
+ ; CHECK-NEXT: [[AMDGPU_SMAX3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMAX3 [[SEXT_INREG]], [[SEXT_INREG1]], [[SEXT_INREG4]]
+ ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY5]], 16
+ ; CHECK-NEXT: [[AMDGPU_SMAX3_1:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMAX3 [[SEXT_INREG2]], [[SEXT_INREG3]], [[SEXT_INREG5]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMAX3_]](s32)
+ ; CHECK-NEXT: $vgpr1 = COPY [[AMDGPU_SMAX3_1]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ %11:vgpr(s32) = COPY $vgpr0
+ %12:vgpr(s32) = COPY $vgpr1
+ %14:vgpr(s32) = COPY $vgpr2
+ %15:vgpr(s32) = COPY $vgpr3
+ %17:vgpr(s32) = COPY $vgpr4
+ %18:vgpr(s32) = COPY $vgpr5
+ %47:vgpr(s32) = G_SEXT_INREG %11, 16
+ %48:vgpr(s32) = G_SEXT_INREG %14, 16
+ %49:vgpr(s32) = G_SMAX %47, %48
+ %44:vgpr(s32) = G_SEXT_INREG %12, 16
+ %45:vgpr(s32) = G_SEXT_INREG %15, 16
+ %46:vgpr(s32) = G_SMAX %44, %45
+ %36:vgpr(s32) = G_SEXT_INREG %17, 16
+ %37:vgpr(s32) = G_SMAX %49, %36
+ %33:vgpr(s32) = G_SEXT_INREG %18, 16
+ %34:vgpr(s32) = G_SMAX %46, %33
+ $vgpr0 = COPY %37(s32)
+ $vgpr1 = COPY %34(s32)
+ SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...
+---
+name: test_umin3
+alignment: 1
+legalized: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_umin3
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[AMDGPU_UMIN3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMIN3 [[COPY]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMIN3_]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %2:vgpr(s32) = COPY $vgpr2
+ %11:vgpr(s32) = G_UMIN %0, %1
+ %12:vgpr(s32) = G_UMIN %11, %2
+ $vgpr0 = COPY %12(s32)
+ SI_RETURN implicit $vgpr0
+
+...
+---
+name: test_umin3_with_constants
+alignment: 1
+legalized: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_umin3_with_constants
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AMDGPU_UMIN3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMIN3 [[COPY]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMIN3_]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %10:vgpr(s32) = G_UMIN %0, %1
+ %11:sgpr(s32) = G_CONSTANT i32 7
+ %13:vgpr(s32) = COPY %11(s32)
+ %12:vgpr(s32) = G_UMIN %10, %13
+ $vgpr0 = COPY %12(s32)
+ SI_RETURN implicit $vgpr0
+
+...
+---
+name: test_umin3_v2i16
+alignment: 1
+legalized: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ ; CHECK-LABEL: name: test_umin3_v2i16
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[COPY6]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[COPY7]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:vgpr(s32) = G_AND [[COPY1]], [[COPY8]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:vgpr(s32) = G_AND [[COPY3]], [[COPY9]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:vgpr(s32) = G_AND [[COPY4]], [[COPY10]]
+ ; CHECK-NEXT: [[AMDGPU_UMIN3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMIN3 [[AND]], [[AND1]], [[AND4]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:vgpr(s32) = G_AND [[COPY5]], [[COPY11]]
+ ; CHECK-NEXT: [[AMDGPU_UMIN3_1:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMIN3 [[AND2]], [[AND3]], [[AND5]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMIN3_]](s32)
+ ; CHECK-NEXT: $vgpr1 = COPY [[AMDGPU_UMIN3_1]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ %11:vgpr(s32) = COPY $vgpr0
+ %12:vgpr(s32) = COPY $vgpr1
+ %14:vgpr(s32) = COPY $vgpr2
+ %15:vgpr(s32) = COPY $vgpr3
+ %17:vgpr(s32) = COPY $vgpr4
+ %18:vgpr(s32) = COPY $vgpr5
+ %50:sgpr(s32) = G_CONSTANT i32 65535
+ %51:vgpr(s32) = COPY %50(s32)
+ %47:vgpr(s32) = G_AND %11, %51
+ %52:vgpr(s32) = COPY %50(s32)
+ %48:vgpr(s32) = G_AND %14, %52
+ %49:vgpr(s32) = G_UMIN %47, %48
+ %53:vgpr(s32) = COPY %50(s32)
+ %44:vgpr(s32) = G_AND %12, %53
+ %54:vgpr(s32) = COPY %50(s32)
+ %45:vgpr(s32) = G_AND %15, %54
+ %46:vgpr(s32) = G_UMIN %44, %45
+ %55:vgpr(s32) = COPY %50(s32)
+ %36:vgpr(s32) = G_AND %17, %55
+ %37:vgpr(s32) = G_UMIN %49, %36
+ %56:vgpr(s32) = COPY %50(s32)
+ %33:vgpr(s32) = G_AND %18, %56
+ %34:vgpr(s32) = G_UMIN %46, %33
+ $vgpr0 = COPY %37(s32)
+ $vgpr1 = COPY %34(s32)
+ SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...
+---
+name: test_umax3
+alignment: 1
+legalized: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_umax3
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[AMDGPU_UMAX3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMAX3 [[COPY]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMAX3_]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %2:vgpr(s32) = COPY $vgpr2
+ %11:vgpr(s32) = G_UMAX %0, %1
+ %12:vgpr(s32) = G_UMAX %11, %2
+ $vgpr0 = COPY %12(s32)
+ SI_RETURN implicit $vgpr0
+
+...
+---
+name: test_umax3_with_constants
+alignment: 1
+legalized: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: test_umax3_with_constants
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AMDGPU_UMAX3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMAX3 [[COPY]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMAX3_]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %10:vgpr(s32) = G_UMAX %0, %1
+ %11:sgpr(s32) = G_CONSTANT i32 7
+ %13:vgpr(s32) = COPY %11(s32)
+ %12:vgpr(s32) = G_UMAX %10, %13
+ $vgpr0 = COPY %12(s32)
+ SI_RETURN implicit $vgpr0
+
+...
+---
+name: test_umax3_v2i16
+alignment: 1
+legalized: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ ; CHECK-LABEL: name: test_umax3_v2i16
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[COPY6]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[COPY7]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:vgpr(s32) = G_AND [[COPY1]], [[COPY8]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:vgpr(s32) = G_AND [[COPY3]], [[COPY9]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:vgpr(s32) = G_AND [[COPY4]], [[COPY10]]
+ ; CHECK-NEXT: [[AMDGPU_UMAX3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMAX3 [[AND]], [[AND1]], [[AND4]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:vgpr(s32) = G_AND [[COPY5]], [[COPY11]]
+ ; CHECK-NEXT: [[AMDGPU_UMAX3_1:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMAX3 [[AND2]], [[AND3]], [[AND5]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMAX3_]](s32)
+ ; CHECK-NEXT: $vgpr1 = COPY [[AMDGPU_UMAX3_1]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ %11:vgpr(s32) = COPY $vgpr0
+ %12:vgpr(s32) = COPY $vgpr1
+ %14:vgpr(s32) = COPY $vgpr2
+ %15:vgpr(s32) = COPY $vgpr3
+ %17:vgpr(s32) = COPY $vgpr4
+ %18:vgpr(s32) = COPY $vgpr5
+ %50:sgpr(s32) = G_CONSTANT i32 65535
+ %51:vgpr(s32) = COPY %50(s32)
+ %47:vgpr(s32) = G_AND %11, %51
+ %52:vgpr(s32) = COPY %50(s32)
+ %48:vgpr(s32) = G_AND %14, %52
+ %49:vgpr(s32) = G_UMAX %47, %48
+ %53:vgpr(s32) = COPY %50(s32)
+ %44:vgpr(s32) = G_AND %12, %53
+ %54:vgpr(s32) = COPY %50(s32)
+ %45:vgpr(s32) = G_AND %15, %54
+ %46:vgpr(s32) = G_UMAX %44, %45
+ %55:vgpr(s32) = COPY %50(s32)
+ %36:vgpr(s32) = G_AND %17, %55
+ %37:vgpr(s32) = G_UMAX %49, %36
+ %56:vgpr(s32) = COPY %50(s32)
+ %33:vgpr(s32) = G_AND %18, %56
+ %34:vgpr(s32) = G_UMAX %46, %33
+ $vgpr0 = COPY %37(s32)
+ $vgpr1 = COPY %34(s32)
+ SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index 3019d4d298eb45..1b71bfd0c3e297 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -861,9 +861,8 @@ define amdgpu_kernel void @v_ctlz_i64(ptr addrspace(1) noalias %out, ptr addrspa
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
; GFX10-GISEL-NEXT: v_add_nc_u32_e64 v0, v0, 32 clamp
-; GFX10-GISEL-NEXT: v_min_u32_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_min3_u32 v0, v1, v0, 64
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 64, v0
; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
;
@@ -989,8 +988,7 @@ define amdgpu_kernel void @v_ctlz_i64_trunc(ptr addrspace(1) noalias %out, ptr a
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
; GFX10-GISEL-NEXT: v_add_nc_u32_e64 v1, v1, 32 clamp
-; GFX10-GISEL-NEXT: v_min_u32_e32 v1, v2, v1
-; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 64, v1
+; GFX10-GISEL-NEXT: v_min3_u32 v1, v2, v1, 64
; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll
index f0c278a67c8bcc..3ebaf864683da6 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz.ll
@@ -749,9 +749,8 @@ define amdgpu_kernel void @v_cttz_i64(ptr addrspace(1) noalias %out, ptr addrspa
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v0, v0
; GFX10-GISEL-NEXT: v_add_nc_u32_e64 v1, v1, 32 clamp
-; GFX10-GISEL-NEXT: v_min_u32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: v_min3_u32 v0, v0, v1, 64
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 64, v0
; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -859,8 +858,7 @@ define amdgpu_kernel void @v_cttz_i64_trunc(ptr addrspace(1) noalias %out, ptr a
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v2, v2
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1
; GFX10-GISEL-NEXT: v_add_nc_u32_e64 v2, v2, 32 clamp
-; GFX10-GISEL-NEXT: v_min_u32_e32 v1, v1, v2
-; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 64, v1
+; GFX10-GISEL-NEXT: v_min3_u32 v1, v1, v2, 64
; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
More information about the llvm-commits
mailing list