[llvm] ec54867 - AMDGPU/GlobalISel: Add floating point med3 combine
Petar Avramovic via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 3 04:02:47 PST 2021
Author: Petar Avramovic
Date: 2021-12-03T12:49:39+01:00
New Revision: ec54867d750bd6c095697d089d9ad5a7440974a8
URL: https://github.com/llvm/llvm-project/commit/ec54867d750bd6c095697d089d9ad5a7440974a8
DIFF: https://github.com/llvm/llvm-project/commit/ec54867d750bd6c095697d089d9ad5a7440974a8.diff
LOG: AMDGPU/GlobalISel: Add floating point med3 combine
Add floating point version of med3 combine.
Source is fminnum(fmaxnum(Val, K0), K1) or fmaxnum(fminnum(Val, K1), K0)
where K0 and K1 are constants and K0 <= K1.
Differential Revision: https://reviews.llvm.org/D90051
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCombine.td
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index c7c5ff7bcbe7..482474692788 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -64,6 +64,15 @@ def int_minmax_to_med3 : GICombineRule<
[{ return RegBankHelper.matchIntMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]),
(apply [{ RegBankHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>;
+def fp_minmax_to_med3 : GICombineRule<
+ (defs root:$min_or_max, med3_matchdata:$matchinfo),
+ (match (wip_match_opcode G_FMAXNUM,
+ G_FMINNUM,
+ G_FMAXNUM_IEEE,
+ G_FMINNUM_IEEE):$min_or_max,
+ [{ return RegBankHelper.matchFPMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]),
+ (apply [{ RegBankHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>;
+
def remove_fcanonicalize_matchinfo : GIDefMatchData<"Register">;
def remove_fcanonicalize : GICombineRule<
@@ -102,7 +111,9 @@ def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
}
def AMDGPURegBankCombinerHelper : GICombinerHelper<
- "AMDGPUGenRegBankCombinerHelper", [zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain]> {
+ "AMDGPUGenRegBankCombinerHelper",
+ [zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
+ fp_minmax_to_med3]> {
let DisableRuleOption = "amdgpuregbankcombiner-disable-rule";
let StateClass = "AMDGPURegBankCombinerHelperState";
let AdditionalArguments = [];
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 12cef2774aaf..84184b8ab2a3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -172,6 +172,7 @@ def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE3, AMDGPUcvt_f32_ubyte3>;
def : GINodeEquiv<G_AMDGPU_CVT_PK_I16_I32, AMDGPUpk_i16_i32_impl>;
def : GINodeEquiv<G_AMDGPU_SMED3, AMDGPUsmed3>;
def : GINodeEquiv<G_AMDGPU_UMED3, AMDGPUumed3>;
+def : GINodeEquiv<G_AMDGPU_FMED3, AMDGPUfmed3_impl>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 12b5830ef930..4f3d7fd0a9dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -16,6 +16,7 @@
#include "AMDGPURegisterBankInfo.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
@@ -36,13 +37,15 @@ class AMDGPURegBankCombinerHelper {
MachineRegisterInfo &MRI;
const RegisterBankInfo &RBI;
const TargetRegisterInfo &TRI;
+ const SIInstrInfo &TII;
CombinerHelper &Helper;
public:
AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
: B(B), MF(B.getMF()), MRI(*B.getMRI()),
RBI(*MF.getSubtarget().getRegBankInfo()),
- TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){};
+ TRI(*MF.getSubtarget().getRegisterInfo()),
+ TII(*MF.getSubtarget<GCNSubtarget>().getInstrInfo()), Helper(Helper){};
bool isVgprRegBank(Register Reg);
Register getAsVgpr(Register Reg);
@@ -63,7 +66,13 @@ class AMDGPURegBankCombinerHelper {
Register &Val, CstTy &K0, CstTy &K1);
bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
+ bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
+
+private:
+ AMDGPU::SIModeRegisterDefaults getMode();
+ bool getIEEE();
+ bool isFminnumIeee(const MachineInstr &MI);
};
bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
@@ -98,6 +107,13 @@ AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
case AMDGPU::G_UMAX:
case AMDGPU::G_UMIN:
return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
+ case AMDGPU::G_FMAXNUM:
+ case AMDGPU::G_FMINNUM:
+ return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
+ case AMDGPU::G_FMAXNUM_IEEE:
+ case AMDGPU::G_FMINNUM_IEEE:
+ return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
+ AMDGPU::G_AMDGPU_FMED3};
}
}
@@ -148,6 +164,59 @@ bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
return true;
}
+// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1)
+// ieee = true : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K
+// ieee = false : min/max(NaN, K) = K
+// Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input.
+// Other operand commutes (see matchMed) give same result since min and max are
+// commutative.
+
+// Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1
+// with fmed3(Val, K0, K1).
+// Val = SNaN only for ieee = true
+// fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1
+// min(max(SNaN, K0), K1) = min(QNaN, K1) = K1
+// max(min(SNaN, K1), K0) = max(K1, K0) = K1
+// Val = NaN,ieee = false or Val = QNaN,ieee = true
+// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0
+// min(max(NaN, K0), K1) = min(K0, K1) = K0
+// max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0
+bool AMDGPURegBankCombinerHelper::matchFPMinMaxToMed3(
+ MachineInstr &MI, Med3MatchInfo &MatchInfo) {
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ if (Ty != LLT::scalar(16) && Ty != LLT::scalar(32))
+ return false;
+
+ auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
+
+ Register Val;
+ Optional<FPValueAndVReg> K0, K1;
+ // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
+ if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
+ return false;
+
+ if (K0->Value > K1->Value)
+ return false;
+
+ // For IEEE=false perform combine only when it's safe to assume that there are
+ // no NaN inputs. Most often MI is marked with nnan fast math flag.
+ // For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to
+ // min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner
+ // nodes(max/min) have same behavior when one input is NaN and other isn't.
+ // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
+ // also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
+ if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
+ // Don't fold single use constant that can't be inlined.
+ if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
+ (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
+ MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
+ return true;
+ }
+ }
+
+ return false;
+}
void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
Med3MatchInfo &MatchInfo) {
B.setInstrAndDebugLoc(MI);
@@ -158,6 +227,16 @@ void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
MI.eraseFromParent();
}
+AMDGPU::SIModeRegisterDefaults AMDGPURegBankCombinerHelper::getMode() {
+ return MF.getInfo<SIMachineFunctionInfo>()->getMode();
+}
+
+bool AMDGPURegBankCombinerHelper::getIEEE() { return getMode().IEEE; }
+
+bool AMDGPURegBankCombinerHelper::isFminnumIeee(const MachineInstr &MI) {
+ return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
+}
+
class AMDGPURegBankCombinerHelperState {
protected:
CombinerHelper &Helper;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d55d8da8699a..94e38dd93dcb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2866,6 +2866,12 @@ def G_AMDGPU_UMED3 : AMDGPUGenericInstruction {
let hasSideEffects = 0;
}
+def G_AMDGPU_FMED3 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
// operand Expects a MachineMemOperand in addition to explicit
// operands.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
new file mode 100644
index 000000000000..adb5b347ba87
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
@@ -0,0 +1,260 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+
+define float @test_min_max_ValK0_K1_f32(float %a) #0 {
+; GFX10-LABEL: test_min_max_ValK0_K1_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
+ %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 4.0)
+ ret float %fmed
+}
+
+define float @test_min_max_K0Val_K1_f32(float %a) #1 {
+; GFX10-LABEL: test_min_max_K0Val_K1_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %maxnum = call nnan float @llvm.maxnum.f32(float 2.0, float %a)
+ %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 4.0)
+ ret float %fmed
+}
+
+; min-max patterns for ieee=true do not have to check for NaNs
+; 'v_max_f16_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true
+define half @test_min_K1max_ValK0_f16(half %a) #0 {
+; GFX10-LABEL: test_min_K1max_ValK0_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %maxnum = call half @llvm.maxnum.f16(half %a, half 2.0)
+ %fmed = call half @llvm.minnum.f16(half 4.0, half %maxnum)
+ ret half %fmed
+}
+
+define half @test_min_K1max_K0Val_f16(half %a) #1 {
+; GFX10-LABEL: test_min_K1max_K0Val_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %maxnum = call nnan half @llvm.maxnum.f16(half 2.0, half %a)
+ %fmed = call nnan half @llvm.minnum.f16(half 4.0, half %maxnum)
+ ret half %fmed
+}
+
+; max-mix patterns work only for non-NaN inputs
+define float @test_max_min_ValK1_K0_f32(float %a) #0 {
+; GFX10-LABEL: test_max_min_ValK1_K0_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %minnum = call nnan float @llvm.minnum.f32(float %a, float 4.0)
+ %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0)
+ ret float %fmed
+}
+
+define float @test_max_min_K1Val_K0_f32(float %a) #1 {
+; GFX10-LABEL: test_max_min_K1Val_K0_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %minnum = call nnan float @llvm.minnum.f32(float 4.0, float %a)
+ %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0)
+ ret float %fmed
+}
+
+define half @test_max_K0min_ValK1_f16(half %a) #0 {
+; GFX10-LABEL: test_max_K0min_ValK1_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %minnum = call nnan half @llvm.minnum.f16(half %a, half 4.0)
+ %fmed = call nnan half @llvm.maxnum.f16(half 2.0, half %minnum)
+ ret half %fmed
+}
+
+define half @test_max_K0min_K1Val_f16(half %a) #1 {
+; GFX10-LABEL: test_max_K0min_K1Val_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %minnum = call nnan half @llvm.minnum.f16(half 4.0, half %a)
+ %fmed = call nnan half @llvm.maxnum.f16(half 2.0, half %minnum)
+ ret half %fmed
+}
+
+; global nnan function attribute always forces fmed3 combine
+
+define float @test_min_max_global_nnan(float %a) #2 {
+; GFX10-LABEL: test_min_max_global_nnan:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
+ %fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0)
+ ret float %fmed
+}
+
+define float @test_max_min_global_nnan(float %a) #2 {
+; GFX10-LABEL: test_max_min_global_nnan:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
+ %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
+ ret float %fmed
+}
+
+; ------------------------------------------------------------------------------
+; Negative patterns
+; ------------------------------------------------------------------------------
+
+; min(max(Val, K0), K1) K0 > K1, should be K0<=K1
+define float @test_min_max_K0_gt_K1(float %a) #0 {
+; GFX10-LABEL: test_min_max_K0_gt_K1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_max_f32_e32 v0, 4.0, v0
+; GFX10-NEXT: v_min_f32_e32 v0, 2.0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 4.0)
+ %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 2.0)
+ ret float %fmed
+}
+
+; max(min(Val, K1), K0) K0 > K1, should be K0<=K1
+define float @test_max_min_K0_gt_K1(float %a) #0 {
+; GFX10-LABEL: test_max_min_K0_gt_K1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_min_f32_e32 v0, 2.0, v0
+; GFX10-NEXT: v_max_f32_e32 v0, 4.0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %minnum = call nnan float @llvm.minnum.f32(float %a, float 2.0)
+ %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 4.0)
+ ret float %fmed
+}
+
+; non-inline constant
+define float @test_min_max_non_inline_const(float %a) #0 {
+; GFX10-LABEL: test_min_max_non_inline_const:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX10-NEXT: v_min_f32_e32 v0, 0x41000000, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
+ %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 8.0)
+ ret float %fmed
+}
+
+; there is no fmed3 for f64 or v2f16 types
+
+define double @test_min_max_f64(double %a) #0 {
+; GFX10-LABEL: test_min_max_f64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_max_f64 v[0:1], v[0:1], 2.0
+; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %maxnum = call nnan double @llvm.maxnum.f64(double %a, double 2.0)
+ %fmed = call nnan double @llvm.minnum.f64(double %maxnum, double 4.0)
+ ret double %fmed
+}
+
+define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 {
+; GFX10-LABEL: test_min_max_v2f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %maxnum = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 2.0>)
+ %fmed = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> %maxnum, <2 x half> <half 4.0, half 4.0>)
+ ret <2 x half> %fmed
+}
+
+; input that can be NaN
+
+; min-max patterns for ieee=false require known non-NaN input
+define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
+; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
+ %fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0)
+ ret float %fmed
+}
+
+; max-min patterns always require known non-NaN input
+
+define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
+; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
+; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
+ %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
+ ret float %fmed
+}
+
+; 'v_max_f32_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true
+define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
+; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
+; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
+ %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
+ ret float %fmed
+}
+
+declare half @llvm.minnum.f16(half, half)
+declare half @llvm.maxnum.f16(half, half)
+declare float @llvm.minnum.f32(float, float)
+declare float @llvm.maxnum.f32(float, float)
+declare double @llvm.minnum.f64(double, double)
+declare double @llvm.maxnum.f64(double, double)
+declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
+declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
+attributes #0 = {"amdgpu-ieee"="true"}
+attributes #1 = {"amdgpu-ieee"="false"}
+attributes #2 = {"no-nans-fp-math"="true"}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir
new file mode 100644
index 000000000000..2f41d8610004
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir
@@ -0,0 +1,553 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: test_min_max_ValK0_K1_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: true
+body: |
+ bb.1:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_min_max_ValK0_K1_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_FMED3 [[COPY]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
+ %0:vgpr(s32) = COPY $vgpr0
+ %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = nnan G_FMAXNUM_IEEE %0, %7
+ %4:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = nnan G_FMINNUM_IEEE %3, %8
+ $vgpr0 = COPY %5(s32)
+...
+
+---
+name: test_min_max_K0Val_K1_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
+body: |
+ bb.1:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_min_max_K0Val_K1_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_FMED3 [[COPY]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
+ %0:vgpr(s32) = COPY $vgpr0
+ %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = nnan G_FMAXNUM %7, %0
+ %4:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = nnan G_FMINNUM %3, %8
+ $vgpr0 = COPY %5(s32)
+...
+
+---
+name: test_min_K1max_ValK0_f16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: true
+body: |
+ bb.1:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_min_K1max_ValK0_f16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000
+ ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s16) = G_FCANONICALIZE [[TRUNC]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4400
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[C1]](s16)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s16) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_FMED3_]](s16)
+ ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ %2:vgpr(s32) = COPY $vgpr0
+ %0:vgpr(s16) = G_TRUNC %2(s32)
+ %3:sgpr(s16) = G_FCONSTANT half 0xH4000
+ %9:vgpr(s16) = G_FCANONICALIZE %0
+ %10:vgpr(s16) = COPY %3(s16)
+ %4:vgpr(s16) = G_FMAXNUM_IEEE %9, %10
+ %5:sgpr(s16) = G_FCONSTANT half 0xH4400
+ %11:vgpr(s16) = COPY %5(s16)
+ %6:vgpr(s16) = G_FMINNUM_IEEE %11, %4
+ %8:vgpr(s32) = G_ANYEXT %6(s16)
+ $vgpr0 = COPY %8(s32)
+
+...
+
+---
+name: test_min_K1max_K0Val_f16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_min_K1max_K0Val_f16
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4400
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[C1]](s16)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_FMED3 [[TRUNC]], [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_FMED3_]](s16)
+ ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ %2:vgpr(s32) = COPY $vgpr0
+ %0:vgpr(s16) = G_TRUNC %2(s32)
+ %3:sgpr(s16) = G_FCONSTANT half 0xH4000
+ %9:vgpr(s16) = COPY %3(s16)
+ %4:vgpr(s16) = nnan G_FMAXNUM %9, %0
+ %5:sgpr(s16) = G_FCONSTANT half 0xH4400
+ %10:vgpr(s16) = COPY %5(s16)
+ %6:vgpr(s16) = nnan G_FMINNUM %10, %4
+ %8:vgpr(s32) = G_ANYEXT %6(s16)
+ $vgpr0 = COPY %8(s32)
+...
+
+---
+name: test_max_min_ValK1_K0_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: true
+body: |
+ bb.1:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_max_min_ValK1_K0_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_FMED3 [[COPY]], [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
+ %0:vgpr(s32) = COPY $vgpr0
+ %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = nnan G_FMINNUM_IEEE %0, %7
+ %4:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = nnan G_FMAXNUM_IEEE %3, %8
+ $vgpr0 = COPY %5(s32)
+...
+
+---
+name: test_max_min_K1Val_K0_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
+body: |
+ bb.1:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_max_min_K1Val_K0_f32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_FMED3 [[COPY]], [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
+ %0:vgpr(s32) = COPY $vgpr0
+ %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = nnan G_FMINNUM %7, %0
+ %4:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = nnan G_FMAXNUM %3, %8
+ $vgpr0 = COPY %5(s32)
+...
+
+---
+name: test_max_K0min_ValK1_f16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: true
+body: |
+ bb.1:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_max_K0min_ValK1_f16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4400
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[C1]](s16)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_FMED3 [[TRUNC]], [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_FMED3_]](s16)
+ ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ %2:vgpr(s32) = COPY $vgpr0
+ %0:vgpr(s16) = G_TRUNC %2(s32)
+ %3:sgpr(s16) = G_FCONSTANT half 0xH4400
+ %9:vgpr(s16) = COPY %3(s16)
+ %4:vgpr(s16) = nnan G_FMINNUM_IEEE %0, %9
+ %5:sgpr(s16) = G_FCONSTANT half 0xH4000
+ %10:vgpr(s16) = COPY %5(s16)
+ %6:vgpr(s16) = nnan G_FMAXNUM_IEEE %10, %4
+ %8:vgpr(s32) = G_ANYEXT %6(s16)
+ $vgpr0 = COPY %8(s32)
+...
+
+---
+name: test_max_K0min_K1Val_f16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_max_K0min_K1Val_f16
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4400
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[C1]](s16)
+ ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_FMED3 [[TRUNC]], [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_FMED3_]](s16)
+ ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ %2:vgpr(s32) = COPY $vgpr0
+ %0:vgpr(s16) = G_TRUNC %2(s32)
+ %3:sgpr(s16) = G_FCONSTANT half 0xH4400
+ %9:vgpr(s16) = COPY %3(s16)
+ %4:vgpr(s16) = nnan G_FMINNUM %9, %0
+ %5:sgpr(s16) = G_FCONSTANT half 0xH4000
+ %10:vgpr(s16) = COPY %5(s16)
+ %6:vgpr(s16) = nnan G_FMAXNUM %10, %4
+ %8:vgpr(s32) = G_ANYEXT %6(s16)
+ $vgpr0 = COPY %8(s32)
+...
+
+# FixMe: add tests with attributes #2 = {"no-nans-fp-math"="true"}
+
+---
+name: test_min_max_K0_gt_K1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: true
+body: |
+ bb.1:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_min_max_K0_gt_K1
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
+ %0:vgpr(s32) = COPY $vgpr0
+ %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = nnan G_FMAXNUM_IEEE %0, %7
+ %4:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = nnan G_FMINNUM_IEEE %3, %8
+ $vgpr0 = COPY %5(s32)
+...
+
+---
+name: test_max_min_K0_gt_K1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: true
+body: |
+ bb.1:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_max_min_K0_gt_K1
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+ %0:vgpr(s32) = COPY $vgpr0
+ %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = nnan G_FMINNUM_IEEE %0, %7
+ %4:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = nnan G_FMAXNUM_IEEE %3, %8
+ $vgpr0 = COPY %5(s32)
+...
+
+---
+name: test_min_max_non_inline_const
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: true
+body: |
+ bb.1:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_min_max_non_inline_const
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 8.000000e+00
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
+ %0:vgpr(s32) = COPY $vgpr0
+ %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = nnan G_FMAXNUM_IEEE %0, %7
+ %4:sgpr(s32) = G_FCONSTANT float 8.000000e+00
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = nnan G_FMINNUM_IEEE %3, %8
+ $vgpr0 = COPY %5(s32)
+...
+
+---
+name: test_min_max_f64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: true
+body: |
+ bb.1:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: test_min_max_f64
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_FCONSTANT double 2.000000e+00
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64)
+ ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s64) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s64) = G_FCONSTANT double 4.000000e+00
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[C1]](s64)
+ ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s64) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64)
+ %0:vgpr(s64) = COPY $vgpr0_vgpr1
+ %4:sgpr(s64) = G_FCONSTANT double 2.000000e+00
+ %11:vgpr(s64) = COPY %4(s64)
+ %5:vgpr(s64) = nnan G_FMAXNUM_IEEE %0, %11
+ %6:sgpr(s64) = G_FCONSTANT double 4.000000e+00
+ %12:vgpr(s64) = COPY %6(s64)
+ %7:vgpr(s64) = nnan G_FMINNUM_IEEE %5, %12
+ $vgpr0_vgpr1 = COPY %7(s64)
+...
+
+---
+name: test_min_max_v2f16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: true
+body: |
+ bb.1:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_min_max_v2f16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[C]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4400
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[C1]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT1]](s32), [[ANYEXT1]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(<2 x s16>) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(<2 x s16>) = nnan G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>)
+ %0:vgpr(<2 x s16>) = COPY $vgpr0
+ %3:sgpr(s16) = G_FCONSTANT half 0xH4000
+ %9:sgpr(s32) = G_ANYEXT %3(s16)
+ %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32)
+ %6:sgpr(s16) = G_FCONSTANT half 0xH4400
+ %10:sgpr(s32) = G_ANYEXT %6(s16)
+ %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %10(s32), %10(s32)
+ %11:vgpr(<2 x s16>) = COPY %2(<2 x s16>)
+ %4:vgpr(<2 x s16>) = nnan G_FMAXNUM_IEEE %0, %11
+ %12:vgpr(<2 x s16>) = COPY %5(<2 x s16>)
+ %7:vgpr(<2 x s16>) = nnan G_FMINNUM_IEEE %4, %12
+ $vgpr0 = COPY %7(<2 x s16>)
+...
+
+---
+name: test_min_max_maybe_NaN_input_ieee_false
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
+body: |
+ bb.1:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_min_max_maybe_NaN_input_ieee_false
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32)
+ %0:vgpr(s32) = COPY $vgpr0
+ %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_FMAXNUM %0, %7
+ %4:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_FMINNUM %3, %8
+ $vgpr0 = COPY %5(s32)
+...
+
+---
+name: test_max_min_maybe_NaN_input_ieee_false
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
+body: |
+ bb.1:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_max_min_maybe_NaN_input_ieee_false
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32)
+ %0:vgpr(s32) = COPY $vgpr0
+ %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_FMINNUM %0, %7
+ %4:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_FMAXNUM %3, %8
+ $vgpr0 = COPY %5(s32)
+...
+
+---
+name: test_max_min_maybe_NaN_input_ieee_true
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: true
+body: |
+ bb.1:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_max_min_maybe_NaN_input_ieee_true
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+ %0:vgpr(s32) = COPY $vgpr0
+ %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00
+ %7:vgpr(s32) = G_FCANONICALIZE %0
+ %8:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_FMINNUM_IEEE %7, %8
+ %4:sgpr(s32) = G_FCONSTANT float 2.000000e+00
+ %9:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_FMAXNUM_IEEE %3, %9
+ $vgpr0 = COPY %5(s32)
+...
More information about the llvm-commits
mailing list