[llvm] [AMDGPU] Codegen for min/max instructions for gfx1170 (PR #185625)
Mirko BrkuĊĦanin via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 10 04:55:37 PDT 2026
https://github.com/mbrkusanin created https://github.com/llvm/llvm-project/pull/185625
gfx1170 does not have s_minimum/maximum_f16/f32 instructions so a new
feature `SALUMinimumMaximumInsts` is added for gfx12+ subtargets.
>From a83bcf9dc11893b521d5c995587bc768b5110eb7 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Tue, 10 Mar 2026 12:51:46 +0100
Subject: [PATCH] [AMDGPU] Codegen for min/max instructions for gfx1170
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 20 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 8 +-
.../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 11 +-
llvm/lib/Target/AMDGPU/SIInstructions.td | 8 +-
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 +-
.../GlobalISel/clamp-fmed3-const-combine.ll | 59 +
.../GlobalISel/clamp-minmax-const-combine.ll | 107 ++
.../GlobalISel/fmed3-min-max-const-combine.ll | 333 +++--
.../GlobalISel/llvm.amdgcn.rsq.clamp.ll | 89 ++
llvm/test/CodeGen/AMDGPU/fmax3.ll | 191 +++
llvm/test/CodeGen/AMDGPU/fmaximum.ll | 355 +++++
llvm/test/CodeGen/AMDGPU/fmaximum3.ll | 811 +++++++++++
llvm/test/CodeGen/AMDGPU/fmin3.ll | 257 ++++
llvm/test/CodeGen/AMDGPU/fminimum.ll | 355 +++++
llvm/test/CodeGen/AMDGPU/fminimum3.ll | 811 +++++++++++
llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll | 235 +++-
llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll | 209 ++-
llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll | 261 +++-
llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll | 236 +++-
llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll | 209 ++-
llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll | 261 +++-
llvm/test/CodeGen/AMDGPU/maximumnum.ll | 1240 +++++++++++++++++
llvm/test/CodeGen/AMDGPU/minimummaximum.ll | 388 ++++--
llvm/test/CodeGen/AMDGPU/minimumnum.ll | 1204 ++++++++++++++++
llvm/test/CodeGen/AMDGPU/minmax.ll | 363 +++++
.../AMDGPU/select-flags-to-fmin-fmax.ll | 626 +++++++++
.../test/CodeGen/AMDGPU/vector-reduce-fmax.ll | 678 +++++++++
.../CodeGen/AMDGPU/vector-reduce-fmaximum.ll | 224 +++
.../test/CodeGen/AMDGPU/vector-reduce-fmin.ll | 678 +++++++++
.../CodeGen/AMDGPU/vector-reduce-fminimum.ll | 464 ++++++
30 files changed, 10364 insertions(+), 329 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index a0b6ff13e7d7a..264a6597da5cc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -186,6 +186,10 @@ defm IEEEMinimumMaximumInsts : AMDGPUSubtargetFeature<"ieee-minimum-maximum-inst
"v_pk_minimum/maximum_f16 instructions"
>;
+defm SALUMinimumMaximumInsts : AMDGPUSubtargetFeature<"salu-minimum-maximum-insts",
+ "Has s_minimum/maximum_f16/f32 instructions"
+>;
+
defm Minimum3Maximum3F32 : AMDGPUSubtargetFeature<"minimum3-maximum3-f32",
"Has v_minimum3_f32 and v_maximum3_f32 instructions"
>;
@@ -1468,9 +1472,10 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
FeatureUnalignedDSAccess, FeatureTrue16BitInsts,
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
- FeatureIEEEMinimumMaximumInsts, FeatureMinimum3Maximum3F32,
- FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
- FeatureFlatOffsetBits24, FeatureFlatSignedOffset
+ FeatureIEEEMinimumMaximumInsts, FeatureSALUMinimumMaximumInsts,
+ FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16,
+ FeatureAgentScopeFineGrainedRemoteMemoryAtomics, FeatureFlatOffsetBits24,
+ FeatureFlatSignedOffset
]
>;
@@ -1493,9 +1498,10 @@ def FeatureGFX13 : GCNSubtargetFeatureGeneration<"GFX13",
FeatureUnalignedDSAccess, FeatureTrue16BitInsts,
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
- FeatureIEEEMinimumMaximumInsts, FeatureMinimum3Maximum3F32,
- FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
- FeatureFlatOffsetBits24, FeatureFlatSignedOffset
+ FeatureIEEEMinimumMaximumInsts, FeatureSALUMinimumMaximumInsts,
+ FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16,
+ FeatureAgentScopeFineGrainedRemoteMemoryAtomics, FeatureFlatOffsetBits24,
+ FeatureFlatSignedOffset
]
>;
//===----------------------------------------------------------------------===//
@@ -2662,6 +2668,8 @@ def NotHasMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">;
def NotHasSALUFloatInsts : Predicate<"!Subtarget->hasSALUFloatInsts()">,
AssemblerPredicate<(all_of (not FeatureSALUFloatInsts))>;
+def NotHasIEEEMinimumMaximumInsts : Predicate<"!Subtarget->hasIEEEMinimumMaximumInsts()">;
+
def NotHasCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">;
def NeedsAlignedVGPRs : Predicate<"Subtarget->needsAlignedVGPRs()">,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index f3264536006d9..812e6f3b42288 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1351,10 +1351,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
+ bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts();
+
addRulesForGOpcs({G_FMINIMUM, G_FMAXIMUM}, Standard)
- .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}})
+ .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUMinimumMaximumInsts)
+ .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUMinimumMaximumInsts)
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
- .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
+ .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUMinimumMaximumInsts)
+ .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUMinimumMaximumInsts)
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
.Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 7e047278fe78f..f14cc8e0446dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4075,8 +4075,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_INTRINSIC_ROUNDEVEN:
case AMDGPU::G_FMINNUM:
case AMDGPU::G_FMAXNUM:
- case AMDGPU::G_FMINIMUM:
- case AMDGPU::G_FMAXIMUM:
case AMDGPU::G_FMINIMUMNUM:
case AMDGPU::G_FMAXIMUMNUM:
case AMDGPU::G_INTRINSIC_TRUNC:
@@ -4091,6 +4089,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getDefaultMappingSOP(MI);
return getDefaultMappingVOP(MI);
}
+ case AMDGPU::G_FMINIMUM:
+ case AMDGPU::G_FMAXIMUM: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ unsigned Size = Ty.getSizeInBits();
+ if (Subtarget.hasSALUMinimumMaximumInsts() && Ty.isScalar() &&
+ (Size == 32 || Size == 16) && isSALUMapping(MI))
+ return getDefaultMappingSOP(MI);
+ return getDefaultMappingVOP(MI);
+ }
case AMDGPU::G_FPTOSI:
case AMDGPU::G_FPTOUI:
case AMDGPU::G_FPTOSI_SAT:
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 80bc12762425c..a55330c87e68d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2034,9 +2034,9 @@ class ClampPat<Instruction inst, ValueType vt> : GCNPat <
>;
def : ClampPat<V_MAX_F32_e64, f32>;
-let SubtargetPredicate = isNotGFX12Plus in
+let SubtargetPredicate = NotHasIEEEMinimumMaximumInsts in
def : ClampPat<V_MAX_F64_e64, f64>;
-let SubtargetPredicate = isGFX12Plus in
+let SubtargetPredicate = HasIEEEMinimumMaximumInsts in
def : ClampPat<V_MAX_NUM_F64_e64, f64>;
let SubtargetPredicate = NotHasTrue16BitInsts in
def : ClampPat<V_MAX_F16_e64, f16>;
@@ -3671,13 +3671,13 @@ multiclass SelectCanonicalizeAsMax<
def : GCNPat<
(fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
(V_MAX_F64_e64 $src_mods, $src, $src_mods, $src)> {
- let OtherPredicates = !listconcat(f64_preds, [isNotGFX12Plus]);
+ let OtherPredicates = !listconcat(f64_preds, [NotHasIEEEMinimumMaximumInsts]);
}
def : GCNPat<
(fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
(V_MAX_NUM_F64_e64 $src_mods, $src, $src_mods, $src)> {
- let OtherPredicates = !listconcat(f64_preds, [isGFX12Plus]);
+ let OtherPredicates = !listconcat(f64_preds, [HasIEEEMinimumMaximumInsts]);
}
def : GCNPat<
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 710e9c5166f2e..1008fa8dcdb51 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -642,7 +642,7 @@ unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
return SIEncodingFamily::GFX12;
if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
- return SIEncodingFamily::GFX11;
+ return isGFX1170(ST) ? SIEncodingFamily::GFX1170 : SIEncodingFamily::GFX11;
llvm_unreachable("Subtarget generation does not support VOPD!");
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
index 23f5ae4f59005..884630ddbb473 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX1170 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX12 %s
define float @test_fmed3_f32_known_nnan_ieee_true(float %a) #0 {
@@ -9,6 +10,12 @@ define float @test_fmed3_f32_known_nnan_ieee_true(float %a) #0 {
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_fmed3_f32_known_nnan_ieee_true:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_fmed3_f32_known_nnan_ieee_true:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -30,6 +37,12 @@ define half @test_fmed3_f16_known_nnan_ieee_false(half %a) #1 {
; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_fmed3_f16_known_nnan_ieee_false:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_fmed3_f16_known_nnan_ieee_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -53,6 +66,14 @@ define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true(float %a) #2 {
; GFX10-NEXT: v_min_f32_e64 v0, 0x41200000, v0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f32_e64 v0, 0x41200000, v0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -77,6 +98,12 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -100,6 +127,12 @@ define float @test_fmed3_global_nnan(float %a) #3 {
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_fmed3_global_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_fmed3_global_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -127,6 +160,12 @@ define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 {
; GFX10-NEXT: v_med3_f32 v0, v0, 1.0, 0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_fmed3_f32_maybe_NaN_ieee_false:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_fmed3_f32_maybe_NaN_ieee_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -151,6 +190,14 @@ define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false(float %a) #4 {
; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f32_e64 v0, 0x41200000, v0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -175,6 +222,12 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -197,6 +250,12 @@ define float @test_fmed3_unknown_input_ieee_true_dx10clamp_true(float %a) #2 {
; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_fmed3_unknown_input_ieee_true_dx10clamp_true:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_med3_num_f32 v0, v0, 0, 1.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_fmed3_unknown_input_ieee_true_dx10clamp_true:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
index 0315bd86feeda..8fd41241b7ae4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX1170 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX12 %s
define float @test_min_max_ValK0_K1_f32(float %a) #0 {
@@ -9,6 +10,12 @@ define float @test_min_max_ValK0_K1_f32(float %a) #0 {
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_min_max_ValK0_K1_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_min_max_ValK0_K1_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -31,6 +38,12 @@ define double @test_min_max_K0Val_K1_f64(double %a) #1 {
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_min_max_K0Val_K1_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_min_max_K0Val_K1_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -54,6 +67,12 @@ define half @test_min_K1max_ValK0_f16(half %a) #2 {
; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_min_K1max_ValK0_f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_min_K1max_ValK0_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -76,6 +95,12 @@ define <2 x half> @test_min_K1max_K0Val_f16(<2 x half> %a) #1 {
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_min_K1max_K0Val_f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_min_K1max_K0Val_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -98,6 +123,12 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_min_max_splat_padded_with_undef:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_min_max_splat_padded_with_undef:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -122,6 +153,12 @@ define float @test_max_min_ValK1_K0_f32(float %a) #0 {
; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_max_min_ValK1_K0_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_max_min_ValK1_K0_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -144,6 +181,12 @@ define double @test_max_min_K1Val_K0_f64(double %a) #1 {
; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_max_min_K1Val_K0_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f64 v[0:1], v[0:1], 2.0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_max_min_K1Val_K0_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -166,6 +209,12 @@ define half @test_max_K0min_ValK1_f16(half %a) #0 {
; GFX10-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_max_K0min_ValK1_f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f16_e64 v0, v0, 2.0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_max_K0min_ValK1_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -189,6 +238,12 @@ define <2 x half> @test_max_K0min_K1Val_v2f16(<2 x half> %a) #1 {
; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_max_K0min_K1Val_v2f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_max_K0min_K1Val_v2f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -213,6 +268,12 @@ define float @test_min_max_global_nnan(float %a) {
; GFX10-NEXT: v_max_f32_e64 v0, v0, v0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_min_max_global_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_min_max_global_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -234,6 +295,12 @@ define float @test_max_min_global_nnan(float %a) {
; GFX10-NEXT: v_max_f32_e64 v0, v0, v0 clamp
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_max_min_global_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f32_e64 v0, v0, v0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_max_min_global_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -261,6 +328,12 @@ define float @test_min_max_K0_gt_K1(float %a) #0 {
; GFX10-NEXT: v_min_f32_e32 v0, 0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_min_max_K0_gt_K1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maxmin_num_f32 v0, v0, 1.0, 0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_min_max_K0_gt_K1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -284,6 +357,12 @@ define float @test_max_min_K0_gt_K1(float %a) #0 {
; GFX10-NEXT: v_max_f32_e32 v0, 1.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_max_min_K0_gt_K1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 0, 1.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_max_min_K0_gt_K1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -310,6 +389,12 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_min_max_maybe_NaN_input_ieee_false:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -334,6 +419,12 @@ define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #
; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -360,6 +451,14 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_true:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f32_e32 v0, 2.0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -386,6 +485,14 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-NEXT: v_max_f32_e32 v0, 0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_false:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_mul_f32_e32 v0, 2.0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
index b754bb6081c31..dbcd4eaa2d71d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
@@ -1,20 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX1170 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefix=GFX12 %s
define float @test_min_max_ValK0_K1_f32(float %a) #0 {
+; GFX8-LABEL: test_min_max_ValK0_K1_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_min_max_ValK0_K1_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: test_min_max_ValK0_K1_f32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX1170-LABEL: test_min_max_ValK0_K1_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_ValK0_K1_f32:
; GFX12: ; %bb.0:
@@ -31,17 +38,23 @@ define float @test_min_max_ValK0_K1_f32(float %a) #0 {
}
define float @test_min_max_K0Val_K1_f32(float %a) #1 {
+; GFX8-LABEL: test_min_max_K0Val_K1_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_min_max_K0Val_K1_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: test_min_max_K0Val_K1_f32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX1170-LABEL: test_min_max_K0Val_K1_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_K0Val_K1_f32:
; GFX12: ; %bb.0:
@@ -60,13 +73,6 @@ define float @test_min_max_K0Val_K1_f32(float %a) #1 {
; min-max patterns for ieee=true do not have to check for NaNs
; 'v_max_f16_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true
define half @test_min_K1max_ValK0_f16(half %a) #0 {
-; GFX10-LABEL: test_min_K1max_ValK0_f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
; GFX8-LABEL: test_min_K1max_ValK0_f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -75,6 +81,21 @@ define half @test_min_K1max_ValK0_f16(half %a) #0 {
; GFX8-NEXT: v_min_f16_e32 v0, 4.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
+; GFX10-LABEL: test_min_K1max_ValK0_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-LABEL: test_min_K1max_ValK0_f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_min_K1max_ValK0_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -92,12 +113,6 @@ define half @test_min_K1max_ValK0_f16(half %a) #0 {
}
define half @test_min_K1max_K0Val_f16(half %a) #1 {
-; GFX10-LABEL: test_min_K1max_K0Val_f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
; GFX8-LABEL: test_min_K1max_K0Val_f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -105,6 +120,18 @@ define half @test_min_K1max_K0Val_f16(half %a) #1 {
; GFX8-NEXT: v_min_f16_e32 v0, 4.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
+; GFX10-LABEL: test_min_K1max_K0Val_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-LABEL: test_min_K1max_K0Val_f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_min_K1max_K0Val_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -121,17 +148,23 @@ define half @test_min_K1max_K0Val_f16(half %a) #1 {
; max-mix patterns work only for non-NaN inputs
define float @test_max_min_ValK1_K0_f32(float %a) #0 {
+; GFX8-LABEL: test_max_min_ValK1_K0_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_min_ValK1_K0_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: test_max_min_ValK1_K0_f32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX1170-LABEL: test_max_min_ValK1_K0_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_ValK1_K0_f32:
; GFX12: ; %bb.0:
@@ -148,17 +181,23 @@ define float @test_max_min_ValK1_K0_f32(float %a) #0 {
}
define float @test_max_min_K1Val_K0_f32(float %a) #1 {
+; GFX8-LABEL: test_max_min_K1Val_K0_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_min_K1Val_K0_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: test_max_min_K1Val_K0_f32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX1170-LABEL: test_max_min_K1Val_K0_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_K1Val_K0_f32:
; GFX12: ; %bb.0:
@@ -175,12 +214,6 @@ define float @test_max_min_K1Val_K0_f32(float %a) #1 {
}
define half @test_max_K0min_ValK1_f16(half %a) #0 {
-; GFX10-LABEL: test_max_K0min_ValK1_f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
; GFX8-LABEL: test_max_K0min_ValK1_f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -188,6 +221,18 @@ define half @test_max_K0min_ValK1_f16(half %a) #0 {
; GFX8-NEXT: v_max_f16_e32 v0, 2.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
+; GFX10-LABEL: test_max_K0min_ValK1_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-LABEL: test_max_K0min_ValK1_f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_max_K0min_ValK1_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -203,12 +248,6 @@ define half @test_max_K0min_ValK1_f16(half %a) #0 {
}
define half @test_max_K0min_K1Val_f16(half %a) #1 {
-; GFX10-LABEL: test_max_K0min_K1Val_f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
; GFX8-LABEL: test_max_K0min_K1Val_f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -216,6 +255,18 @@ define half @test_max_K0min_K1Val_f16(half %a) #1 {
; GFX8-NEXT: v_max_f16_e32 v0, 2.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
+; GFX10-LABEL: test_max_K0min_K1Val_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_med3_f16 v0, v0, 2.0, 4.0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-LABEL: test_max_K0min_K1Val_f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_med3_num_f16 v0, v0, 2.0, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_max_K0min_K1Val_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -233,17 +284,23 @@ define half @test_max_K0min_K1Val_f16(half %a) #1 {
; global nnan function attribute always forces fmed3 combine
define float @test_min_max_global_nnan(float %a) {
+; GFX8-LABEL: test_min_max_global_nnan:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_min_max_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: test_min_max_global_nnan:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX1170-LABEL: test_min_max_global_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_global_nnan:
; GFX12: ; %bb.0:
@@ -260,17 +317,23 @@ define float @test_min_max_global_nnan(float %a) {
}
define float @test_max_min_global_nnan(float %a) {
+; GFX8-LABEL: test_max_min_global_nnan:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_min_global_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: test_max_min_global_nnan:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX1170-LABEL: test_max_min_global_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_global_nnan:
; GFX12: ; %bb.0:
@@ -292,6 +355,13 @@ define float @test_max_min_global_nnan(float %a) {
; min(max(Val, K0), K1) K0 > K1, should be K0<=K1
define float @test_min_max_K0_gt_K1(float %a) #0 {
+; GFX8-LABEL: test_min_max_K0_gt_K1:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_max_f32_e32 v0, 4.0, v0
+; GFX8-NEXT: v_min_f32_e32 v0, 2.0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_min_max_K0_gt_K1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -299,12 +369,11 @@ define float @test_min_max_K0_gt_K1(float %a) #0 {
; GFX10-NEXT: v_min_f32_e32 v0, 2.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: test_min_max_K0_gt_K1:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f32_e32 v0, 4.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, 2.0, v0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX1170-LABEL: test_min_max_K0_gt_K1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maxmin_num_f32 v0, v0, 4.0, 2.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_K0_gt_K1:
; GFX12: ; %bb.0:
@@ -322,6 +391,13 @@ define float @test_min_max_K0_gt_K1(float %a) #0 {
; max(min(Val, K1), K0) K0 > K1, should be K0<=K1
define float @test_max_min_K0_gt_K1(float %a) #0 {
+; GFX8-LABEL: test_max_min_K0_gt_K1:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_min_f32_e32 v0, 2.0, v0
+; GFX8-NEXT: v_max_f32_e32 v0, 4.0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_min_K0_gt_K1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -329,12 +405,11 @@ define float @test_max_min_K0_gt_K1(float %a) #0 {
; GFX10-NEXT: v_max_f32_e32 v0, 4.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: test_max_min_K0_gt_K1:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_min_f32_e32 v0, 2.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, 4.0, v0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX1170-LABEL: test_max_min_K0_gt_K1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 2.0, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_K0_gt_K1:
; GFX12: ; %bb.0:
@@ -352,6 +427,13 @@ define float @test_max_min_K0_gt_K1(float %a) #0 {
; non-inline constant
define float @test_min_max_non_inline_const(float %a) #0 {
+; GFX8-LABEL: test_min_max_non_inline_const:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX8-NEXT: v_min_f32_e32 v0, 0x41000000, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_min_max_non_inline_const:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -359,12 +441,11 @@ define float @test_min_max_non_inline_const(float %a) #0 {
; GFX10-NEXT: v_min_f32_e32 v0, 0x41000000, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: test_min_max_non_inline_const:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, 0x41000000, v0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX1170-LABEL: test_min_max_non_inline_const:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maxmin_num_f32 v0, v0, 2.0, 0x41000000
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_non_inline_const:
; GFX12: ; %bb.0:
@@ -383,6 +464,13 @@ define float @test_min_max_non_inline_const(float %a) #0 {
; there is no fmed3 for f64 or v2f16 types
define double @test_min_max_f64(double %a) #0 {
+; GFX8-LABEL: test_min_max_f64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], 2.0
+; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_min_max_f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -390,12 +478,13 @@ define double @test_min_max_f64(double %a) #0 {
; GFX10-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: test_min_max_f64:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], 2.0
-; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], 4.0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX1170-LABEL: test_min_max_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], 2.0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_f64:
; GFX12: ; %bb.0:
@@ -414,13 +503,6 @@ define double @test_min_max_f64(double %a) #0 {
}
define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 {
-; GFX10-LABEL: test_min_max_v2f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0]
-; GFX10-NEXT: v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
; GFX8-LABEL: test_min_max_v2f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -433,6 +515,21 @@ define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 {
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
+; GFX10-LABEL: test_min_max_v2f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-LABEL: test_min_max_v2f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, 4.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_min_max_v2f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -453,6 +550,13 @@ define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 {
; min-max patterns for ieee=false require known non-NaN input
define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
+; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -460,12 +564,13 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX1170-LABEL: test_min_max_maybe_NaN_input_ieee_false:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
; GFX12: ; %bb.0:
@@ -486,6 +591,13 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
; max-min patterns always require known non-NaN input
define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
+; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
+; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -493,12 +605,13 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_false:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
; GFX12: ; %bb.0:
@@ -518,6 +631,14 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
; 'v_max_f32_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true
define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
+; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_true:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
+; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -526,13 +647,13 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_true:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0
-; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GFX1170-LABEL: test_max_min_maybe_NaN_input_ieee_true:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
; GFX12: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
index 4a6e24b700663..9bbdc2982138c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=VI %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck -check-prefix=GFX1170 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
define float @v_rsq_clamp_f32(float %src) #0 {
@@ -18,6 +19,15 @@ define float @v_rsq_clamp_f32(float %src) #0 {
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_rsq_clamp_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_rsq_f32_e32 v0, v0
+; GFX1170-NEXT: v_mov_b32_e32 v1, 0xff7fffff
+; GFX1170-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 0x7f7fffff, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_rsq_clamp_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -49,6 +59,15 @@ define float @v_rsq_clamp_fabs_f32(float %src) #0 {
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_rsq_clamp_fabs_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_rsq_f32_e64 v0, |v0|
+; GFX1170-NEXT: v_mov_b32_e32 v1, 0xff7fffff
+; GFX1170-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 0x7f7fffff, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_rsq_clamp_fabs_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -85,6 +104,19 @@ define double @v_rsq_clamp_f64(double %src) #0 {
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_rsq_clamp_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; GFX1170-NEXT: v_mov_b32_e32 v2, -1
+; GFX1170-NEXT: v_mov_b32_e32 v3, 0x7fefffff
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: v_mov_b32_e32 v2, -1
+; GFX1170-NEXT: v_mov_b32_e32 v3, 0xffefffff
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_rsq_clamp_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -124,6 +156,19 @@ define double @v_rsq_clamp_fabs_f64(double %src) #0 {
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_rsq_clamp_fabs_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]|
+; GFX1170-NEXT: v_mov_b32_e32 v2, -1
+; GFX1170-NEXT: v_mov_b32_e32 v3, 0x7fefffff
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: v_mov_b32_e32 v2, -1
+; GFX1170-NEXT: v_mov_b32_e32 v3, 0xffefffff
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_rsq_clamp_fabs_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -160,6 +205,15 @@ define float @v_rsq_clamp_undef_f32() #0 {
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_rsq_clamp_undef_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_rsq_f32_e32 v0, s0
+; GFX1170-NEXT: v_mov_b32_e32 v1, 0xff7fffff
+; GFX1170-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 0x7f7fffff, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_rsq_clamp_undef_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -196,6 +250,19 @@ define double @v_rsq_clamp_undef_f64() #0 {
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_rsq_clamp_undef_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_rsq_f64_e32 v[0:1], s[0:1]
+; GFX1170-NEXT: v_mov_b32_e32 v2, -1
+; GFX1170-NEXT: v_mov_b32_e32 v3, 0x7fefffff
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: v_mov_b32_e32 v2, -1
+; GFX1170-NEXT: v_mov_b32_e32 v3, 0xffefffff
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_rsq_clamp_undef_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -231,6 +298,15 @@ define float @v_rsq_clamp_f32_non_ieee(float %src) #2 {
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_rsq_clamp_f32_non_ieee:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_rsq_f32_e32 v0, v0
+; GFX1170-NEXT: v_mov_b32_e32 v1, 0xff7fffff
+; GFX1170-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_minmax_num_f32 v0, v0, 0x7f7fffff, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_rsq_clamp_f32_non_ieee:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -266,6 +342,19 @@ define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; VI-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_rsq_clamp_f64_non_ieee:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
+; GFX1170-NEXT: v_mov_b32_e32 v2, -1
+; GFX1170-NEXT: v_mov_b32_e32 v3, 0x7fefffff
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: v_mov_b32_e32 v2, -1
+; GFX1170-NEXT: v_mov_b32_e32 v3, 0xffefffff
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_rsq_clamp_f64_non_ieee:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/fmax3.ll b/llvm/test/CodeGen/AMDGPU/fmax3.ll
index c2b44cb251ffb..03ab1e3e69212 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax3.ll
@@ -4,6 +4,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250,GFX1250-TRUE16 %s
@@ -130,6 +132,36 @@ define amdgpu_kernel void @test_fmax3_olt_0_f32(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
; GFX11-NEXT: s_endpgm
;
+; GFX1170-LABEL: test_fmax3_olt_0_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX1170-NEXT: s_mov_b32 s10, -1
+; GFX1170-NEXT: s_mov_b32 s11, 0x31016000
+; GFX1170-NEXT: s_mov_b32 s14, s10
+; GFX1170-NEXT: s_mov_b32 s15, s11
+; GFX1170-NEXT: s_mov_b32 s18, s10
+; GFX1170-NEXT: s_mov_b32 s19, s11
+; GFX1170-NEXT: s_mov_b32 s22, s10
+; GFX1170-NEXT: s_mov_b32 s23, s11
+; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s12, s2
+; GFX1170-NEXT: s_mov_b32 s13, s3
+; GFX1170-NEXT: s_mov_b32 s16, s4
+; GFX1170-NEXT: s_mov_b32 s17, s5
+; GFX1170-NEXT: s_mov_b32 s20, s6
+; GFX1170-NEXT: s_mov_b32 s21, s7
+; GFX1170-NEXT: buffer_load_b32 v0, off, s[12:15], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: buffer_load_b32 v1, off, s[16:19], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: buffer_load_b32 v2, off, s[20:23], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s8, s0
+; GFX1170-NEXT: s_mov_b32 s9, s1
+; GFX1170-NEXT: v_max3_num_f32 v0, v0, v1, v2
+; GFX1170-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX1170-NEXT: s_endpgm
+;
; GFX12-LABEL: test_fmax3_olt_0_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@@ -321,6 +353,36 @@ define amdgpu_kernel void @test_fmax3_olt_1_f32(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
; GFX11-NEXT: s_endpgm
;
+; GFX1170-LABEL: test_fmax3_olt_1_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX1170-NEXT: s_mov_b32 s10, -1
+; GFX1170-NEXT: s_mov_b32 s11, 0x31016000
+; GFX1170-NEXT: s_mov_b32 s14, s10
+; GFX1170-NEXT: s_mov_b32 s15, s11
+; GFX1170-NEXT: s_mov_b32 s18, s10
+; GFX1170-NEXT: s_mov_b32 s19, s11
+; GFX1170-NEXT: s_mov_b32 s22, s10
+; GFX1170-NEXT: s_mov_b32 s23, s11
+; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s12, s2
+; GFX1170-NEXT: s_mov_b32 s13, s3
+; GFX1170-NEXT: s_mov_b32 s16, s4
+; GFX1170-NEXT: s_mov_b32 s17, s5
+; GFX1170-NEXT: s_mov_b32 s20, s6
+; GFX1170-NEXT: s_mov_b32 s21, s7
+; GFX1170-NEXT: buffer_load_b32 v0, off, s[12:15], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: buffer_load_b32 v1, off, s[16:19], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: buffer_load_b32 v2, off, s[20:23], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s8, s0
+; GFX1170-NEXT: s_mov_b32 s9, s1
+; GFX1170-NEXT: v_max3_num_f32 v0, v2, v0, v1
+; GFX1170-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX1170-NEXT: s_endpgm
+;
; GFX12-LABEL: test_fmax3_olt_1_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@@ -552,6 +614,66 @@ define amdgpu_kernel void @test_fmax3_olt_0_f16(ptr addrspace(1) %out, ptr addrs
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX11-FAKE16-NEXT: s_endpgm
;
+; GFX1170-TRUE16-LABEL: test_fmax3_olt_0_f16:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX1170-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX1170-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX1170-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX1170-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX1170-TRUE16-NEXT: s_mov_b32 s18, s10
+; GFX1170-TRUE16-NEXT: s_mov_b32 s19, s11
+; GFX1170-TRUE16-NEXT: s_mov_b32 s22, s10
+; GFX1170-TRUE16-NEXT: s_mov_b32 s23, s11
+; GFX1170-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX1170-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX1170-TRUE16-NEXT: s_mov_b32 s16, s4
+; GFX1170-TRUE16-NEXT: s_mov_b32 s17, s5
+; GFX1170-TRUE16-NEXT: s_mov_b32 s20, s6
+; GFX1170-TRUE16-NEXT: s_mov_b32 s21, s7
+; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v0, off, s[12:15], 0 glc dlc
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-TRUE16-NEXT: buffer_load_d16_hi_b16 v0, off, s[16:19], 0 glc dlc
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v1, off, s[20:23], 0 glc dlc
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX1170-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX1170-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v0.h, v1.l
+; GFX1170-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX1170-TRUE16-NEXT: s_endpgm
+;
+; GFX1170-FAKE16-LABEL: test_fmax3_olt_0_f16:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX1170-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX1170-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX1170-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX1170-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX1170-FAKE16-NEXT: s_mov_b32 s18, s10
+; GFX1170-FAKE16-NEXT: s_mov_b32 s19, s11
+; GFX1170-FAKE16-NEXT: s_mov_b32 s22, s10
+; GFX1170-FAKE16-NEXT: s_mov_b32 s23, s11
+; GFX1170-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX1170-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX1170-FAKE16-NEXT: s_mov_b32 s16, s4
+; GFX1170-FAKE16-NEXT: s_mov_b32 s17, s5
+; GFX1170-FAKE16-NEXT: s_mov_b32 s20, s6
+; GFX1170-FAKE16-NEXT: s_mov_b32 s21, s7
+; GFX1170-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], 0 glc dlc
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], 0 glc dlc
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX1170-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX1170-FAKE16-NEXT: v_max3_num_f16 v0, v0, v1, v2
+; GFX1170-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX1170-FAKE16-NEXT: s_endpgm
+;
; GFX12-TRUE16-LABEL: test_fmax3_olt_0_f16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@@ -845,6 +967,66 @@ define amdgpu_kernel void @test_fmax3_olt_1_f16(ptr addrspace(1) %out, ptr addrs
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX11-FAKE16-NEXT: s_endpgm
;
+; GFX1170-TRUE16-LABEL: test_fmax3_olt_1_f16:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX1170-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX1170-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX1170-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX1170-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX1170-TRUE16-NEXT: s_mov_b32 s18, s10
+; GFX1170-TRUE16-NEXT: s_mov_b32 s19, s11
+; GFX1170-TRUE16-NEXT: s_mov_b32 s22, s10
+; GFX1170-TRUE16-NEXT: s_mov_b32 s23, s11
+; GFX1170-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX1170-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX1170-TRUE16-NEXT: s_mov_b32 s16, s4
+; GFX1170-TRUE16-NEXT: s_mov_b32 s17, s5
+; GFX1170-TRUE16-NEXT: s_mov_b32 s20, s6
+; GFX1170-TRUE16-NEXT: s_mov_b32 s21, s7
+; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v0, off, s[12:15], 0 glc dlc
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-TRUE16-NEXT: buffer_load_d16_hi_b16 v0, off, s[16:19], 0 glc dlc
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v1, off, s[20:23], 0 glc dlc
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX1170-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX1170-TRUE16-NEXT: v_max3_num_f16 v0.l, v1.l, v0.l, v0.h
+; GFX1170-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX1170-TRUE16-NEXT: s_endpgm
+;
+; GFX1170-FAKE16-LABEL: test_fmax3_olt_1_f16:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX1170-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX1170-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX1170-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX1170-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX1170-FAKE16-NEXT: s_mov_b32 s18, s10
+; GFX1170-FAKE16-NEXT: s_mov_b32 s19, s11
+; GFX1170-FAKE16-NEXT: s_mov_b32 s22, s10
+; GFX1170-FAKE16-NEXT: s_mov_b32 s23, s11
+; GFX1170-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX1170-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX1170-FAKE16-NEXT: s_mov_b32 s16, s4
+; GFX1170-FAKE16-NEXT: s_mov_b32 s17, s5
+; GFX1170-FAKE16-NEXT: s_mov_b32 s20, s6
+; GFX1170-FAKE16-NEXT: s_mov_b32 s21, s7
+; GFX1170-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], 0 glc dlc
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], 0 glc dlc
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX1170-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX1170-FAKE16-NEXT: v_max3_num_f16 v0, v2, v0, v1
+; GFX1170-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX1170-FAKE16-NEXT: s_endpgm
+;
; GFX12-TRUE16-LABEL: test_fmax3_olt_1_f16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@@ -1042,6 +1224,15 @@ define <2 x half> @no_fmax3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <
; GFX11-NEXT: v_pk_max_f16 v0, v0, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: no_fmax3_v2f16:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_max_num_f16 v0, v2, v0
+; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: no_fmax3_v2f16:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum.ll b/llvm/test/CodeGen/AMDGPU/fmaximum.ll
index 471829186841d..ea1abbdf3b1c6 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum.ll
@@ -1,6 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
@@ -15,6 +19,11 @@ define amdgpu_ps float @test_fmaximum_f32_vv(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_f32_vv:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_f32_vv:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v1
@@ -44,6 +53,11 @@ define amdgpu_ps float @test_fmaximum_f32_ss(float inreg %a, float inreg %b) {
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_f32_ss:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f32 v0, s0, s1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_f32_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_maximum_f32 s0, s0, s1
@@ -63,6 +77,11 @@ define amdgpu_ps float @test_fmaximum_f32_vs(float %a, float inreg %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_f32_vs:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f32 v0, v0, s0
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_f32_vs:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, s0
@@ -77,6 +96,11 @@ define amdgpu_ps float @test_fmaximum_nnan_f32(float %a, float %b) {
; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_nnan_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_nnan_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v1
@@ -94,6 +118,11 @@ define amdgpu_ps float @test_fmaximum_nsz_f32(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_nsz_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_nsz_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v1
@@ -108,6 +137,11 @@ define amdgpu_ps float @test_fmaximum_signed_zero_f32() {
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_signed_zero_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_mov_b32_e32 v0, 0
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_signed_zero_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_mov_b32_e32 v0, 0
@@ -128,6 +162,12 @@ define amdgpu_ps <2 x float> @test_fmaximum_v2f32(<2 x float> %a, <2 x float> %b
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_v2f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v2
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v3
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_v2f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v2
@@ -169,6 +209,12 @@ define amdgpu_ps <2 x float> @test_fmaximum_v2f32_ss(<2 x float> inreg %a, <2 x
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_v2f32_ss:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f32 v0, s0, s2
+; GFX1170-NEXT: v_maximum_f32 v1, s1, s3
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_v2f32_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_maximum_f32 s0, s0, s2
@@ -195,6 +241,13 @@ define amdgpu_ps <3 x float> @test_fmaximum_v3f32(<3 x float> %a, <3 x float> %b
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_v3f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v3
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v4
+; GFX1170-NEXT: v_maximum_f32 v2, v2, v5
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_v3f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v3
@@ -223,6 +276,14 @@ define amdgpu_ps <4 x float> @test_fmaximum_v4f32(<4 x float> %a, <4 x float> %b
; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_v4f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v4
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v5
+; GFX1170-NEXT: v_maximum_f32 v2, v2, v6
+; GFX1170-NEXT: v_maximum_f32 v3, v3, v7
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_v4f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v4
@@ -288,6 +349,26 @@ define amdgpu_ps <16 x float> @test_fmaximum_v16f32(<16 x float> %a, <16 x float
; GFX9-NEXT: v_cndmask_b32_e32 v15, v33, v16, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_v16f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v16
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v17
+; GFX1170-NEXT: v_maximum_f32 v2, v2, v18
+; GFX1170-NEXT: v_maximum_f32 v3, v3, v19
+; GFX1170-NEXT: v_maximum_f32 v4, v4, v20
+; GFX1170-NEXT: v_maximum_f32 v5, v5, v21
+; GFX1170-NEXT: v_maximum_f32 v6, v6, v22
+; GFX1170-NEXT: v_maximum_f32 v7, v7, v23
+; GFX1170-NEXT: v_maximum_f32 v8, v8, v24
+; GFX1170-NEXT: v_maximum_f32 v9, v9, v25
+; GFX1170-NEXT: v_maximum_f32 v10, v10, v26
+; GFX1170-NEXT: v_maximum_f32 v11, v11, v27
+; GFX1170-NEXT: v_maximum_f32 v12, v12, v28
+; GFX1170-NEXT: v_maximum_f32 v13, v13, v29
+; GFX1170-NEXT: v_maximum_f32 v14, v14, v30
+; GFX1170-NEXT: v_maximum_f32 v15, v15, v31
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_v16f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v16
@@ -320,6 +401,26 @@ define amdgpu_ps half @test_fmaximum_f16_vv(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-SDAG-TRUE16-LABEL: test_fmaximum_f16_vv:
+; GFX1170-SDAG-TRUE16: ; %bb.0:
+; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
+; GFX1170-SDAG-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_fmaximum_f16_vv:
+; GFX1170-SDAG-FAKE16: ; %bb.0:
+; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_fmaximum_f16_vv:
+; GFX1170-GISEL-TRUE16: ; %bb.0:
+; GFX1170-GISEL-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
+; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_fmaximum_f16_vv:
+; GFX1170-GISEL-FAKE16: ; %bb.0:
+; GFX1170-GISEL-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
+;
; GFX12-SDAG-TRUE16-LABEL: test_fmaximum_f16_vv:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
@@ -364,6 +465,26 @@ define amdgpu_ps half @test_fmaximum_f16_ss(half inreg %a, half inreg %b) {
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-SDAG-TRUE16-LABEL: test_fmaximum_f16_ss:
+; GFX1170-SDAG-TRUE16: ; %bb.0:
+; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, s0, s1
+; GFX1170-SDAG-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_fmaximum_f16_ss:
+; GFX1170-SDAG-FAKE16: ; %bb.0:
+; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, s0, s1
+; GFX1170-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_fmaximum_f16_ss:
+; GFX1170-GISEL-TRUE16: ; %bb.0:
+; GFX1170-GISEL-TRUE16-NEXT: v_maximum_f16 v0.l, s0, s1
+; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_fmaximum_f16_ss:
+; GFX1170-GISEL-FAKE16: ; %bb.0:
+; GFX1170-GISEL-FAKE16-NEXT: v_maximum_f16 v0, s0, s1
+; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_f16_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_maximum_f16 s0, s0, s1
@@ -399,6 +520,11 @@ define amdgpu_ps <2 x half> @test_fmaximum_v2f16_vv(<2 x half> %a, <2 x half> %b
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_v2f16_vv:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_v2f16_vv:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1
@@ -448,6 +574,11 @@ define amdgpu_ps <2 x half> @test_fmaximum_v2f16_ss(<2 x half> inreg %a, <2 x ha
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_v2f16_ss:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_pk_maximum_f16 v0, s0, s1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_v2f16_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_maximum_f16 v0, s0, s1
@@ -490,6 +621,27 @@ define amdgpu_ps <3 x half> @test_fmaximum_v3f16_vv(<3 x half> %a, <3 x half> %b
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-SDAG-LABEL: test_fmaximum_v3f16_vv:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-SDAG-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_fmaximum_v3f16_vv:
+; GFX1170-GISEL-TRUE16: ; %bb.0:
+; GFX1170-GISEL-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-GISEL-TRUE16-NEXT: v_maximum_f16 v1.l, v1.l, v3.l
+; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_fmaximum_v3f16_vv:
+; GFX1170-GISEL-FAKE16: ; %bb.0:
+; GFX1170-GISEL-FAKE16-NEXT: v_maximum_f16 v1, v1, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_lshl_or_b32 v1, s0, 16, v1
+; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
+;
; GFX12-SDAG-LABEL: test_fmaximum_v3f16_vv:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_pk_maximum_f16 v0, v0, v2
@@ -567,6 +719,30 @@ define amdgpu_ps <3 x half> @test_fmaximum_v3f16_ss(<3 x half> inreg %a, <3 x ha
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-SDAG-LABEL: test_fmaximum_v3f16_ss:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: v_pk_maximum_f16 v0, s0, s2
+; GFX1170-SDAG-NEXT: v_pk_maximum_f16 v1, s1, s3
+; GFX1170-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_fmaximum_v3f16_ss:
+; GFX1170-GISEL-TRUE16: ; %bb.0:
+; GFX1170-GISEL-TRUE16-NEXT: v_maximum_f16 v1.l, s1, s3
+; GFX1170-GISEL-TRUE16-NEXT: v_pk_maximum_f16 v0, s0, s2
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_readfirstlane_b32 s0, v1
+; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, s0
+; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_fmaximum_v3f16_ss:
+; GFX1170-GISEL-FAKE16: ; %bb.0:
+; GFX1170-GISEL-FAKE16-NEXT: v_maximum_f16 v1, s1, s3
+; GFX1170-GISEL-FAKE16-NEXT: v_pk_maximum_f16 v0, s0, s2
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s0, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, s0
+; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
+;
; GFX12-SDAG-LABEL: test_fmaximum_v3f16_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_pk_maximum_f16 v0, s0, s2
@@ -624,6 +800,12 @@ define amdgpu_ps <4 x half> @test_fmaximum_v4f16(<4 x half> %a, <4 x half> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_v4f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_v4f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
@@ -706,6 +888,12 @@ define amdgpu_ps <4 x half> @test_fmaximum_v4f16_ss(<4 x half> inreg %a, <4 x ha
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_v4f16_ss:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_pk_maximum_f16 v0, s0, s2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, s1, s3
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_v4f16_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_maximum_f16 v0, s0, s2
@@ -734,6 +922,11 @@ define amdgpu_ps <2 x float> @test_fmaximum_f64_vv(double %a, double %b) {
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_f64_vv:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_f64_vv:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
@@ -771,6 +964,21 @@ define amdgpu_ps <2 x float> @test_fmaximum_f64_ss(double inreg %a, double inreg
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-SDAG-LABEL: test_fmaximum_f64_ss:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3]
+; GFX1170-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-LABEL: test_fmaximum_f64_ss:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1170-GISEL-NEXT: ; return to shader part epilog
+;
; GFX12-SDAG-LABEL: test_fmaximum_f64_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3]
@@ -835,6 +1043,27 @@ define amdgpu_ps <4 x float> @test_fmaximum_v2f64_ss(<2 x double> inreg %a, <2 x
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-SDAG-LABEL: test_fmaximum_v2f64_ss:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[4:5]
+; GFX1170-SDAG-NEXT: v_maximum_f64 v[2:3], s[2:3], s[6:7]
+; GFX1170-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-LABEL: test_fmaximum_v2f64_ss:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: v_maximum_f64 v[0:1], s[0:1], s[4:5]
+; GFX1170-GISEL-NEXT: v_maximum_f64 v[2:3], s[2:3], s[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s2, v2
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s3, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1170-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX1170-GISEL-NEXT: ; return to shader part epilog
+;
; GFX12-SDAG-LABEL: test_fmaximum_v2f64_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[4:5]
@@ -904,6 +1133,14 @@ define amdgpu_ps <8 x float> @test_fmaximum_v4f64(<4 x double> %a, <4 x double>
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v7, v18, v13, s[4:5]
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_v4f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_v4f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
@@ -992,6 +1229,36 @@ define amdgpu_ps <8 x float> @test_fmaximum_v4f64_ss(<4 x double> inreg %a, <4 x
; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, s5
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-SDAG-LABEL: test_fmaximum_v4f64_ss:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[8:9]
+; GFX1170-SDAG-NEXT: v_maximum_f64 v[2:3], s[2:3], s[10:11]
+; GFX1170-SDAG-NEXT: v_maximum_f64 v[4:5], s[4:5], s[12:13]
+; GFX1170-SDAG-NEXT: v_maximum_f64 v[6:7], s[6:7], s[14:15]
+; GFX1170-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-LABEL: test_fmaximum_v4f64_ss:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: v_maximum_f64 v[0:1], s[0:1], s[8:9]
+; GFX1170-GISEL-NEXT: v_maximum_f64 v[2:3], s[2:3], s[10:11]
+; GFX1170-GISEL-NEXT: v_maximum_f64 v[4:5], s[4:5], s[12:13]
+; GFX1170-GISEL-NEXT: v_maximum_f64 v[6:7], s[6:7], s[14:15]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s2, v2
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s3, v3
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s4, v4
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s5, v5
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s6, v6
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s7, v7
+; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1170-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX1170-GISEL-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-GISEL-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
+; GFX1170-GISEL-NEXT: ; return to shader part epilog
+;
; GFX12-SDAG-LABEL: test_fmaximum_v4f64_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_maximum_f64 v[0:1], s[0:1], s[8:9]
@@ -1063,6 +1330,21 @@ define amdgpu_kernel void @fmaximumi_f32_move_to_valu(ptr addrspace(1) %out, ptr
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
+; GFX1170-LABEL: fmaximumi_f32_move_to_valu:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_clause 0x1
+; GFX1170-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1170-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1170-NEXT: v_mov_b32_e32 v0, 0
+; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v2
+; GFX1170-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1170-NEXT: s_endpgm
+;
; GFX12-SDAG-LABEL: fmaximumi_f32_move_to_valu:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_clause 0x1
@@ -1143,6 +1425,69 @@ define amdgpu_kernel void @fmaximum_f16_move_to_valu(ptr addrspace(1) %out, ptr
; GFX9-GISEL-NEXT: global_store_short v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
+; GFX1170-SDAG-TRUE16-LABEL: fmaximum_f16_move_to_valu:
+; GFX1170-SDAG-TRUE16: ; %bb.0:
+; GFX1170-SDAG-TRUE16-NEXT: s_clause 0x1
+; GFX1170-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1170-SDAG-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1170-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] glc dlc
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX1170-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX1170-SDAG-FAKE16-LABEL: fmaximum_f16_move_to_valu:
+; GFX1170-SDAG-FAKE16: ; %bb.0:
+; GFX1170-SDAG-FAKE16-NEXT: s_clause 0x1
+; GFX1170-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1170-SDAG-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1170-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v1, v1, v2
+; GFX1170-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1170-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX1170-GISEL-TRUE16-LABEL: fmaximum_f16_move_to_valu:
+; GFX1170-GISEL-TRUE16: ; %bb.0:
+; GFX1170-GISEL-TRUE16-NEXT: s_clause 0x1
+; GFX1170-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1170-GISEL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] glc dlc
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX1170-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX1170-GISEL-FAKE16-LABEL: fmaximum_f16_move_to_valu:
+; GFX1170-GISEL-FAKE16: ; %bb.0:
+; GFX1170-GISEL-FAKE16-NEXT: s_clause 0x1
+; GFX1170-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1170-GISEL-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1170-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s2, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s3, v2
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_maximum_f16 v1, s2, s3
+; GFX1170-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1170-GISEL-FAKE16-NEXT: s_endpgm
+;
; GFX12-SDAG-TRUE16-LABEL: fmaximum_f16_move_to_valu:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: s_clause 0x1
@@ -1226,6 +1571,11 @@ define amdgpu_ps float @test_fmaximum_f32_ieee_on(float %a, float %b) #0 {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_f32_ieee_on:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_f32_ieee_on:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v1
@@ -1243,6 +1593,11 @@ define amdgpu_ps float @test_fmaximum_f32_ieee_off(float %a, float %b) #1 {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fmaximum_f32_ieee_off:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fmaximum_f32_ieee_off:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maximum_f32 v0, v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 6010f29c166a1..bdbd980cc166b 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
@@ -15,6 +17,12 @@ define float @v_fmaximum3_f32(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -50,6 +58,12 @@ define float @v_fmaximum3_f32_commute(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v2, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32_commute:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v2, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -84,6 +98,14 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inre
; GFX12-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: s_fmaximum3_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_mov_b32_e32 v0, s2
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum3_f32 v0, s0, s1, v0
+; GFX1170-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX942-LABEL: s_fmaximum3_f32:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_mov_b32_e32 v0, s1
@@ -126,6 +148,12 @@ define float @v_fmaximum3_f32_fabs0(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, |v0|, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32_fabs0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, |v0|, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32_fabs0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -162,6 +190,12 @@ define float @v_fmaximum3_f32_fabs1(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, |v1|, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32_fabs1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, |v1|, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32_fabs1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -198,6 +232,12 @@ define float @v_fmaximum3_f32_fabs2(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, |v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32_fabs2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v1, |v2|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32_fabs2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -234,6 +274,12 @@ define float @v_fmaximum3_f32_fabs_all(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32_fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v2|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32_fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -272,6 +318,12 @@ define float @v_fmaximum3_f32_fneg_all(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, -v0, -v1, -v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32_fneg_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, -v0, -v1, -v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32_fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -310,6 +362,12 @@ define float @v_fmaximum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32_fneg_fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v2|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32_fneg_fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -351,6 +409,12 @@ define float @v_fmaximum3_f32_fneg0(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, -v0, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32_fneg0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, -v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32_fneg0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -387,6 +451,12 @@ define float @v_fmaximum3_f32_fneg1(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, -v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32_fneg1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, -v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32_fneg1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -423,6 +493,12 @@ define float @v_fmaximum3_f32_fneg2(float %a, float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, -v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32_fneg2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v1, -v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32_fneg2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -459,6 +535,12 @@ define float @v_fmaximum3_f32_const0(float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, 0x41000000, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32_const0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, 0x41000000, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32_const0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -495,6 +577,12 @@ define float @v_fmaximum3_f32__const2(float %a, float %b) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, 0x41000000
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32__const2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v1, 0x41000000
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32__const2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -531,6 +619,12 @@ define float @v_fmaximum3_f32_inlineimm0(float %b, float %c) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, 4.0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32_inlineimm0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, 4.0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32_inlineimm0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -566,6 +660,12 @@ define float @v_fmaximum3_f32__inlineimm(float %a, float %b) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32__inlineimm:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v1, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32__inlineimm:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -603,6 +703,14 @@ define float @v_fmaximum3_f32_const1_const2(float %a) {
; GFX12-NEXT: v_maximum3_f32 v0, v0, s0, 0x41800000
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f32_const1_const2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s0, 0x41000000
+; GFX1170-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, s0, 0x41800000
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f32_const1_const2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -641,6 +749,13 @@ define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
; GFX12-NEXT: v_maximum3_f32 v1, v5, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v2f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v4, v0, v2
+; GFX1170-NEXT: v_maximum3_f32 v1, v5, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v2f32:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -686,6 +801,13 @@ define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
; GFX12-NEXT: v_maximum3_f32 v1, v1, v3, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v2f32_commute:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v2, v4
+; GFX1170-NEXT: v_maximum3_f32 v1, v1, v3, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v2f32_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -731,6 +853,13 @@ define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
; GFX12-NEXT: v_maximum3_f32 v1, |v1|, |v3|, |v5|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v2f32__fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, |v0|, |v2|, |v4|
+; GFX1170-NEXT: v_maximum3_f32 v1, |v1|, |v3|, |v5|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v2f32__fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -779,6 +908,13 @@ define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
; GFX12-NEXT: v_maximum3_f32 v1, -v1, -v3, -v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v2f32__fneg_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, -v0, -v2, -v4
+; GFX1170-NEXT: v_maximum3_f32 v1, -v1, -v3, -v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v2f32__fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -827,6 +963,13 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
; GFX12-NEXT: v_maximum3_f32 v1, v1, 2.0, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v2f32__inlineimm1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, 2.0, v2
+; GFX1170-NEXT: v_maximum3_f32 v1, v1, 2.0, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v2f32__inlineimm1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -872,6 +1015,13 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
; GFX12-NEXT: v_maximum3_f32 v1, v1, v3, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v2f32__inlineimm2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v2, 4.0
+; GFX1170-NEXT: v_maximum3_f32 v1, v1, v3, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v2f32__inlineimm2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -918,6 +1068,14 @@ define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
; GFX12-NEXT: v_maximum3_f32 v2, v8, v2, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v3f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v6, v0, v3
+; GFX1170-NEXT: v_maximum3_f32 v1, v7, v1, v4
+; GFX1170-NEXT: v_maximum3_f32 v2, v8, v2, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v3f32:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -973,6 +1131,14 @@ define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
; GFX12-NEXT: v_maximum3_f32 v2, v2, v5, v8
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v3f32_commute:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v3, v6
+; GFX1170-NEXT: v_maximum3_f32 v1, v1, v4, v7
+; GFX1170-NEXT: v_maximum3_f32 v2, v2, v5, v8
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v3f32_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1028,6 +1194,14 @@ define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
; GFX12-NEXT: v_maximum3_f32 v2, |v2|, |v5|, |v8|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v3f32__fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, |v0|, |v3|, |v6|
+; GFX1170-NEXT: v_maximum3_f32 v1, |v1|, |v4|, |v7|
+; GFX1170-NEXT: v_maximum3_f32 v2, |v2|, |v5|, |v8|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v3f32__fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1086,6 +1260,14 @@ define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
; GFX12-NEXT: v_maximum3_f32 v2, -v2, -v5, -v8
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v3f32__fneg_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, -v0, -v3, -v6
+; GFX1170-NEXT: v_maximum3_f32 v1, -v1, -v4, -v7
+; GFX1170-NEXT: v_maximum3_f32 v2, -v2, -v5, -v8
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v3f32__fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1144,6 +1326,14 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
; GFX12-NEXT: v_maximum3_f32 v2, v2, 2.0, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v3f32__inlineimm1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, 2.0, v3
+; GFX1170-NEXT: v_maximum3_f32 v1, v1, 2.0, v4
+; GFX1170-NEXT: v_maximum3_f32 v2, v2, 2.0, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v3f32__inlineimm1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1199,6 +1389,14 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
; GFX12-NEXT: v_maximum3_f32 v2, v2, v5, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v3f32__inlineimm2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v3, 4.0
+; GFX1170-NEXT: v_maximum3_f32 v1, v1, v4, 4.0
+; GFX1170-NEXT: v_maximum3_f32 v2, v2, v5, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v3f32__inlineimm2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1263,6 +1461,18 @@ define half @v_fmaximum3_f16(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, v1.l, v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, v2
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1308,6 +1518,18 @@ define half @v_fmaximum3_f16_commute(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v2, v0, v1
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_commute:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v2.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_commute:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v2, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1355,6 +1577,26 @@ define amdgpu_ps i32 @s_fmaximum3_f16(half inreg %a, half inreg %b, half inreg %
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: ; return to shader part epilog
;
+; GFX1170-TRUE16-LABEL: s_fmaximum3_f16:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, s0, s1, v0.l
+; GFX1170-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-FAKE16-LABEL: s_fmaximum3_f16:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, s0, s1, v0
+; GFX1170-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-FAKE16-NEXT: ; return to shader part epilog
+;
; GFX942-LABEL: s_fmaximum3_f16:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_mov_b32_e32 v0, s1
@@ -1411,6 +1653,18 @@ define half @v_fmaximum3_f16_fabs0(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, |v0|, v1, v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fabs0:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, |v0.l|, v1.l, v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fabs0:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, |v0|, v1, v2
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16_fabs0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1458,6 +1712,18 @@ define half @v_fmaximum3_f16_fabs1(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, |v1|, v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fabs1:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, |v1.l|, v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fabs1:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, |v1|, v2
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16_fabs1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1505,6 +1771,18 @@ define half @v_fmaximum3_f16_fabs2(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, |v2|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fabs2:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, v1.l, |v2.l|
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fabs2:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, |v2|
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16_fabs2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1552,6 +1830,18 @@ define half @v_fmaximum3_f16_fabs_all(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, |v0|, |v1|, |v2|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fabs_all:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, |v0.l|, |v1.l|, |v2.l|
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fabs_all:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, |v0|, |v1|, |v2|
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16_fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1603,6 +1893,18 @@ define half @v_fmaximum3_f16_fneg_all(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, -v0, -v1, -v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fneg_all:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, -v0.l, -v1.l, -v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fneg_all:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, -v0, -v1, -v2
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16_fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1654,6 +1956,18 @@ define half @v_fmaximum3_f16_fneg_fabs_all(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, -|v0|, -|v1|, -|v2|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fneg_fabs_all:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, -|v0.l|, -|v1.l|, -|v2.l|
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fneg_fabs_all:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, -|v0|, -|v1|, -|v2|
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16_fneg_fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1708,6 +2022,18 @@ define half @v_fmaximum3_f16_fneg0(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, -v0, v1, v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fneg0:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, -v0.l, v1.l, v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fneg0:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, -v0, v1, v2
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16_fneg0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1755,6 +2081,18 @@ define half @v_fmaximum3_f16_fneg1(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, -v1, v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fneg1:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, -v1.l, v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fneg1:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, -v1, v2
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16_fneg1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1802,6 +2140,18 @@ define half @v_fmaximum3_f16_fneg2(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, -v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_fneg2:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, v1.l, -v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_fneg2:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, -v2
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16_fneg2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1849,6 +2199,18 @@ define half @v_fmaximum3_f16_const0(half %b, half %c) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, 0x4800, v1
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_const0:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, 0x4800, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_const0:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, 0x4800, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16_const0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1895,6 +2257,18 @@ define half @v_fmaximum3_f16__const2(half %a, half %b) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, 0x4800
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16__const2:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, v1.l, 0x4800
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16__const2:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, 0x4800
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16__const2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1941,6 +2315,18 @@ define half @v_fmaximum3_f16_inlineimm0(half %b, half %c) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, 4.0, v1
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_inlineimm0:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, 4.0, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_inlineimm0:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, 4.0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16_inlineimm0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1986,6 +2372,18 @@ define half @v_fmaximum3_f16__inlineimm(half %a, half %b) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, 4.0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16__inlineimm:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, v1.l, 4.0
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16__inlineimm:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, v1, 4.0
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16__inlineimm:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2035,6 +2433,22 @@ define half @v_fmaximum3_f16_const1_const2(half %a) {
; GFX12-FAKE16-NEXT: v_maximum3_f16 v0, v0, s0, 0x4c00
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fmaximum3_f16_const1_const2:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0x4800
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, v1.l, 0x4c00
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fmaximum3_f16_const1_const2:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: s_movk_i32 s0, 0x4800
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1170-FAKE16-NEXT: v_maximum3_f16 v0, v0, s0, 0x4c00
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_f16_const1_const2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2074,6 +2488,14 @@ define <2 x half> @v_fmaximum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c
; GFX12-NEXT: v_pk_maximum_f16 v0, v2, v0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v2f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v2, v0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v2f16:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2120,6 +2542,14 @@ define <2 x half> @v_fmaximum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x
; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v2f16_commute:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v2f16_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2169,6 +2599,17 @@ define <2 x half> @v_fmaximum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2
; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v2f16__fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX1170-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX1170-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v2f16__fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2222,6 +2663,14 @@ define <2 x half> @v_fmaximum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2
; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v2f16__fneg_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v2f16__fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2271,6 +2720,14 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) {
; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v2f16__inlineimm1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v2f16__inlineimm1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2316,6 +2773,14 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) {
; GFX12-NEXT: v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v2f16__inlineimm2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v2f16__inlineimm2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2364,6 +2829,16 @@ define <3 x half> @v_fmaximum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c
; GFX12-NEXT: v_pk_maximum_f16 v1, v5, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v3f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v4, v0
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v5, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v3f16:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2426,6 +2901,16 @@ define <3 x half> @v_fmaximum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x
; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v3f16_commute:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v4
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v3f16_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2495,6 +2980,23 @@ define <3 x half> @v_fmaximum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3
; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v3f16__fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX1170-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX1170-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX1170-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX1170-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5
+; GFX1170-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v4
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v3f16__fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2571,6 +3073,16 @@ define <3 x half> @v_fmaximum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3
; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v3f16__fneg_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v3f16__fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2636,6 +3148,16 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) {
; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v3f16__inlineimm1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, 2.0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v3f16__inlineimm1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2693,6 +3215,16 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) {
; GFX12-NEXT: v_pk_maximum_f16 v1, v1, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v3f16__inlineimm2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v3f16__inlineimm2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2755,6 +3287,16 @@ define <4 x half> @v_fmaximum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c
; GFX12-NEXT: v_pk_maximum_f16 v1, v5, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v4f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v4, v0
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v5, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v4f16:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2820,6 +3362,16 @@ define <4 x half> @v_fmaximum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x
; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v4f16_commute:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v4
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v4f16_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2892,6 +3444,23 @@ define <4 x half> @v_fmaximum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4
; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v4f16__fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX1170-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX1170-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX1170-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX1170-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5
+; GFX1170-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v4
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v4f16__fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2969,6 +3538,16 @@ define <4 x half> @v_fmaximum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4
; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v4f16__fneg_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v4f16__fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3037,6 +3616,16 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) {
; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v4f16__inlineimm1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, 2.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v4f16__inlineimm1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3099,6 +3688,16 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) {
; GFX12-NEXT: v_pk_maximum_f16 v1, v1, 4.0 op_sel_hi:[1,0]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_v4f16__inlineimm2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, 4.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fmaximum3_v4f16__inlineimm2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3162,6 +3761,14 @@ define double @v_fmaximum3_f64(double %a, double %b, double %c) {
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3195,6 +3802,14 @@ define double @v_fmaximum3_f64_commute(double %a, double %b, double %c) {
; GFX12-NEXT: v_maximum_f64 v[0:1], v[4:5], v[0:1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64_commute:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[4:5], v[0:1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64_commute:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3227,6 +3842,16 @@ define amdgpu_ps <2 x i32> @s_fmaximum3_f64(double inreg %a, double inreg %b, do
; GFX12-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: s_fmaximum3_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], s[4:5]
+; GFX1170-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1170-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: s_fmaximum3_f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
@@ -3269,6 +3894,14 @@ define double @v_fmaximum3_f64_fabs0(double %a, double %b, double %c) {
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64_fabs0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], |v[0:1]|, v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64_fabs0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3303,6 +3936,14 @@ define double @v_fmaximum3_f64_fabs1(double %a, double %b, double %c) {
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64_fabs1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], |v[2:3]|
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64_fabs1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3337,6 +3978,14 @@ define double @v_fmaximum3_f64_fabs2(double %a, double %b, double %c) {
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], |v[4:5]|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64_fabs2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], |v[4:5]|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64_fabs2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3371,6 +4020,14 @@ define double @v_fmaximum3_f64_fabs_all(double %a, double %b, double %c) {
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], |v[4:5]|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64_fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], |v[0:1]|, |v[2:3]|
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], |v[4:5]|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64_fabs_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3407,6 +4064,14 @@ define double @v_fmaximum3_f64_fneg_all(double %a, double %b, double %c) {
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], -v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64_fneg_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], -v[0:1], -v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], -v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64_fneg_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3443,6 +4108,14 @@ define double @v_fmaximum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], -|v[4:5]|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64_fneg_fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], -|v[0:1]|, -|v[2:3]|
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], -|v[4:5]|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64_fneg_fabs_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3482,6 +4155,14 @@ define double @v_fmaximum3_f64_fneg0(double %a, double %b, double %c) {
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64_fneg0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], -v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64_fneg0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3516,6 +4197,14 @@ define double @v_fmaximum3_f64_fneg1(double %a, double %b, double %c) {
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64_fneg1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], -v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64_fneg1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3550,6 +4239,14 @@ define double @v_fmaximum3_f64_fneg2(double %a, double %b, double %c) {
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], -v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64_fneg2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], -v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64_fneg2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3584,6 +4281,14 @@ define double @v_fmaximum3_f64_const0(double %b, double %c) {
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64_const0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], 0x40200000, v[0:1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64_const0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3619,6 +4324,14 @@ define double @v_fmaximum3_f64__const2(double %a, double %b) {
; GFX12-NEXT: v_maximum_f64 v[0:1], 0x40200000, v[0:1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64__const2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], 0x40200000, v[0:1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64__const2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3653,6 +4366,14 @@ define double @v_fmaximum3_f64_inlineimm0(double %b, double %c) {
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64_inlineimm0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], 4.0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64_inlineimm0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3686,6 +4407,14 @@ define double @v_fmaximum3_f64__inlineimm(double %a, double %b) {
; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64__inlineimm:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64__inlineimm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3719,6 +4448,14 @@ define double @v_fmaximum3_f64_const1_const2(double %a) {
; GFX12-NEXT: v_maximum_f64 v[0:1], 0x40300000, v[0:1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fmaximum3_f64_const1_const2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], 0x40200000, v[0:1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], 0x40300000, v[0:1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fmaximum3_f64_const1_const2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3755,6 +4492,14 @@ define <2 x float> @v_no_fmaximum3_f32__multi_use(float %a, float %b, float %c)
; GFX12-NEXT: v_maximum_f32 v1, v0, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_no_fmaximum3_f32__multi_use:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f32 v1, v0, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_no_fmaximum3_f32__multi_use:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3790,6 +4535,15 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f32__multi_use(float inreg %a, float
; GFX12-NEXT: s_maximum_f32 s1, s0, s2
; GFX12-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: s_no_fmaximum3_f32__multi_use:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maximum_f32 v0, s0, s1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_maximum_f32 v1, v0, s2
+; GFX1170-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX942-LABEL: s_no_fmaximum3_f32__multi_use:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_mov_b32_e32 v0, s1
@@ -3852,6 +4606,23 @@ define <2 x half> @v_no_fmaximum3_f16__multi_use(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_no_fmaximum3_f16__multi_use:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.h, v0.l, v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_no_fmaximum3_f16__multi_use:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_maximum_f16 v1, v0, v2
+; GFX1170-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_no_fmaximum3_f16__multi_use:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3893,6 +4664,30 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f16__multi_use(half inreg %a, half in
; GFX12-NEXT: s_and_b32 s1, 0xffff, s1
; GFX12-NEXT: ; return to shader part epilog
;
+; GFX1170-TRUE16-LABEL: s_no_fmaximum3_f16__multi_use:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, s0, s1
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-TRUE16-NEXT: v_maximum_f16 v1.l, v0.l, s2
+; GFX1170-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1170-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-TRUE16-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1170-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-FAKE16-LABEL: s_no_fmaximum3_f16__multi_use:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, s0, s1
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-FAKE16-NEXT: v_maximum_f16 v1, v0, s2
+; GFX1170-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1170-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-FAKE16-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1170-FAKE16-NEXT: ; return to shader part epilog
+;
; GFX942-LABEL: s_no_fmaximum3_f16__multi_use:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_mov_b32_e32 v0, s1
@@ -3948,6 +4743,14 @@ define <4 x half> @v_no_fmaximum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b,
; GFX12-NEXT: v_pk_maximum_f16 v1, v0, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_no_fmaximum3_v2f16__multi_use:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v0, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_no_fmaximum3_v2f16__multi_use:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3997,6 +4800,14 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double
; GFX12-NEXT: v_maximum_f64 v[2:3], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_no_fmaximum3_f64__multi_use:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[0:1], v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_no_fmaximum3_f64__multi_use:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/fmin3.ll b/llvm/test/CodeGen/AMDGPU/fmin3.ll
index 382c98218a11c..4b952df206b31 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin3.ll
@@ -4,6 +4,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250,GFX1250-TRUE16 %s
@@ -130,6 +132,36 @@ define amdgpu_kernel void @test_fmin3_olt_0_f32(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
; GFX11-NEXT: s_endpgm
;
+; GFX1170-LABEL: test_fmin3_olt_0_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX1170-NEXT: s_mov_b32 s10, -1
+; GFX1170-NEXT: s_mov_b32 s11, 0x31016000
+; GFX1170-NEXT: s_mov_b32 s14, s10
+; GFX1170-NEXT: s_mov_b32 s15, s11
+; GFX1170-NEXT: s_mov_b32 s18, s10
+; GFX1170-NEXT: s_mov_b32 s19, s11
+; GFX1170-NEXT: s_mov_b32 s22, s10
+; GFX1170-NEXT: s_mov_b32 s23, s11
+; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s12, s2
+; GFX1170-NEXT: s_mov_b32 s13, s3
+; GFX1170-NEXT: s_mov_b32 s16, s4
+; GFX1170-NEXT: s_mov_b32 s17, s5
+; GFX1170-NEXT: s_mov_b32 s20, s6
+; GFX1170-NEXT: s_mov_b32 s21, s7
+; GFX1170-NEXT: buffer_load_b32 v0, off, s[12:15], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: buffer_load_b32 v1, off, s[16:19], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: buffer_load_b32 v2, off, s[20:23], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s8, s0
+; GFX1170-NEXT: s_mov_b32 s9, s1
+; GFX1170-NEXT: v_min3_num_f32 v0, v0, v1, v2
+; GFX1170-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX1170-NEXT: s_endpgm
+;
; GFX12-LABEL: test_fmin3_olt_0_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@@ -321,6 +353,36 @@ define amdgpu_kernel void @test_fmin3_olt_1_f32(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0
; GFX11-NEXT: s_endpgm
;
+; GFX1170-LABEL: test_fmin3_olt_1_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX1170-NEXT: s_mov_b32 s10, -1
+; GFX1170-NEXT: s_mov_b32 s11, 0x31016000
+; GFX1170-NEXT: s_mov_b32 s14, s10
+; GFX1170-NEXT: s_mov_b32 s15, s11
+; GFX1170-NEXT: s_mov_b32 s18, s10
+; GFX1170-NEXT: s_mov_b32 s19, s11
+; GFX1170-NEXT: s_mov_b32 s22, s10
+; GFX1170-NEXT: s_mov_b32 s23, s11
+; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s12, s2
+; GFX1170-NEXT: s_mov_b32 s13, s3
+; GFX1170-NEXT: s_mov_b32 s16, s4
+; GFX1170-NEXT: s_mov_b32 s17, s5
+; GFX1170-NEXT: s_mov_b32 s20, s6
+; GFX1170-NEXT: s_mov_b32 s21, s7
+; GFX1170-NEXT: buffer_load_b32 v0, off, s[12:15], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: buffer_load_b32 v1, off, s[16:19], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: buffer_load_b32 v2, off, s[20:23], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s8, s0
+; GFX1170-NEXT: s_mov_b32 s9, s1
+; GFX1170-NEXT: v_min3_num_f32 v0, v2, v0, v1
+; GFX1170-NEXT: buffer_store_b32 v0, off, s[8:11], 0
+; GFX1170-NEXT: s_endpgm
+;
; GFX12-LABEL: test_fmin3_olt_1_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@@ -552,6 +614,66 @@ define amdgpu_kernel void @test_fmin3_olt_0_f16(ptr addrspace(1) %out, ptr addrs
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX11-FAKE16-NEXT: s_endpgm
;
+; GFX1170-TRUE16-LABEL: test_fmin3_olt_0_f16:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX1170-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX1170-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX1170-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX1170-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX1170-TRUE16-NEXT: s_mov_b32 s18, s10
+; GFX1170-TRUE16-NEXT: s_mov_b32 s19, s11
+; GFX1170-TRUE16-NEXT: s_mov_b32 s22, s10
+; GFX1170-TRUE16-NEXT: s_mov_b32 s23, s11
+; GFX1170-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX1170-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX1170-TRUE16-NEXT: s_mov_b32 s16, s4
+; GFX1170-TRUE16-NEXT: s_mov_b32 s17, s5
+; GFX1170-TRUE16-NEXT: s_mov_b32 s20, s6
+; GFX1170-TRUE16-NEXT: s_mov_b32 s21, s7
+; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v0, off, s[12:15], 0 glc dlc
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-TRUE16-NEXT: buffer_load_d16_hi_b16 v0, off, s[16:19], 0 glc dlc
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v1, off, s[20:23], 0 glc dlc
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX1170-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX1170-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v0.h, v1.l
+; GFX1170-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX1170-TRUE16-NEXT: s_endpgm
+;
+; GFX1170-FAKE16-LABEL: test_fmin3_olt_0_f16:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX1170-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX1170-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX1170-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX1170-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX1170-FAKE16-NEXT: s_mov_b32 s18, s10
+; GFX1170-FAKE16-NEXT: s_mov_b32 s19, s11
+; GFX1170-FAKE16-NEXT: s_mov_b32 s22, s10
+; GFX1170-FAKE16-NEXT: s_mov_b32 s23, s11
+; GFX1170-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX1170-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX1170-FAKE16-NEXT: s_mov_b32 s16, s4
+; GFX1170-FAKE16-NEXT: s_mov_b32 s17, s5
+; GFX1170-FAKE16-NEXT: s_mov_b32 s20, s6
+; GFX1170-FAKE16-NEXT: s_mov_b32 s21, s7
+; GFX1170-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], 0 glc dlc
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], 0 glc dlc
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX1170-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX1170-FAKE16-NEXT: v_min3_num_f16 v0, v0, v1, v2
+; GFX1170-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX1170-FAKE16-NEXT: s_endpgm
+;
; GFX12-TRUE16-LABEL: test_fmin3_olt_0_f16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@@ -845,6 +967,66 @@ define amdgpu_kernel void @test_fmin3_olt_1_f16(ptr addrspace(1) %out, ptr addrs
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
; GFX11-FAKE16-NEXT: s_endpgm
;
+; GFX1170-TRUE16-LABEL: test_fmin3_olt_1_f16:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX1170-TRUE16-NEXT: s_mov_b32 s10, -1
+; GFX1170-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX1170-TRUE16-NEXT: s_mov_b32 s14, s10
+; GFX1170-TRUE16-NEXT: s_mov_b32 s15, s11
+; GFX1170-TRUE16-NEXT: s_mov_b32 s18, s10
+; GFX1170-TRUE16-NEXT: s_mov_b32 s19, s11
+; GFX1170-TRUE16-NEXT: s_mov_b32 s22, s10
+; GFX1170-TRUE16-NEXT: s_mov_b32 s23, s11
+; GFX1170-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: s_mov_b32 s12, s2
+; GFX1170-TRUE16-NEXT: s_mov_b32 s13, s3
+; GFX1170-TRUE16-NEXT: s_mov_b32 s16, s4
+; GFX1170-TRUE16-NEXT: s_mov_b32 s17, s5
+; GFX1170-TRUE16-NEXT: s_mov_b32 s20, s6
+; GFX1170-TRUE16-NEXT: s_mov_b32 s21, s7
+; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v0, off, s[12:15], 0 glc dlc
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-TRUE16-NEXT: buffer_load_d16_hi_b16 v0, off, s[16:19], 0 glc dlc
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-TRUE16-NEXT: buffer_load_d16_b16 v1, off, s[20:23], 0 glc dlc
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-TRUE16-NEXT: s_mov_b32 s8, s0
+; GFX1170-TRUE16-NEXT: s_mov_b32 s9, s1
+; GFX1170-TRUE16-NEXT: v_min3_num_f16 v0.l, v1.l, v0.l, v0.h
+; GFX1170-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX1170-TRUE16-NEXT: s_endpgm
+;
+; GFX1170-FAKE16-LABEL: test_fmin3_olt_1_f16:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX1170-FAKE16-NEXT: s_mov_b32 s10, -1
+; GFX1170-FAKE16-NEXT: s_mov_b32 s11, 0x31016000
+; GFX1170-FAKE16-NEXT: s_mov_b32 s14, s10
+; GFX1170-FAKE16-NEXT: s_mov_b32 s15, s11
+; GFX1170-FAKE16-NEXT: s_mov_b32 s18, s10
+; GFX1170-FAKE16-NEXT: s_mov_b32 s19, s11
+; GFX1170-FAKE16-NEXT: s_mov_b32 s22, s10
+; GFX1170-FAKE16-NEXT: s_mov_b32 s23, s11
+; GFX1170-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: s_mov_b32 s12, s2
+; GFX1170-FAKE16-NEXT: s_mov_b32 s13, s3
+; GFX1170-FAKE16-NEXT: s_mov_b32 s16, s4
+; GFX1170-FAKE16-NEXT: s_mov_b32 s17, s5
+; GFX1170-FAKE16-NEXT: s_mov_b32 s20, s6
+; GFX1170-FAKE16-NEXT: s_mov_b32 s21, s7
+; GFX1170-FAKE16-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-FAKE16-NEXT: buffer_load_u16 v1, off, s[16:19], 0 glc dlc
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-FAKE16-NEXT: buffer_load_u16 v2, off, s[20:23], 0 glc dlc
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-FAKE16-NEXT: s_mov_b32 s8, s0
+; GFX1170-FAKE16-NEXT: s_mov_b32 s9, s1
+; GFX1170-FAKE16-NEXT: v_min3_num_f16 v0, v2, v0, v1
+; GFX1170-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
+; GFX1170-FAKE16-NEXT: s_endpgm
+;
; GFX12-TRUE16-LABEL: test_fmin3_olt_1_f16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@@ -1042,6 +1224,15 @@ define <2 x half> @no_fmin3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <
; GFX11-NEXT: v_pk_min_f16 v0, v0, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: no_fmin3_v2f16:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_min_num_f16 v0, v2, v0
+; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: no_fmin3_v2f16:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1204,6 +1395,39 @@ define amdgpu_kernel void @test_fmin3_olt_0_f64(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
; GFX11-NEXT: s_endpgm
;
+; GFX1170-LABEL: test_fmin3_olt_0_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX1170-NEXT: s_mov_b32 s10, -1
+; GFX1170-NEXT: s_mov_b32 s11, 0x31016000
+; GFX1170-NEXT: s_mov_b32 s14, s10
+; GFX1170-NEXT: s_mov_b32 s15, s11
+; GFX1170-NEXT: s_mov_b32 s18, s10
+; GFX1170-NEXT: s_mov_b32 s19, s11
+; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s12, s2
+; GFX1170-NEXT: s_mov_b32 s13, s3
+; GFX1170-NEXT: s_mov_b32 s16, s4
+; GFX1170-NEXT: s_mov_b32 s17, s5
+; GFX1170-NEXT: buffer_load_b64 v[0:1], off, s[12:15], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: buffer_load_b64 v[2:3], off, s[16:19], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s12, s6
+; GFX1170-NEXT: s_mov_b32 s13, s7
+; GFX1170-NEXT: s_mov_b32 s8, s0
+; GFX1170-NEXT: buffer_load_b64 v[4:5], off, s[12:15], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s9, s1
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: v_max_num_f64 v[2:3], v[4:5], v[4:5]
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX1170-NEXT: s_endpgm
+;
; GFX12-LABEL: test_fmin3_olt_0_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
@@ -1414,6 +1638,39 @@ define amdgpu_kernel void @test_fmin3_olt_1_f64(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
; GFX11-NEXT: s_endpgm
;
+; GFX1170-LABEL: test_fmin3_olt_1_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX1170-NEXT: s_mov_b32 s10, -1
+; GFX1170-NEXT: s_mov_b32 s11, 0x31016000
+; GFX1170-NEXT: s_mov_b32 s14, s10
+; GFX1170-NEXT: s_mov_b32 s15, s11
+; GFX1170-NEXT: s_mov_b32 s18, s10
+; GFX1170-NEXT: s_mov_b32 s19, s11
+; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s12, s2
+; GFX1170-NEXT: s_mov_b32 s13, s3
+; GFX1170-NEXT: s_mov_b32 s16, s4
+; GFX1170-NEXT: s_mov_b32 s17, s5
+; GFX1170-NEXT: buffer_load_b64 v[0:1], off, s[12:15], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: buffer_load_b64 v[2:3], off, s[16:19], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s12, s6
+; GFX1170-NEXT: s_mov_b32 s13, s7
+; GFX1170-NEXT: s_mov_b32 s8, s0
+; GFX1170-NEXT: buffer_load_b64 v[4:5], off, s[12:15], 0 glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s9, s1
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: v_max_num_f64 v[2:3], v[4:5], v[4:5]
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[2:3], v[0:1]
+; GFX1170-NEXT: buffer_store_b64 v[0:1], off, s[8:11], 0
+; GFX1170-NEXT: s_endpgm
+;
; GFX12-LABEL: test_fmin3_olt_1_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum.ll b/llvm/test/CodeGen/AMDGPU/fminimum.ll
index e851f1d2e586e..7deaa1d0c62b5 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum.ll
@@ -1,6 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
@@ -15,6 +19,11 @@ define amdgpu_ps float @test_fminimum_f32_vv(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_f32_vv:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_f32_vv:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v1
@@ -44,6 +53,11 @@ define amdgpu_ps float @test_fminimum_f32_ss(float inreg %a, float inreg %b) {
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_f32_ss:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f32 v0, s0, s1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_f32_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_minimum_f32 s0, s0, s1
@@ -63,6 +77,11 @@ define amdgpu_ps float @test_fminimum_f32_vs(float %a, float inreg %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_f32_vs:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f32 v0, v0, s0
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_f32_vs:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, s0
@@ -77,6 +96,11 @@ define amdgpu_ps float @test_fminimum_nnan_f32(float %a, float %b) {
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_nnan_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_nnan_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v1
@@ -94,6 +118,11 @@ define amdgpu_ps float @test_fminimum_nsz_f32(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_nsz_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_nsz_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v1
@@ -108,6 +137,11 @@ define amdgpu_ps float @test_fminimum_signed_zero_f32() {
; GFX9-NEXT: v_bfrev_b32_e32 v0, 1
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_signed_zero_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_bfrev_b32_e32 v0, 1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_signed_zero_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_bfrev_b32_e32 v0, 1
@@ -128,6 +162,12 @@ define amdgpu_ps <2 x float> @test_fminimum_v2f32(<2 x float> %a, <2 x float> %b
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_v2f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v2
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v3
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_v2f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v2
@@ -169,6 +209,12 @@ define amdgpu_ps <2 x float> @test_fminimum_v2f32_ss(<2 x float> inreg %a, <2 x
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_v2f32_ss:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f32 v0, s0, s2
+; GFX1170-NEXT: v_minimum_f32 v1, s1, s3
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_v2f32_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_minimum_f32 s0, s0, s2
@@ -195,6 +241,13 @@ define amdgpu_ps <3 x float> @test_fminimum_v3f32(<3 x float> %a, <3 x float> %b
; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_v3f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v3
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v4
+; GFX1170-NEXT: v_minimum_f32 v2, v2, v5
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_v3f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v3
@@ -223,6 +276,14 @@ define amdgpu_ps <4 x float> @test_fminimum_v4f32(<4 x float> %a, <4 x float> %b
; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_v4f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v4
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v5
+; GFX1170-NEXT: v_minimum_f32 v2, v2, v6
+; GFX1170-NEXT: v_minimum_f32 v3, v3, v7
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_v4f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v4
@@ -288,6 +349,26 @@ define amdgpu_ps <16 x float> @test_fminimum_v16f32(<16 x float> %a, <16 x float
; GFX9-NEXT: v_cndmask_b32_e32 v15, v33, v16, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_v16f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v16
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v17
+; GFX1170-NEXT: v_minimum_f32 v2, v2, v18
+; GFX1170-NEXT: v_minimum_f32 v3, v3, v19
+; GFX1170-NEXT: v_minimum_f32 v4, v4, v20
+; GFX1170-NEXT: v_minimum_f32 v5, v5, v21
+; GFX1170-NEXT: v_minimum_f32 v6, v6, v22
+; GFX1170-NEXT: v_minimum_f32 v7, v7, v23
+; GFX1170-NEXT: v_minimum_f32 v8, v8, v24
+; GFX1170-NEXT: v_minimum_f32 v9, v9, v25
+; GFX1170-NEXT: v_minimum_f32 v10, v10, v26
+; GFX1170-NEXT: v_minimum_f32 v11, v11, v27
+; GFX1170-NEXT: v_minimum_f32 v12, v12, v28
+; GFX1170-NEXT: v_minimum_f32 v13, v13, v29
+; GFX1170-NEXT: v_minimum_f32 v14, v14, v30
+; GFX1170-NEXT: v_minimum_f32 v15, v15, v31
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_v16f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v16
@@ -320,6 +401,26 @@ define amdgpu_ps half @test_fminimum_f16_vv(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-SDAG-TRUE16-LABEL: test_fminimum_f16_vv:
+; GFX1170-SDAG-TRUE16: ; %bb.0:
+; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
+; GFX1170-SDAG-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_fminimum_f16_vv:
+; GFX1170-SDAG-FAKE16: ; %bb.0:
+; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_fminimum_f16_vv:
+; GFX1170-GISEL-TRUE16: ; %bb.0:
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
+; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_fminimum_f16_vv:
+; GFX1170-GISEL-FAKE16: ; %bb.0:
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
+;
; GFX12-SDAG-TRUE16-LABEL: test_fminimum_f16_vv:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
@@ -364,6 +465,26 @@ define amdgpu_ps half @test_fminimum_f16_ss(half inreg %a, half inreg %b) {
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-SDAG-TRUE16-LABEL: test_fminimum_f16_ss:
+; GFX1170-SDAG-TRUE16: ; %bb.0:
+; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, s0, s1
+; GFX1170-SDAG-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_fminimum_f16_ss:
+; GFX1170-SDAG-FAKE16: ; %bb.0:
+; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, s0, s1
+; GFX1170-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_fminimum_f16_ss:
+; GFX1170-GISEL-TRUE16: ; %bb.0:
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, s0, s1
+; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_fminimum_f16_ss:
+; GFX1170-GISEL-FAKE16: ; %bb.0:
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, s0, s1
+; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_f16_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_minimum_f16 s0, s0, s1
@@ -399,6 +520,11 @@ define amdgpu_ps <2 x half> @test_fminimum_v2f16_vv(<2 x half> %a, <2 x half> %b
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_v2f16_vv:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_v2f16_vv:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1
@@ -448,6 +574,11 @@ define amdgpu_ps <2 x half> @test_fminimum_v2f16_ss(<2 x half> inreg %a, <2 x ha
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_v2f16_ss:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_pk_minimum_f16 v0, s0, s1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_v2f16_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_minimum_f16 v0, s0, s1
@@ -490,6 +621,27 @@ define amdgpu_ps <3 x half> @test_fminimum_v3f16_vv(<3 x half> %a, <3 x half> %b
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-SDAG-LABEL: test_fminimum_v3f16_vv:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-SDAG-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_fminimum_v3f16_vv:
+; GFX1170-GISEL-TRUE16: ; %bb.0:
+; GFX1170-GISEL-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, v1.l, v3.l
+; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_fminimum_v3f16_vv:
+; GFX1170-GISEL-FAKE16: ; %bb.0:
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v1, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_lshl_or_b32 v1, s0, 16, v1
+; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
+;
; GFX12-SDAG-LABEL: test_fminimum_v3f16_vv:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_pk_minimum_f16 v0, v0, v2
@@ -567,6 +719,30 @@ define amdgpu_ps <3 x half> @test_fminimum_v3f16_ss(<3 x half> inreg %a, <3 x ha
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-SDAG-LABEL: test_fminimum_v3f16_ss:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: v_pk_minimum_f16 v0, s0, s2
+; GFX1170-SDAG-NEXT: v_pk_minimum_f16 v1, s1, s3
+; GFX1170-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_fminimum_v3f16_ss:
+; GFX1170-GISEL-TRUE16: ; %bb.0:
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, s1, s3
+; GFX1170-GISEL-TRUE16-NEXT: v_pk_minimum_f16 v0, s0, s2
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_readfirstlane_b32 s0, v1
+; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, s0
+; GFX1170-GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_fminimum_v3f16_ss:
+; GFX1170-GISEL-FAKE16: ; %bb.0:
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, s1, s3
+; GFX1170-GISEL-FAKE16-NEXT: v_pk_minimum_f16 v0, s0, s2
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s0, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, s0
+; GFX1170-GISEL-FAKE16-NEXT: ; return to shader part epilog
+;
; GFX12-SDAG-LABEL: test_fminimum_v3f16_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_pk_minimum_f16 v0, s0, s2
@@ -624,6 +800,12 @@ define amdgpu_ps <4 x half> @test_fminimum_v4f16(<4 x half> %a, <4 x half> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_v4f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_v4f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
@@ -706,6 +888,12 @@ define amdgpu_ps <4 x half> @test_fminimum_v4f16_ss(<4 x half> inreg %a, <4 x ha
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_v4f16_ss:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_pk_minimum_f16 v0, s0, s2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, s1, s3
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_v4f16_ss:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_pk_minimum_f16 v0, s0, s2
@@ -734,6 +922,11 @@ define amdgpu_ps <2 x float> @test_fminimum_f64_vv(double %a, double %b) {
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_f64_vv:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_f64_vv:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
@@ -771,6 +964,21 @@ define amdgpu_ps <2 x float> @test_fminimum_f64_ss(double inreg %a, double inreg
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-SDAG-LABEL: test_fminimum_f64_ss:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3]
+; GFX1170-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-LABEL: test_fminimum_f64_ss:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1170-GISEL-NEXT: ; return to shader part epilog
+;
; GFX12-SDAG-LABEL: test_fminimum_f64_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3]
@@ -835,6 +1043,27 @@ define amdgpu_ps <4 x float> @test_fminimum_v2f64_ss(<2 x double> inreg %a, <2 x
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-SDAG-LABEL: test_fminimum_v2f64_ss:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[4:5]
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[2:3], s[2:3], s[6:7]
+; GFX1170-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-LABEL: test_fminimum_v2f64_ss:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], s[0:1], s[4:5]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], s[2:3], s[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s2, v2
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s3, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1170-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX1170-GISEL-NEXT: ; return to shader part epilog
+;
; GFX12-SDAG-LABEL: test_fminimum_v2f64_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[4:5]
@@ -904,6 +1133,14 @@ define amdgpu_ps <8 x float> @test_fminimum_v4f64(<4 x double> %a, <4 x double>
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v7, v18, v13, s[4:5]
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_v4f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_v4f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
@@ -992,6 +1229,36 @@ define amdgpu_ps <8 x float> @test_fminimum_v4f64_ss(<4 x double> inreg %a, <4 x
; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, s5
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
+; GFX1170-SDAG-LABEL: test_fminimum_v4f64_ss:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[8:9]
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[2:3], s[2:3], s[10:11]
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[4:5], s[4:5], s[12:13]
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[6:7], s[6:7], s[14:15]
+; GFX1170-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX1170-GISEL-LABEL: test_fminimum_v4f64_ss:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], s[0:1], s[8:9]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], s[2:3], s[10:11]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[4:5], s[4:5], s[12:13]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[6:7], s[6:7], s[14:15]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s2, v2
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s3, v3
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s4, v4
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s5, v5
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s6, v6
+; GFX1170-GISEL-NEXT: v_readfirstlane_b32 s7, v7
+; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1170-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX1170-GISEL-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-GISEL-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
+; GFX1170-GISEL-NEXT: ; return to shader part epilog
+;
; GFX12-SDAG-LABEL: test_fminimum_v4f64_ss:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_minimum_f64 v[0:1], s[0:1], s[8:9]
@@ -1063,6 +1330,21 @@ define amdgpu_kernel void @fminimumi_f32_move_to_valu(ptr addrspace(1) %out, ptr
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
+; GFX1170-LABEL: fminimumi_f32_move_to_valu:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_clause 0x1
+; GFX1170-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1170-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1170-NEXT: v_mov_b32_e32 v0, 0
+; GFX1170-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v2
+; GFX1170-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1170-NEXT: s_endpgm
+;
; GFX12-SDAG-LABEL: fminimumi_f32_move_to_valu:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_clause 0x1
@@ -1143,6 +1425,69 @@ define amdgpu_kernel void @fminimum_f16_move_to_valu(ptr addrspace(1) %out, ptr
; GFX9-GISEL-NEXT: global_store_short v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
+; GFX1170-SDAG-TRUE16-LABEL: fminimum_f16_move_to_valu:
+; GFX1170-SDAG-TRUE16: ; %bb.0:
+; GFX1170-SDAG-TRUE16-NEXT: s_clause 0x1
+; GFX1170-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1170-SDAG-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1170-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] glc dlc
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX1170-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX1170-SDAG-FAKE16-LABEL: fminimum_f16_move_to_valu:
+; GFX1170-SDAG-FAKE16: ; %bb.0:
+; GFX1170-SDAG-FAKE16-NEXT: s_clause 0x1
+; GFX1170-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1170-SDAG-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1170-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v1, v1, v2
+; GFX1170-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1170-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX1170-GISEL-TRUE16-LABEL: fminimum_f16_move_to_valu:
+; GFX1170-GISEL-TRUE16: ; %bb.0:
+; GFX1170-GISEL-TRUE16-NEXT: s_clause 0x1
+; GFX1170-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1170-GISEL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: global_load_d16_b16 v0, v1, s[2:3] glc dlc
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: global_load_d16_hi_b16 v0, v1, s[4:5] glc dlc
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX1170-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX1170-GISEL-FAKE16-LABEL: fminimum_f16_move_to_valu:
+; GFX1170-GISEL-FAKE16: ; %bb.0:
+; GFX1170-GISEL-FAKE16-NEXT: s_clause 0x1
+; GFX1170-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1170-GISEL-FAKE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1170-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s2, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s3, v2
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, s2, s3
+; GFX1170-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1170-GISEL-FAKE16-NEXT: s_endpgm
+;
; GFX12-SDAG-TRUE16-LABEL: fminimum_f16_move_to_valu:
; GFX12-SDAG-TRUE16: ; %bb.0:
; GFX12-SDAG-TRUE16-NEXT: s_clause 0x1
@@ -1226,6 +1571,11 @@ define amdgpu_ps float @test_fminimum_f32_ieee_on(float %a, float %b) #0 {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_f32_ieee_on:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_f32_ieee_on:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v1
@@ -1243,6 +1593,11 @@ define amdgpu_ps float @test_fminimum_f32_ieee_off(float %a, float %b) #1 {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_fminimum_f32_ieee_off:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_fminimum_f32_ieee_off:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minimum_f32 v0, v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
index 4506fd649a5ff..0cd4293a16116 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
@@ -15,6 +17,12 @@ define float @v_fminimum3_f32(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -50,6 +58,12 @@ define float @v_fminimum3_f32_commute(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v2, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32_commute:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v2, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -84,6 +98,14 @@ define amdgpu_ps i32 @s_fminimum3_f32(float inreg %a, float inreg %b, float inre
; GFX12-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: s_fminimum3_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_mov_b32_e32 v0, s2
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum3_f32 v0, s0, s1, v0
+; GFX1170-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX942-LABEL: s_fminimum3_f32:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_mov_b32_e32 v0, s1
@@ -126,6 +148,12 @@ define float @v_fminimum3_f32_fabs0(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, |v0|, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32_fabs0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, |v0|, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32_fabs0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -162,6 +190,12 @@ define float @v_fminimum3_f32_fabs1(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, |v1|, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32_fabs1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, |v1|, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32_fabs1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -198,6 +232,12 @@ define float @v_fminimum3_f32_fabs2(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, |v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32_fabs2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, v1, |v2|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32_fabs2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -234,6 +274,12 @@ define float @v_fminimum3_f32_fabs_all(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, |v0|, |v1|, |v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32_fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, |v0|, |v1|, |v2|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32_fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -272,6 +318,12 @@ define float @v_fminimum3_f32_fneg_all(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, -v0, -v1, -v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32_fneg_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, -v0, -v1, -v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32_fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -310,6 +362,12 @@ define float @v_fminimum3_f32_fneg_fabs_all(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, -|v0|, -|v1|, -|v2|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32_fneg_fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, -|v0|, -|v1|, -|v2|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32_fneg_fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -351,6 +409,12 @@ define float @v_fminimum3_f32_fneg0(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, -v0, v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32_fneg0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, -v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32_fneg0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -387,6 +451,12 @@ define float @v_fminimum3_f32_fneg1(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, -v1, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32_fneg1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, -v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32_fneg1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -423,6 +493,12 @@ define float @v_fminimum3_f32_fneg2(float %a, float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, -v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32_fneg2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, v1, -v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32_fneg2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -459,6 +535,12 @@ define float @v_fminimum3_f32_const0(float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, 0x41000000, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32_const0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, 0x41000000, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32_const0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -495,6 +577,12 @@ define float @v_fminimum3_f32__const2(float %a, float %b) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, 0x41000000
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32__const2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, v1, 0x41000000
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32__const2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -531,6 +619,12 @@ define float @v_fminimum3_f32_inlineimm0(float %b, float %c) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, 4.0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32_inlineimm0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, 4.0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32_inlineimm0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -566,6 +660,12 @@ define float @v_fminimum3_f32__inlineimm(float %a, float %b) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32__inlineimm:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, v1, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32__inlineimm:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -603,6 +703,14 @@ define float @v_fminimum3_f32_const1_const2(float %a) {
; GFX12-NEXT: v_minimum3_f32 v0, v0, s0, 0x41800000
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f32_const1_const2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: s_mov_b32 s0, 0x41000000
+; GFX1170-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, s0, 0x41800000
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f32_const1_const2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -641,6 +749,13 @@ define <2 x float> @v_fminimum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float
; GFX12-NEXT: v_minimum3_f32 v1, v5, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v2f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v4, v0, v2
+; GFX1170-NEXT: v_minimum3_f32 v1, v5, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v2f32:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -686,6 +801,13 @@ define <2 x float> @v_fminimum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2
; GFX12-NEXT: v_minimum3_f32 v1, v1, v3, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v2f32_commute:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, v2, v4
+; GFX1170-NEXT: v_minimum3_f32 v1, v1, v3, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v2f32_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -731,6 +853,13 @@ define <2 x float> @v_fminimum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b,
; GFX12-NEXT: v_minimum3_f32 v1, |v1|, |v3|, |v5|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v2f32__fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, |v0|, |v2|, |v4|
+; GFX1170-NEXT: v_minimum3_f32 v1, |v1|, |v3|, |v5|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v2f32__fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -779,6 +908,13 @@ define <2 x float> @v_fminimum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b,
; GFX12-NEXT: v_minimum3_f32 v1, -v1, -v3, -v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v2f32__fneg_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, -v0, -v2, -v4
+; GFX1170-NEXT: v_minimum3_f32 v1, -v1, -v3, -v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v2f32__fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -827,6 +963,13 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c
; GFX12-NEXT: v_minimum3_f32 v1, v1, 2.0, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v2f32__inlineimm1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, 2.0, v2
+; GFX1170-NEXT: v_minimum3_f32 v1, v1, 2.0, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v2f32__inlineimm1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -872,6 +1015,13 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b
; GFX12-NEXT: v_minimum3_f32 v1, v1, v3, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v2f32__inlineimm2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, v2, 4.0
+; GFX1170-NEXT: v_minimum3_f32 v1, v1, v3, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v2f32__inlineimm2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -918,6 +1068,14 @@ define <3 x float> @v_fminimum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float
; GFX12-NEXT: v_minimum3_f32 v2, v8, v2, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v3f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v6, v0, v3
+; GFX1170-NEXT: v_minimum3_f32 v1, v7, v1, v4
+; GFX1170-NEXT: v_minimum3_f32 v2, v8, v2, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v3f32:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -973,6 +1131,14 @@ define <3 x float> @v_fminimum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3
; GFX12-NEXT: v_minimum3_f32 v2, v2, v5, v8
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v3f32_commute:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, v3, v6
+; GFX1170-NEXT: v_minimum3_f32 v1, v1, v4, v7
+; GFX1170-NEXT: v_minimum3_f32 v2, v2, v5, v8
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v3f32_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1028,6 +1194,14 @@ define <3 x float> @v_fminimum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b,
; GFX12-NEXT: v_minimum3_f32 v2, |v2|, |v5|, |v8|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v3f32__fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, |v0|, |v3|, |v6|
+; GFX1170-NEXT: v_minimum3_f32 v1, |v1|, |v4|, |v7|
+; GFX1170-NEXT: v_minimum3_f32 v2, |v2|, |v5|, |v8|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v3f32__fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1086,6 +1260,14 @@ define <3 x float> @v_fminimum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b,
; GFX12-NEXT: v_minimum3_f32 v2, -v2, -v5, -v8
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v3f32__fneg_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, -v0, -v3, -v6
+; GFX1170-NEXT: v_minimum3_f32 v1, -v1, -v4, -v7
+; GFX1170-NEXT: v_minimum3_f32 v2, -v2, -v5, -v8
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v3f32__fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1144,6 +1326,14 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c
; GFX12-NEXT: v_minimum3_f32 v2, v2, 2.0, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v3f32__inlineimm1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, 2.0, v3
+; GFX1170-NEXT: v_minimum3_f32 v1, v1, 2.0, v4
+; GFX1170-NEXT: v_minimum3_f32 v2, v2, 2.0, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v3f32__inlineimm1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1199,6 +1389,14 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b
; GFX12-NEXT: v_minimum3_f32 v2, v2, v5, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v3f32__inlineimm2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum3_f32 v0, v0, v3, 4.0
+; GFX1170-NEXT: v_minimum3_f32 v1, v1, v4, 4.0
+; GFX1170-NEXT: v_minimum3_f32 v2, v2, v5, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v3f32__inlineimm2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1263,6 +1461,18 @@ define half @v_fminimum3_f16(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, v2
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1308,6 +1518,18 @@ define half @v_fminimum3_f16_commute(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v2, v0, v1
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16_commute:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v2.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16_commute:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v2, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1355,6 +1577,26 @@ define amdgpu_ps i32 @s_fminimum3_f16(half inreg %a, half inreg %b, half inreg %
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: ; return to shader part epilog
;
+; GFX1170-TRUE16-LABEL: s_fminimum3_f16:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, s0, s1, v0.l
+; GFX1170-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-FAKE16-LABEL: s_fminimum3_f16:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, s0, s1, v0
+; GFX1170-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-FAKE16-NEXT: ; return to shader part epilog
+;
; GFX942-LABEL: s_fminimum3_f16:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_mov_b32_e32 v0, s1
@@ -1411,6 +1653,18 @@ define half @v_fminimum3_f16_fabs0(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, |v0|, v1, v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fabs0:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, |v0.l|, v1.l, v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fabs0:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, |v0|, v1, v2
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16_fabs0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1458,6 +1712,18 @@ define half @v_fminimum3_f16_fabs1(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, |v1|, v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fabs1:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, |v1.l|, v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fabs1:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, |v1|, v2
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16_fabs1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1505,6 +1771,18 @@ define half @v_fminimum3_f16_fabs2(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, |v2|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fabs2:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, |v2.l|
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fabs2:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, |v2|
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16_fabs2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1552,6 +1830,18 @@ define half @v_fminimum3_f16_fabs_all(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, |v0|, |v1|, |v2|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fabs_all:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, |v0.l|, |v1.l|, |v2.l|
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fabs_all:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, |v0|, |v1|, |v2|
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16_fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1603,6 +1893,18 @@ define half @v_fminimum3_f16_fneg_all(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, -v0, -v1, -v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fneg_all:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, -v0.l, -v1.l, -v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fneg_all:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, -v0, -v1, -v2
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16_fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1654,6 +1956,18 @@ define half @v_fminimum3_f16_fneg_fabs_all(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, -|v0|, -|v1|, -|v2|
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fneg_fabs_all:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, -|v0.l|, -|v1.l|, -|v2.l|
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fneg_fabs_all:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, -|v0|, -|v1|, -|v2|
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16_fneg_fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1708,6 +2022,18 @@ define half @v_fminimum3_f16_fneg0(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, -v0, v1, v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fneg0:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, -v0.l, v1.l, v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fneg0:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, -v0, v1, v2
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16_fneg0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1755,6 +2081,18 @@ define half @v_fminimum3_f16_fneg1(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, -v1, v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fneg1:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, -v1.l, v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fneg1:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, -v1, v2
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16_fneg1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1802,6 +2140,18 @@ define half @v_fminimum3_f16_fneg2(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, -v2
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16_fneg2:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, -v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16_fneg2:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, -v2
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16_fneg2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1849,6 +2199,18 @@ define half @v_fminimum3_f16_const0(half %b, half %c) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, 0x4800, v1
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16_const0:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, 0x4800, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16_const0:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, 0x4800, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16_const0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1895,6 +2257,18 @@ define half @v_fminimum3_f16__const2(half %a, half %b) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, 0x4800
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16__const2:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, 0x4800
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16__const2:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, 0x4800
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16__const2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1941,6 +2315,18 @@ define half @v_fminimum3_f16_inlineimm0(half %b, half %c) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, 4.0, v1
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16_inlineimm0:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, 4.0, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16_inlineimm0:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, 4.0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16_inlineimm0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1986,6 +2372,18 @@ define half @v_fminimum3_f16__inlineimm(half %a, half %b) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, 4.0
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16__inlineimm:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, 4.0
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16__inlineimm:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, v1, 4.0
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16__inlineimm:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2035,6 +2433,22 @@ define half @v_fminimum3_f16_const1_const2(half %a) {
; GFX12-FAKE16-NEXT: v_minimum3_f16 v0, v0, s0, 0x4c00
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_fminimum3_f16_const1_const2:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0x4800
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, 0x4c00
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_fminimum3_f16_const1_const2:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: s_movk_i32 s0, 0x4800
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1170-FAKE16-NEXT: v_minimum3_f16 v0, v0, s0, 0x4c00
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_f16_const1_const2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2074,6 +2488,14 @@ define <2 x half> @v_fminimum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c
; GFX12-NEXT: v_pk_minimum_f16 v0, v2, v0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v2f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v2, v0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v2f16:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2120,6 +2542,14 @@ define <2 x half> @v_fminimum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v2f16_commute:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v2f16_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2169,6 +2599,17 @@ define <2 x half> @v_fminimum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v2f16__fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX1170-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX1170-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v2f16__fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2222,6 +2663,14 @@ define <2 x half> @v_fminimum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v2f16__fneg_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v2f16__fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2271,6 +2720,14 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) {
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v2f16__inlineimm1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v2f16__inlineimm1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2316,6 +2773,14 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) {
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v2f16__inlineimm2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v2f16__inlineimm2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2364,6 +2829,16 @@ define <3 x half> @v_fminimum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c
; GFX12-NEXT: v_pk_minimum_f16 v1, v5, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v3f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v4, v0
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v5, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v3f16:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2426,6 +2901,16 @@ define <3 x half> @v_fminimum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v3f16_commute:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v4
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v3f16_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2495,6 +2980,23 @@ define <3 x half> @v_fminimum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v3f16__fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX1170-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX1170-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX1170-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX1170-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5
+; GFX1170-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v4
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v3f16__fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2571,6 +3073,16 @@ define <3 x half> @v_fminimum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v3f16__fneg_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v3f16__fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2636,6 +3148,16 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) {
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v3f16__inlineimm1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, 2.0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v3f16__inlineimm1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2693,6 +3215,16 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) {
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v3f16__inlineimm2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v3f16__inlineimm2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2755,6 +3287,16 @@ define <4 x half> @v_fminimum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c
; GFX12-NEXT: v_pk_minimum_f16 v1, v5, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v4f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v4, v0
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v5, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v4f16:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2820,6 +3362,16 @@ define <4 x half> @v_fminimum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v4f16_commute:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v4
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v4f16_commute:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2892,6 +3444,23 @@ define <4 x half> @v_fminimum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v4f16__fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX1170-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX1170-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX1170-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX1170-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v5
+; GFX1170-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v4
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v4
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v4f16__fabs_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2969,6 +3538,16 @@ define <4 x half> @v_fminimum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v4f16__fneg_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1]
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1]
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v4f16__fneg_all:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3037,6 +3616,16 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) {
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v4f16__inlineimm1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, 2.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, 2.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v4f16__inlineimm1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3099,6 +3688,16 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) {
; GFX12-NEXT: v_pk_minimum_f16 v1, v1, 4.0 op_sel_hi:[1,0]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_v4f16__inlineimm2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, 4.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, 4.0 op_sel_hi:[1,0]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_fminimum3_v4f16__inlineimm2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3162,6 +3761,14 @@ define double @v_fminimum3_f64(double %a, double %b, double %c) {
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3195,6 +3802,14 @@ define double @v_fminimum3_f64_commute(double %a, double %b, double %c) {
; GFX12-NEXT: v_minimum_f64 v[0:1], v[4:5], v[0:1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64_commute:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[4:5], v[0:1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64_commute:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3227,6 +3842,16 @@ define amdgpu_ps <2 x i32> @s_fminimum3_f64(double inreg %a, double inreg %b, do
; GFX12-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: s_fminimum3_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], s[4:5]
+; GFX1170-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1170-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX9-LABEL: s_fminimum3_f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
@@ -3269,6 +3894,14 @@ define double @v_fminimum3_f64_fabs0(double %a, double %b, double %c) {
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64_fabs0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], |v[0:1]|, v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64_fabs0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3303,6 +3936,14 @@ define double @v_fminimum3_f64_fabs1(double %a, double %b, double %c) {
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64_fabs1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], |v[2:3]|
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64_fabs1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3337,6 +3978,14 @@ define double @v_fminimum3_f64_fabs2(double %a, double %b, double %c) {
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], |v[4:5]|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64_fabs2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], |v[4:5]|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64_fabs2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3371,6 +4020,14 @@ define double @v_fminimum3_f64_fabs_all(double %a, double %b, double %c) {
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], |v[4:5]|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64_fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], |v[0:1]|, |v[2:3]|
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], |v[4:5]|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64_fabs_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3407,6 +4064,14 @@ define double @v_fminimum3_f64_fneg_all(double %a, double %b, double %c) {
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], -v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64_fneg_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], -v[0:1], -v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], -v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64_fneg_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3443,6 +4108,14 @@ define double @v_fminimum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], -|v[4:5]|
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64_fneg_fabs_all:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], -|v[0:1]|, -|v[2:3]|
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], -|v[4:5]|
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64_fneg_fabs_all:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3482,6 +4155,14 @@ define double @v_fminimum3_f64_fneg0(double %a, double %b, double %c) {
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64_fneg0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], -v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64_fneg0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3516,6 +4197,14 @@ define double @v_fminimum3_f64_fneg1(double %a, double %b, double %c) {
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64_fneg1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], -v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64_fneg1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3550,6 +4239,14 @@ define double @v_fminimum3_f64_fneg2(double %a, double %b, double %c) {
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], -v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64_fneg2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], -v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64_fneg2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3584,6 +4281,14 @@ define double @v_fminimum3_f64_const0(double %b, double %c) {
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64_const0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], 0x40200000, v[0:1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64_const0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3619,6 +4324,14 @@ define double @v_fminimum3_f64__const2(double %a, double %b) {
; GFX12-NEXT: v_minimum_f64 v[0:1], 0x40200000, v[0:1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64__const2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], 0x40200000, v[0:1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64__const2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3653,6 +4366,14 @@ define double @v_fminimum3_f64_inlineimm0(double %b, double %c) {
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64_inlineimm0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], 4.0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64_inlineimm0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3686,6 +4407,14 @@ define double @v_fminimum3_f64__inlineimm(double %a, double %b) {
; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], 4.0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64__inlineimm:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], 4.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64__inlineimm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3719,6 +4448,14 @@ define double @v_fminimum3_f64_const1_const2(double %a) {
; GFX12-NEXT: v_minimum_f64 v[0:1], 0x40300000, v[0:1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_fminimum3_f64_const1_const2:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], 0x40200000, v[0:1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], 0x40300000, v[0:1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_fminimum3_f64_const1_const2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3755,6 +4492,14 @@ define <2 x float> @v_no_fminimum3_f32__multi_use(float %a, float %b, float %c)
; GFX12-NEXT: v_minimum_f32 v1, v0, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_no_fminimum3_f32__multi_use:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f32 v1, v0, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_no_fminimum3_f32__multi_use:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3790,6 +4535,15 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f32__multi_use(float inreg %a, float
; GFX12-NEXT: s_minimum_f32 s1, s0, s2
; GFX12-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: s_no_fminimum3_f32__multi_use:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minimum_f32 v0, s0, s1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-NEXT: v_minimum_f32 v1, v0, s2
+; GFX1170-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX942-LABEL: s_no_fminimum3_f32__multi_use:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_mov_b32_e32 v0, s1
@@ -3852,6 +4606,23 @@ define <2 x half> @v_no_fminimum3_f16__multi_use(half %a, half %b, half %c) {
; GFX12-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_no_fminimum3_f16__multi_use:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.h, v0.l, v2.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_no_fminimum3_f16__multi_use:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_minimum_f16 v1, v0, v2
+; GFX1170-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_no_fminimum3_f16__multi_use:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3893,6 +4664,30 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f16__multi_use(half inreg %a, half in
; GFX12-NEXT: s_and_b32 s1, 0xffff, s1
; GFX12-NEXT: ; return to shader part epilog
;
+; GFX1170-TRUE16-LABEL: s_no_fminimum3_f16__multi_use:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, s0, s1
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-TRUE16-NEXT: v_minimum_f16 v1.l, v0.l, s2
+; GFX1170-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1170-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-TRUE16-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1170-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1170-FAKE16-LABEL: s_no_fminimum3_f16__multi_use:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, s0, s1
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-FAKE16-NEXT: v_minimum_f16 v1, v0, s2
+; GFX1170-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1170-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1170-FAKE16-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1170-FAKE16-NEXT: ; return to shader part epilog
+;
; GFX942-LABEL: s_no_fminimum3_f16__multi_use:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_mov_b32_e32 v0, s1
@@ -3948,6 +4743,14 @@ define <4 x half> @v_no_fminimum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b,
; GFX12-NEXT: v_pk_minimum_f16 v1, v0, v2
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_no_fminimum3_v2f16__multi_use:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v0, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX942-LABEL: v_no_fminimum3_v2f16__multi_use:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3997,6 +4800,14 @@ define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double
; GFX12-NEXT: v_minimum_f64 v[2:3], v[0:1], v[4:5]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_no_fminimum3_f64__multi_use:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[0:1], v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX9-LABEL: v_no_fminimum3_f64__multi_use:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
index 48f6c96df139d..f192ee28dcec5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
@@ -1,13 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
define half @v_maximum_f16(half %src0, half %src1) {
; GFX7-LABEL: v_maximum_f16:
@@ -72,6 +74,18 @@ define half @v_maximum_f16(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_maximum_f16:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_maximum_f16:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_maximum_f16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -135,6 +149,18 @@ define half @v_maximum_f16__nnan(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_maximum_f16__nnan:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_maximum_f16__nnan:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_maximum_f16__nnan:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -221,6 +247,18 @@ define half @v_maximum_f16__nsz(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_maximum_f16__nsz:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_maximum_f16__nsz:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_maximum_f16__nsz:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -284,6 +322,18 @@ define half @v_maximum_f16__nnan_nsz(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_maximum_f16__nnan_nsz:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_maximum_f16__nnan_nsz:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_maximum_f16__nnan_nsz:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -379,6 +429,22 @@ define half @v_maximum_f16__nnan_src0(half %arg0, half %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_maximum_f16__nnan_src0:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_maximum_f16__nnan_src0:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_add_f16_e32 v0, 1.0, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_maximum_f16__nnan_src0:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -479,6 +545,22 @@ define half @v_maximum_f16__nnan_src1(half %src0, half %arg1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_maximum_f16__nnan_src1:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_add_f16_e32 v1.l, 1.0, v1.l
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_maximum_f16__nnan_src1:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_maximum_f16__nnan_src1:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -601,6 +683,28 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
; GFX11-FAKE16-NEXT: ;;#ASMEND
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: s_maximum_f16:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_maximum_f16 v0.l, s0, s1
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1170-TRUE16-NEXT: ;;#ASMSTART
+; GFX1170-TRUE16-NEXT: ; use v0
+; GFX1170-TRUE16-NEXT: ;;#ASMEND
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: s_maximum_f16:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_maximum_f16 v0, s0, s1
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1170-FAKE16-NEXT: ;;#ASMSTART
+; GFX1170-FAKE16-NEXT: ; use v0
+; GFX1170-FAKE16-NEXT: ;;#ASMEND
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: s_maximum_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -720,6 +824,12 @@ define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v2f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v2f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -777,6 +887,12 @@ define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
; GFX11-NEXT: v_pk_max_f16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v2f16__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v2f16__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -886,6 +1002,12 @@ define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v2f16__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v2f16__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -943,6 +1065,12 @@ define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
; GFX11-NEXT: v_pk_max_f16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v2f16__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v2f16__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1089,6 +1217,15 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
; GFX11-FAKE16-NEXT: ;;#ASMEND
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: s_maximum_v2f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, s0, s1
+; GFX1170-NEXT: ;;#ASMSTART
+; GFX1170-NEXT: ; use v0
+; GFX1170-NEXT: ;;#ASMEND
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: s_maximum_v2f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1227,6 +1364,13 @@ define <3 x half> @v_maximum_v3f16(<3 x half> %src0, <3 x half> %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v3f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v3f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1293,6 +1437,13 @@ define <3 x half> @v_maximum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v3f16__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v3f16__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1427,6 +1578,13 @@ define <3 x half> @v_maximum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v3f16__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v3f16__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1493,6 +1651,13 @@ define <3 x half> @v_maximum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v3f16__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v3f16__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1658,6 +1823,13 @@ define <4 x half> @v_maximum_v4f16(<4 x half> %src0, <4 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v4f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v4f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1734,6 +1906,13 @@ define <4 x half> @v_maximum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v4f16__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v4f16__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1899,6 +2078,13 @@ define <4 x half> @v_maximum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v4f16__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v4f16__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1975,6 +2161,13 @@ define <4 x half> @v_maximum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v4f16__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v4f16__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2251,6 +2444,15 @@ define <8 x half> @v_maximum_v8f16(<8 x half> %src0, <8 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v3, v3, v9, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v8f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v4
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v5
+; GFX1170-NEXT: v_pk_maximum_f16 v2, v2, v6
+; GFX1170-NEXT: v_pk_maximum_f16 v3, v3, v7
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v8f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2755,6 +2957,19 @@ define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v4, v4, v14, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v16f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_maximum_f16 v0, v0, v8
+; GFX1170-NEXT: v_pk_maximum_f16 v1, v1, v9
+; GFX1170-NEXT: v_pk_maximum_f16 v2, v2, v10
+; GFX1170-NEXT: v_pk_maximum_f16 v3, v3, v11
+; GFX1170-NEXT: v_pk_maximum_f16 v4, v4, v12
+; GFX1170-NEXT: v_pk_maximum_f16 v5, v5, v13
+; GFX1170-NEXT: v_pk_maximum_f16 v6, v6, v14
+; GFX1170-NEXT: v_pk_maximum_f16 v7, v7, v15
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v16f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2774,5 +2989,3 @@ define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) {
%op = call <16 x half> @llvm.maximum.v16f16(<16 x half> %src0, <16 x half> %src1)
ret <16 x half> %op
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
index 97eafd07d4b37..505e8c36b56d6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GFX1170 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
define float @v_maximum_f32(float %src0, float %src1) {
; GFX7-LABEL: v_maximum_f32:
@@ -59,6 +59,12 @@ define float @v_maximum_f32(float %src0, float %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -103,6 +109,12 @@ define float @v_maximum_f32__nnan(float %src0, float %src1) {
; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_f32__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -167,6 +179,12 @@ define float @v_maximum_f32__nsz(float %src0, float %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_f32__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -211,6 +229,12 @@ define float @v_maximum_f32__nnan_nsz(float %src0, float %src1) {
; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_f32__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -281,6 +305,14 @@ define float @v_maximum_f32__nnan_src0(float %arg0, float %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_f32__nnan_src0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_f32__nnan_src0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -354,6 +386,14 @@ define float @v_maximum_f32__nnan_src1(float %src0, float %arg1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_f32__nnan_src1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_add_f32_e32 v1, 1.0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_f32__nnan_src1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -443,6 +483,15 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: s_maximum_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, s0, s1
+; GFX1170-NEXT: ;;#ASMSTART
+; GFX1170-NEXT: ; use v0
+; GFX1170-NEXT: ;;#ASMEND
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: s_maximum_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -527,6 +576,13 @@ define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v2f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v2
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v2f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -576,6 +632,13 @@ define <2 x float> @v_maximum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1)
; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v2f32__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v2
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v2f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -656,6 +719,13 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v2f32__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v2
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v2f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -705,6 +775,13 @@ define <2 x float> @v_maximum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr
; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v2f32__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v2
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v2f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -812,6 +889,16 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: s_maximum_v2f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v1, s1, s3
+; GFX1170-NEXT: v_maximum_f32 v0, s0, s2
+; GFX1170-NEXT: ;;#ASMSTART
+; GFX1170-NEXT: ; use v[0:1]
+; GFX1170-NEXT: ;;#ASMEND
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: s_maximum_v2f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -913,6 +1000,14 @@ define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v3f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v3
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v4
+; GFX1170-NEXT: v_maximum_f32 v2, v2, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v3f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -968,6 +1063,14 @@ define <3 x float> @v_maximum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1)
; GFX11-NEXT: v_max_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v3f32__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v3
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v4
+; GFX1170-NEXT: v_maximum_f32 v2, v2, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v3f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1065,6 +1168,14 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v3f32__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v3
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v4
+; GFX1170-NEXT: v_maximum_f32 v2, v2, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v3f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1120,6 +1231,14 @@ define <3 x float> @v_maximum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr
; GFX11-NEXT: v_max_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v3f32__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v3
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v4
+; GFX1170-NEXT: v_maximum_f32 v2, v2, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v3f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1233,6 +1352,15 @@ define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v4f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v4
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v5
+; GFX1170-NEXT: v_maximum_f32 v2, v2, v6
+; GFX1170-NEXT: v_maximum_f32 v3, v3, v7
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v4f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1293,6 +1421,15 @@ define <4 x float> @v_maximum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1)
; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v4f32__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v4
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v5
+; GFX1170-NEXT: v_maximum_f32 v2, v2, v6
+; GFX1170-NEXT: v_maximum_f32 v3, v3, v7
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v4f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1407,6 +1544,15 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v4f32__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v4
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v5
+; GFX1170-NEXT: v_maximum_f32 v2, v2, v6
+; GFX1170-NEXT: v_maximum_f32 v3, v3, v7
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v4f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1467,6 +1613,15 @@ define <4 x float> @v_maximum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr
; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v4f32__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v4
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v5
+; GFX1170-NEXT: v_maximum_f32 v2, v2, v6
+; GFX1170-NEXT: v_maximum_f32 v3, v3, v7
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v4f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1643,6 +1798,19 @@ define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v8f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v8
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v9
+; GFX1170-NEXT: v_maximum_f32 v2, v2, v10
+; GFX1170-NEXT: v_maximum_f32 v3, v3, v11
+; GFX1170-NEXT: v_maximum_f32 v4, v4, v12
+; GFX1170-NEXT: v_maximum_f32 v5, v5, v13
+; GFX1170-NEXT: v_maximum_f32 v6, v6, v14
+; GFX1170-NEXT: v_maximum_f32 v7, v7, v15
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v8f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1960,6 +2128,29 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v16f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v16
+; GFX1170-NEXT: v_maximum_f32 v1, v1, v17
+; GFX1170-NEXT: v_maximum_f32 v2, v2, v18
+; GFX1170-NEXT: v_maximum_f32 v3, v3, v19
+; GFX1170-NEXT: v_maximum_f32 v4, v4, v20
+; GFX1170-NEXT: v_maximum_f32 v5, v5, v21
+; GFX1170-NEXT: v_maximum_f32 v6, v6, v22
+; GFX1170-NEXT: v_maximum_f32 v7, v7, v23
+; GFX1170-NEXT: v_maximum_f32 v8, v8, v24
+; GFX1170-NEXT: v_maximum_f32 v9, v9, v25
+; GFX1170-NEXT: v_maximum_f32 v10, v10, v26
+; GFX1170-NEXT: v_maximum_f32 v11, v11, v27
+; GFX1170-NEXT: v_maximum_f32 v12, v12, v28
+; GFX1170-NEXT: v_maximum_f32 v13, v13, v29
+; GFX1170-NEXT: v_maximum_f32 v14, v14, v30
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v15, v15, v31
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v16f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1989,5 +2180,3 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
%op = call <16 x float> @llvm.maximum.v16f32(<16 x float> %src0, <16 x float> %src1)
ret <16 x float> %op
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
index 3280d7aa9ddfe..a98f22fdf72f2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GFX1170 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
define double @v_maximum_f64(double %src0, double %src1) {
; GFX7-LABEL: v_maximum_f64:
@@ -69,6 +69,12 @@ define double @v_maximum_f64(double %src0, double %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -113,6 +119,12 @@ define double @v_maximum_f64__nnan(double %src0, double %src1) {
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_f64__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -187,6 +199,12 @@ define double @v_maximum_f64__nsz(double %src0, double %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_f64__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -231,6 +249,12 @@ define double @v_maximum_f64__nnan_nsz(double %src0, double %src1) {
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_f64__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -312,6 +336,14 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_f64__nnan_src0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_f64__nnan_src0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -396,6 +428,14 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_f64__nnan_src1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_f64__nnan_src1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -498,6 +538,15 @@ define void @s_maximum_f64(double inreg %src0, double inreg %src1) {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: s_maximum_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], s[0:1], s[2:3]
+; GFX1170-NEXT: ;;#ASMSTART
+; GFX1170-NEXT: ; use v[0:1]
+; GFX1170-NEXT: ;;#ASMEND
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: s_maximum_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -601,6 +650,13 @@ define <2 x double> @v_maximum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v2f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v2f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -651,6 +707,13 @@ define <2 x double> @v_maximum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v2f64__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v2f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -751,6 +814,13 @@ define <2 x double> @v_maximum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v2f64__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v2f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -801,6 +871,13 @@ define <2 x double> @v_maximum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v2f64__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[6:7]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -932,6 +1009,16 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: s_maximum_v2f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[2:3], s[2:3], s[18:19]
+; GFX1170-NEXT: v_maximum_f64 v[0:1], s[0:1], s[16:17]
+; GFX1170-NEXT: ;;#ASMSTART
+; GFX1170-NEXT: ; use v[0:3]
+; GFX1170-NEXT: ;;#ASMEND
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: s_maximum_v2f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1060,6 +1147,14 @@ define <3 x double> @v_maximum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v3f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
+; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v3f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1116,6 +1211,14 @@ define <3 x double> @v_maximum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v3f64__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
+; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v3f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1241,6 +1344,14 @@ define <3 x double> @v_maximum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v3f64__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
+; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v3f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1297,6 +1408,14 @@ define <3 x double> @v_maximum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v3f64__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[8:9]
+; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[10:11]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1447,6 +1566,15 @@ define <4 x double> @v_maximum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v4f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v4f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1509,6 +1637,15 @@ define <4 x double> @v_maximum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v4f64__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v4f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1660,6 +1797,15 @@ define <4 x double> @v_maximum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v4f64__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v4f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1722,6 +1868,15 @@ define <4 x double> @v_maximum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v4f64__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1982,6 +2137,21 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v8f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[16:17]
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[18:19]
+; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[20:21]
+; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[22:23]
+; GFX1170-NEXT: v_maximum_f64 v[8:9], v[8:9], v[24:25]
+; GFX1170-NEXT: v_maximum_f64 v[10:11], v[10:11], v[26:27]
+; GFX1170-NEXT: v_maximum_f64 v[12:13], v[12:13], v[28:29]
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[14:15], v[14:15], v[30:31]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v8f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2757,6 +2927,79 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX11-NEXT: v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximum_v16f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: s_clause 0x1b
+; GFX1170-NEXT: scratch_load_b32 v33, off, s32 offset:8
+; GFX1170-NEXT: scratch_load_b32 v32, off, s32 offset:4
+; GFX1170-NEXT: scratch_load_b32 v35, off, s32 offset:16
+; GFX1170-NEXT: scratch_load_b32 v34, off, s32 offset:12
+; GFX1170-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-NEXT: scratch_load_b32 v37, off, s32 offset:120
+; GFX1170-NEXT: scratch_load_b32 v39, off, s32 offset:104
+; GFX1170-NEXT: scratch_load_b32 v49, off, s32 offset:24
+; GFX1170-NEXT: scratch_load_b32 v48, off, s32 offset:20
+; GFX1170-NEXT: scratch_load_b32 v51, off, s32 offset:32
+; GFX1170-NEXT: scratch_load_b32 v50, off, s32 offset:28
+; GFX1170-NEXT: scratch_load_b32 v53, off, s32 offset:40
+; GFX1170-NEXT: scratch_load_b32 v52, off, s32 offset:36
+; GFX1170-NEXT: scratch_load_b32 v55, off, s32 offset:48
+; GFX1170-NEXT: scratch_load_b32 v54, off, s32 offset:44
+; GFX1170-NEXT: scratch_load_b32 v65, off, s32 offset:56
+; GFX1170-NEXT: scratch_load_b32 v64, off, s32 offset:52
+; GFX1170-NEXT: scratch_load_b32 v67, off, s32 offset:64
+; GFX1170-NEXT: scratch_load_b32 v66, off, s32 offset:60
+; GFX1170-NEXT: scratch_load_b32 v69, off, s32 offset:72
+; GFX1170-NEXT: scratch_load_b32 v68, off, s32 offset:68
+; GFX1170-NEXT: scratch_load_b32 v71, off, s32 offset:80
+; GFX1170-NEXT: scratch_load_b32 v70, off, s32 offset:76
+; GFX1170-NEXT: scratch_load_b32 v81, off, s32 offset:88
+; GFX1170-NEXT: scratch_load_b32 v80, off, s32 offset:84
+; GFX1170-NEXT: scratch_load_b32 v83, off, s32 offset:96
+; GFX1170-NEXT: scratch_load_b32 v82, off, s32 offset:92
+; GFX1170-NEXT: scratch_load_b32 v38, off, s32 offset:100
+; GFX1170-NEXT: s_waitcnt vmcnt(26)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[32:33]
+; GFX1170-NEXT: s_clause 0x2
+; GFX1170-NEXT: scratch_load_b32 v33, off, s32 offset:112
+; GFX1170-NEXT: scratch_load_b32 v32, off, s32 offset:108
+; GFX1170-NEXT: scratch_load_b32 v36, off, s32 offset:116
+; GFX1170-NEXT: s_waitcnt vmcnt(27)
+; GFX1170-NEXT: v_maximum_f64 v[2:3], v[2:3], v[34:35]
+; GFX1170-NEXT: s_clause 0x1
+; GFX1170-NEXT: scratch_load_b32 v35, off, s32 offset:128
+; GFX1170-NEXT: scratch_load_b32 v34, off, s32 offset:124
+; GFX1170-NEXT: s_waitcnt vmcnt(24)
+; GFX1170-NEXT: v_maximum_f64 v[4:5], v[4:5], v[48:49]
+; GFX1170-NEXT: s_waitcnt vmcnt(22)
+; GFX1170-NEXT: v_maximum_f64 v[6:7], v[6:7], v[50:51]
+; GFX1170-NEXT: s_waitcnt vmcnt(20)
+; GFX1170-NEXT: v_maximum_f64 v[8:9], v[8:9], v[52:53]
+; GFX1170-NEXT: s_waitcnt vmcnt(18)
+; GFX1170-NEXT: v_maximum_f64 v[10:11], v[10:11], v[54:55]
+; GFX1170-NEXT: s_waitcnt vmcnt(16)
+; GFX1170-NEXT: v_maximum_f64 v[12:13], v[12:13], v[64:65]
+; GFX1170-NEXT: s_waitcnt vmcnt(14)
+; GFX1170-NEXT: v_maximum_f64 v[14:15], v[14:15], v[66:67]
+; GFX1170-NEXT: s_waitcnt vmcnt(12)
+; GFX1170-NEXT: v_maximum_f64 v[16:17], v[16:17], v[68:69]
+; GFX1170-NEXT: s_waitcnt vmcnt(10)
+; GFX1170-NEXT: v_maximum_f64 v[18:19], v[18:19], v[70:71]
+; GFX1170-NEXT: s_waitcnt vmcnt(8)
+; GFX1170-NEXT: v_maximum_f64 v[20:21], v[20:21], v[80:81]
+; GFX1170-NEXT: s_waitcnt vmcnt(6)
+; GFX1170-NEXT: v_maximum_f64 v[22:23], v[22:23], v[82:83]
+; GFX1170-NEXT: s_waitcnt vmcnt(5)
+; GFX1170-NEXT: v_maximum_f64 v[24:25], v[24:25], v[38:39]
+; GFX1170-NEXT: s_waitcnt vmcnt(3)
+; GFX1170-NEXT: v_maximum_f64 v[26:27], v[26:27], v[32:33]
+; GFX1170-NEXT: s_waitcnt vmcnt(2)
+; GFX1170-NEXT: v_maximum_f64 v[28:29], v[28:29], v[36:37]
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[30:31], v[30:31], v[34:35]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximum_v16f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2834,5 +3077,3 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
%op = call <16 x double> @llvm.maximum.v16f64(<16 x double> %src0, <16 x double> %src1)
ret <16 x double> %op
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
index b5dab396f0bf1..9a1a51cc5dfc9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
@@ -1,14 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; xUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s
+; xUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
define half @v_minimum_f16(half %src0, half %src1) {
; GFX8-LABEL: v_minimum_f16:
@@ -61,6 +62,18 @@ define half @v_minimum_f16(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_minimum_f16:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_minimum_f16:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_minimum_f16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -115,6 +128,18 @@ define half @v_minimum_f16__nnan(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_minimum_f16__nnan:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_minimum_f16__nnan:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_minimum_f16__nnan:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -189,6 +214,18 @@ define half @v_minimum_f16__nsz(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_minimum_f16__nsz:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_minimum_f16__nsz:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_minimum_f16__nsz:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -243,6 +280,18 @@ define half @v_minimum_f16__nnan_nsz(half %src0, half %src1) {
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_minimum_f16__nnan_nsz:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_minimum_f16__nnan_nsz:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_minimum_f16__nnan_nsz:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -323,6 +372,22 @@ define half @v_minimum_f16__nnan_src0(half %arg0, half %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_minimum_f16__nnan_src0:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_minimum_f16__nnan_src0:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_add_f16_e32 v0, 1.0, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_minimum_f16__nnan_src0:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -408,6 +473,22 @@ define half @v_minimum_f16__nnan_src1(half %src0, half %arg1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_minimum_f16__nnan_src1:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_add_f16_e32 v1.l, 1.0, v1.l
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_minimum_f16__nnan_src1:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_minimum_f16__nnan_src1:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -515,6 +596,28 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
; GFX11-FAKE16-NEXT: ;;#ASMEND
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: s_minimum_f16:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_minimum_f16 v0.l, s0, s1
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1170-TRUE16-NEXT: ;;#ASMSTART
+; GFX1170-TRUE16-NEXT: ; use v0
+; GFX1170-TRUE16-NEXT: ;;#ASMEND
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: s_minimum_f16:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_minimum_f16 v0, s0, s1
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1170-FAKE16-NEXT: ;;#ASMSTART
+; GFX1170-FAKE16-NEXT: ; use v0
+; GFX1170-FAKE16-NEXT: ;;#ASMEND
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: s_minimum_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -612,6 +715,12 @@ define <2 x half> @v_minimum_v2f16(<2 x half> %src0, <2 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v2f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v2f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -652,6 +761,12 @@ define <2 x half> @v_minimum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
; GFX11-NEXT: v_pk_min_f16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v2f16__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v2f16__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -739,6 +854,12 @@ define <2 x half> @v_minimum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v2f16__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v2f16__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -779,6 +900,12 @@ define <2 x half> @v_minimum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
; GFX11-NEXT: v_pk_min_f16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v2f16__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v2f16__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -900,6 +1027,15 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
; GFX11-FAKE16-NEXT: ;;#ASMEND
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: s_minimum_v2f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, s0, s1
+; GFX1170-NEXT: ;;#ASMSTART
+; GFX1170-NEXT: ; use v0
+; GFX1170-NEXT: ;;#ASMEND
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: s_minimum_v2f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1010,6 +1146,13 @@ define <3 x half> @v_minimum_v3f16(<3 x half> %src0, <3 x half> %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v3f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v3f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1055,6 +1198,13 @@ define <3 x half> @v_minimum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v3f16__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v3f16__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1161,6 +1311,13 @@ define <3 x half> @v_minimum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v3f16__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v3f16__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1206,6 +1363,13 @@ define <3 x half> @v_minimum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v3f16__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v3f16__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1333,6 +1497,13 @@ define <4 x half> @v_minimum_v4f16(<4 x half> %src0, <4 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v4f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v4f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1380,6 +1551,13 @@ define <4 x half> @v_minimum_v4f16__nnan(<4 x half> %src0, <4 x half> %src1) {
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v4f16__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v4f16__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1507,6 +1685,13 @@ define <4 x half> @v_minimum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v4f16__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v4f16__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1554,6 +1739,13 @@ define <4 x half> @v_minimum_v4f16__nnan_nsz(<4 x half> %src0, <4 x half> %src1)
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v4f16__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v4f16__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1760,6 +1952,15 @@ define <8 x half> @v_minimum_v8f16(<8 x half> %src0, <8 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v3, v3, v9, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v8f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v4
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v5
+; GFX1170-NEXT: v_pk_minimum_f16 v2, v2, v6
+; GFX1170-NEXT: v_pk_minimum_f16 v3, v3, v7
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v8f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2130,6 +2331,19 @@ define <16 x half> @v_minimum_v16f16(<16 x half> %src0, <16 x half> %src1) {
; GFX11-FAKE16-NEXT: v_perm_b32 v4, v4, v14, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v16f16:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_minimum_f16 v0, v0, v8
+; GFX1170-NEXT: v_pk_minimum_f16 v1, v1, v9
+; GFX1170-NEXT: v_pk_minimum_f16 v2, v2, v10
+; GFX1170-NEXT: v_pk_minimum_f16 v3, v3, v11
+; GFX1170-NEXT: v_pk_minimum_f16 v4, v4, v12
+; GFX1170-NEXT: v_pk_minimum_f16 v5, v5, v13
+; GFX1170-NEXT: v_pk_minimum_f16 v6, v6, v14
+; GFX1170-NEXT: v_pk_minimum_f16 v7, v7, v15
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v16f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2149,5 +2363,3 @@ define <16 x half> @v_minimum_v16f16(<16 x half> %src0, <16 x half> %src1) {
%op = call <16 x half> @llvm.minimum.v16f16(<16 x half> %src0, <16 x half> %src1)
ret <16 x half> %op
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
index 3e98599fc4c7f..269f10823ddb8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GFX1170 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
define float @v_minimum_f32(float %src0, float %src1) {
; GFX7-LABEL: v_minimum_f32:
@@ -59,6 +59,12 @@ define float @v_minimum_f32(float %src0, float %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -103,6 +109,12 @@ define float @v_minimum_f32__nnan(float %src0, float %src1) {
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_f32__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -167,6 +179,12 @@ define float @v_minimum_f32__nsz(float %src0, float %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_f32__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -211,6 +229,12 @@ define float @v_minimum_f32__nnan_nsz(float %src0, float %src1) {
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_f32__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -281,6 +305,14 @@ define float @v_minimum_f32__nnan_src0(float %arg0, float %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_f32__nnan_src0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_f32__nnan_src0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -354,6 +386,14 @@ define float @v_minimum_f32__nnan_src1(float %src0, float %arg1) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_f32__nnan_src1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_add_f32_e32 v1, 1.0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_f32__nnan_src1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -443,6 +483,15 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: s_minimum_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, s0, s1
+; GFX1170-NEXT: ;;#ASMSTART
+; GFX1170-NEXT: ; use v0
+; GFX1170-NEXT: ;;#ASMEND
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: s_minimum_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -527,6 +576,13 @@ define <2 x float> @v_minimum_v2f32(<2 x float> %src0, <2 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v2f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v2
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v2f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -576,6 +632,13 @@ define <2 x float> @v_minimum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1)
; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v2f32__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v2
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v2f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -656,6 +719,13 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v2f32__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v2
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v2f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -705,6 +775,13 @@ define <2 x float> @v_minimum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr
; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v2f32__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v2
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v2f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -812,6 +889,16 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: s_minimum_v2f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v1, s1, s3
+; GFX1170-NEXT: v_minimum_f32 v0, s0, s2
+; GFX1170-NEXT: ;;#ASMSTART
+; GFX1170-NEXT: ; use v[0:1]
+; GFX1170-NEXT: ;;#ASMEND
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: s_minimum_v2f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -913,6 +1000,14 @@ define <3 x float> @v_minimum_v3f32(<3 x float> %src0, <3 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v3f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v3
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v4
+; GFX1170-NEXT: v_minimum_f32 v2, v2, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v3f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -968,6 +1063,14 @@ define <3 x float> @v_minimum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1)
; GFX11-NEXT: v_min_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v3f32__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v3
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v4
+; GFX1170-NEXT: v_minimum_f32 v2, v2, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v3f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1065,6 +1168,14 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v3f32__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v3
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v4
+; GFX1170-NEXT: v_minimum_f32 v2, v2, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v3f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1120,6 +1231,14 @@ define <3 x float> @v_minimum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr
; GFX11-NEXT: v_min_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v3f32__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v3
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v4
+; GFX1170-NEXT: v_minimum_f32 v2, v2, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v3f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1233,6 +1352,15 @@ define <4 x float> @v_minimum_v4f32(<4 x float> %src0, <4 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v4f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v4
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v5
+; GFX1170-NEXT: v_minimum_f32 v2, v2, v6
+; GFX1170-NEXT: v_minimum_f32 v3, v3, v7
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v4f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1293,6 +1421,15 @@ define <4 x float> @v_minimum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1)
; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v4f32__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v4
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v5
+; GFX1170-NEXT: v_minimum_f32 v2, v2, v6
+; GFX1170-NEXT: v_minimum_f32 v3, v3, v7
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v4f32__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1407,6 +1544,15 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v4f32__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v4
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v5
+; GFX1170-NEXT: v_minimum_f32 v2, v2, v6
+; GFX1170-NEXT: v_minimum_f32 v3, v3, v7
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v4f32__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1467,6 +1613,15 @@ define <4 x float> @v_minimum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr
; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v4f32__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v4
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v5
+; GFX1170-NEXT: v_minimum_f32 v2, v2, v6
+; GFX1170-NEXT: v_minimum_f32 v3, v3, v7
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v4f32__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1643,6 +1798,19 @@ define <8 x float> @v_minimum_v8f32(<8 x float> %src0, <8 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v8f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v8
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v9
+; GFX1170-NEXT: v_minimum_f32 v2, v2, v10
+; GFX1170-NEXT: v_minimum_f32 v3, v3, v11
+; GFX1170-NEXT: v_minimum_f32 v4, v4, v12
+; GFX1170-NEXT: v_minimum_f32 v5, v5, v13
+; GFX1170-NEXT: v_minimum_f32 v6, v6, v14
+; GFX1170-NEXT: v_minimum_f32 v7, v7, v15
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v8f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1960,6 +2128,29 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v16f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v16
+; GFX1170-NEXT: v_minimum_f32 v1, v1, v17
+; GFX1170-NEXT: v_minimum_f32 v2, v2, v18
+; GFX1170-NEXT: v_minimum_f32 v3, v3, v19
+; GFX1170-NEXT: v_minimum_f32 v4, v4, v20
+; GFX1170-NEXT: v_minimum_f32 v5, v5, v21
+; GFX1170-NEXT: v_minimum_f32 v6, v6, v22
+; GFX1170-NEXT: v_minimum_f32 v7, v7, v23
+; GFX1170-NEXT: v_minimum_f32 v8, v8, v24
+; GFX1170-NEXT: v_minimum_f32 v9, v9, v25
+; GFX1170-NEXT: v_minimum_f32 v10, v10, v26
+; GFX1170-NEXT: v_minimum_f32 v11, v11, v27
+; GFX1170-NEXT: v_minimum_f32 v12, v12, v28
+; GFX1170-NEXT: v_minimum_f32 v13, v13, v29
+; GFX1170-NEXT: v_minimum_f32 v14, v14, v30
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v15, v15, v31
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v16f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1989,5 +2180,3 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
%op = call <16 x float> @llvm.minimum.v16f32(<16 x float> %src0, <16 x float> %src1)
ret <16 x float> %op
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
index d07bd6c8dd902..1d7678779b8be 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GFX1170 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
define double @v_minimum_f64(double %src0, double %src1) {
; GFX7-LABEL: v_minimum_f64:
@@ -69,6 +69,12 @@ define double @v_minimum_f64(double %src0, double %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -113,6 +119,12 @@ define double @v_minimum_f64__nnan(double %src0, double %src1) {
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_f64__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -187,6 +199,12 @@ define double @v_minimum_f64__nsz(double %src0, double %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_f64__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -231,6 +249,12 @@ define double @v_minimum_f64__nnan_nsz(double %src0, double %src1) {
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_f64__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -312,6 +336,14 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_f64__nnan_src0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_add_f64 v[0:1], v[0:1], 1.0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_f64__nnan_src0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -396,6 +428,14 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_f64__nnan_src1:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_add_f64 v[2:3], v[2:3], 1.0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_f64__nnan_src1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -498,6 +538,15 @@ define void @s_minimum_f64(double inreg %src0, double inreg %src1) {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: s_minimum_f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], s[0:1], s[2:3]
+; GFX1170-NEXT: ;;#ASMSTART
+; GFX1170-NEXT: ; use v[0:1]
+; GFX1170-NEXT: ;;#ASMEND
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: s_minimum_f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -601,6 +650,13 @@ define <2 x double> @v_minimum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v2f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[6:7]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v2f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -651,6 +707,13 @@ define <2 x double> @v_minimum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v2f64__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[6:7]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v2f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -751,6 +814,13 @@ define <2 x double> @v_minimum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v2f64__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[6:7]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v2f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -801,6 +871,13 @@ define <2 x double> @v_minimum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v2f64__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[6:7]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -932,6 +1009,16 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1)
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: s_minimum_v2f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[2:3], s[2:3], s[18:19]
+; GFX1170-NEXT: v_minimum_f64 v[0:1], s[0:1], s[16:17]
+; GFX1170-NEXT: ;;#ASMSTART
+; GFX1170-NEXT: ; use v[0:3]
+; GFX1170-NEXT: ;;#ASMEND
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: s_minimum_v2f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1060,6 +1147,14 @@ define <3 x double> @v_minimum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v3f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[8:9]
+; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[10:11]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v3f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1116,6 +1211,14 @@ define <3 x double> @v_minimum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v3f64__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[8:9]
+; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[10:11]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v3f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1241,6 +1344,14 @@ define <3 x double> @v_minimum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v3f64__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[8:9]
+; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[10:11]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v3f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1297,6 +1408,14 @@ define <3 x double> @v_minimum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v3f64__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[8:9]
+; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[10:11]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1447,6 +1566,15 @@ define <4 x double> @v_minimum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v4f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v4f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1509,6 +1637,15 @@ define <4 x double> @v_minimum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v4f64__nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v4f64__nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1660,6 +1797,15 @@ define <4 x double> @v_minimum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v4f64__nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v4f64__nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1722,6 +1868,15 @@ define <4 x double> @v_minimum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v4f64__nnan_nsz:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1982,6 +2137,21 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX11-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v8f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[16:17]
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[18:19]
+; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[20:21]
+; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[22:23]
+; GFX1170-NEXT: v_minimum_f64 v[8:9], v[8:9], v[24:25]
+; GFX1170-NEXT: v_minimum_f64 v[10:11], v[10:11], v[26:27]
+; GFX1170-NEXT: v_minimum_f64 v[12:13], v[12:13], v[28:29]
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[14:15], v[14:15], v[30:31]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v8f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2757,6 +2927,79 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX11-NEXT: v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimum_v16f64:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: s_clause 0x1b
+; GFX1170-NEXT: scratch_load_b32 v33, off, s32 offset:8
+; GFX1170-NEXT: scratch_load_b32 v32, off, s32 offset:4
+; GFX1170-NEXT: scratch_load_b32 v35, off, s32 offset:16
+; GFX1170-NEXT: scratch_load_b32 v34, off, s32 offset:12
+; GFX1170-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-NEXT: scratch_load_b32 v37, off, s32 offset:120
+; GFX1170-NEXT: scratch_load_b32 v39, off, s32 offset:104
+; GFX1170-NEXT: scratch_load_b32 v49, off, s32 offset:24
+; GFX1170-NEXT: scratch_load_b32 v48, off, s32 offset:20
+; GFX1170-NEXT: scratch_load_b32 v51, off, s32 offset:32
+; GFX1170-NEXT: scratch_load_b32 v50, off, s32 offset:28
+; GFX1170-NEXT: scratch_load_b32 v53, off, s32 offset:40
+; GFX1170-NEXT: scratch_load_b32 v52, off, s32 offset:36
+; GFX1170-NEXT: scratch_load_b32 v55, off, s32 offset:48
+; GFX1170-NEXT: scratch_load_b32 v54, off, s32 offset:44
+; GFX1170-NEXT: scratch_load_b32 v65, off, s32 offset:56
+; GFX1170-NEXT: scratch_load_b32 v64, off, s32 offset:52
+; GFX1170-NEXT: scratch_load_b32 v67, off, s32 offset:64
+; GFX1170-NEXT: scratch_load_b32 v66, off, s32 offset:60
+; GFX1170-NEXT: scratch_load_b32 v69, off, s32 offset:72
+; GFX1170-NEXT: scratch_load_b32 v68, off, s32 offset:68
+; GFX1170-NEXT: scratch_load_b32 v71, off, s32 offset:80
+; GFX1170-NEXT: scratch_load_b32 v70, off, s32 offset:76
+; GFX1170-NEXT: scratch_load_b32 v81, off, s32 offset:88
+; GFX1170-NEXT: scratch_load_b32 v80, off, s32 offset:84
+; GFX1170-NEXT: scratch_load_b32 v83, off, s32 offset:96
+; GFX1170-NEXT: scratch_load_b32 v82, off, s32 offset:92
+; GFX1170-NEXT: scratch_load_b32 v38, off, s32 offset:100
+; GFX1170-NEXT: s_waitcnt vmcnt(26)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[32:33]
+; GFX1170-NEXT: s_clause 0x2
+; GFX1170-NEXT: scratch_load_b32 v33, off, s32 offset:112
+; GFX1170-NEXT: scratch_load_b32 v32, off, s32 offset:108
+; GFX1170-NEXT: scratch_load_b32 v36, off, s32 offset:116
+; GFX1170-NEXT: s_waitcnt vmcnt(27)
+; GFX1170-NEXT: v_minimum_f64 v[2:3], v[2:3], v[34:35]
+; GFX1170-NEXT: s_clause 0x1
+; GFX1170-NEXT: scratch_load_b32 v35, off, s32 offset:128
+; GFX1170-NEXT: scratch_load_b32 v34, off, s32 offset:124
+; GFX1170-NEXT: s_waitcnt vmcnt(24)
+; GFX1170-NEXT: v_minimum_f64 v[4:5], v[4:5], v[48:49]
+; GFX1170-NEXT: s_waitcnt vmcnt(22)
+; GFX1170-NEXT: v_minimum_f64 v[6:7], v[6:7], v[50:51]
+; GFX1170-NEXT: s_waitcnt vmcnt(20)
+; GFX1170-NEXT: v_minimum_f64 v[8:9], v[8:9], v[52:53]
+; GFX1170-NEXT: s_waitcnt vmcnt(18)
+; GFX1170-NEXT: v_minimum_f64 v[10:11], v[10:11], v[54:55]
+; GFX1170-NEXT: s_waitcnt vmcnt(16)
+; GFX1170-NEXT: v_minimum_f64 v[12:13], v[12:13], v[64:65]
+; GFX1170-NEXT: s_waitcnt vmcnt(14)
+; GFX1170-NEXT: v_minimum_f64 v[14:15], v[14:15], v[66:67]
+; GFX1170-NEXT: s_waitcnt vmcnt(12)
+; GFX1170-NEXT: v_minimum_f64 v[16:17], v[16:17], v[68:69]
+; GFX1170-NEXT: s_waitcnt vmcnt(10)
+; GFX1170-NEXT: v_minimum_f64 v[18:19], v[18:19], v[70:71]
+; GFX1170-NEXT: s_waitcnt vmcnt(8)
+; GFX1170-NEXT: v_minimum_f64 v[20:21], v[20:21], v[80:81]
+; GFX1170-NEXT: s_waitcnt vmcnt(6)
+; GFX1170-NEXT: v_minimum_f64 v[22:23], v[22:23], v[82:83]
+; GFX1170-NEXT: s_waitcnt vmcnt(5)
+; GFX1170-NEXT: v_minimum_f64 v[24:25], v[24:25], v[38:39]
+; GFX1170-NEXT: s_waitcnt vmcnt(3)
+; GFX1170-NEXT: v_minimum_f64 v[26:27], v[26:27], v[32:33]
+; GFX1170-NEXT: s_waitcnt vmcnt(2)
+; GFX1170-NEXT: v_minimum_f64 v[28:29], v[28:29], v[36:37]
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[30:31], v[30:31], v[34:35]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimum_v16f64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2834,5 +3077,3 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
%op = call <16 x double> @llvm.minimum.v16f64(<16 x double> %src0, <16 x double> %src1)
ret <16 x double> %op
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/maximumnum.ll b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
index 806d941ac8730..54262139da7c3 100644
--- a/llvm/test/CodeGen/AMDGPU/maximumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
@@ -20,6 +20,12 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-SDAG,GFX11-FAKE16-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-GISEL,GFX11-FAKE16-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16,GFX1170-SDAG,GFX1170-TRUE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16,GFX1170-GISEL,GFX1170-TRUE16-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16,GFX1170-SDAG,GFX1170-FAKE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16,GFX1170-GISEL,GFX1170-FAKE16-GISEL %s
+
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-SDAG,GFX12-TRUE16-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-GISEL,GFX12-TRUE16-GISEL %s
@@ -129,6 +135,42 @@ define half @v_maximumnum_f16(half %x, half %y) {
; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -233,6 +275,18 @@ define half @v_maximumnum_f16_nnan(half %x, half %y) {
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_maximumnum_f16_nnan:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_maximumnum_f16_nnan:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_maximumnum_f16_nnan:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -302,6 +356,22 @@ define half @v_maximumnum_f16_1.0(half %x) {
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, 1.0, v0
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_maximumnum_f16_1.0:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, 1.0, v0.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_maximumnum_f16_1.0:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, 1.0, v0
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_maximumnum_f16_1.0:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -410,6 +480,22 @@ define float @v_maximumnum_f32(float %x, float %y) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_f32:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_f32:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_f32:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -468,6 +554,12 @@ define float @v_maximumnum_f32_nnan(float %x, float %y) {
; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_f32_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_f32_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -564,6 +656,24 @@ define double @v_maximumnum_f64(double %x, double %y) {
; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_f64:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_f64:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_f64:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -624,6 +734,12 @@ define double @v_maximumnum_f64_nnan(double %x, double %y) {
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_f64_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_f64_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -674,6 +790,14 @@ define float @v_maximumnum_f32_1.0(float %x) {
; GFX11-NEXT: v_max_f32_e32 v0, 1.0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_f32_1.0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_max_num_f32_e32 v0, 1.0, v0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_f32_1.0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -730,6 +854,14 @@ define float @v_maximumnum_f32_rhs_not_snan(float %x, float %y) {
; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_f32_rhs_not_snan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_f32_rhs_not_snan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -787,6 +919,14 @@ define float @v_maximumnum_f32_lhs_not_snan(float %x, float %y) {
; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_f32_lhs_not_snan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_f32_lhs_not_snan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -844,6 +984,14 @@ define float @v_maximumnum_f32_both_operands_not_snan(float %x, float %y) {
; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_f32_both_operands_not_snan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_f32_both_operands_not_snan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -898,6 +1046,14 @@ define double @v_maximumnum_f64_1.0(double %x) {
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], 1.0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_f64_1.0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], 1.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_f64_1.0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1032,6 +1188,42 @@ define half @v_maximumnum_f16_s_v(half inreg %x, half %y) {
; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v1, v0
; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_v:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_s_v:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_s_v:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v1, v0
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_s_v:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v1, v0
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_v:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1206,6 +1398,42 @@ define half @v_maximumnum_f16_v_s(half %x, half inreg %y) {
; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_v_s:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_v_s:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_v_s:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_v_s:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_v_s:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1380,6 +1608,42 @@ define half @v_maximumnum_f16_s_s(half inreg %x, half inreg %y) {
; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_s:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, s1, s1
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.h, v0.l
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_s_s:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, s0, s0
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s1, s1
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_s_s:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, s1, s1
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v1, v0
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_s_s:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, s0, s0
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s1, s1
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_s_s:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1534,6 +1798,24 @@ define float @v_maximumnum_f32_s_v(float inreg %x, float %y) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v1, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_f32_s_v:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v1, v0
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_f32_s_v:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v1, v0
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_f32_s_v:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1662,6 +1944,24 @@ define float @v_maximumnum_f32_v_s(float %x, float inreg %y) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_f32_v_s:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_f32_v_s:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_f32_v_s:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1790,6 +2090,24 @@ define float @v_maximumnum_f32_s_s(float inreg %x, float inreg %y) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_f32_s_s:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v0, s1, s1
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v1, v0
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_f32_s_s:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v0, s0, s0
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, s1, s1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_f32_s_s:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1901,6 +2219,15 @@ define double @v_maximumnum_f64_s_v(double inreg %x, double %y) {
; GFX11-NEXT: v_max_f64 v[0:1], v[2:3], v[0:1]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_f64_s_v:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f64 v[2:3], s[0:1], s[0:1]
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[2:3], v[0:1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_f64_s_v:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1999,6 +2326,15 @@ define double @v_maximumnum_f64_v_s(double %x, double inreg %y) {
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_f64_v_s:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f64 v[2:3], s[0:1], s[0:1]
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_f64_v_s:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2114,6 +2450,24 @@ define double @v_maximumnum_f64_s_s(double inreg %x, double inreg %y) {
; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_f64_s_s:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], s[2:3], s[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], s[0:1], s[0:1]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[2:3], v[0:1]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_f64_s_s:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], s[0:1], s[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], s[2:3], s[2:3]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_f64_s_s:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2226,6 +2580,24 @@ define float @v_maximumnum_f32_fabs_rhs(float %x, float %y) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2339,6 +2711,24 @@ define float @v_maximumnum_f32_fneg_fabs_rhs(float %x, float %y) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1|
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1|
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2453,6 +2843,24 @@ define float @v_maximumnum_f32_fabs(float %x, float %y) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_f32_fabs:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v0, |v0|, |v0|
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_f32_fabs:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v0, |v0|, |v0|
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_f32_fabs:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2567,6 +2975,24 @@ define float @v_maximumnum_f32_fneg(float %x, float %y) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_f32_fneg:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, -v1, -v1
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v0, -v0, -v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_f32_fneg:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v0, -v0, -v0
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, -v1, -v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_f32_fneg:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2701,6 +3127,42 @@ define half @v_maximumnum_f16_fabs_rhs(half %x, half %y) {
; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2860,6 +3322,42 @@ define half @v_maximumnum_f16_fneg_fabs_rhs(half %x, half %y) {
; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l|
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l|
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1|
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1|
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3020,6 +3518,42 @@ define half @v_maximumnum_f16_fabs(half %x, half %y) {
; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l|
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_fabs:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l|
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_fabs:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, |v0|, |v0|
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_fabs:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, |v0|, |v0|
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fabs:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3180,6 +3714,42 @@ define half @v_maximumnum_f16_fneg(half %x, half %y) {
; GFX11-FAKE16-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_fneg:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_fneg:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -v1, -v1
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, -v0, -v0
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_fneg:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, -v0, -v0
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -v1, -v1
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_fneg:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3320,6 +3890,24 @@ define double @v_maximumnum_f64_fneg(double %x, double %y) {
; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_f64_fneg:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_f64_fneg:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_f64_fneg:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3478,6 +4066,24 @@ define <2 x half> @v_maximumnum_v2f16(<2 x half> %x, <2 x half> %y) {
; GFX11-GISEL-NEXT: v_pk_max_f16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_v2f16:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_v2f16:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_v2f16:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3576,6 +4182,12 @@ define <2 x half> @v_maximumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
; GFX11-NEXT: v_pk_max_f16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_v2f16_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_v2f16_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3742,6 +4354,30 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_v3f16:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v2
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v3
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_v3f16:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v2
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v3
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_v3f16:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3866,6 +4502,13 @@ define <3 x half> @v_maximumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_v3f16_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_max_num_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_v3f16_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -4069,6 +4712,30 @@ define <4 x half> @v_maximumnum_v4f16(<4 x half> %x, <4 x half> %y) {
; GFX11-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_v4f16:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v2
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v3
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_v4f16:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v2
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v3
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_v4f16:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -4206,6 +4873,13 @@ define <4 x half> @v_maximumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_v4f16_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_max_num_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_v4f16_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -4471,6 +5145,36 @@ define <6 x half> @v_maximumnum_v6f16(<6 x half> %x, <6 x half> %y) {
; GFX11-GISEL-NEXT: v_pk_max_f16 v2, v2, v5
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_v6f16:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v3
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v4
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v5
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_v6f16:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v4
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v5
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_v6f16:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -4825,6 +5529,42 @@ define <8 x half> @v_maximumnum_v8f16(<8 x half> %x, <8 x half> %y) {
; GFX11-GISEL-NEXT: v_pk_max_f16 v3, v3, v7
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_v8f16:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v4
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v5
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v6
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v7
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_v8f16:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v4
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v5
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v6
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v7
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_v8f16:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -5431,6 +6171,64 @@ define <16 x half> @v_maximumnum_v16f16(<16 x half> %x, <16 x half> %y) {
; GFX11-GISEL-NEXT: v_pk_max_f16 v7, v7, v12
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_v16f16:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v8
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v8, v11, v11
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v9
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v10
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v9, v12, v12
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v10, v13, v13
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v11, v14, v14
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v12, v15, v15
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v8
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v9
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v10
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v11
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v12
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_v16f16:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v8
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v9
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v8, v11, v11
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v10
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v9, v12, v12
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v10, v13, v13
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v11, v14, v14
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v12, v15, v15
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v8
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v9
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v10
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v11
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v12
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_v16f16:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -6616,6 +7414,118 @@ define <32 x half> @v_maximumnum_v32f16(<32 x half> %x, <32 x half> %y) {
; GFX11-GISEL-NEXT: v_pk_max_f16 v15, v15, v16
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_v32f16:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v16, v16, v16
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v17, v17, v17
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v18, v18, v18
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v19, v19, v19
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v20, v20, v20
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v21, v21, v21
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v22, v22, v22
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v23, v23, v23
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v24, v24, v24
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v25, v25, v25
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v26, v26, v26
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v27, v27, v27
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v11, v11, v11
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v28, v28, v28
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v12, v12, v12
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v29, v29, v29
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v13, v13, v13
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v30, v30, v30
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v14, v14, v14
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v15, v15, v15
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v16
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v17
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v18
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v19
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v20
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v21
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v22
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v23
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v24
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v25
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v26
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v11, v11, v27
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v12, v12, v28
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v13, v13, v29
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v14, v14, v30
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v16, v31, v31
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v15, v15, v16
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_v32f16:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v16, v16, v16
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v17, v17, v17
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v18, v18, v18
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v19, v19, v19
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v20, v20, v20
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v21, v21, v21
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v22, v22, v22
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v23, v23, v23
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v24, v24, v24
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v25, v25, v25
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v26, v26, v26
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v11
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v27, v27, v27
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v12
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v28, v28, v28
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v13
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v29, v29, v29
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v14
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v30, v30, v30
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v15
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v16
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v17
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v18
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v19
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v20
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v21
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v22
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v23
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v24
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v25
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v26
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v27
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v28
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v29
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v30
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v16, v31, v31
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v16
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_v32f16:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -6846,6 +7756,24 @@ define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) {
; GFX11-GISEL-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_v2f32:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_v2f32:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_v2f32:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -6910,6 +7838,12 @@ define <2 x float> @v_maximumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) {
; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_v2f32_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_v2f32_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -7058,6 +7992,28 @@ define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v2, v2, v5
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_v3f32:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v2, v2, v5
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_v3f32:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v4, v4, v4
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v5, v5, v5
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v2, v2, v5
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_v3f32:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -7131,6 +8087,13 @@ define <3 x float> @v_maximumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) {
; GFX11-NEXT: v_max_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_v3f32_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v3 :: v_dual_max_num_f32 v1, v1, v4
+; GFX1170-NEXT: v_max_num_f32_e32 v2, v2, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_v3f32_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -7306,6 +8269,30 @@ define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) {
; GFX11-GISEL-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_v4f32:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_v4f32:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_v4f32:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -7385,6 +8372,13 @@ define <4 x float> @v_maximumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) {
; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_v4f32_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v4 :: v_dual_max_num_f32 v1, v1, v5
+; GFX1170-NEXT: v_dual_max_num_f32 v2, v2, v6 :: v_dual_max_num_f32 v3, v3, v7
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_v4f32_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -7534,6 +8528,30 @@ define <2 x double> @v_maximumnum_v2f64(<2 x double> %x, <2 x double> %y) {
; GFX11-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_v2f64:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[6:7]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_v2f64:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[6:7]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_v2f64:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -7605,6 +8623,13 @@ define <2 x double> @v_maximumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) {
; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_v2f64_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: v_max_num_f64 v[2:3], v[2:3], v[6:7]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_v2f64_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -7790,6 +8815,36 @@ define <3 x double> @v_maximumnum_v3f64(<3 x double> %x, <3 x double> %y) {
; GFX11-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_v3f64:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[8:9]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[10:11]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_v3f64:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[8:9]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[10:11]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_v3f64:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -7872,6 +8927,14 @@ define <3 x double> @v_maximumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) {
; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_v3f64_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-NEXT: v_max_num_f64 v[2:3], v[2:3], v[8:9]
+; GFX1170-NEXT: v_max_num_f64 v[4:5], v[4:5], v[10:11]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_v3f64_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8094,6 +9157,42 @@ define <4 x double> @v_maximumnum_v4f64(<4 x double> %x, <4 x double> %y) {
; GFX11-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_v4f64:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_v4f64:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_v4f64:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8187,6 +9286,15 @@ define <4 x double> @v_maximumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) {
; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_v4f64_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-NEXT: v_max_num_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-NEXT: v_max_num_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-NEXT: v_max_num_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_v4f64_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8252,6 +9360,42 @@ define half @v_maximumnum_f16_no_ieee(half %x, half %y) #0 {
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_maximumnum_f16_no_ieee:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_maximumnum_f16_no_ieee:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_maximumnum_f16_no_ieee:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_maximumnum_f16_no_ieee:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_maximumnum_f16_no_ieee:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8356,6 +9500,18 @@ define half @v_maximumnum_f16_nan_no_ieee(half %x, half %y) #0 {
; GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_maximumnum_f16_nan_no_ieee:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_maximumnum_f16_nan_no_ieee:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_maximumnum_f16_nan_no_ieee:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8410,6 +9566,22 @@ define float @v_maximumnum_f32_no_ieee(float %x, float %y) #0 {
; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_f32_no_ieee:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_f32_no_ieee:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_f32_no_ieee:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8468,6 +9640,12 @@ define float @v_maximumnum_f32_nnan_no_ieee(float %x, float %y) #0 {
; GFX11-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_f32_nnan_no_ieee:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_f32_nnan_no_ieee:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8512,6 +9690,24 @@ define double @v_maximumnum_f64_no_ieee(double %x, double %y) #0 {
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_f64_no_ieee:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_f64_no_ieee:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_f64_no_ieee:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8572,6 +9768,12 @@ define double @v_maximumnum_f64_nnan_no_ieee(double %x, double %y) #0 {
; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_f64_nnan_no_ieee:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_f64_nnan_no_ieee:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8654,6 +9856,24 @@ define <2 x half> @v_maximumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
; GFX11-NEXT: v_pk_max_f16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_maximumnum_v2f16_no_ieee:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8752,6 +9972,12 @@ define <2 x half> @v_maximumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y)
; GFX11-NEXT: v_pk_max_f16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8854,6 +10080,13 @@ define <3 x half> @v_maximumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_max_num_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8970,6 +10203,13 @@ define <4 x half> @v_maximumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y)
; GFX11-NEXT: v_pk_max_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_max_num_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/minimummaximum.ll b/llvm/test/CodeGen/AMDGPU/minimummaximum.ll
index c246b9d97e75d..8489193b1f580 100644
--- a/llvm/test/CodeGen/AMDGPU/minimummaximum.ll
+++ b/llvm/test/CodeGen/AMDGPU/minimummaximum.ll
@@ -1,42 +1,67 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-TRUE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-FAKE16 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-TRUE16 %s
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,TRUE16,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,FAKE16,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,TRUE16,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,FAKE16,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
define amdgpu_ps float @test_minmax_f32(float %a, float %b, float %c) {
-; GFX12-LABEL: test_minmax_f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: v_maximumminimum_f32 v0, v0, v1, v2
-; GFX12-NEXT: ; return to shader part epilog
+; GCN-LABEL: test_minmax_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_maximumminimum_f32 v0, v0, v1, v2
+; GCN-NEXT: ; return to shader part epilog
%max = call float @llvm.maximum.f32(float %a, float %b)
%minmax = call float @llvm.minimum.f32(float %max, float %c)
ret float %minmax
}
define amdgpu_ps void @s_test_minmax_f32(float inreg %a, float inreg %b, float inreg %c, ptr addrspace(1) inreg %out) {
-; SDAG-LABEL: s_test_minmax_f32:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: s_maximum_f32 s0, s0, s1
-; SDAG-NEXT: s_mov_b32 s5, s4
-; SDAG-NEXT: s_mov_b32 s4, s3
-; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
-; SDAG-NEXT: s_minimum_f32 s0, s0, s2
-; SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
-; SDAG-NEXT: global_store_b32 v0, v1, s[4:5]
-; SDAG-NEXT: s_endpgm
-;
-; GISEL-LABEL: s_test_minmax_f32:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: s_maximum_f32 s0, s0, s1
-; GISEL-NEXT: s_mov_b32 s6, s3
-; GISEL-NEXT: s_mov_b32 s7, s4
-; GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GISEL-NEXT: s_minimum_f32 s0, s0, s2
-; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
-; GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
-; GISEL-NEXT: s_endpgm
+; GFX1170-SDAG-LABEL: s_test_minmax_f32:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: v_maximum_f32 v0, s0, s1
+; GFX1170-SDAG-NEXT: v_mov_b32_e32 v1, 0
+; GFX1170-SDAG-NEXT: s_mov_b32 s5, s4
+; GFX1170-SDAG-NEXT: s_mov_b32 s4, s3
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_minimum_f32 v0, v0, s2
+; GFX1170-SDAG-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX1170-SDAG-NEXT: s_endpgm
+;
+; GFX1170-GISEL-LABEL: s_test_minmax_f32:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX1170-GISEL-NEXT: s_mov_b32 s6, s3
+; GFX1170-GISEL-NEXT: s_mov_b32 s7, s4
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_maximumminimum_f32 v0, s0, s1, v0
+; GFX1170-GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
+; GFX1170-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: s_test_minmax_f32:
+; GFX12-SDAG: ; %bb.0:
+; GFX12-SDAG-NEXT: s_maximum_f32 s0, s0, s1
+; GFX12-SDAG-NEXT: s_mov_b32 s5, s4
+; GFX12-SDAG-NEXT: s_mov_b32 s4, s3
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX12-SDAG-NEXT: s_minimum_f32 s0, s0, s2
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
+; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[4:5]
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: s_test_minmax_f32:
+; GFX12-GISEL: ; %bb.0:
+; GFX12-GISEL-NEXT: s_maximum_f32 s0, s0, s1
+; GFX12-GISEL-NEXT: s_mov_b32 s6, s3
+; GFX12-GISEL-NEXT: s_mov_b32 s7, s4
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-GISEL-NEXT: s_minimum_f32 s0, s0, s2
+; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[6:7]
+; GFX12-GISEL-NEXT: s_endpgm
%smax = call float @llvm.maximum.f32(float %a, float %b)
%sminmax = call float @llvm.minimum.f32(float %smax, float %c)
store float %sminmax, ptr addrspace(1) %out
@@ -44,157 +69,230 @@ define amdgpu_ps void @s_test_minmax_f32(float inreg %a, float inreg %b, float i
}
define amdgpu_ps float @test_minmax_commuted_f32(float %a, float %b, float %c) {
-; GFX12-LABEL: test_minmax_commuted_f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: v_maximumminimum_f32 v0, v0, v1, v2
-; GFX12-NEXT: ; return to shader part epilog
+; GCN-LABEL: test_minmax_commuted_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_maximumminimum_f32 v0, v0, v1, v2
+; GCN-NEXT: ; return to shader part epilog
%max = call float @llvm.maximum.f32(float %a, float %b)
%minmax = call float @llvm.minimum.f32(float %c, float %max)
ret float %minmax
}
define amdgpu_ps float @test_maxmin_f32(float %a, float %b, float %c) {
-; GFX12-LABEL: test_maxmin_f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: v_minimummaximum_f32 v0, v0, v1, v2
-; GFX12-NEXT: ; return to shader part epilog
+; GCN-LABEL: test_maxmin_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_minimummaximum_f32 v0, v0, v1, v2
+; GCN-NEXT: ; return to shader part epilog
%min = call float @llvm.minimum.f32(float %a, float %b)
%maxmin = call float @llvm.maximum.f32(float %min, float %c)
ret float %maxmin
}
define amdgpu_ps float @test_maxmin_commuted_f32(float %a, float %b, float %c) {
-; GFX12-LABEL: test_maxmin_commuted_f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: v_minimummaximum_f32 v0, v0, v1, v2
-; GFX12-NEXT: ; return to shader part epilog
+; GCN-LABEL: test_maxmin_commuted_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_minimummaximum_f32 v0, v0, v1, v2
+; GCN-NEXT: ; return to shader part epilog
%min = call float @llvm.minimum.f32(float %a, float %b)
%maxmin = call float @llvm.maximum.f32(float %c, float %min)
ret float %maxmin
}
define amdgpu_ps half @test_minmax_f16(half %a, half %b, half %c) {
-; SDAG-TRUE16-LABEL: test_minmax_f16:
-; SDAG-TRUE16: ; %bb.0:
-; SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
-; SDAG-TRUE16-NEXT: ; return to shader part epilog
-;
-; SDAG-FAKE16-LABEL: test_minmax_f16:
-; SDAG-FAKE16: ; %bb.0:
-; SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
-; SDAG-FAKE16-NEXT: ; return to shader part epilog
-;
-; GISEL-TRUE16-LABEL: test_minmax_f16:
-; GISEL-TRUE16: ; %bb.0:
-; GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
-; GISEL-TRUE16-NEXT: ; return to shader part epilog
-;
-; GISEL-FAKE16-LABEL: test_minmax_f16:
-; GISEL-FAKE16: ; %bb.0:
-; GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
-; GISEL-FAKE16-NEXT: ; return to shader part epilog
+; TRUE16-LABEL: test_minmax_f16:
+; TRUE16: ; %bb.0:
+; TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
+; TRUE16-NEXT: ; return to shader part epilog
+;
+; FAKE16-LABEL: test_minmax_f16:
+; FAKE16: ; %bb.0:
+; FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
+; FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-SDAG-TRUE16-LABEL: test_minmax_f16:
+; GFX12-SDAG-TRUE16: ; %bb.0:
+; GFX12-SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
+; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-SDAG-FAKE16-LABEL: test_minmax_f16:
+; GFX12-SDAG-FAKE16: ; %bb.0:
+; GFX12-SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
+; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-TRUE16-LABEL: test_minmax_f16:
+; GFX12-GISEL-TRUE16: ; %bb.0:
+; GFX12-GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
+; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-FAKE16-LABEL: test_minmax_f16:
+; GFX12-GISEL-FAKE16: ; %bb.0:
+; GFX12-GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
+; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog
%max = call half @llvm.maximum.f16(half %a, half %b)
%minmax = call half @llvm.minimum.f16(half %max, half %c)
ret half %minmax
}
define amdgpu_ps half @test_minmax_commuted_f16(half %a, half %b, half %c) {
-; SDAG-TRUE16-LABEL: test_minmax_commuted_f16:
-; SDAG-TRUE16: ; %bb.0:
-; SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
-; SDAG-TRUE16-NEXT: ; return to shader part epilog
-;
-; SDAG-FAKE16-LABEL: test_minmax_commuted_f16:
-; SDAG-FAKE16: ; %bb.0:
-; SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
-; SDAG-FAKE16-NEXT: ; return to shader part epilog
-;
-; GISEL-TRUE16-LABEL: test_minmax_commuted_f16:
-; GISEL-TRUE16: ; %bb.0:
-; GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
-; GISEL-TRUE16-NEXT: ; return to shader part epilog
-;
-; GISEL-FAKE16-LABEL: test_minmax_commuted_f16:
-; GISEL-FAKE16: ; %bb.0:
-; GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
-; GISEL-FAKE16-NEXT: ; return to shader part epilog
+; TRUE16-LABEL: test_minmax_commuted_f16:
+; TRUE16: ; %bb.0:
+; TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
+; TRUE16-NEXT: ; return to shader part epilog
+;
+; FAKE16-LABEL: test_minmax_commuted_f16:
+; FAKE16: ; %bb.0:
+; FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
+; FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-SDAG-TRUE16-LABEL: test_minmax_commuted_f16:
+; GFX12-SDAG-TRUE16: ; %bb.0:
+; GFX12-SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
+; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-SDAG-FAKE16-LABEL: test_minmax_commuted_f16:
+; GFX12-SDAG-FAKE16: ; %bb.0:
+; GFX12-SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
+; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-TRUE16-LABEL: test_minmax_commuted_f16:
+; GFX12-GISEL-TRUE16: ; %bb.0:
+; GFX12-GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
+; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-FAKE16-LABEL: test_minmax_commuted_f16:
+; GFX12-GISEL-FAKE16: ; %bb.0:
+; GFX12-GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
+; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog
%max = call half @llvm.maximum.f16(half %a, half %b)
%minmax = call half @llvm.minimum.f16(half %c, half %max)
ret half %minmax
}
define amdgpu_ps half @test_maxmin_commuted_f16(half %a, half %b, half %c) {
-; SDAG-TRUE16-LABEL: test_maxmin_commuted_f16:
-; SDAG-TRUE16: ; %bb.0:
-; SDAG-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l
-; SDAG-TRUE16-NEXT: ; return to shader part epilog
-;
-; SDAG-FAKE16-LABEL: test_maxmin_commuted_f16:
-; SDAG-FAKE16: ; %bb.0:
-; SDAG-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
-; SDAG-FAKE16-NEXT: ; return to shader part epilog
-;
-; GISEL-TRUE16-LABEL: test_maxmin_commuted_f16:
-; GISEL-TRUE16: ; %bb.0:
-; GISEL-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l
-; GISEL-TRUE16-NEXT: ; return to shader part epilog
-;
-; GISEL-FAKE16-LABEL: test_maxmin_commuted_f16:
-; GISEL-FAKE16: ; %bb.0:
-; GISEL-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
-; GISEL-FAKE16-NEXT: ; return to shader part epilog
+; TRUE16-LABEL: test_maxmin_commuted_f16:
+; TRUE16: ; %bb.0:
+; TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l
+; TRUE16-NEXT: ; return to shader part epilog
+;
+; FAKE16-LABEL: test_maxmin_commuted_f16:
+; FAKE16: ; %bb.0:
+; FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
+; FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-SDAG-TRUE16-LABEL: test_maxmin_commuted_f16:
+; GFX12-SDAG-TRUE16: ; %bb.0:
+; GFX12-SDAG-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l
+; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-SDAG-FAKE16-LABEL: test_maxmin_commuted_f16:
+; GFX12-SDAG-FAKE16: ; %bb.0:
+; GFX12-SDAG-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
+; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-TRUE16-LABEL: test_maxmin_commuted_f16:
+; GFX12-GISEL-TRUE16: ; %bb.0:
+; GFX12-GISEL-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l
+; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-GISEL-FAKE16-LABEL: test_maxmin_commuted_f16:
+; GFX12-GISEL-FAKE16: ; %bb.0:
+; GFX12-GISEL-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
+; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog
%min = call half @llvm.minimum.f16(half %a, half %b)
%maxmin = call half @llvm.maximum.f16(half %c, half %min)
ret half %maxmin
}
define amdgpu_ps void @s_test_minmax_f16(half inreg %a, half inreg %b, half inreg %c, ptr addrspace(1) inreg %out) {
-; SDAG-TRUE16-LABEL: s_test_minmax_f16:
-; SDAG-TRUE16: ; %bb.0:
-; SDAG-TRUE16-NEXT: s_maximum_f16 s0, s0, s1
-; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
-; SDAG-TRUE16-NEXT: s_mov_b32 s5, s4
-; SDAG-TRUE16-NEXT: s_mov_b32 s4, s3
-; SDAG-TRUE16-NEXT: s_minimum_f16 s0, s0, s2
-; SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
-; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
-; SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5]
-; SDAG-TRUE16-NEXT: s_endpgm
-;
-; SDAG-FAKE16-LABEL: s_test_minmax_f16:
-; SDAG-FAKE16: ; %bb.0:
-; SDAG-FAKE16-NEXT: s_maximum_f16 s0, s0, s1
-; SDAG-FAKE16-NEXT: s_mov_b32 s5, s4
-; SDAG-FAKE16-NEXT: s_mov_b32 s4, s3
-; SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
-; SDAG-FAKE16-NEXT: s_minimum_f16 s0, s0, s2
-; SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
-; SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[4:5]
-; SDAG-FAKE16-NEXT: s_endpgm
-;
-; GISEL-TRUE16-LABEL: s_test_minmax_f16:
-; GISEL-TRUE16: ; %bb.0:
-; GISEL-TRUE16-NEXT: s_maximum_f16 s0, s0, s1
-; GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0
-; GISEL-TRUE16-NEXT: s_mov_b32 s6, s3
-; GISEL-TRUE16-NEXT: s_mov_b32 s7, s4
-; GISEL-TRUE16-NEXT: s_minimum_f16 s0, s0, s2
-; GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
-; GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
-; GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7]
-; GISEL-TRUE16-NEXT: s_endpgm
-;
-; GISEL-FAKE16-LABEL: s_test_minmax_f16:
-; GISEL-FAKE16: ; %bb.0:
-; GISEL-FAKE16-NEXT: s_maximum_f16 s0, s0, s1
-; GISEL-FAKE16-NEXT: s_mov_b32 s6, s3
-; GISEL-FAKE16-NEXT: s_mov_b32 s7, s4
-; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
-; GISEL-FAKE16-NEXT: s_minimum_f16 s0, s0, s2
-; GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
-; GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s0
-; GISEL-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7]
-; GISEL-FAKE16-NEXT: s_endpgm
+; GFX1170-SDAG-TRUE16-LABEL: s_test_minmax_f16:
+; GFX1170-SDAG-TRUE16: ; %bb.0:
+; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, s0, s1
+; GFX1170-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; GFX1170-SDAG-TRUE16-NEXT: s_mov_b32 s5, s4
+; GFX1170-SDAG-TRUE16-NEXT: s_mov_b32 s4, s3
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, s2
+; GFX1170-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5]
+; GFX1170-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX1170-SDAG-FAKE16-LABEL: s_test_minmax_f16:
+; GFX1170-SDAG-FAKE16: ; %bb.0:
+; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, s0, s1
+; GFX1170-SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0
+; GFX1170-SDAG-FAKE16-NEXT: s_mov_b32 s5, s4
+; GFX1170-SDAG-FAKE16-NEXT: s_mov_b32 s4, s3
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, s2
+; GFX1170-SDAG-FAKE16-NEXT: global_store_b16 v1, v0, s[4:5]
+; GFX1170-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX1170-GISEL-TRUE16-LABEL: s_test_minmax_f16:
+; GFX1170-GISEL-TRUE16: ; %bb.0:
+; GFX1170-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
+; GFX1170-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; GFX1170-GISEL-TRUE16-NEXT: s_mov_b32 s6, s3
+; GFX1170-GISEL-TRUE16-NEXT: s_mov_b32 s7, s4
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1170-GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, s0, s1, v0.l
+; GFX1170-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7]
+; GFX1170-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX1170-GISEL-FAKE16-LABEL: s_test_minmax_f16:
+; GFX1170-GISEL-FAKE16: ; %bb.0:
+; GFX1170-GISEL-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GFX1170-GISEL-FAKE16-NEXT: s_mov_b32 s6, s3
+; GFX1170-GISEL-FAKE16-NEXT: s_mov_b32 s7, s4
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, s0, s1, v0
+; GFX1170-GISEL-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7]
+; GFX1170-GISEL-FAKE16-NEXT: s_endpgm
+;
+; GFX12-SDAG-TRUE16-LABEL: s_test_minmax_f16:
+; GFX12-SDAG-TRUE16: ; %bb.0:
+; GFX12-SDAG-TRUE16-NEXT: s_maximum_f16 s0, s0, s1
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-SDAG-TRUE16-NEXT: s_mov_b32 s5, s4
+; GFX12-SDAG-TRUE16-NEXT: s_mov_b32 s4, s3
+; GFX12-SDAG-TRUE16-NEXT: s_minimum_f16 s0, s0, s2
+; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
+; GFX12-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5]
+; GFX12-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX12-SDAG-FAKE16-LABEL: s_test_minmax_f16:
+; GFX12-SDAG-FAKE16: ; %bb.0:
+; GFX12-SDAG-FAKE16-NEXT: s_maximum_f16 s0, s0, s1
+; GFX12-SDAG-FAKE16-NEXT: s_mov_b32 s5, s4
+; GFX12-SDAG-FAKE16-NEXT: s_mov_b32 s4, s3
+; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX12-SDAG-FAKE16-NEXT: s_minimum_f16 s0, s0, s2
+; GFX12-SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
+; GFX12-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[4:5]
+; GFX12-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX12-GISEL-TRUE16-LABEL: s_test_minmax_f16:
+; GFX12-GISEL-TRUE16: ; %bb.0:
+; GFX12-GISEL-TRUE16-NEXT: s_maximum_f16 s0, s0, s1
+; GFX12-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-GISEL-TRUE16-NEXT: s_mov_b32 s6, s3
+; GFX12-GISEL-TRUE16-NEXT: s_mov_b32 s7, s4
+; GFX12-GISEL-TRUE16-NEXT: s_minimum_f16 s0, s0, s2
+; GFX12-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
+; GFX12-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7]
+; GFX12-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX12-GISEL-FAKE16-LABEL: s_test_minmax_f16:
+; GFX12-GISEL-FAKE16: ; %bb.0:
+; GFX12-GISEL-FAKE16-NEXT: s_maximum_f16 s0, s0, s1
+; GFX12-GISEL-FAKE16-NEXT: s_mov_b32 s6, s3
+; GFX12-GISEL-FAKE16-NEXT: s_mov_b32 s7, s4
+; GFX12-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-GISEL-FAKE16-NEXT: s_minimum_f16 s0, s0, s2
+; GFX12-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-GISEL-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7]
+; GFX12-GISEL-FAKE16-NEXT: s_endpgm
%smax = call half @llvm.maximum.f16(half %a, half %b)
%sminmax = call half @llvm.minimum.f16(half %smax, half %c)
store half %sminmax, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/minimumnum.ll b/llvm/test/CodeGen/AMDGPU/minimumnum.ll
index 8c98931b02933..87f76bab79ed0 100644
--- a/llvm/test/CodeGen/AMDGPU/minimumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/minimumnum.ll
@@ -20,6 +20,12 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-SDAG,GFX11-FAKE16-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16,GFX11-GISEL,GFX11-FAKE16-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16,GFX1170-SDAG,GFX1170-TRUE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16,GFX1170-GISEL,GFX1170-TRUE16-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16,GFX1170-SDAG,GFX1170-FAKE16-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16,GFX1170-GISEL,GFX1170-FAKE16-GISEL %s
+
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-SDAG,GFX12-TRUE16-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16,GFX12-GISEL,GFX12-TRUE16-GISEL %s
@@ -129,6 +135,42 @@ define half @v_minimumnum_f16(half %x, half %y) {
; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -233,6 +275,18 @@ define half @v_minimumnum_f16_nnan(half %x, half %y) {
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_minimumnum_f16_nnan:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_minimumnum_f16_nnan:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_minimumnum_f16_nnan:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -302,6 +356,22 @@ define half @v_minimumnum_f16_1.0(half %x) {
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, 1.0, v0
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_minimumnum_f16_1.0:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT: v_min_num_f16_e32 v0.l, 1.0, v0.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_minimumnum_f16_1.0:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_min_num_f16_e32 v0, 1.0, v0
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_minimumnum_f16_1.0:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -410,6 +480,22 @@ define float @v_minimumnum_f32(float %x, float %y) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_f32:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_f32:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_f32:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -468,6 +554,12 @@ define float @v_minimumnum_f32_nnan(float %x, float %y) {
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_f32_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_f32_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -564,6 +656,24 @@ define double @v_minimumnum_f64(double %x, double %y) {
; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_f64:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_f64:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_f64:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -624,6 +734,12 @@ define double @v_minimumnum_f64_nnan(double %x, double %y) {
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_f64_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_f64_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -674,6 +790,14 @@ define float @v_minimumnum_f32_1.0(float %x) {
; GFX11-NEXT: v_min_f32_e32 v0, 1.0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_f32_1.0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f32_e32 v0, 1.0, v0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_f32_1.0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -730,6 +854,14 @@ define float @v_minimumnum_f32_rhs_not_snan(float %x, float %y) {
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_f32_rhs_not_snan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_f32_rhs_not_snan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -787,6 +919,14 @@ define float @v_minimumnum_f32_lhs_not_snan(float %x, float %y) {
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_f32_lhs_not_snan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_f32_lhs_not_snan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -844,6 +984,14 @@ define float @v_minimumnum_f32_both_operands_not_snan(float %x, float %y) {
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_f32_both_operands_not_snan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_f32_both_operands_not_snan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -898,6 +1046,14 @@ define double @v_minimumnum_f64_1.0(double %x) {
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], 1.0
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_f64_1.0:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], 1.0
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_f64_1.0:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1032,6 +1188,42 @@ define half @v_minimumnum_f16_v_s(half %x, half inreg %y) {
; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16_v_s:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16_v_s:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16_v_s:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16_v_s:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_v_s:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1206,6 +1398,42 @@ define half @v_minimumnum_f16_s_s(half inreg %x, half inreg %y) {
; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16_s_s:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, s1, s1
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, s0, s0
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.h, v0.l
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16_s_s:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, s0, s0
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, s1, s1
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16_s_s:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, s1, s1
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, s0, s0
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v1, v0
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16_s_s:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, s0, s0
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, s1, s1
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_s_s:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1360,6 +1588,24 @@ define float @v_minimumnum_f32_s_v(float inreg %x, float %y) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v1, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_f32_s_v:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v1, v0
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_f32_s_v:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v1, v0
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_f32_s_v:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1488,6 +1734,24 @@ define float @v_minimumnum_f32_v_s(float %x, float inreg %y) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_f32_v_s:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_f32_v_s:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_f32_v_s:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1616,6 +1880,24 @@ define float @v_minimumnum_f32_s_s(float inreg %x, float inreg %y) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_f32_s_s:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v0, s1, s1
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, s0, s0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v1, v0
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_f32_s_s:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v0, s0, s0
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, s1, s1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_f32_s_s:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1727,6 +2009,15 @@ define double @v_minimumnum_f64_s_v(double inreg %x, double %y) {
; GFX11-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_f64_s_v:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f64 v[2:3], s[0:1], s[0:1]
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[2:3], v[0:1]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_f64_s_v:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1825,6 +2116,15 @@ define double @v_minimumnum_f64_v_s(double %x, double inreg %y) {
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_f64_v_s:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f64 v[2:3], s[0:1], s[0:1]
+; GFX1170-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_f64_v_s:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1940,6 +2240,24 @@ define double @v_minimumnum_f64_s_s(double inreg %x, double inreg %y) {
; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_f64_s_s:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], s[2:3], s[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], s[0:1], s[0:1]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[2:3], v[0:1]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_f64_s_s:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], s[0:1], s[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], s[2:3], s[2:3]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_f64_s_s:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2052,6 +2370,24 @@ define float @v_minimumnum_f32_fabs_rhs(float %x, float %y) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2165,6 +2501,24 @@ define float @v_minimumnum_f32_fneg_fabs_rhs(float %x, float %y) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1|
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1|
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2279,6 +2633,24 @@ define float @v_minimumnum_f32_fabs(float %x, float %y) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_f32_fabs:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v0, |v0|, |v0|
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_f32_fabs:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v0, |v0|, |v0|
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_f32_fabs:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2393,6 +2765,24 @@ define float @v_minimumnum_f32_fneg(float %x, float %y) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_f32_fneg:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v1, -v1, -v1
+; GFX1170-SDAG-NEXT: v_max_num_f32_e64 v0, -v0, -v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_f32_fneg:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v0, -v0, -v0
+; GFX1170-GISEL-NEXT: v_max_num_f32_e64 v1, -v1, -v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_f32_fneg:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2527,6 +2917,42 @@ define half @v_minimumnum_f16_fabs_rhs(half %x, half %y) {
; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2686,6 +3112,42 @@ define half @v_minimumnum_f16_fneg_fabs_rhs(half %x, half %y) {
; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l|
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -|v1.l|, -|v1.l|
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1|
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -|v1|, -|v1|
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2846,6 +3308,42 @@ define half @v_minimumnum_f16_fabs(half %x, half %y) {
; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l|
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16_fabs:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, |v0.l|, |v0.l|
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, |v1.l|, |v1.l|
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16_fabs:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, |v0|, |v0|
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16_fabs:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, |v0|, |v0|
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, |v1|, |v1|
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fabs:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3006,6 +3504,42 @@ define half @v_minimumnum_f16_fneg(half %x, half %y) {
; GFX11-FAKE16-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16_fneg:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.l, -v0.l, -v0.l
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e64 v0.h, -v1.l, -v1.l
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16_fneg:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v1, -v1, -v1
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e64 v0, -v0, -v0
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16_fneg:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v0, -v0, -v0
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e64 v1, -v1, -v1
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_fneg:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3146,6 +3680,24 @@ define double @v_minimumnum_f64_fneg(double %x, double %y) {
; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_f64_fneg:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_f64_fneg:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_f64_fneg:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3304,6 +3856,24 @@ define <2 x half> @v_minimumnum_v2f16(<2 x half> %x, <2 x half> %y) {
; GFX11-GISEL-NEXT: v_pk_min_f16 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_v2f16:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_v2f16:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_v2f16:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3402,6 +3972,12 @@ define <2 x half> @v_minimumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
; GFX11-NEXT: v_pk_min_f16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_v2f16_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_v2f16_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3568,6 +4144,30 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_v3f16:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v2
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v3
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_v3f16:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v2
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v3
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_v3f16:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3692,6 +4292,13 @@ define <3 x half> @v_minimumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_v3f16_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_min_num_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_v3f16_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3895,6 +4502,30 @@ define <4 x half> @v_minimumnum_v4f16(<4 x half> %x, <4 x half> %y) {
; GFX11-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_v4f16:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v2
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v3
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_v4f16:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v2
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v3
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_v4f16:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -4032,6 +4663,13 @@ define <4 x half> @v_minimumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_v4f16_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_min_num_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_v4f16_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -4297,6 +4935,36 @@ define <6 x half> @v_minimumnum_v6f16(<6 x half> %x, <6 x half> %y) {
; GFX11-GISEL-NEXT: v_pk_min_f16 v2, v2, v5
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_v6f16:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v3
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v4
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v5
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_v6f16:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v4
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v5
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_v6f16:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -4651,6 +5319,42 @@ define <8 x half> @v_minimumnum_v8f16(<8 x half> %x, <8 x half> %y) {
; GFX11-GISEL-NEXT: v_pk_min_f16 v3, v3, v7
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_v8f16:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v4
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v5
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v6
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v7
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_v8f16:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v4
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v5
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v6
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v7
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_v8f16:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -5257,6 +5961,64 @@ define <16 x half> @v_minimumnum_v16f16(<16 x half> %x, <16 x half> %y) {
; GFX11-GISEL-NEXT: v_pk_min_f16 v7, v7, v12
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_v16f16:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v8
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v8, v11, v11
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v9
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v10
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v9, v12, v12
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v10, v13, v13
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v11, v14, v14
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v12, v15, v15
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v8
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v4, v4, v9
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v5, v5, v10
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v6, v6, v11
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v7, v7, v12
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_v16f16:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v8
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v9
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v8, v11, v11
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v10
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v9, v12, v12
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v10, v13, v13
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v11, v14, v14
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v12, v15, v15
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v8
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v4, v4, v9
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v5, v5, v10
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v6, v6, v11
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v7, v7, v12
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_v16f16:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -6442,6 +7204,118 @@ define <32 x half> @v_minimumnum_v32f16(<32 x half> %x, <32 x half> %y) {
; GFX11-GISEL-NEXT: v_pk_min_f16 v15, v15, v16
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_v32f16:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v16, v16, v16
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v17, v17, v17
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v18, v18, v18
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v19, v19, v19
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v20, v20, v20
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v21, v21, v21
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v22, v22, v22
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v23, v23, v23
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v24, v24, v24
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v25, v25, v25
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v26, v26, v26
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v27, v27, v27
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v11, v11, v11
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v28, v28, v28
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v12, v12, v12
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v29, v29, v29
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v13, v13, v13
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v30, v30, v30
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v14, v14, v14
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v15, v15, v15
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v16
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v1, v1, v17
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v2, v2, v18
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v3, v3, v19
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v4, v4, v20
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v5, v5, v21
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v6, v6, v22
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v7, v7, v23
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v8, v8, v24
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v9, v9, v25
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v10, v10, v26
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v11, v11, v27
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v12, v12, v28
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v13, v13, v29
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v14, v14, v30
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v16, v31, v31
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v15, v15, v16
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_v32f16:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v16, v16, v16
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v17, v17, v17
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v18, v18, v18
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v19, v19, v19
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v4, v4, v4
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v20, v20, v20
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v5, v5, v5
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v21, v21, v21
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v6, v6, v6
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v22, v22, v22
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v7, v7, v7
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v23, v23, v23
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v8, v8, v8
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v24, v24, v24
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v9, v9, v9
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v25, v25, v25
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v10, v10, v10
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v26, v26, v26
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v11, v11, v11
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v27, v27, v27
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v12, v12, v12
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v28, v28, v28
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v13, v13, v13
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v29, v29, v29
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v14, v14, v14
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v30, v30, v30
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v15, v15, v15
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v16
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v1, v1, v17
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v2, v2, v18
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v3, v3, v19
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v4, v4, v20
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v5, v5, v21
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v6, v6, v22
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v7, v7, v23
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v8, v8, v24
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v9, v9, v25
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v10, v10, v26
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v11, v11, v27
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v12, v12, v28
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v13, v13, v29
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v14, v14, v30
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v16, v31, v31
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v15, v15, v16
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_v32f16:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -6672,6 +7546,24 @@ define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) {
; GFX11-GISEL-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_v2f32:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_v2f32:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_v2f32:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -6736,6 +7628,12 @@ define <2 x float> @v_minimumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) {
; GFX11-NEXT: v_dual_min_f32 v0, v0, v2 :: v_dual_min_f32 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_v2f32_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_v2f32_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -6884,6 +7782,28 @@ define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v2, v2, v5
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_v3f32:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v3, v3, v3 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v5, v5, v5 :: v_dual_max_num_f32 v2, v2, v2
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4
+; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v2, v2, v5
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_v3f32:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v4, v4, v4
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v5, v5, v5
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v2, v2, v5
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_v3f32:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -6957,6 +7877,13 @@ define <3 x float> @v_minimumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) {
; GFX11-NEXT: v_min_f32_e32 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_v3f32_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_min_num_f32 v0, v0, v3 :: v_dual_min_num_f32 v1, v1, v4
+; GFX1170-NEXT: v_min_num_f32_e32 v2, v2, v5
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_v3f32_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -7132,6 +8059,30 @@ define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) {
; GFX11-GISEL-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_v4f32:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5
+; GFX1170-SDAG-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_v4f32:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5
+; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_v4f32:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -7211,6 +8162,13 @@ define <4 x float> @v_minimumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) {
; GFX11-NEXT: v_dual_min_f32 v2, v2, v6 :: v_dual_min_f32 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_v4f32_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_min_num_f32 v0, v0, v4 :: v_dual_min_num_f32 v1, v1, v5
+; GFX1170-NEXT: v_dual_min_num_f32 v2, v2, v6 :: v_dual_min_num_f32 v3, v3, v7
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_v4f32_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -7360,6 +8318,30 @@ define <2 x double> @v_minimumnum_v2f64(<2 x double> %x, <2 x double> %y) {
; GFX11-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_v2f64:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[2:3], v[2:3], v[6:7]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_v2f64:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[2:3], v[6:7]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_v2f64:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -7431,6 +8413,13 @@ define <2 x double> @v_minimumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) {
; GFX11-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_v2f64_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: v_min_num_f64 v[2:3], v[2:3], v[6:7]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_v2f64_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -7616,6 +8605,36 @@ define <3 x double> @v_minimumnum_v3f64(<3 x double> %x, <3 x double> %y) {
; GFX11-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_v3f64:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[2:3], v[2:3], v[8:9]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[4:5], v[4:5], v[10:11]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_v3f64:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[2:3], v[8:9]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[4:5], v[4:5], v[10:11]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_v3f64:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -7698,6 +8717,14 @@ define <3 x double> @v_minimumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) {
; GFX11-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_v3f64_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-NEXT: v_min_num_f64 v[2:3], v[2:3], v[8:9]
+; GFX1170-NEXT: v_min_num_f64 v[4:5], v[4:5], v[10:11]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_v3f64_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -7920,6 +8947,42 @@ define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) {
; GFX11-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_v4f64:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_v4f64:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_v4f64:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8013,6 +9076,15 @@ define <4 x double> @v_minimumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) {
; GFX11-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_v4f64_nnan:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-NEXT: v_min_num_f64 v[2:3], v[2:3], v[10:11]
+; GFX1170-NEXT: v_min_num_f64 v[4:5], v[4:5], v[12:13]
+; GFX1170-NEXT: v_min_num_f64 v[6:7], v[6:7], v[14:15]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_v4f64_nnan:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8078,6 +9150,42 @@ define half @v_minimumnum_f16_no_ieee(half %x, half %y) #0 {
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-SDAG-LABEL: v_minimumnum_f16_no_ieee:
+; GFX1170-TRUE16-SDAG: ; %bb.0:
+; GFX1170-TRUE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX1170-TRUE16-SDAG-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-SDAG-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-TRUE16-GISEL-LABEL: v_minimumnum_f16_no_ieee:
+; GFX1170-TRUE16-GISEL: ; %bb.0:
+; GFX1170-TRUE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-TRUE16-GISEL-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX1170-TRUE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-TRUE16-GISEL-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-TRUE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-SDAG-LABEL: v_minimumnum_f16_no_ieee:
+; GFX1170-FAKE16-SDAG: ; %bb.0:
+; GFX1170-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-FAKE16-SDAG-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-SDAG-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-GISEL-LABEL: v_minimumnum_f16_no_ieee:
+; GFX1170-FAKE16-GISEL: ; %bb.0:
+; GFX1170-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-FAKE16-GISEL-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-GISEL-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-SDAG-LABEL: v_minimumnum_f16_no_ieee:
; GFX12-TRUE16-SDAG: ; %bb.0:
; GFX12-TRUE16-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8182,6 +9290,18 @@ define half @v_minimumnum_f16_nan_no_ieee(half %x, half %y) #0 {
; GFX11-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_minimumnum_f16_nan_no_ieee:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_minimumnum_f16_nan_no_ieee:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_minimumnum_f16_nan_no_ieee:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8236,6 +9356,22 @@ define float @v_minimumnum_f32_no_ieee(float %x, float %y) #0 {
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_f32_no_ieee:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_f32_no_ieee:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_f32_no_ieee:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8294,6 +9430,12 @@ define float @v_minimumnum_f32_nnan_no_ieee(float %x, float %y) #0 {
; GFX11-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_f32_nnan_no_ieee:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_f32_nnan_no_ieee:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8338,6 +9480,24 @@ define double @v_minimumnum_f64_no_ieee(double %x, double %y) #0 {
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_f64_no_ieee:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_f64_no_ieee:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_f64_no_ieee:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8398,6 +9558,12 @@ define double @v_minimumnum_f64_nnan_no_ieee(double %x, double %y) #0 {
; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_f64_nnan_no_ieee:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_f64_nnan_no_ieee:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8480,6 +9646,24 @@ define <2 x half> @v_minimumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
; GFX11-NEXT: v_pk_min_f16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX1170-SDAG: ; %bb.0:
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX1170-GISEL: ; %bb.0:
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-GISEL-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: v_minimumnum_v2f16_no_ieee:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8578,6 +9762,12 @@ define <2 x half> @v_minimumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y)
; GFX11-NEXT: v_pk_min_f16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8680,6 +9870,13 @@ define <3 x half> @v_minimumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y)
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_min_num_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8796,6 +9993,13 @@ define <4 x half> @v_minimumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y)
; GFX11-NEXT: v_pk_min_f16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_min_num_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/minmax.ll b/llvm/test/CodeGen/AMDGPU/minmax.ll
index 456db08bda06b..95592ed50387b 100644
--- a/llvm/test/CodeGen/AMDGPU/minmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/minmax.ll
@@ -3,6 +3,10 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,SDAG,SDAG-GFX11,SDAG-GFX11-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GISEL,GISEL-GFX11,GISEL-GFX11-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GISEL,GISEL-GFX11,GISEL-GFX11-FAKE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX1170,SDAG,SDAG-GFX1170,SDAG-GFX1170-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX1170,SDAG,SDAG-GFX1170,SDAG-GFX1170-FAKE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX1170,GISEL,GISEL-GFX1170,GISEL-GFX1170-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX1170,GISEL,GISEL-GFX1170,GISEL-GFX1170-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-GFX12,SDAG-GFX12-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-GFX12,SDAG-GFX12-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-GFX12,GISEL-GFX12-TRUE16 %s
@@ -19,6 +23,12 @@ define i32 @test_minmax_i32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_minmax_i32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maxmin_i32 v0, v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_minmax_i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -61,6 +71,26 @@ define amdgpu_ps void @s_test_minmax_i32(i32 inreg %a, i32 inreg %b, i32 inreg %
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[6:7]
; GISEL-GFX11-NEXT: s_endpgm
;
+; SDAG-GFX1170-LABEL: s_test_minmax_i32:
+; SDAG-GFX1170: ; %bb.0:
+; SDAG-GFX1170-NEXT: s_max_i32 s0, s0, s1
+; SDAG-GFX1170-NEXT: s_mov_b32 s5, s4
+; SDAG-GFX1170-NEXT: s_min_i32 s0, s0, s2
+; SDAG-GFX1170-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
+; SDAG-GFX1170-NEXT: s_mov_b32 s4, s3
+; SDAG-GFX1170-NEXT: global_store_b32 v0, v1, s[4:5]
+; SDAG-GFX1170-NEXT: s_endpgm
+;
+; GISEL-GFX1170-LABEL: s_test_minmax_i32:
+; GISEL-GFX1170: ; %bb.0:
+; GISEL-GFX1170-NEXT: s_max_i32 s0, s0, s1
+; GISEL-GFX1170-NEXT: s_mov_b32 s6, s3
+; GISEL-GFX1170-NEXT: s_min_i32 s0, s0, s2
+; GISEL-GFX1170-NEXT: s_mov_b32 s7, s4
+; GISEL-GFX1170-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
+; GISEL-GFX1170-NEXT: global_store_b32 v1, v0, s[6:7]
+; GISEL-GFX1170-NEXT: s_endpgm
+;
; SDAG-GFX12-LABEL: s_test_minmax_i32:
; SDAG-GFX12: ; %bb.0:
; SDAG-GFX12-NEXT: s_max_i32 s0, s0, s1
@@ -115,6 +145,12 @@ define i32 @test_minmax_commuted_i32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_minmax_commuted_i32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maxmin_i32 v0, v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_minmax_commuted_i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -143,6 +179,12 @@ define i32 @test_maxmin_i32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_maxmin_i32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minmax_i32 v0, v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_maxmin_i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -171,6 +213,12 @@ define i32 @test_maxmin_commuted_i32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_maxmin_commuted_i32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minmax_i32 v0, v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_maxmin_commuted_i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -200,6 +248,13 @@ define void @test_smed3_i32(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) {
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_smed3_i32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_med3_i32 v2, v2, v3, v4
+; GFX1170-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_smed3_i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -233,6 +288,12 @@ define i32 @test_minmax_u32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_minmax_u32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maxmin_u32 v0, v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_minmax_u32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -275,6 +336,26 @@ define amdgpu_ps void @s_test_minmax_u32(i32 inreg %a, i32 inreg %b, i32 inreg %
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[6:7]
; GISEL-GFX11-NEXT: s_endpgm
;
+; SDAG-GFX1170-LABEL: s_test_minmax_u32:
+; SDAG-GFX1170: ; %bb.0:
+; SDAG-GFX1170-NEXT: s_max_u32 s0, s0, s1
+; SDAG-GFX1170-NEXT: s_mov_b32 s5, s4
+; SDAG-GFX1170-NEXT: s_min_u32 s0, s0, s2
+; SDAG-GFX1170-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
+; SDAG-GFX1170-NEXT: s_mov_b32 s4, s3
+; SDAG-GFX1170-NEXT: global_store_b32 v0, v1, s[4:5]
+; SDAG-GFX1170-NEXT: s_endpgm
+;
+; GISEL-GFX1170-LABEL: s_test_minmax_u32:
+; GISEL-GFX1170: ; %bb.0:
+; GISEL-GFX1170-NEXT: s_max_u32 s0, s0, s1
+; GISEL-GFX1170-NEXT: s_mov_b32 s6, s3
+; GISEL-GFX1170-NEXT: s_min_u32 s0, s0, s2
+; GISEL-GFX1170-NEXT: s_mov_b32 s7, s4
+; GISEL-GFX1170-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
+; GISEL-GFX1170-NEXT: global_store_b32 v1, v0, s[6:7]
+; GISEL-GFX1170-NEXT: s_endpgm
+;
; SDAG-GFX12-LABEL: s_test_minmax_u32:
; SDAG-GFX12: ; %bb.0:
; SDAG-GFX12-NEXT: s_max_u32 s0, s0, s1
@@ -329,6 +410,12 @@ define i32 @test_minmax_commuted_u32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_minmax_commuted_u32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maxmin_u32 v0, v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_minmax_commuted_u32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -357,6 +444,12 @@ define i32 @test_maxmin_u32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_maxmin_u32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minmax_u32 v0, v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_maxmin_u32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -385,6 +478,12 @@ define i32 @test_maxmin_commuted_u32(i32 %a, i32 %b, i32 %c) {
; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_maxmin_commuted_u32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minmax_u32 v0, v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_maxmin_commuted_u32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -414,6 +513,13 @@ define void @test_umed3_i32(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) {
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_umed3_i32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_med3_u32 v2, v2, v3, v4
+; GFX1170-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_umed3_i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -457,6 +563,22 @@ define float @test_minmax_f32_ieee_true(float %a, float %b, float %c) {
; GISEL-GFX11-NEXT: v_maxmin_f32 v0, v0, v1, v2
; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX1170-LABEL: test_minmax_f32_ieee_true:
+; SDAG-GFX1170: ; %bb.0:
+; SDAG-GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1170-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; SDAG-GFX1170-NEXT: v_max_num_f32_e32 v2, v2, v2
+; SDAG-GFX1170-NEXT: v_maxmin_num_f32 v0, v0, v1, v2
+; SDAG-GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX1170-LABEL: test_minmax_f32_ieee_true:
+; GISEL-GFX1170: ; %bb.0:
+; GISEL-GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GISEL-GFX1170-NEXT: v_max_num_f32_e32 v2, v2, v2
+; GISEL-GFX1170-NEXT: v_maxmin_num_f32 v0, v0, v1, v2
+; GISEL-GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-GFX12-LABEL: test_minmax_f32_ieee_true:
; SDAG-GFX12: ; %bb.0:
; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -522,6 +644,26 @@ define amdgpu_ps void @s_test_minmax_f32_ieee_false(float inreg %a, float inreg
; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[6:7]
; GISEL-GFX11-NEXT: s_endpgm
;
+; SDAG-GFX1170-LABEL: s_test_minmax_f32_ieee_false:
+; SDAG-GFX1170: ; %bb.0:
+; SDAG-GFX1170-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; SDAG-GFX1170-NEXT: s_mov_b32 s5, s4
+; SDAG-GFX1170-NEXT: s_mov_b32 s4, s3
+; SDAG-GFX1170-NEXT: v_maxmin_num_f32 v0, s0, s1, v0
+; SDAG-GFX1170-NEXT: global_store_b32 v1, v0, s[4:5]
+; SDAG-GFX1170-NEXT: s_endpgm
+;
+; GISEL-GFX1170-LABEL: s_test_minmax_f32_ieee_false:
+; GISEL-GFX1170: ; %bb.0:
+; GISEL-GFX1170-NEXT: s_max_f32 s0, s0, s1
+; GISEL-GFX1170-NEXT: s_mov_b32 s6, s3
+; GISEL-GFX1170-NEXT: s_mov_b32 s7, s4
+; GISEL-GFX1170-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX1170-NEXT: s_min_f32 s0, s0, s2
+; GISEL-GFX1170-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX1170-NEXT: global_store_b32 v1, v0, s[6:7]
+; GISEL-GFX1170-NEXT: s_endpgm
+;
; SDAG-GFX12-LABEL: s_test_minmax_f32_ieee_false:
; SDAG-GFX12: ; %bb.0:
; SDAG-GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
@@ -575,6 +717,11 @@ define amdgpu_ps float @test_minmax_commuted_f32_ieee_false(float %a, float %b,
; GFX11-NEXT: v_maxmin_f32 v0, v0, v1, v2
; GFX11-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_minmax_commuted_f32_ieee_false:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_maxmin_num_f32 v0, v0, v1, v2
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_minmax_commuted_f32_ieee_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_maxmin_num_f32 v0, v0, v1, v2
@@ -607,6 +754,22 @@ define float @test_maxmin_f32_ieee_true(float %a, float %b, float %c) {
; GISEL-GFX11-NEXT: v_minmax_f32 v0, v0, v1, v2
; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX1170-LABEL: test_maxmin_f32_ieee_true:
+; SDAG-GFX1170: ; %bb.0:
+; SDAG-GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1170-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; SDAG-GFX1170-NEXT: v_max_num_f32_e32 v2, v2, v2
+; SDAG-GFX1170-NEXT: v_minmax_num_f32 v0, v0, v1, v2
+; SDAG-GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX1170-LABEL: test_maxmin_f32_ieee_true:
+; GISEL-GFX1170: ; %bb.0:
+; GISEL-GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GISEL-GFX1170-NEXT: v_max_num_f32_e32 v2, v2, v2
+; GISEL-GFX1170-NEXT: v_minmax_num_f32 v0, v0, v1, v2
+; GISEL-GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-GFX12-LABEL: test_maxmin_f32_ieee_true:
; SDAG-GFX12: ; %bb.0:
; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -659,6 +822,11 @@ define amdgpu_ps float @test_maxmin_commuted_f32_ieee_false(float %a, float %b,
; GFX11-NEXT: v_minmax_f32 v0, v0, v1, v2
; GFX11-NEXT: ; return to shader part epilog
;
+; GFX1170-LABEL: test_maxmin_commuted_f32_ieee_false:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: v_minmax_num_f32 v0, v0, v1, v2
+; GFX1170-NEXT: ; return to shader part epilog
+;
; GFX12-LABEL: test_maxmin_commuted_f32_ieee_false:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_minmax_num_f32 v0, v0, v1, v2
@@ -682,6 +850,13 @@ define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z)
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_med3_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_med3_num_f32 v2, v2, v3, v4
+; GFX1170-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_med3_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -716,6 +891,13 @@ define void @test_med3_minimumnum_maximumnum_f32(ptr addrspace(1) %arg, float %x
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_med3_minimumnum_maximumnum_f32:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_med3_num_f32 v2, v2, v3, v4
+; GFX1170-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_med3_minimumnum_maximumnum_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -763,6 +945,26 @@ define amdgpu_ps half @test_minmax_f16_ieee_false(half %a, half %b, half %c) {
; GISEL-GFX11-FAKE16-NEXT: v_maxmin_f16 v0, v0, v1, v2
; GISEL-GFX11-FAKE16-NEXT: ; return to shader part epilog
;
+; SDAG-GFX1170-TRUE16-LABEL: test_minmax_f16_ieee_false:
+; SDAG-GFX1170-TRUE16: ; %bb.0:
+; SDAG-GFX1170-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v1.l, v2.l
+; SDAG-GFX1170-TRUE16-NEXT: ; return to shader part epilog
+;
+; SDAG-GFX1170-FAKE16-LABEL: test_minmax_f16_ieee_false:
+; SDAG-GFX1170-FAKE16: ; %bb.0:
+; SDAG-GFX1170-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
+; SDAG-GFX1170-FAKE16-NEXT: ; return to shader part epilog
+;
+; GISEL-GFX1170-TRUE16-LABEL: test_minmax_f16_ieee_false:
+; GISEL-GFX1170-TRUE16: ; %bb.0:
+; GISEL-GFX1170-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v1.l, v2.l
+; GISEL-GFX1170-TRUE16-NEXT: ; return to shader part epilog
+;
+; GISEL-GFX1170-FAKE16-LABEL: test_minmax_f16_ieee_false:
+; GISEL-GFX1170-FAKE16: ; %bb.0:
+; GISEL-GFX1170-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
+; GISEL-GFX1170-FAKE16-NEXT: ; return to shader part epilog
+;
; SDAG-GFX12-TRUE16-LABEL: test_minmax_f16_ieee_false:
; SDAG-GFX12-TRUE16: ; %bb.0:
; SDAG-GFX12-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v1.l, v2.l
@@ -850,6 +1052,47 @@ define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b
; GISEL-GFX11-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GISEL-GFX11-FAKE16-NEXT: s_endpgm
;
+; SDAG-GFX1170-TRUE16-LABEL: s_test_minmax_f16_ieee_false:
+; SDAG-GFX1170-TRUE16: ; %bb.0:
+; SDAG-GFX1170-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
+; SDAG-GFX1170-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-GFX1170-TRUE16-NEXT: s_mov_b32 s5, s4
+; SDAG-GFX1170-TRUE16-NEXT: s_mov_b32 s4, s3
+; SDAG-GFX1170-TRUE16-NEXT: v_maxmin_num_f16 v0.l, s0, s1, v0.l
+; SDAG-GFX1170-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5]
+; SDAG-GFX1170-TRUE16-NEXT: s_endpgm
+;
+; SDAG-GFX1170-FAKE16-LABEL: s_test_minmax_f16_ieee_false:
+; SDAG-GFX1170-FAKE16: ; %bb.0:
+; SDAG-GFX1170-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; SDAG-GFX1170-FAKE16-NEXT: s_mov_b32 s5, s4
+; SDAG-GFX1170-FAKE16-NEXT: s_mov_b32 s4, s3
+; SDAG-GFX1170-FAKE16-NEXT: v_maxmin_num_f16 v0, s0, s1, v0
+; SDAG-GFX1170-FAKE16-NEXT: global_store_b16 v1, v0, s[4:5]
+; SDAG-GFX1170-FAKE16-NEXT: s_endpgm
+;
+; GISEL-GFX1170-TRUE16-LABEL: s_test_minmax_f16_ieee_false:
+; GISEL-GFX1170-TRUE16: ; %bb.0:
+; GISEL-GFX1170-TRUE16-NEXT: s_max_f16 s0, s0, s1
+; GISEL-GFX1170-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX1170-TRUE16-NEXT: s_mov_b32 s6, s3
+; GISEL-GFX1170-TRUE16-NEXT: s_mov_b32 s7, s4
+; GISEL-GFX1170-TRUE16-NEXT: s_min_f16 s0, s0, s2
+; GISEL-GFX1170-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
+; GISEL-GFX1170-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7]
+; GISEL-GFX1170-TRUE16-NEXT: s_endpgm
+;
+; GISEL-GFX1170-FAKE16-LABEL: s_test_minmax_f16_ieee_false:
+; GISEL-GFX1170-FAKE16: ; %bb.0:
+; GISEL-GFX1170-FAKE16-NEXT: s_max_f16 s0, s0, s1
+; GISEL-GFX1170-FAKE16-NEXT: s_mov_b32 s6, s3
+; GISEL-GFX1170-FAKE16-NEXT: s_mov_b32 s7, s4
+; GISEL-GFX1170-FAKE16-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX1170-FAKE16-NEXT: s_min_f16 s0, s0, s2
+; GISEL-GFX1170-FAKE16-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-GFX1170-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7]
+; GISEL-GFX1170-FAKE16-NEXT: s_endpgm
+;
; SDAG-GFX12-TRUE16-LABEL: s_test_minmax_f16_ieee_false:
; SDAG-GFX12-TRUE16: ; %bb.0:
; SDAG-GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
@@ -978,6 +1221,42 @@ define half @test_minmax_commuted_f16_ieee_true(half %a, half %b, half %c) {
; GISEL-GFX11-FAKE16-NEXT: v_maxmin_f16 v0, v0, v1, v2
; GISEL-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX1170-TRUE16-LABEL: test_minmax_commuted_f16_ieee_true:
+; SDAG-GFX1170-TRUE16: ; %bb.0:
+; SDAG-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.l
+; SDAG-GFX1170-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v0.h, v1.l
+; SDAG-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX1170-FAKE16-LABEL: test_minmax_commuted_f16_ieee_true:
+; SDAG-GFX1170-FAKE16: ; %bb.0:
+; SDAG-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; SDAG-GFX1170-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
+; SDAG-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX1170-TRUE16-LABEL: test_minmax_commuted_f16_ieee_true:
+; GISEL-GFX1170-TRUE16: ; %bb.0:
+; GISEL-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.l
+; GISEL-GFX1170-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v0.h, v1.l
+; GISEL-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX1170-FAKE16-LABEL: test_minmax_commuted_f16_ieee_true:
+; GISEL-GFX1170-FAKE16: ; %bb.0:
+; GISEL-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; GISEL-GFX1170-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
+; GISEL-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-GFX12-TRUE16-LABEL: test_minmax_commuted_f16_ieee_true:
; SDAG-GFX12-TRUE16: ; %bb.0:
; SDAG-GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1095,6 +1374,26 @@ define amdgpu_ps half @test_maxmin_f16_ieee_false(half %a, half %b, half %c) {
; GISEL-GFX11-FAKE16-NEXT: v_minmax_f16 v0, v0, v1, v2
; GISEL-GFX11-FAKE16-NEXT: ; return to shader part epilog
;
+; SDAG-GFX1170-TRUE16-LABEL: test_maxmin_f16_ieee_false:
+; SDAG-GFX1170-TRUE16: ; %bb.0:
+; SDAG-GFX1170-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v1.l, v2.l
+; SDAG-GFX1170-TRUE16-NEXT: ; return to shader part epilog
+;
+; SDAG-GFX1170-FAKE16-LABEL: test_maxmin_f16_ieee_false:
+; SDAG-GFX1170-FAKE16: ; %bb.0:
+; SDAG-GFX1170-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2
+; SDAG-GFX1170-FAKE16-NEXT: ; return to shader part epilog
+;
+; GISEL-GFX1170-TRUE16-LABEL: test_maxmin_f16_ieee_false:
+; GISEL-GFX1170-TRUE16: ; %bb.0:
+; GISEL-GFX1170-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v1.l, v2.l
+; GISEL-GFX1170-TRUE16-NEXT: ; return to shader part epilog
+;
+; GISEL-GFX1170-FAKE16-LABEL: test_maxmin_f16_ieee_false:
+; GISEL-GFX1170-FAKE16: ; %bb.0:
+; GISEL-GFX1170-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2
+; GISEL-GFX1170-FAKE16-NEXT: ; return to shader part epilog
+;
; SDAG-GFX12-TRUE16-LABEL: test_maxmin_f16_ieee_false:
; SDAG-GFX12-TRUE16: ; %bb.0:
; SDAG-GFX12-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v1.l, v2.l
@@ -1180,6 +1479,42 @@ define half @test_maxmin_commuted_f16_ieee_true(half %a, half %b, half %c) {
; GISEL-GFX11-FAKE16-NEXT: v_minmax_f16 v0, v0, v1, v2
; GISEL-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX1170-TRUE16-LABEL: test_maxmin_commuted_f16_ieee_true:
+; SDAG-GFX1170-TRUE16: ; %bb.0:
+; SDAG-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; SDAG-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.l
+; SDAG-GFX1170-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v0.h, v1.l
+; SDAG-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX1170-FAKE16-LABEL: test_maxmin_commuted_f16_ieee_true:
+; SDAG-GFX1170-FAKE16: ; %bb.0:
+; SDAG-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; SDAG-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; SDAG-GFX1170-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2
+; SDAG-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX1170-TRUE16-LABEL: test_maxmin_commuted_f16_ieee_true:
+; GISEL-GFX1170-TRUE16: ; %bb.0:
+; GISEL-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GISEL-GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.l
+; GISEL-GFX1170-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v0.h, v1.l
+; GISEL-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX1170-FAKE16-LABEL: test_maxmin_commuted_f16_ieee_true:
+; GISEL-GFX1170-FAKE16: ; %bb.0:
+; GISEL-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GISEL-GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; GISEL-GFX1170-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2
+; GISEL-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-GFX12-TRUE16-LABEL: test_maxmin_commuted_f16_ieee_true:
; SDAG-GFX12-TRUE16: ; %bb.0:
; SDAG-GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1305,6 +1640,34 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0
; GISEL-GFX11-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
; GISEL-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX1170-TRUE16-LABEL: test_med3_f16:
+; SDAG-GFX1170-TRUE16: ; %bb.0:
+; SDAG-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1170-TRUE16-NEXT: v_med3_num_f16 v2.l, v2.l, v3.l, v4.l
+; SDAG-GFX1170-TRUE16-NEXT: global_store_b16 v[0:1], v2, off
+; SDAG-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX1170-FAKE16-LABEL: test_med3_f16:
+; SDAG-GFX1170-FAKE16: ; %bb.0:
+; SDAG-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX1170-FAKE16-NEXT: v_med3_num_f16 v2, v2, v3, v4
+; SDAG-GFX1170-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
+; SDAG-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX1170-TRUE16-LABEL: test_med3_f16:
+; GISEL-GFX1170-TRUE16: ; %bb.0:
+; GISEL-GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX1170-TRUE16-NEXT: v_med3_num_f16 v2.l, v2.l, v3.l, v4.l
+; GISEL-GFX1170-TRUE16-NEXT: global_store_b16 v[0:1], v2, off
+; GISEL-GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX1170-FAKE16-LABEL: test_med3_f16:
+; GISEL-GFX1170-FAKE16: ; %bb.0:
+; GISEL-GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX1170-FAKE16-NEXT: v_med3_num_f16 v2, v2, v3, v4
+; GISEL-GFX1170-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
+; GISEL-GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-GFX12-TRUE16-LABEL: test_med3_f16:
; SDAG-GFX12-TRUE16: ; %bb.0:
; SDAG-GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
index 59c0f1cc7782f..344cf2dc77b68 100644
--- a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-FAKE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
@@ -23,6 +25,13 @@ define float @v_test_fmin_legacy_ule_f32_safe(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmin_legacy_ule_f32_safe:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
+; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmin_legacy_ule_f32_safe:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -53,6 +62,13 @@ define float @v_test_fmin_legacy_ule_f32_nnan_flag(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
+; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -83,6 +99,13 @@ define float @v_test_fmin_legacy_ule_f32_nsz_flag(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
+; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -112,6 +135,12 @@ define float @v_test_fmin_legacy_ule_f32_nnan_nsz_flag(float %a, float %b) {
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -140,6 +169,13 @@ define float @v_test_fmax_legacy_uge_f32_safe(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmax_legacy_uge_f32_safe:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
+; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmax_legacy_uge_f32_safe:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -170,6 +206,13 @@ define float @v_test_fmax_legacy_uge_f32_nnan_flag(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
+; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -200,6 +243,13 @@ define float @v_test_fmax_legacy_uge_f32_nsz_flag(float %a, float %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
+; GFX1170-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -229,6 +279,12 @@ define float @v_test_fmax_legacy_uge_f32_nnan_nsz_flag(float %a, float %b) {
; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -260,6 +316,15 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_safe(<2 x float> %a, <2 x float
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmin_legacy_ule_v2f32_safe:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
+; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
+; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_safe:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -296,6 +361,15 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_flag(<2 x float> %a, <2 x
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
+; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
+; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -332,6 +406,15 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nsz_flag(<2 x float> %a, <2 x f
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
+; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX1170-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
+; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -366,6 +449,12 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -397,6 +486,15 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_safe(<2 x float> %a, <2 x float
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmax_legacy_uge_v2f32_safe:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
+; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
+; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_safe:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -433,6 +531,15 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_flag(<2 x float> %a, <2 x
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
+; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
+; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -469,6 +576,15 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nsz_flag(<2 x float> %a, <2 x f
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
+; GFX1170-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX1170-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
+; GFX1170-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -503,6 +619,12 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -534,6 +656,20 @@ define half @v_test_fmin_legacy_ule_f16_safe(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_safe:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_f16_safe:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_safe:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -579,6 +715,20 @@ define half @v_test_fmin_legacy_ule_f16_nnan_flag(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -624,6 +774,20 @@ define half @v_test_fmin_legacy_ule_f16_nsz_flag(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -668,6 +832,18 @@ define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) {
; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -709,6 +885,20 @@ define half @v_test_fmax_legacy_uge_f16_safe(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_safe:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_f16_safe:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_safe:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -754,6 +944,20 @@ define half @v_test_fmax_legacy_uge_f16_nnan_flag(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -799,6 +1003,20 @@ define half @v_test_fmax_legacy_uge_f16_nsz_flag(half %a, half %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -843,6 +1061,18 @@ define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) {
; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -899,6 +1129,29 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_safe(<2 x half> %a, <2 x half> %
; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_safe:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.h, v1.h
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v2f16_safe:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_safe:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -971,6 +1224,29 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_flag(<2 x half> %a, <2 x ha
; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.h, v1.h
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1043,6 +1319,29 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nsz_flag(<2 x half> %a, <2 x hal
; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.h, v1.h
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1107,6 +1406,12 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag(<2 x half> %a, <2
; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1153,6 +1458,29 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_safe(<2 x half> %a, <2 x half> %
; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_safe:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.h, v1.h
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v2f16_safe:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_safe:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1225,6 +1553,29 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_flag(<2 x half> %a, <2 x ha
; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.h, v1.h
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1297,6 +1648,29 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nsz_flag(<2 x half> %a, <2 x hal
; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.h, v1.h
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.l, v1.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1361,6 +1735,12 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag(<2 x half> %a, <2
; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1427,6 +1807,42 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_safe(<4 x half> %a, <4 x half> %
; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_safe:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1.h, v3.h
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.h, v2.h
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s1, v0.l, v2.l
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s2, v1.l, v3.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v4f16_safe:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
+; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_safe:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1531,6 +1947,42 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_flag(<4 x half> %a, <4 x ha
; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1.h, v3.h
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.h, v2.h
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s1, v0.l, v2.l
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s2, v1.l, v3.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
+; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1635,6 +2087,42 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nsz_flag(<4 x half> %a, <4 x hal
; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1.h, v3.h
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s0, v0.h, v2.h
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s1, v0.l, v2.l
+; GFX1170-TRUE16-NEXT: v_cmp_ngt_f16_e64 s2, v1.l, v3.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
+; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1724,6 +2212,13 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_min_num_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_min_num_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1791,6 +2286,42 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_safe(<4 x half> %a, <4 x half> %
; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_safe:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1.h, v3.h
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.h, v2.h
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, v0.l, v2.l
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s2, v1.l, v3.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v4f16_safe:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
+; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_safe:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1895,6 +2426,42 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_flag(<4 x half> %a, <4 x ha
; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1.h, v3.h
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.h, v2.h
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, v0.l, v2.l
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s2, v1.l, v3.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
+; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1999,6 +2566,42 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nsz_flag(<4 x half> %a, <4 x hal
; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag:
+; GFX1170-TRUE16: ; %bb.0:
+; GFX1170-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1.h, v3.h
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v0.h, v2.h
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, v0.l, v2.l
+; GFX1170-TRUE16-NEXT: v_cmp_nlt_f16_e64 s2, v1.l, v3.l
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.h, v3.h, v1.h, vcc_lo
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s1
+; GFX1170-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1170-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v1.l, s2
+; GFX1170-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-FAKE16-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag:
+; GFX1170-FAKE16: ; %bb.0:
+; GFX1170-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX1170-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX1170-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3
+; GFX1170-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX1170-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
+; GFX1170-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
+; GFX1170-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-TRUE16-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2088,6 +2691,13 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4
; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_pk_max_num_f16 v0, v0, v2
+; GFX1170-NEXT: v_pk_max_num_f16 v1, v1, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2120,6 +2730,14 @@ define float @v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float
; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2155,6 +2773,14 @@ define float @v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float
; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs:
+; GFX1170: ; %bb.0:
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmax.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmax.ll
index f1d1ddf49bbff..6b2be6b05b120 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmax.ll
@@ -11,6 +11,10 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
@@ -123,6 +127,44 @@ define half @test_vector_reduce_fmax_v2half(<2 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v2half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmax_v2half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmax_v2half:
+; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmax_v2half:
+; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v2half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -307,6 +349,54 @@ define half @test_vector_reduce_fmax_v3half(<3 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v3half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0x7e00
+; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmax_v3half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0x7e00
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmax_v3half:
+; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmax_v3half:
+; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v2
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v3half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -535,6 +625,59 @@ define half @test_vector_reduce_fmax_v4half(<4 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v4half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmax_v4half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmax_v4half:
+; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmax_v4half:
+; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v3
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v4half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -871,6 +1014,86 @@ define half @test_vector_reduce_fmax_v8half(<8 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v8half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v1, v1, v3
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v2
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmax_v8half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v1, v1, v3
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v2
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmax_v8half:
+; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v2.l, v2.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v2.h, v2.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.l, v3.l, v3.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.h, v3.h, v3.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v3.l, v3.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmax_v8half:
+; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v4
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v5, v5, v5
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v6, v6, v6
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v7, v7, v7
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v4
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v5
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v6
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v7
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v2, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v8half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1438,6 +1661,138 @@ define half @test_vector_reduce_fmax_v16half(<16 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_max_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v16half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v1.l, v1.h
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v2.l, v2.h
+; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v3.l, v3.h
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v4.l, v4.h
+; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v5.l, v5.h
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v6.l, v6.h
+; GFX1170-SDAG-TRUE16-NEXT: v_max3_num_f16 v0.l, v0.l, v7.l, v7.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmax_v16half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v8, v8, v8
+; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v8
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v2
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v1, v9
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v3
+; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v2, v8
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v4
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v3, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v5
+; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v4, v2
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v6
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v5, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v7
+; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v6, v2
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_max3_num_f16 v0, v0, v7, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmax_v16half:
+; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v2.l, v2.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v2.h, v2.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v3.l, v3.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v3.h, v3.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v4.l, v4.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.l, v4.h, v4.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.h, v5.l, v5.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v4.l, v5.h, v5.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v4.h, v6.l, v6.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v5.l, v6.h, v6.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v5.h, v7.l, v7.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v6.l, v7.h, v7.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v2.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v2.h, v3.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v3.h, v4.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.l, v4.h, v5.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.h, v5.h, v6.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v3.l, v3.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmax_v16half:
+; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v12, 16, v4
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v13, 16, v5
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v8, v8, v8
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v9, v9, v9
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v10, v10, v10
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v6
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v15, 16, v7
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v8
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v9
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v10
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v8, v11, v11
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v4
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v9, v12, v12
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v5, v5, v5
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v10, v13, v13
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v6, v6, v6
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v11, v14, v14
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v7, v7, v7
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v12, v15, v15
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v8
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v9
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v5, v5, v10
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v6, v6, v11
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v7, v7, v12
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v2, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v4, v5
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v6, v7
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v2, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmax_v16half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1671,6 +2026,22 @@ define float @test_vector_reduce_fmax_v2float(<2 x float> %v) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v2float:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v2float:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v2float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1779,6 +2150,21 @@ define float @test_vector_reduce_fmax_v3float(<3 x float> %v) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v3float:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v1, v2
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v3float:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v2
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v3float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1910,6 +2296,25 @@ define float @test_vector_reduce_fmax_v4float(<4 x float> %v) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v4float:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v2, v3
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v4float:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v3
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v4float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2093,6 +2498,33 @@ define float @test_vector_reduce_fmax_v8float(<8 x float> %v) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v8float:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v2, v3
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v4, v5
+; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v6, v7
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v8float:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v3
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v4, v5 :: v_dual_max_num_f32 v3, v6, v7
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v3
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v8float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2382,6 +2814,51 @@ define float @test_vector_reduce_fmax_v16float(<16 x float> %v) {
; GFX11-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v16float:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v2, v3
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v4, v5
+; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v6, v7
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v8, v9
+; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v10, v11
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v12, v13
+; GFX1170-SDAG-NEXT: v_max3_num_f32 v0, v0, v14, v15
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v16float:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v3
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v3, v6, v6 :: v_dual_max_num_f32 v2, v4, v5
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v7, v7 :: v_dual_max_num_f32 v5, v8, v8
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v9, v9 :: v_dual_max_num_f32 v7, v10, v10
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v8, v11, v11 :: v_dual_max_num_f32 v9, v12, v12
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v10, v13, v13 :: v_dual_max_num_f32 v11, v14, v14
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v12, v15, v15
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v3, v3, v4 :: v_dual_max_num_f32 v4, v5, v6
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v5, v7, v8 :: v_dual_max_num_f32 v6, v9, v10
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v7, v11, v12 :: v_dual_max_num_f32 v0, v0, v1
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v1, v2, v3 :: v_dual_max_num_f32 v2, v4, v5
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v3, v6, v7 :: v_dual_max_num_f32 v0, v0, v1
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v1, v2, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v16float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2523,6 +3000,24 @@ define double @test_vector_reduce_fmax_v2double(<2 x double> %v) {
; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v2double:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v2double:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v2double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2656,6 +3151,28 @@ define double @test_vector_reduce_fmax_v3double(<3 x double> %v) {
; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v3double:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v3double:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v3double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2815,6 +3332,34 @@ define double @test_vector_reduce_fmax_v4double(<4 x double> %v) {
; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v4double:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v4double:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v4double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3062,6 +3607,52 @@ define double @test_vector_reduce_fmax_v8double(<8 x double> %v) {
; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v8double:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[8:9], v[8:9]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[10:11], v[10:11]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[12:13], v[12:13]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[14:15], v[14:15]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v8double:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[8:9], v[10:11]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[12:13], v[14:15]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v8double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3511,6 +4102,92 @@ define double @test_vector_reduce_fmax_v16double(<16 x double> %v) {
; GFX11-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmax_v16double:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-SDAG-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[8:9], v[8:9]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[10:11], v[10:11]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[12:13], v[12:13]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[14:15], v[14:15]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[16:17], v[16:17]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[18:19], v[18:19]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[20:21], v[20:21]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[22:23], v[22:23]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[24:25], v[24:25]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[26:27], v[26:27]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[28:29], v[28:29]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[30:31], v[30:31]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmax_v16double:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[16:17], v[16:17], v[16:17]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[18:19], v[18:19], v[18:19]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[20:21], v[20:21], v[20:21]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[22:23], v[22:23], v[22:23]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[24:25], v[24:25], v[24:25]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[26:27], v[26:27], v[26:27]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[28:29], v[28:29], v[28:29]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[8:9], v[10:11]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[12:13], v[14:15]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[16:17], v[18:19]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[20:21], v[22:23]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[24:25], v[26:27]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[8:9], v[10:11]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[30:31], v[30:31], v[30:31]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[28:29], v[30:31]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[12:13], v[14:15]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmax_v16double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3627,6 +4304,7 @@ declare double @llvm.vector.reduce.fmax.v16double(<16 x double>)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX10: {{.*}}
; GFX11: {{.*}}
+; GFX1170: {{.*}}
; GFX12: {{.*}}
; GFX8: {{.*}}
; GFX9: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll
index a20b5de786271..e77b44d3ee3cb 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll
@@ -5,6 +5,8 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
@@ -69,6 +71,20 @@ define half @test_vector_reduce_fmaximum_v2half(<2 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v2half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v2half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v2half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -180,6 +196,27 @@ define half @test_vector_reduce_fmaximum_v3half(<3 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v3half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0xfc00
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v3half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: s_mov_b32 s0, 0xfc00
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_perm_b32 v1, s0, v1, 0x5040100
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v3half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -326,6 +363,23 @@ define half @test_vector_reduce_fmaximum_v4half(<4 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v1, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v4half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v4half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v4half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -560,6 +614,28 @@ define half @test_vector_reduce_fmaximum_v8half(<8 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v8half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v8half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v8half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1002,6 +1078,38 @@ define half @test_vector_reduce_fmaximum_v16half(<16 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v16half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v3, v3, v7
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v1, v1, v5
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v2, v2, v6
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v4
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmaximum_v16half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v3, v3, v7
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v1, v1, v5
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v2, v2, v6
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v4
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v1, v1, v3
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v2
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_maximum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmaximum_v16half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1092,6 +1200,12 @@ define float @test_vector_reduce_fmaximum_v2float(<2 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_vector_reduce_fmaximum_v2float:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_vector_reduce_fmaximum_v2float:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1169,6 +1283,12 @@ define float @test_vector_reduce_fmaximum_v3float(<3 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_vector_reduce_fmaximum_v3float:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v1, v2
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_vector_reduce_fmaximum_v3float:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1263,6 +1383,14 @@ define float @test_vector_reduce_fmaximum_v4float(<4 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_vector_reduce_fmaximum_v4float:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v2, v3
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_vector_reduce_fmaximum_v4float:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1427,6 +1555,17 @@ define float @test_vector_reduce_fmaximum_v8float(<8 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_vector_reduce_fmaximum_v8float:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v2, v3
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v4, v5
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v6, v7
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_vector_reduce_fmaximum_v8float:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1730,6 +1869,23 @@ define float @test_vector_reduce_fmaximum_v16float(<16 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_vector_reduce_fmaximum_v16float:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f32 v0, v0, v1
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v2, v3
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v4, v5
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v6, v7
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v8, v9
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v10, v11
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v12, v13
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum3_f32 v0, v0, v14, v15
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_vector_reduce_fmaximum_v16float:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1807,6 +1963,12 @@ define double @test_vector_reduce_fmaximum_v2double(<2 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_vector_reduce_fmaximum_v2double:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_vector_reduce_fmaximum_v2double:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1895,6 +2057,14 @@ define double @test_vector_reduce_fmaximum_v3double(<3 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_vector_reduce_fmaximum_v3double:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_vector_reduce_fmaximum_v3double:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2007,6 +2177,15 @@ define double @test_vector_reduce_fmaximum_v4double(<4 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_vector_reduce_fmaximum_v4double:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_vector_reduce_fmaximum_v4double:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2210,6 +2389,21 @@ define double @test_vector_reduce_fmaximum_v8double(<8 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_vector_reduce_fmaximum_v8double:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[10:11]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[12:13]
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[14:15]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_vector_reduce_fmaximum_v8double:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2609,6 +2803,35 @@ define double @test_vector_reduce_fmaximum_v16double(<16 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_vector_reduce_fmaximum_v16double:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[10:11]
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[12:13]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[14:15]
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[16:17]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[18:19]
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[20:21]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[22:23]
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[24:25]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[26:27]
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[28:29]
+; GFX1170-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_maximum_f64 v[0:1], v[0:1], v[30:31]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_vector_reduce_fmaximum_v16double:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2664,6 +2887,7 @@ declare double @llvm.vector.reduce.fmaximum.v16double(<16 x double>)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX10-SDAG: {{.*}}
; GFX11-SDAG: {{.*}}
+; GFX1170-SDAG: {{.*}}
; GFX12-SDAG: {{.*}}
; GFX7-SDAG: {{.*}}
; GFX8-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmin.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmin.ll
index 9b26912e659e6..efb210d6f07f1 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmin.ll
@@ -11,6 +11,10 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
@@ -123,6 +127,44 @@ define half @test_vector_reduce_fmin_v2half(<2 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v2half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmin_v2half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmin_v2half:
+; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmin_v2half:
+; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v2half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -307,6 +349,54 @@ define half @test_vector_reduce_fmin_v3half(<3 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v3half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0x7e00
+; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmin_v3half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0x7e00
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmin_v3half:
+; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmin_v3half:
+; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v2
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v3half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -535,6 +625,59 @@ define half @test_vector_reduce_fmin_v4half(<4 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v4half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmin_v4half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmin_v4half:
+; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmin_v4half:
+; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v1, v3
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v4half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -871,6 +1014,86 @@ define half @test_vector_reduce_fmin_v8half(<8 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v8half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_min_num_f16 v1, v1, v3
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_min_num_f16 v0, v0, v2
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmin_v8half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v3, v3, v3
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v1, v1, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v2, v2, v2
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_max_num_f16 v0, v0, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_min_num_f16 v1, v1, v3
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_min_num_f16 v0, v0, v2
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_min_num_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmin_v8half:
+; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v2.l, v2.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v2.h, v2.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.l, v3.l, v3.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.h, v3.h, v3.h
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.l, v2.l, v2.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.h, v3.l, v3.h
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmin_v8half:
+; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v4
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v5, v5, v5
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v6, v6, v6
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v7, v7, v7
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v4
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v1, v5
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v2, v2, v6
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v3, v3, v7
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v2, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v8half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1438,6 +1661,138 @@ define half @test_vector_reduce_fmin_v16half(<16 x half> %v) {
; GFX11-GISEL-FAKE16-NEXT: v_min_f16_e32 v0, v0, v1
; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v16half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v1.l, v1.h
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v2.l, v2.h
+; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v3.l, v3.h
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v4.l, v4.h
+; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v5.l, v5.h
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v6.l, v6.h
+; GFX1170-SDAG-TRUE16-NEXT: v_min3_num_f16 v0.l, v0.l, v7.l, v7.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fmin_v16half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_max_num_f16_e32 v8, v8, v8
+; GFX1170-SDAG-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v8
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v2
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v1, v9
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v3
+; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v2, v8
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v4
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v3, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v5
+; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v4, v2
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v6
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v5, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v7
+; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v6, v2
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_min3_num_f16 v0, v0, v7, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fmin_v16half:
+; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v1.h, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v2.l, v2.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v2.h, v2.h
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v1.h, v3.l, v3.l
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.l, v2.l, v2.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.l, v3.h, v3.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v2.h, v4.l, v4.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.l, v4.h, v4.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v3.h, v5.l, v5.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v4.l, v5.h, v5.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v4.h, v6.l, v6.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v5.l, v6.h, v6.h
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v5.h, v7.l, v7.l
+; GFX1170-GISEL-TRUE16-NEXT: v_max_num_f16_e32 v6.l, v7.h, v7.h
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.h, v1.h, v2.l
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v2.l, v2.h, v3.l
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v2.h, v3.h, v4.l
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v3.l, v4.h, v5.l
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v3.h, v5.h, v6.l
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.l, v2.l, v2.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v1.h, v3.l, v3.h
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fmin_v16half:
+; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v12, 16, v4
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v13, 16, v5
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v8, v8, v8
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v9, v9, v9
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v10, v10, v10
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v6
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v15, 16, v7
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v8
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v1, v9
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v2, v2, v10
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v3, v3, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v8, v11, v11
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v4
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v9, v12, v12
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v5, v5, v5
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v10, v13, v13
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v6, v6, v6
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v11, v14, v14
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v7, v7, v7
+; GFX1170-GISEL-FAKE16-NEXT: v_max_num_f16_e32 v12, v15, v15
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v3, v3, v8
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v4, v4, v9
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v5, v5, v10
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v6, v6, v11
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v7, v7, v12
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v2, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v2, v4, v5
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v3, v6, v7
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v1, v2, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fmin_v16half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1671,6 +2026,22 @@ define float @test_vector_reduce_fmin_v2float(<2 x float> %v) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v2float:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v2float:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v2float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1779,6 +2150,21 @@ define float @test_vector_reduce_fmin_v3float(<3 x float> %v) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v3float:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v1, v2
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v3float:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v1 :: v_dual_max_num_f32 v1, v2, v2
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v3float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1910,6 +2296,25 @@ define float @test_vector_reduce_fmin_v4float(<4 x float> %v) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v4float:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v2, v3
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v4float:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v1 :: v_dual_min_num_f32 v1, v2, v3
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v4float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2093,6 +2498,33 @@ define float @test_vector_reduce_fmin_v8float(<8 x float> %v) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v8float:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v2, v3
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v4, v5
+; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v6, v7
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v8float:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v1 :: v_dual_min_num_f32 v1, v2, v3
+; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v2, v4, v5 :: v_dual_min_num_f32 v3, v6, v7
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v1 :: v_dual_min_num_f32 v1, v2, v3
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v8float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2382,6 +2814,51 @@ define float @test_vector_reduce_fmin_v16float(<16 x float> %v) {
; GFX11-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v16float:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v2, v3
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v4, v5
+; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v6, v7
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v8, v9
+; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v10, v11
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v12, v13
+; GFX1170-SDAG-NEXT: v_min3_num_f32 v0, v0, v14, v15
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v16float:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v0, v0, v1 :: v_dual_min_num_f32 v1, v2, v3
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v3, v6, v6 :: v_dual_min_num_f32 v2, v4, v5
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v4, v7, v7 :: v_dual_max_num_f32 v5, v8, v8
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v6, v9, v9 :: v_dual_max_num_f32 v7, v10, v10
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v8, v11, v11 :: v_dual_max_num_f32 v9, v12, v12
+; GFX1170-GISEL-NEXT: v_dual_max_num_f32 v10, v13, v13 :: v_dual_max_num_f32 v11, v14, v14
+; GFX1170-GISEL-NEXT: v_max_num_f32_e32 v12, v15, v15
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v3, v3, v4 :: v_dual_min_num_f32 v4, v5, v6
+; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v5, v7, v8 :: v_dual_min_num_f32 v6, v9, v10
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v7, v11, v12 :: v_dual_min_num_f32 v0, v0, v1
+; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v1, v2, v3 :: v_dual_min_num_f32 v2, v4, v5
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_dual_min_num_f32 v3, v6, v7 :: v_dual_min_num_f32 v0, v0, v1
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v1, v2, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v16float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2522,6 +2999,24 @@ define double @test_vector_reduce_fmin_v2double(<2 x double> %v) {
; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v2double:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v2double:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v2double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2655,6 +3150,28 @@ define double @test_vector_reduce_fmin_v3double(<3 x double> %v) {
; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v3double:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v3double:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v3double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2814,6 +3331,34 @@ define double @test_vector_reduce_fmin_v4double(<4 x double> %v) {
; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v4double:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v4double:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v4double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3061,6 +3606,52 @@ define double @test_vector_reduce_fmin_v8double(<8 x double> %v) {
; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v8double:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[8:9], v[8:9]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[10:11], v[10:11]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[12:13], v[12:13]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[14:15], v[14:15]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v8double:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[4:5], v[8:9], v[10:11]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[6:7], v[12:13], v[14:15]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v8double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3510,6 +4101,92 @@ define double @test_vector_reduce_fmin_v16double(<16 x double> %v) {
; GFX11-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fmin_v16double:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-SDAG-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[6:7], v[6:7]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[8:9], v[8:9]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[10:11], v[10:11]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[12:13], v[12:13]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[14:15], v[14:15]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[16:17], v[16:17]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[18:19], v[18:19]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[20:21], v[20:21]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[22:23], v[22:23]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[24:25], v[24:25]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[26:27], v[26:27]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[4:5], v[28:29], v[28:29]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-SDAG-NEXT: v_max_num_f64 v[2:3], v[30:31], v[30:31]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fmin_v16double:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[0:1], v[0:1], v[0:1]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[2:3], v[2:3], v[2:3]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[4:5], v[4:5], v[4:5]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[6:7], v[6:7], v[6:7]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[8:9], v[8:9], v[8:9]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[10:11], v[10:11], v[10:11]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[12:13], v[12:13], v[12:13]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[14:15], v[14:15], v[14:15]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[16:17], v[16:17], v[16:17]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[18:19], v[18:19], v[18:19]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[20:21], v[20:21], v[20:21]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[22:23], v[22:23], v[22:23]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[24:25], v[24:25], v[24:25]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[26:27], v[26:27], v[26:27]
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[28:29], v[28:29], v[28:29]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[4:5], v[8:9], v[10:11]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[6:7], v[12:13], v[14:15]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[8:9], v[16:17], v[18:19]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[10:11], v[20:21], v[22:23]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[12:13], v[24:25], v[26:27]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[4:5], v[8:9], v[10:11]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-GISEL-NEXT: v_max_num_f64 v[30:31], v[30:31], v[30:31]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[14:15], v[28:29], v[30:31]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[6:7], v[12:13], v[14:15]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: v_min_num_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fmin_v16double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -3626,6 +4303,7 @@ declare double @llvm.vector.reduce.fmin.v16double(<16 x double>)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX10: {{.*}}
; GFX11: {{.*}}
+; GFX1170: {{.*}}
; GFX12: {{.*}}
; GFX8: {{.*}}
; GFX9: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll
index deeac90a952c5..f331a33364424 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll
@@ -5,6 +5,10 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-SDAG,GFX1170-SDAG-FAKE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1170,GFX1170-GISEL,GFX1170-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
@@ -71,6 +75,34 @@ define half @test_vector_reduce_fminimum_v2half(<2 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v2half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v2half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fminimum_v2half:
+; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fminimum_v2half:
+; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v2half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -204,6 +236,44 @@ define half @test_vector_reduce_fminimum_v3half(<3 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v3half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0x7c00
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v3half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x7c00
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_perm_b32 v1, s0, v1, 0x5040100
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fminimum_v3half:
+; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
+; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fminimum_v3half:
+; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v3half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -375,6 +445,44 @@ define half @test_vector_reduce_fminimum_v4half(<4 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v1, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v4half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v4half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fminimum_v4half:
+; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fminimum_v4half:
+; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v1, v3
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v4half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -638,6 +746,62 @@ define half @test_vector_reduce_fminimum_v8half(<8 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v8half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v8half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fminimum_v8half:
+; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, v2.l, v2.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.h, v3.l, v3.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fminimum_v8half:
+; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v4
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v1, v5
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v2, v2, v6
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v3, v3, v7
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v2, v3
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v8half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1122,6 +1286,90 @@ define half @test_vector_reduce_fminimum_v16half(<16 x half> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v16half:
+; GFX1170-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v3, v3, v7
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v1, v1, v5
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v2, v2, v6
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v4
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-TRUE16-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-SDAG-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-SDAG-FAKE16-LABEL: test_vector_reduce_fminimum_v16half:
+; GFX1170-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1170-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v3, v3, v7
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v1, v1, v5
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v2, v2, v6
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v4
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v1, v1, v3
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v2
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_pk_minimum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1170-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-TRUE16-LABEL: test_vector_reduce_fminimum_v16half:
+; GFX1170-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, v2.l, v2.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.h, v3.l, v3.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v2.l, v4.l, v4.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v2.h, v5.l, v5.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v3.l, v6.l, v6.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v3.h, v7.l, v7.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.l, v2.l, v2.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v1.h, v3.l, v3.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.h, v1.l, v1.h
+; GFX1170-GISEL-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v0.h
+; GFX1170-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-FAKE16-LABEL: test_vector_reduce_fminimum_v16half:
+; GFX1170-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1170-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v2
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v12, 16, v4
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v13, 16, v5
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v6
+; GFX1170-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v15, 16, v7
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v8
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v1, v9
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v2, v2, v10
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v3, v3, v11
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v4, v4, v12
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v5, v5, v13
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v6, v6, v14
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v7, v7, v15
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v2, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v2, v4, v5
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v3, v6, v7
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v1, v2, v3
+; GFX1170-GISEL-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
+; GFX1170-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-TRUE16-LABEL: test_vector_reduce_fminimum_v16half:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX12-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1272,6 +1520,12 @@ define float @test_vector_reduce_fminimum_v2float(<2 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_vector_reduce_fminimum_v2float:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_vector_reduce_fminimum_v2float:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1349,6 +1603,20 @@ define float @test_vector_reduce_fminimum_v3float(<3 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v3float:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v1, v2
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v3float:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v2
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v3float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1455,6 +1723,23 @@ define float @test_vector_reduce_fminimum_v4float(<4 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v4float:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v2, v3
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v4float:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v4float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1632,6 +1917,31 @@ define float @test_vector_reduce_fminimum_v8float(<8 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v8float:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v2, v3
+; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v4, v5
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v6, v7
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v8float:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3
+; GFX1170-GISEL-NEXT: v_minimum_f32 v2, v4, v5
+; GFX1170-GISEL-NEXT: v_minimum_f32 v3, v6, v7
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v8float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1953,6 +2263,45 @@ define float @test_vector_reduce_fminimum_v16float(<16 x float> %v) {
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v16float:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v2, v3
+; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v4, v5
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v6, v7
+; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v8, v9
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v10, v11
+; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v12, v13
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum3_f32 v0, v0, v14, v15
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v16float:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3
+; GFX1170-GISEL-NEXT: v_minimum_f32 v2, v4, v5
+; GFX1170-GISEL-NEXT: v_minimum_f32 v3, v6, v7
+; GFX1170-GISEL-NEXT: v_minimum_f32 v4, v8, v9
+; GFX1170-GISEL-NEXT: v_minimum_f32 v5, v10, v11
+; GFX1170-GISEL-NEXT: v_minimum_f32 v6, v12, v13
+; GFX1170-GISEL-NEXT: v_minimum_f32 v7, v14, v15
+; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3
+; GFX1170-GISEL-NEXT: v_minimum_f32 v2, v4, v5
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1170-GISEL-NEXT: v_minimum_f32 v3, v6, v7
+; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_minimum_f32 v1, v2, v3
+; GFX1170-GISEL-NEXT: v_minimum_f32 v0, v0, v1
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v16float:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2056,6 +2405,12 @@ define double @test_vector_reduce_fminimum_v2double(<2 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_vector_reduce_fminimum_v2double:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_vector_reduce_fminimum_v2double:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2144,6 +2499,14 @@ define double @test_vector_reduce_fminimum_v3double(<3 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-LABEL: test_vector_reduce_fminimum_v3double:
+; GFX1170: ; %bb.0: ; %entry
+; GFX1170-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-LABEL: test_vector_reduce_fminimum_v3double:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2256,6 +2619,24 @@ define double @test_vector_reduce_fminimum_v4double(<4 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v4double:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v4double:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v4double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2472,6 +2853,35 @@ define double @test_vector_reduce_fminimum_v8double(<8 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v8double:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[10:11]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[12:13]
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[14:15]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v8double:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[4:5], v[8:9], v[10:11]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[6:7], v[12:13], v[14:15]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v8double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2889,6 +3299,60 @@ define double @test_vector_reduce_fminimum_v16double(<16 x double> %v) {
; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1170-SDAG-LABEL: test_vector_reduce_fminimum_v16double:
+; GFX1170-SDAG: ; %bb.0: ; %entry
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-SDAG-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[4:5]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[6:7]
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[8:9]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[10:11]
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[12:13]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[14:15]
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[16:17]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[18:19]
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[20:21]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[22:23]
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[24:25]
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[26:27]
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[28:29]
+; GFX1170-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-SDAG-NEXT: v_minimum_f64 v[0:1], v[0:1], v[30:31]
+; GFX1170-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1170-GISEL-LABEL: test_vector_reduce_fminimum_v16double:
+; GFX1170-GISEL: ; %bb.0: ; %entry
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1170-GISEL-NEXT: scratch_load_b32 v31, off, s32
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[4:5], v[8:9], v[10:11]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[6:7], v[12:13], v[14:15]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[8:9], v[16:17], v[18:19]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[10:11], v[20:21], v[22:23]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[12:13], v[24:25], v[26:27]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[4:5], v[8:9], v[10:11]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[14:15], v[28:29], v[30:31]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[6:7], v[12:13], v[14:15]
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[2:3], v[4:5], v[6:7]
+; GFX1170-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-GISEL-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3]
+; GFX1170-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v16double:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
More information about the llvm-commits
mailing list