[llvm] true16 fpminmax pat (PR #125107)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 30 11:04:12 PST 2025
https://github.com/broxigarchen created https://github.com/llvm/llvm-project/pull/125107
None
>From 4cf0cf55e6b29240f8401b2e5e97190857d361a1 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Thu, 30 Jan 2025 12:48:48 -0500
Subject: [PATCH] true16 fpminmax pat
---
llvm/lib/Target/AMDGPU/SIInstructions.td | 14 +
llvm/test/CodeGen/AMDGPU/minimummaximum.ll | 84 +++-
llvm/test/CodeGen/AMDGPU/minmax.ll | 554 +++++++++++++++------
3 files changed, 487 insertions(+), 165 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 5af46989aca97b..40dfca6ced77d9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3767,6 +3767,13 @@ def : FPMinCanonMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>
def : FPMinCanonMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>;
}
+let True16Predicate = UseRealTrue16Insts in {
+def : FPMinMaxPat<V_MINMAX_F16_t16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
+def : FPMinMaxPat<V_MAXMIN_F16_t16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
+def : FPMinCanonMaxPat<V_MINMAX_F16_t16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
+def : FPMinCanonMaxPat<V_MAXMIN_F16_t16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
+}
+
let True16Predicate = UseFakeTrue16Insts in {
def : FPMinMaxPat<V_MINMAX_F16_fake16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
def : FPMinMaxPat<V_MAXMIN_F16_fake16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
@@ -3796,6 +3803,13 @@ def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
}
+let True16Predicate = UseRealTrue16Insts, SubtargetPredicate = isGFX12Plus in {
+def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_t16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
+def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_t16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
+def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_t16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
+def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F16_t16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
+}
+
let True16Predicate = UseFakeTrue16Insts, SubtargetPredicate = isGFX12Plus in {
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_fake16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
diff --git a/llvm/test/CodeGen/AMDGPU/minimummaximum.ll b/llvm/test/CodeGen/AMDGPU/minimummaximum.ll
index c375b16ee3809c..7e867a5372986d 100644
--- a/llvm/test/CodeGen/AMDGPU/minimummaximum.ll
+++ b/llvm/test/CodeGen/AMDGPU/minimummaximum.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-FAKE16 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-TRUE16 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-FAKE16 %s
define amdgpu_ps float @test_minmax_f32(float %a, float %b, float %c) {
; GFX12-LABEL: test_minmax_f32:
@@ -72,30 +74,84 @@ define amdgpu_ps float @test_maxmin_commuted_f32(float %a, float %b, float %c) {
}
define amdgpu_ps half @test_minmax_f16(half %a, half %b, half %c) {
-; GFX12-LABEL: test_minmax_f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
-; GFX12-NEXT: ; return to shader part epilog
+; SDAG-TRUE16-LABEL: test_minmax_f16:
+; SDAG-TRUE16: ; %bb.0:
+; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v0.h, v1.l
+; SDAG-TRUE16-NEXT: ; return to shader part epilog
+;
+; SDAG-FAKE16-LABEL: test_minmax_f16:
+; SDAG-FAKE16: ; %bb.0:
+; SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
+; SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GISEL-TRUE16-LABEL: test_minmax_f16:
+; GISEL-TRUE16: ; %bb.0:
+; GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
+; GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GISEL-FAKE16-LABEL: test_minmax_f16:
+; GISEL-FAKE16: ; %bb.0:
+; GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
+; GISEL-FAKE16-NEXT: ; return to shader part epilog
%max = call half @llvm.maximum.f16(half %a, half %b)
%minmax = call half @llvm.minimum.f16(half %max, half %c)
ret half %minmax
}
define amdgpu_ps half @test_minmax_commuted_f16(half %a, half %b, half %c) {
-; GFX12-LABEL: test_minmax_commuted_f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
-; GFX12-NEXT: ; return to shader part epilog
+; SDAG-TRUE16-LABEL: test_minmax_commuted_f16:
+; SDAG-TRUE16: ; %bb.0:
+; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v0.h, v1.l
+; SDAG-TRUE16-NEXT: ; return to shader part epilog
+;
+; SDAG-FAKE16-LABEL: test_minmax_commuted_f16:
+; SDAG-FAKE16: ; %bb.0:
+; SDAG-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
+; SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GISEL-TRUE16-LABEL: test_minmax_commuted_f16:
+; GISEL-TRUE16: ; %bb.0:
+; GISEL-TRUE16-NEXT: v_maximumminimum_f16 v0.l, v0.l, v1.l, v2.l
+; GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GISEL-FAKE16-LABEL: test_minmax_commuted_f16:
+; GISEL-FAKE16: ; %bb.0:
+; GISEL-FAKE16-NEXT: v_maximumminimum_f16 v0, v0, v1, v2
+; GISEL-FAKE16-NEXT: ; return to shader part epilog
%max = call half @llvm.maximum.f16(half %a, half %b)
%minmax = call half @llvm.minimum.f16(half %c, half %max)
ret half %minmax
}
define amdgpu_ps half @test_maxmin_commuted_f16(half %a, half %b, half %c) {
-; GFX12-LABEL: test_maxmin_commuted_f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
-; GFX12-NEXT: ; return to shader part epilog
+; SDAG-TRUE16-LABEL: test_maxmin_commuted_f16:
+; SDAG-TRUE16: ; %bb.0:
+; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v0.h, v1.l
+; SDAG-TRUE16-NEXT: ; return to shader part epilog
+;
+; SDAG-FAKE16-LABEL: test_maxmin_commuted_f16:
+; SDAG-FAKE16: ; %bb.0:
+; SDAG-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
+; SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GISEL-TRUE16-LABEL: test_maxmin_commuted_f16:
+; GISEL-TRUE16: ; %bb.0:
+; GISEL-TRUE16-NEXT: v_minimummaximum_f16 v0.l, v0.l, v1.l, v2.l
+; GISEL-TRUE16-NEXT: ; return to shader part epilog
+;
+; GISEL-FAKE16-LABEL: test_maxmin_commuted_f16:
+; GISEL-FAKE16: ; %bb.0:
+; GISEL-FAKE16-NEXT: v_minimummaximum_f16 v0, v0, v1, v2
+; GISEL-FAKE16-NEXT: ; return to shader part epilog
%min = call half @llvm.minimum.f16(half %a, half %b)
%maxmin = call half @llvm.maximum.f16(half %c, half %min)
ret half %maxmin
diff --git a/llvm/test/CodeGen/AMDGPU/minmax.ll b/llvm/test/CodeGen/AMDGPU/minmax.ll
index 774a22fb907db9..954dab3d0fc6fb 100644
--- a/llvm/test/CodeGen/AMDGPU/minmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/minmax.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,SDAG,SDAG-GFX11 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GISEL,GISEL-GFX11 %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-GFX12 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-GFX12 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,SDAG,SDAG-GFX11,SDAG-GFX11-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,SDAG,SDAG-GFX11,SDAG-GFX11-FAKE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GISEL,GISEL-GFX11,GISEL-GFX11-TRUE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GISEL,GISEL-GFX11,GISEL-GFX11-FAKE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-GFX12,SDAG-GFX12-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,SDAG,SDAG-GFX12,SDAG-GFX12-FAKE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-GFX12,GISEL-GFX12-TRUE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GISEL,GISEL-GFX12,GISEL-GFX12-FAKE16 %s
define i32 @test_minmax_i32(i32 %a, i32 %b, i32 %c) {
; GFX11-LABEL: test_minmax_i32:
@@ -467,47 +471,111 @@ define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z)
}
define amdgpu_ps half @test_minmax_f16_ieee_false(half %a, half %b, half %c) {
-; GFX11-LABEL: test_minmax_f16_ieee_false:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_maxmin_f16 v0, v0, v1, v2
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: test_minmax_f16_ieee_false:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
-; GFX12-NEXT: ; return to shader part epilog
+; SDAG-GFX11-TRUE16-LABEL: test_minmax_f16_ieee_false:
+; SDAG-GFX11-TRUE16: ; %bb.0:
+; SDAG-GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; SDAG-GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; SDAG-GFX11-TRUE16-NEXT: v_maxmin_f16 v0.l, v0.l, v0.h, v1.l
+; SDAG-GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; SDAG-GFX11-FAKE16-LABEL: test_minmax_f16_ieee_false:
+; SDAG-GFX11-FAKE16: ; %bb.0:
+; SDAG-GFX11-FAKE16-NEXT: v_maxmin_f16 v0, v0, v1, v2
+; SDAG-GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GISEL-GFX11-TRUE16-LABEL: test_minmax_f16_ieee_false:
+; GISEL-GFX11-TRUE16: ; %bb.0:
+; GISEL-GFX11-TRUE16-NEXT: v_maxmin_f16 v0.l, v0.l, v1.l, v2.l
+; GISEL-GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GISEL-GFX11-FAKE16-LABEL: test_minmax_f16_ieee_false:
+; GISEL-GFX11-FAKE16: ; %bb.0:
+; GISEL-GFX11-FAKE16-NEXT: v_maxmin_f16 v0, v0, v1, v2
+; GISEL-GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; SDAG-GFX12-TRUE16-LABEL: test_minmax_f16_ieee_false:
+; SDAG-GFX12-TRUE16: ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; SDAG-GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; SDAG-GFX12-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v0.h, v1.l
+; SDAG-GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; SDAG-GFX12-FAKE16-LABEL: test_minmax_f16_ieee_false:
+; SDAG-GFX12-FAKE16: ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
+; SDAG-GFX12-FAKE16-NEXT: ; return to shader part epilog
+;
+; GISEL-GFX12-TRUE16-LABEL: test_minmax_f16_ieee_false:
+; GISEL-GFX12-TRUE16: ; %bb.0:
+; GISEL-GFX12-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v1.l, v2.l
+; GISEL-GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GISEL-GFX12-FAKE16-LABEL: test_minmax_f16_ieee_false:
+; GISEL-GFX12-FAKE16: ; %bb.0:
+; GISEL-GFX12-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
+; GISEL-GFX12-FAKE16-NEXT: ; return to shader part epilog
%max = call half @llvm.maxnum.f16(half %a, half %b)
%minmax = call half @llvm.minnum.f16(half %max, half %c)
ret half %minmax
}
define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b, half inreg %c, ptr addrspace(1) inreg %out) {
-; SDAG-GFX11-LABEL: s_test_minmax_f16_ieee_false:
-; SDAG-GFX11: ; %bb.0:
-; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
-; SDAG-GFX11-NEXT: s_mov_b32 s5, s4
-; SDAG-GFX11-NEXT: s_mov_b32 s4, s3
-; SDAG-GFX11-NEXT: v_maxmin_f16 v0, s0, s1, v0
-; SDAG-GFX11-NEXT: global_store_b16 v1, v0, s[4:5]
-; SDAG-GFX11-NEXT: s_endpgm
-;
-; GISEL-GFX11-LABEL: s_test_minmax_f16_ieee_false:
-; GISEL-GFX11: ; %bb.0:
-; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
-; GISEL-GFX11-NEXT: s_mov_b32 s6, s3
-; GISEL-GFX11-NEXT: s_mov_b32 s7, s4
-; GISEL-GFX11-NEXT: v_maxmin_f16 v0, s0, s1, v0
-; GISEL-GFX11-NEXT: global_store_b16 v1, v0, s[6:7]
-; GISEL-GFX11-NEXT: s_endpgm
-;
-; SDAG-GFX12-LABEL: s_test_minmax_f16_ieee_false:
-; SDAG-GFX12: ; %bb.0:
-; SDAG-GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
-; SDAG-GFX12-NEXT: s_mov_b32 s5, s4
-; SDAG-GFX12-NEXT: s_mov_b32 s4, s3
-; SDAG-GFX12-NEXT: v_maxmin_num_f16 v0, s0, s1, v0
-; SDAG-GFX12-NEXT: global_store_b16 v1, v0, s[4:5]
-; SDAG-GFX12-NEXT: s_endpgm
+; SDAG-GFX11-TRUE16-LABEL: s_test_minmax_f16_ieee_false:
+; SDAG-GFX11-TRUE16: ; %bb.0:
+; SDAG-GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
+; SDAG-GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-GFX11-TRUE16-NEXT: s_mov_b32 s5, s4
+; SDAG-GFX11-TRUE16-NEXT: s_mov_b32 s4, s3
+; SDAG-GFX11-TRUE16-NEXT: v_maxmin_f16 v0.l, s0, s1, v0.l
+; SDAG-GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5]
+; SDAG-GFX11-TRUE16-NEXT: s_endpgm
+;
+; SDAG-GFX11-FAKE16-LABEL: s_test_minmax_f16_ieee_false:
+; SDAG-GFX11-FAKE16: ; %bb.0:
+; SDAG-GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; SDAG-GFX11-FAKE16-NEXT: s_mov_b32 s5, s4
+; SDAG-GFX11-FAKE16-NEXT: s_mov_b32 s4, s3
+; SDAG-GFX11-FAKE16-NEXT: v_maxmin_f16 v0, s0, s1, v0
+; SDAG-GFX11-FAKE16-NEXT: global_store_b16 v1, v0, s[4:5]
+; SDAG-GFX11-FAKE16-NEXT: s_endpgm
+;
+; GISEL-GFX11-TRUE16-LABEL: s_test_minmax_f16_ieee_false:
+; GISEL-GFX11-TRUE16: ; %bb.0:
+; GISEL-GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
+; GISEL-GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-GFX11-TRUE16-NEXT: s_mov_b32 s6, s3
+; GISEL-GFX11-TRUE16-NEXT: s_mov_b32 s7, s4
+; GISEL-GFX11-TRUE16-NEXT: v_maxmin_f16 v0.l, s0, s1, v0.l
+; GISEL-GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7]
+; GISEL-GFX11-TRUE16-NEXT: s_endpgm
+;
+; GISEL-GFX11-FAKE16-LABEL: s_test_minmax_f16_ieee_false:
+; GISEL-GFX11-FAKE16: ; %bb.0:
+; GISEL-GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; GISEL-GFX11-FAKE16-NEXT: s_mov_b32 s6, s3
+; GISEL-GFX11-FAKE16-NEXT: s_mov_b32 s7, s4
+; GISEL-GFX11-FAKE16-NEXT: v_maxmin_f16 v0, s0, s1, v0
+; GISEL-GFX11-FAKE16-NEXT: global_store_b16 v1, v0, s[6:7]
+; GISEL-GFX11-FAKE16-NEXT: s_endpgm
+;
+; SDAG-GFX12-TRUE16-LABEL: s_test_minmax_f16_ieee_false:
+; SDAG-GFX12-TRUE16: ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
+; SDAG-GFX12-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-GFX12-TRUE16-NEXT: s_mov_b32 s5, s4
+; SDAG-GFX12-TRUE16-NEXT: s_mov_b32 s4, s3
+; SDAG-GFX12-TRUE16-NEXT: v_maxmin_num_f16 v0.l, s0, s1, v0.l
+; SDAG-GFX12-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5]
+; SDAG-GFX12-TRUE16-NEXT: s_endpgm
+;
+; SDAG-GFX12-FAKE16-LABEL: s_test_minmax_f16_ieee_false:
+; SDAG-GFX12-FAKE16: ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
+; SDAG-GFX12-FAKE16-NEXT: s_mov_b32 s5, s4
+; SDAG-GFX12-FAKE16-NEXT: s_mov_b32 s4, s3
+; SDAG-GFX12-FAKE16-NEXT: v_maxmin_num_f16 v0, s0, s1, v0
+; SDAG-GFX12-FAKE16-NEXT: global_store_b16 v1, v0, s[4:5]
+; SDAG-GFX12-FAKE16-NEXT: s_endpgm
;
; GISEL-GFX12-LABEL: s_test_minmax_f16_ieee_false:
; GISEL-GFX12: ; %bb.0:
@@ -526,136 +594,320 @@ define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b
}
define half @test_minmax_commuted_f16_ieee_true(half %a, half %b, half %c) {
-; SDAG-GFX11-LABEL: test_minmax_commuted_f16_ieee_true:
-; SDAG-GFX11: ; %bb.0:
-; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX11-NEXT: v_max_f16_e32 v1, v1, v1
-; SDAG-GFX11-NEXT: v_max_f16_e32 v0, v0, v0
-; SDAG-GFX11-NEXT: v_max_f16_e32 v2, v2, v2
-; SDAG-GFX11-NEXT: v_maxmin_f16 v0, v0, v1, v2
-; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GISEL-GFX11-LABEL: test_minmax_commuted_f16_ieee_true:
-; GISEL-GFX11: ; %bb.0:
-; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX11-NEXT: v_max_f16_e32 v0, v0, v0
-; GISEL-GFX11-NEXT: v_max_f16_e32 v1, v1, v1
-; GISEL-GFX11-NEXT: v_max_f16_e32 v2, v2, v2
-; GISEL-GFX11-NEXT: v_maxmin_f16 v0, v0, v1, v2
-; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; SDAG-GFX12-LABEL: test_minmax_commuted_f16_ieee_true:
-; SDAG-GFX12: ; %bb.0:
-; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; SDAG-GFX12-NEXT: s_wait_expcnt 0x0
-; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0
-; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0
-; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0
-; SDAG-GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1
-; SDAG-GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0
-; SDAG-GFX12-NEXT: v_max_num_f16_e32 v2, v2, v2
-; SDAG-GFX12-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
-; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
-;
-; GISEL-GFX12-LABEL: test_minmax_commuted_f16_ieee_true:
-; GISEL-GFX12: ; %bb.0:
-; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
-; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
-; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
-; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
-; GISEL-GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0
-; GISEL-GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1
-; GISEL-GFX12-NEXT: v_max_num_f16_e32 v2, v2, v2
-; GISEL-GFX12-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
-; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX11-TRUE16-LABEL: test_minmax_commuted_f16_ieee_true:
+; SDAG-GFX11-TRUE16: ; %bb.0:
+; SDAG-GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
+; SDAG-GFX11-TRUE16-NEXT: v_max_f16_e32 v1.l, v1.l, v1.l
+; SDAG-GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; SDAG-GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v0.h, v0.h
+; SDAG-GFX11-TRUE16-NEXT: v_maxmin_f16 v0.l, v0.l, v1.l, v0.h
+; SDAG-GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX11-FAKE16-LABEL: test_minmax_commuted_f16_ieee_true:
+; SDAG-GFX11-FAKE16: ; %bb.0:
+; SDAG-GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1
+; SDAG-GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
+; SDAG-GFX11-FAKE16-NEXT: v_max_f16_e32 v2, v2, v2
+; SDAG-GFX11-FAKE16-NEXT: v_maxmin_f16 v0, v0, v1, v2
+; SDAG-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX11-TRUE16-LABEL: test_minmax_commuted_f16_ieee_true:
+; GISEL-GFX11-TRUE16: ; %bb.0:
+; GISEL-GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GISEL-GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
+; GISEL-GFX11-TRUE16-NEXT: v_max_f16_e32 v1.l, v2.l, v2.l
+; GISEL-GFX11-TRUE16-NEXT: v_maxmin_f16 v0.l, v0.l, v0.h, v1.l
+; GISEL-GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX11-FAKE16-LABEL: test_minmax_commuted_f16_ieee_true:
+; GISEL-GFX11-FAKE16: ; %bb.0:
+; GISEL-GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
+; GISEL-GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1
+; GISEL-GFX11-FAKE16-NEXT: v_max_f16_e32 v2, v2, v2
+; GISEL-GFX11-FAKE16-NEXT: v_maxmin_f16 v0, v0, v1, v2
+; GISEL-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-TRUE16-LABEL: test_minmax_commuted_f16_ieee_true:
+; SDAG-GFX12-TRUE16: ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; SDAG-GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
+; SDAG-GFX12-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
+; SDAG-GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; SDAG-GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; SDAG-GFX12-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v1.l, v0.h
+; SDAG-GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-FAKE16-LABEL: test_minmax_commuted_f16_ieee_true:
+; SDAG-GFX12-FAKE16: ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; SDAG-GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; SDAG-GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; SDAG-GFX12-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; SDAG-GFX12-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
+; SDAG-GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-TRUE16-LABEL: test_minmax_commuted_f16_ieee_true:
+; GISEL-GFX12-TRUE16: ; %bb.0:
+; GISEL-GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GISEL-GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GISEL-GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GISEL-GFX12-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.l
+; GISEL-GFX12-TRUE16-NEXT: v_maxmin_num_f16 v0.l, v0.l, v0.h, v1.l
+; GISEL-GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-FAKE16-LABEL: test_minmax_commuted_f16_ieee_true:
+; GISEL-GFX12-FAKE16: ; %bb.0:
+; GISEL-GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GISEL-GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GISEL-GFX12-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GISEL-GFX12-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; GISEL-GFX12-FAKE16-NEXT: v_maxmin_num_f16 v0, v0, v1, v2
+; GISEL-GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%max = call half @llvm.maxnum.f16(half %a, half %b)
%minmax = call half @llvm.minnum.f16(half %c, half %max)
ret half %minmax
}
define amdgpu_ps half @test_maxmin_f16_ieee_false(half %a, half %b, half %c) {
-; GFX11-LABEL: test_maxmin_f16_ieee_false:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_minmax_f16 v0, v0, v1, v2
-; GFX11-NEXT: ; return to shader part epilog
-;
-; GFX12-LABEL: test_maxmin_f16_ieee_false:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: v_minmax_num_f16 v0, v0, v1, v2
-; GFX12-NEXT: ; return to shader part epilog
+; SDAG-GFX11-TRUE16-LABEL: test_maxmin_f16_ieee_false:
+; SDAG-GFX11-TRUE16: ; %bb.0:
+; SDAG-GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; SDAG-GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; SDAG-GFX11-TRUE16-NEXT: v_minmax_f16 v0.l, v0.l, v0.h, v1.l
+; SDAG-GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; SDAG-GFX11-FAKE16-LABEL: test_maxmin_f16_ieee_false:
+; SDAG-GFX11-FAKE16: ; %bb.0:
+; SDAG-GFX11-FAKE16-NEXT: v_minmax_f16 v0, v0, v1, v2
+; SDAG-GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GISEL-GFX11-TRUE16-LABEL: test_maxmin_f16_ieee_false:
+; GISEL-GFX11-TRUE16: ; %bb.0:
+; GISEL-GFX11-TRUE16-NEXT: v_minmax_f16 v0.l, v0.l, v1.l, v2.l
+; GISEL-GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GISEL-GFX11-FAKE16-LABEL: test_maxmin_f16_ieee_false:
+; GISEL-GFX11-FAKE16: ; %bb.0:
+; GISEL-GFX11-FAKE16-NEXT: v_minmax_f16 v0, v0, v1, v2
+; GISEL-GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; SDAG-GFX12-TRUE16-LABEL: test_maxmin_f16_ieee_false:
+; SDAG-GFX12-TRUE16: ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; SDAG-GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; SDAG-GFX12-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v0.h, v1.l
+; SDAG-GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; SDAG-GFX12-FAKE16-LABEL: test_maxmin_f16_ieee_false:
+; SDAG-GFX12-FAKE16: ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2
+; SDAG-GFX12-FAKE16-NEXT: ; return to shader part epilog
+;
+; GISEL-GFX12-TRUE16-LABEL: test_maxmin_f16_ieee_false:
+; GISEL-GFX12-TRUE16: ; %bb.0:
+; GISEL-GFX12-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v1.l, v2.l
+; GISEL-GFX12-TRUE16-NEXT: ; return to shader part epilog
+;
+; GISEL-GFX12-FAKE16-LABEL: test_maxmin_f16_ieee_false:
+; GISEL-GFX12-FAKE16: ; %bb.0:
+; GISEL-GFX12-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2
+; GISEL-GFX12-FAKE16-NEXT: ; return to shader part epilog
%min = call half @llvm.minnum.f16(half %a, half %b)
%maxmin = call half @llvm.maxnum.f16(half %min, half %c)
ret half %maxmin
}
define half @test_maxmin_commuted_f16_ieee_true(half %a, half %b, half %c) {
-; SDAG-GFX11-LABEL: test_maxmin_commuted_f16_ieee_true:
-; SDAG-GFX11: ; %bb.0:
-; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX11-NEXT: v_max_f16_e32 v1, v1, v1
-; SDAG-GFX11-NEXT: v_max_f16_e32 v0, v0, v0
-; SDAG-GFX11-NEXT: v_max_f16_e32 v2, v2, v2
-; SDAG-GFX11-NEXT: v_minmax_f16 v0, v0, v1, v2
-; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GISEL-GFX11-LABEL: test_maxmin_commuted_f16_ieee_true:
-; GISEL-GFX11: ; %bb.0:
-; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX11-NEXT: v_max_f16_e32 v0, v0, v0
-; GISEL-GFX11-NEXT: v_max_f16_e32 v1, v1, v1
-; GISEL-GFX11-NEXT: v_max_f16_e32 v2, v2, v2
-; GISEL-GFX11-NEXT: v_minmax_f16 v0, v0, v1, v2
-; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; SDAG-GFX12-LABEL: test_maxmin_commuted_f16_ieee_true:
-; SDAG-GFX12: ; %bb.0:
-; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; SDAG-GFX12-NEXT: s_wait_expcnt 0x0
-; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0
-; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0
-; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0
-; SDAG-GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1
-; SDAG-GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0
-; SDAG-GFX12-NEXT: v_max_num_f16_e32 v2, v2, v2
-; SDAG-GFX12-NEXT: v_minmax_num_f16 v0, v0, v1, v2
-; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
-;
-; GISEL-GFX12-LABEL: test_maxmin_commuted_f16_ieee_true:
-; GISEL-GFX12: ; %bb.0:
-; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
-; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
-; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
-; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
-; GISEL-GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0
-; GISEL-GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1
-; GISEL-GFX12-NEXT: v_max_num_f16_e32 v2, v2, v2
-; GISEL-GFX12-NEXT: v_minmax_num_f16 v0, v0, v1, v2
-; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX11-TRUE16-LABEL: test_maxmin_commuted_f16_ieee_true:
+; SDAG-GFX11-TRUE16: ; %bb.0:
+; SDAG-GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
+; SDAG-GFX11-TRUE16-NEXT: v_max_f16_e32 v1.l, v1.l, v1.l
+; SDAG-GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; SDAG-GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v0.h, v0.h
+; SDAG-GFX11-TRUE16-NEXT: v_minmax_f16 v0.l, v0.l, v1.l, v0.h
+; SDAG-GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX11-FAKE16-LABEL: test_maxmin_commuted_f16_ieee_true:
+; SDAG-GFX11-FAKE16: ; %bb.0:
+; SDAG-GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1
+; SDAG-GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
+; SDAG-GFX11-FAKE16-NEXT: v_max_f16_e32 v2, v2, v2
+; SDAG-GFX11-FAKE16-NEXT: v_minmax_f16 v0, v0, v1, v2
+; SDAG-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX11-TRUE16-LABEL: test_maxmin_commuted_f16_ieee_true:
+; GISEL-GFX11-TRUE16: ; %bb.0:
+; GISEL-GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
+; GISEL-GFX11-TRUE16-NEXT: v_max_f16_e32 v0.h, v1.l, v1.l
+; GISEL-GFX11-TRUE16-NEXT: v_max_f16_e32 v1.l, v2.l, v2.l
+; GISEL-GFX11-TRUE16-NEXT: v_minmax_f16 v0.l, v0.l, v0.h, v1.l
+; GISEL-GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX11-FAKE16-LABEL: test_maxmin_commuted_f16_ieee_true:
+; GISEL-GFX11-FAKE16: ; %bb.0:
+; GISEL-GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-FAKE16-NEXT: v_max_f16_e32 v0, v0, v0
+; GISEL-GFX11-FAKE16-NEXT: v_max_f16_e32 v1, v1, v1
+; GISEL-GFX11-FAKE16-NEXT: v_max_f16_e32 v2, v2, v2
+; GISEL-GFX11-FAKE16-NEXT: v_minmax_f16 v0, v0, v1, v2
+; GISEL-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-TRUE16-LABEL: test_maxmin_commuted_f16_ieee_true:
+; SDAG-GFX12-TRUE16: ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; SDAG-GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
+; SDAG-GFX12-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v1.l, v1.l
+; SDAG-GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; SDAG-GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v0.h, v0.h
+; SDAG-GFX12-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v1.l, v0.h
+; SDAG-GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-FAKE16-LABEL: test_maxmin_commuted_f16_ieee_true:
+; SDAG-GFX12-FAKE16: ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; SDAG-GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; SDAG-GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; SDAG-GFX12-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; SDAG-GFX12-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2
+; SDAG-GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-TRUE16-LABEL: test_maxmin_commuted_f16_ieee_true:
+; GISEL-GFX12-TRUE16: ; %bb.0:
+; GISEL-GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GISEL-GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v0.l
+; GISEL-GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.h, v1.l, v1.l
+; GISEL-GFX12-TRUE16-NEXT: v_max_num_f16_e32 v1.l, v2.l, v2.l
+; GISEL-GFX12-TRUE16-NEXT: v_minmax_num_f16 v0.l, v0.l, v0.h, v1.l
+; GISEL-GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-FAKE16-LABEL: test_maxmin_commuted_f16_ieee_true:
+; GISEL-GFX12-FAKE16: ; %bb.0:
+; GISEL-GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GISEL-GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v0
+; GISEL-GFX12-FAKE16-NEXT: v_max_num_f16_e32 v1, v1, v1
+; GISEL-GFX12-FAKE16-NEXT: v_max_num_f16_e32 v2, v2, v2
+; GISEL-GFX12-FAKE16-NEXT: v_minmax_num_f16 v0, v0, v1, v2
+; GISEL-GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half %a, half %b)
%maxmin = call half @llvm.maxnum.f16(half %c, half %min)
ret half %maxmin
}
define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0 {
-; GFX11-LABEL: test_med3_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_med3_f16 v2, v2, v3, v4
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: test_med3_f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_med3_num_f16 v2, v2, v3, v4
-; GFX12-NEXT: global_store_b16 v[0:1], v2, off
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX11-TRUE16-LABEL: test_med3_f16:
+; SDAG-GFX11-TRUE16: ; %bb.0:
+; SDAG-GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
+; SDAG-GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v4.l
+; SDAG-GFX11-TRUE16-NEXT: v_med3_f16 v2.l, v2.l, v2.h, v3.l
+; SDAG-GFX11-TRUE16-NEXT: global_store_b16 v[0:1], v2, off
+; SDAG-GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX11-FAKE16-LABEL: test_med3_f16:
+; SDAG-GFX11-FAKE16: ; %bb.0:
+; SDAG-GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-FAKE16-NEXT: v_med3_f16 v2, v2, v3, v4
+; SDAG-GFX11-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
+; SDAG-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX11-TRUE16-LABEL: test_med3_f16:
+; GISEL-GFX11-TRUE16: ; %bb.0:
+; GISEL-GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-TRUE16-NEXT: v_med3_f16 v2.l, v2.l, v3.l, v4.l
+; GISEL-GFX11-TRUE16-NEXT: global_store_b16 v[0:1], v2, off
+; GISEL-GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX11-FAKE16-LABEL: test_med3_f16:
+; GISEL-GFX11-FAKE16: ; %bb.0:
+; GISEL-GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-FAKE16-NEXT: v_med3_f16 v2, v2, v3, v4
+; GISEL-GFX11-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
+; GISEL-GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-TRUE16-LABEL: test_med3_f16:
+; SDAG-GFX12-TRUE16: ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; SDAG-GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
+; SDAG-GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v4.l
+; SDAG-GFX12-TRUE16-NEXT: v_med3_num_f16 v2.l, v2.l, v2.h, v3.l
+; SDAG-GFX12-TRUE16-NEXT: global_store_b16 v[0:1], v2, off
+; SDAG-GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-FAKE16-LABEL: test_med3_f16:
+; SDAG-GFX12-FAKE16: ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; SDAG-GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT: v_med3_num_f16 v2, v2, v3, v4
+; SDAG-GFX12-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
+; SDAG-GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-TRUE16-LABEL: test_med3_f16:
+; GISEL-GFX12-TRUE16: ; %bb.0:
+; GISEL-GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GISEL-GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT: v_med3_num_f16 v2.l, v2.l, v3.l, v4.l
+; GISEL-GFX12-TRUE16-NEXT: global_store_b16 v[0:1], v2, off
+; GISEL-GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-FAKE16-LABEL: test_med3_f16:
+; GISEL-GFX12-FAKE16: ; %bb.0:
+; GISEL-GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GISEL-GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT: v_med3_num_f16 v2, v2, v3, v4
+; GISEL-GFX12-FAKE16-NEXT: global_store_b16 v[0:1], v2, off
+; GISEL-GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%tmp0 = call half @llvm.minnum.f16(half %x, half %y)
%tmp1 = call half @llvm.maxnum.f16(half %x, half %y)
%tmp2 = call half @llvm.minnum.f16(half %tmp1, half %z)
More information about the llvm-commits
mailing list