[llvm-branch-commits] [llvm] AMDGPU: Test more subtargets in minimumnum/maximumnum tests (PR #139144)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu May 8 13:11:11 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
---
Patch is 137.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139144.diff
2 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/maximumnum.ll (+1043-204)
- (modified) llvm/test/CodeGen/AMDGPU/minimumnum.ll (+1022-199)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/maximumnum.ll b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
index 718a266f49f5d..df79534a0844e 100644
--- a/llvm/test/CodeGen/AMDGPU/maximumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
@@ -8,6 +10,16 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
define half @v_maximumnum_f16(half %x, half %y) {
+; GFX7-LABEL: v_maximumnum_f16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -80,6 +92,16 @@ define half @v_maximumnum_f16(half %x, half %y) {
}
define half @v_maximumnum_f16_nnan(half %x, half %y) {
+; GFX7-LABEL: v_maximumnum_f16_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -134,6 +156,14 @@ define half @v_maximumnum_f16_nnan(half %x, half %y) {
}
define half @v_maximumnum_f16_1.0(half %x) {
+; GFX7-LABEL: v_maximumnum_f16_1.0:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16_1.0:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -199,6 +229,17 @@ define half @v_maximumnum_f16_1.0(half %x) {
}
define bfloat @v_maximumnum_bf16(bfloat %x, bfloat %y) {
+; GFX7-LABEL: v_maximumnum_bf16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_bf16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -231,36 +272,67 @@ define bfloat @v_maximumnum_bf16(bfloat %x, bfloat %y) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_bf16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v1
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v2, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-NEXT: v_bfe_u32 v3, v2, 16, 1
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
-; GFX9-NEXT: v_add3_u32 v3, v3, v2, s4
-; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v2
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_bf16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
+; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, v2, v3
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX900-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-NEXT: v_bfe_u32 v3, v2, 16, 1
+; GFX900-NEXT: s_movk_i32 s4, 0x7fff
+; GFX900-NEXT: v_add3_u32 v3, v3, v2, s4
+; GFX900-NEXT: v_or_b32_e32 v4, 0x400000, v2
+; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc
+; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
+; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_bf16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
+; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
+; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
+; GFX950-NEXT: v_cmp_gt_f32_e32 vcc, v2, v3
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX950-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-NEXT: v_cvt_pk_bf16_f32 v2, v2, s0
+; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v2
+; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_bf16:
; GFX10: ; %bb.0:
@@ -474,6 +546,17 @@ define bfloat @v_maximumnum_bf16(bfloat %x, bfloat %y) {
}
define bfloat @v_maximumnum_bf16_nnan(bfloat %x, bfloat %y) {
+; GFX7-LABEL: v_maximumnum_bf16_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_bf16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -490,21 +573,41 @@ define bfloat @v_maximumnum_bf16_nnan(bfloat %x, bfloat %y) {
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_bf16_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v1
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0
-; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, v3, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
-; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v2
-; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_bf16_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v1
+; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, v3, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v2
+; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_bf16_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v1
+; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX950-NEXT: v_cmp_gt_f32_e32 vcc, v3, v2
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
+; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, 0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v2
+; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_bf16_nnan:
; GFX10: ; %bb.0:
@@ -618,6 +721,14 @@ define bfloat @v_maximumnum_bf16_nnan(bfloat %x, bfloat %y) {
}
define float @v_maximumnum_f32(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -666,6 +777,12 @@ define float @v_maximumnum_f32(float %x, float %y) {
}
define float @v_maximumnum_f32_nnan(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -704,6 +821,14 @@ define float @v_maximumnum_f32_nnan(float %x, float %y) {
}
define double @v_maximumnum_f64(double %x, double %y) {
+; GFX7-LABEL: v_maximumnum_f64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -754,6 +879,12 @@ define double @v_maximumnum_f64(double %x, double %y) {
}
define double @v_maximumnum_f64_nnan(double %x, double %y) {
+; GFX7-LABEL: v_maximumnum_f64_nnan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f64_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -792,6 +923,13 @@ define double @v_maximumnum_f64_nnan(double %x, double %y) {
}
define float @v_maximumnum_f32_1.0(float %x) {
+; GFX7-LABEL: v_maximumnum_f32_1.0:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_1.0:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -837,6 +975,14 @@ define float @v_maximumnum_f32_1.0(float %x) {
}
define float @v_maximumnum_f32_rhs_not_snan(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_rhs_not_snan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_rhs_not_snan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -886,6 +1032,14 @@ define float @v_maximumnum_f32_rhs_not_snan(float %x, float %y) {
}
define float @v_maximumnum_f32_lhs_not_snan(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_lhs_not_snan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_lhs_not_snan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -935,6 +1089,14 @@ define float @v_maximumnum_f32_lhs_not_snan(float %x, float %y) {
}
define float @v_maximumnum_f32_both_operands_not_snan(float %x, float %y) {
+; GFX7-LABEL: v_maximumnum_f32_both_operands_not_snan:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f32_both_operands_not_snan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -985,6 +1147,13 @@ define float @v_maximumnum_f32_both_operands_not_snan(float %x, float %y) {
}
define double @v_maximumnum_f64_1.0(double %x) {
+; GFX7-LABEL: v_maximumnum_f64_1.0:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], 1.0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f64_1.0:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1030,6 +1199,16 @@ define double @v_maximumnum_f64_1.0(double %x) {
}
define half @v_maximumnum_f16_s_v(half inreg %x, half %y) {
+; GFX7-LABEL: v_maximumnum_f16_s_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s16
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_max_f32_e32 v0, v1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16_s_v:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1038,13 +1217,21 @@ define half @v_maximumnum_f16_s_v(half inreg %x, half %y) {
; GFX8-NEXT: v_max_f16_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f16_s_v:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX9-NEXT: v_max_f16_e32 v0, v1, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f16_s_v:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX900-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f16_s_v:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX950-NEXT: v_max_f16_e32 v0, v1, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f16_s_v:
; GFX10: ; %bb.0:
@@ -1102,6 +1289,16 @@ define half @v_maximumnum_f16_s_v(half inreg %x, half %y) {
}
define half @v_maximumnum_f16_v_s(half %x, half inreg %y) {
+; GFX7-LABEL: v_maximumnum_f16_v_s:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s16
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16_v_s:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1110,13 +1307,21 @@ define half @v_maximumnum_f16_v_s(half %x, half inreg %y) {
; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f16_v_s:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e64 v1, s16, s16
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f16_v_s:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e64 v1, s16, s16
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f16_v_s:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e64 v1, s0, s0
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f16_v_s:
; GFX10: ; %bb.0:
@@ -1174,6 +1379,16 @@ define half @v_maximumnum_f16_v_s(half %x, half inreg %y) {
}
define half @v_maximumnum_f16_s_s(half inreg %x, half inreg %y) {
+; GFX7-LABEL: v_maximumnum_f16_s_s:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, s16
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, s17
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
; GFX8-LABEL: v_maximumnum_f16_s_s:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1182,13 +1397,21 @@ define half @v_maximumnum_f16_s_s(half inreg %x, half inreg %y) {
; GFX8-NEXT: v_max_f16_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f16_s_s:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e64 v0, s17, s17
-; GFX9-NEXT: v_max_f16_e64 v1...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/139144
More information about the llvm-branch-commits
mailing list