[llvm] [AMDGPU] Specialize gfx1250 codegen tests form fake and real t16. NFC. (PR #190390)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 3 12:15:33 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
This is preparation of turning on real true16, so we can easily
apply it or revert.
---
Patch is 505.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/190390.diff
32 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll (+102-49)
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn-sin-cos-f16-f32.ll (+30-13)
- (modified) llvm/test/CodeGen/AMDGPU/atomics-system-scope.ll (+610-305)
- (modified) llvm/test/CodeGen/AMDGPU/bf16-conversions.ll (+32-13)
- (modified) llvm/test/CodeGen/AMDGPU/code-size-estimate-gfx1250.ll (+30-13)
- (modified) llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.bf16.ll (+40-19)
- (modified) llvm/test/CodeGen/AMDGPU/fcanonicalize.bf16.ll (+835-387)
- (modified) llvm/test/CodeGen/AMDGPU/fmaximum3.v2f16.ll (+27-12)
- (modified) llvm/test/CodeGen/AMDGPU/fminimum3.v2f16.ll (+27-12)
- (modified) llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll (+146-72)
- (modified) llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll (+505-259)
- (modified) llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll (+1325-124)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll (+64-8)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll (+38-17)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.bf16.ll (+64-8)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.bf16.ll (+64-8)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.make.buffer.rsrc.ll (+111-54)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.bf16.ll (+64-8)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.bf16.ll (+64-8)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll (+75-38)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.cos.bf16.ll (+71-33)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.sin.bf16.ll (+71-33)
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i1.ll (+460-214)
- (modified) llvm/test/CodeGen/AMDGPU/mad-mix-bf16.ll (+100-45)
- (modified) llvm/test/CodeGen/AMDGPU/mad-mix-hi-bf16.ll (+158-64)
- (modified) llvm/test/CodeGen/AMDGPU/mad-mix-lo-bf16.ll (+313-136)
- (modified) llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll (+19-9)
- (modified) llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll (+283-133)
- (modified) llvm/test/CodeGen/AMDGPU/scale-offset-flat.ll (+22-8)
- (modified) llvm/test/CodeGen/AMDGPU/scale-offset-global.ll (+22-8)
- (modified) llvm/test/CodeGen/AMDGPU/scale-offset-scratch.ll (+24-8)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir (+2-2)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
index 1ca67c4acf7a4..239408d31482f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
@@ -2,7 +2,8 @@
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti -o - < %s | FileCheck %s --check-prefixes=GFX,GFX6
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -o - < %s | FileCheck %s --check-prefixes=GFX,GFX8
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -o - < %s | FileCheck %s --check-prefixes=GFX,GFX10
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 -o - < %s | FileCheck %s --check-prefixes=GFX,GFX1250
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -o - < %s | FileCheck %s --check-prefixes=GFX,GFX1250,GFX1250-FAKE16
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -o - < %s | FileCheck %s --check-prefixes=GFX,GFX1250,GFX1250-REAL16
declare i16 @llvm.abs.i16(i16, i1)
declare i32 @llvm.abs.i32(i32, i1)
@@ -170,14 +171,23 @@ define i16 @abs_vgpr_i16(i16 %arg) {
; GFX10-NEXT: v_max_i16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: abs_vgpr_i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_sub_nc_u16 v1, 0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_max_i16 v0, v0, v1
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250-FAKE16-LABEL: abs_vgpr_i16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_sub_nc_u16 v1, 0, v0
+; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-FAKE16-NEXT: v_max_i16 v0, v0, v1
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-REAL16-LABEL: abs_vgpr_i16:
+; GFX1250-REAL16: ; %bb.0:
+; GFX1250-REAL16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-REAL16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-REAL16-NEXT: v_sub_nc_u16 v0.h, 0, v0.l
+; GFX1250-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-REAL16-NEXT: v_max_i16 v0.l, v0.l, v0.h
+; GFX1250-REAL16-NEXT: s_set_pc_i64 s[30:31]
%res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
ret i16 %res
}
@@ -390,19 +400,33 @@ define <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
; GFX10-NEXT: v_max_i16 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: abs_vgpr_v2i8:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX1250-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_sub_nc_u16 v2, 0, v0
-; GFX1250-NEXT: v_sub_nc_u16 v3, 0, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_max_i16 v0, v0, v2
-; GFX1250-NEXT: v_max_i16 v1, v1, v3
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250-FAKE16-LABEL: abs_vgpr_v2i8:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX1250-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
+; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-FAKE16-NEXT: v_sub_nc_u16 v2, 0, v0
+; GFX1250-FAKE16-NEXT: v_sub_nc_u16 v3, 0, v1
+; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-FAKE16-NEXT: v_max_i16 v0, v0, v2
+; GFX1250-FAKE16-NEXT: v_max_i16 v1, v1, v3
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-REAL16-LABEL: abs_vgpr_v2i8:
+; GFX1250-REAL16: ; %bb.0:
+; GFX1250-REAL16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-REAL16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-REAL16-NEXT: v_bfe_i32 v2, v0, 0, 8
+; GFX1250-REAL16-NEXT: v_bfe_i32 v1, v1, 0, 8
+; GFX1250-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-REAL16-NEXT: v_sub_nc_u16 v0.l, 0, v2.l
+; GFX1250-REAL16-NEXT: v_sub_nc_u16 v0.h, 0, v1.l
+; GFX1250-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-REAL16-NEXT: v_max_i16 v0.l, v2.l, v0.l
+; GFX1250-REAL16-NEXT: v_max_i16 v1.l, v1.l, v0.h
+; GFX1250-REAL16-NEXT: s_set_pc_i64 s[30:31]
%res = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %arg, i1 false)
ret <2 x i8> %res
}
@@ -493,23 +517,41 @@ define <3 x i8> @abs_vgpr_v3i8(<3 x i8> %arg) {
; GFX10-NEXT: v_max_i16 v2, v2, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: abs_vgpr_v3i8:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX1250-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX1250-NEXT: v_bfe_i32 v2, v2, 0, 8
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_sub_nc_u16 v3, 0, v0
-; GFX1250-NEXT: v_sub_nc_u16 v4, 0, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_sub_nc_u16 v5, 0, v2
-; GFX1250-NEXT: v_max_i16 v0, v0, v3
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_max_i16 v1, v1, v4
-; GFX1250-NEXT: v_max_i16 v2, v2, v5
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250-FAKE16-LABEL: abs_vgpr_v3i8:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX1250-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
+; GFX1250-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
+; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250-FAKE16-NEXT: v_sub_nc_u16 v3, 0, v0
+; GFX1250-FAKE16-NEXT: v_sub_nc_u16 v4, 0, v1
+; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250-FAKE16-NEXT: v_sub_nc_u16 v5, 0, v2
+; GFX1250-FAKE16-NEXT: v_max_i16 v0, v0, v3
+; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250-FAKE16-NEXT: v_max_i16 v1, v1, v4
+; GFX1250-FAKE16-NEXT: v_max_i16 v2, v2, v5
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-REAL16-LABEL: abs_vgpr_v3i8:
+; GFX1250-REAL16: ; %bb.0:
+; GFX1250-REAL16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-REAL16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-REAL16-NEXT: v_bfe_i32 v3, v0, 0, 8
+; GFX1250-REAL16-NEXT: v_bfe_i32 v1, v1, 0, 8
+; GFX1250-REAL16-NEXT: v_bfe_i32 v2, v2, 0, 8
+; GFX1250-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250-REAL16-NEXT: v_sub_nc_u16 v0.l, 0, v3.l
+; GFX1250-REAL16-NEXT: v_sub_nc_u16 v0.h, 0, v1.l
+; GFX1250-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250-REAL16-NEXT: v_sub_nc_u16 v1.h, 0, v2.l
+; GFX1250-REAL16-NEXT: v_max_i16 v0.l, v3.l, v0.l
+; GFX1250-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250-REAL16-NEXT: v_max_i16 v1.l, v1.l, v0.h
+; GFX1250-REAL16-NEXT: v_max_i16 v2.l, v2.l, v1.h
+; GFX1250-REAL16-NEXT: s_set_pc_i64 s[30:31]
%res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %arg, i1 false)
ret <3 x i8> %res
}
@@ -694,16 +736,27 @@ define <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
; GFX10-NEXT: v_max_i16 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: abs_vgpr_v3i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_pk_sub_i16 v2, 0, v0
-; GFX1250-NEXT: v_sub_nc_u16 v3, 0, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_pk_max_i16 v0, v0, v2
-; GFX1250-NEXT: v_max_i16 v1, v1, v3
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250-FAKE16-LABEL: abs_vgpr_v3i16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_pk_sub_i16 v2, 0, v0
+; GFX1250-FAKE16-NEXT: v_sub_nc_u16 v3, 0, v1
+; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-FAKE16-NEXT: v_pk_max_i16 v0, v0, v2
+; GFX1250-FAKE16-NEXT: v_max_i16 v1, v1, v3
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-REAL16-LABEL: abs_vgpr_v3i16:
+; GFX1250-REAL16: ; %bb.0:
+; GFX1250-REAL16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-REAL16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-REAL16-NEXT: v_pk_sub_i16 v2, 0, v0
+; GFX1250-REAL16-NEXT: v_sub_nc_u16 v1.h, 0, v1.l
+; GFX1250-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-REAL16-NEXT: v_pk_max_i16 v0, v0, v2
+; GFX1250-REAL16-NEXT: v_max_i16 v1.l, v1.l, v1.h
+; GFX1250-REAL16-NEXT: s_set_pc_i64 s[30:31]
%res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false)
ret <3 x i16> %res
}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-sin-cos-f16-f32.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-sin-cos-f16-f32.ll
index 802a9722c237a..97572e995f155 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-sin-cos-f16-f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-sin-cos-f16-f32.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "s_setreg_imm32_b32" --filter-out "shader" --version 6
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,FAKE16 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,REAL16 %s
define amdgpu_ps float @v_sin_f32(float %src) #1 {
; GCN-LABEL: v_sin_f32:
@@ -18,17 +19,25 @@ define amdgpu_ps float @s_sin_f32(float inreg %src) #1 {
}
define amdgpu_ps half @v_sin_f16(half %src) #1 {
-; GCN-LABEL: v_sin_f16:
-; GCN: ; %bb.0:
-; GCN: v_sin_f16_e32 v0, v0
+; FAKE16-LABEL: v_sin_f16:
+; FAKE16: ; %bb.0:
+; FAKE16: v_sin_f16_e32 v0, v0
+;
+; REAL16-LABEL: v_sin_f16:
+; REAL16: ; %bb.0:
+; REAL16: v_sin_f16_e32 v0.l, v0.l
%sin = call half @llvm.amdgcn.sin.f16(half %src) #0
ret half %sin
}
define amdgpu_ps half @s_sin_f16(half inreg %src) #1 {
-; GCN-LABEL: s_sin_f16:
-; GCN: ; %bb.0:
-; GCN: v_sin_f16_e32 v0, s0
+; FAKE16-LABEL: s_sin_f16:
+; FAKE16: ; %bb.0:
+; FAKE16: v_sin_f16_e32 v0, s0
+;
+; REAL16-LABEL: s_sin_f16:
+; REAL16: ; %bb.0:
+; REAL16: v_sin_f16_e32 v0.l, s0
%sin = call half @llvm.amdgcn.sin.f16(half %src) #0
ret half %sin
}
@@ -50,17 +59,25 @@ define amdgpu_ps float @s_cos_f32(float inreg %src) #1 {
}
define amdgpu_ps half @v_cos_f16(half %src) #1 {
-; GCN-LABEL: v_cos_f16:
-; GCN: ; %bb.0:
-; GCN: v_cos_f16_e32 v0, v0
+; FAKE16-LABEL: v_cos_f16:
+; FAKE16: ; %bb.0:
+; FAKE16: v_cos_f16_e32 v0, v0
+;
+; REAL16-LABEL: v_cos_f16:
+; REAL16: ; %bb.0:
+; REAL16: v_cos_f16_e32 v0.l, v0.l
%cos = call half @llvm.amdgcn.cos.f16(half %src) #0
ret half %cos
}
define amdgpu_ps half @s_cos_f16(half inreg %src) #1 {
-; GCN-LABEL: s_cos_f16:
-; GCN: ; %bb.0:
-; GCN: v_cos_f16_e32 v0, s0
+; FAKE16-LABEL: s_cos_f16:
+; FAKE16: ; %bb.0:
+; FAKE16: v_cos_f16_e32 v0, s0
+;
+; REAL16-LABEL: s_cos_f16:
+; REAL16: ; %bb.0:
+; REAL16: v_cos_f16_e32 v0.l, s0
%cos = call half @llvm.amdgcn.cos.f16(half %src) #0
ret half %cos
}
diff --git a/llvm/test/CodeGen/AMDGPU/atomics-system-scope.ll b/llvm/test/CodeGen/AMDGPU/atomics-system-scope.ll
index 54871a622189b..fe9bad1de22d4 100644
--- a/llvm/test/CodeGen/AMDGPU/atomics-system-scope.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomics-system-scope.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN:llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GFX1250 %s
+; RUN:llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck --check-prefixes=GFX1250,FAKE16 %s
+; RUN:llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck --check-prefixes=GFX1250,REAL16 %s
define float @global_system_atomic_fadd_f32(ptr addrspace(1) %ptr, float %val) {
; GFX1250-LABEL: global_system_atomic_fadd_f32:
@@ -338,173 +339,325 @@ define i64 @global_system_atomic_umax_i64(ptr addrspace(1) %ptr, i64 %val) {
}
define i16 @global_one_as_atomic_min_i16(ptr addrspace(1) %ptr, i16 %val) {
-; GFX1250-LABEL: global_one_as_atomic_min_i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_mov_b32_e32 v3, v0
-; GFX1250-NEXT: s_mov_b32 s0, 0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_and_b32_e32 v0, -4, v3
-; GFX1250-NEXT: v_and_b32_e32 v3, 3, v3
-; GFX1250-NEXT: v_lshlrev_b32_e32 v3, 3, v3
-; GFX1250-NEXT: global_load_b32 v5, v[0:1], off
-; GFX1250-NEXT: v_lshlrev_b32_e64 v4, v3, 0xffff
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_not_b32_e32 v4, v4
-; GFX1250-NEXT: .LBB28_1: ; %atomicrmw.start
-; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_mov_b32_e32 v7, v5
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_lshrrev_b32_e32 v5, v3, v7
-; GFX1250-NEXT: v_min_i16 v5, v5, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
-; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: global_atomic_cmpswap_b32 v5, v[0:1], v[6:7], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
-; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0
-; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
-; GFX1250-NEXT: s_cbranch_execnz .LBB28_1
-; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end
-; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-NEXT: v_lshrrev_b32_e32 v0, v3, v5
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; FAKE16-LABEL: global_one_as_atomic_min_i16:
+; FAKE16: ; %bb.0:
+; FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; FAKE16-NEXT: s_wait_kmcnt 0x0
+; FAKE16-NEXT: v_mov_b32_e32 v3, v0
+; FAKE16-NEXT: s_mov_b32 s0, 0
+; FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; FAKE16-NEXT: v_and_b32_e32 v0, -4, v3
+; FAKE16-NEXT: v_and_b32_e32 v3, 3, v3
+; FAKE16-NEXT: v_lshlrev_b32_e32 v3, 3, v3
+; FAKE16-NEXT: global_load_b32 v5, v[0:1], off
+; FAKE16-NEXT: v_lshlrev_b32_e64 v4, v3, 0xffff
+; FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; FAKE16-NEXT: v_not_b32_e32 v4, v4
+; FAKE16-NEXT: .LBB28_1: ; %atomicrmw.start
+; FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
+; FAKE16-NEXT: s_wait_loadcnt 0x0
+; FAKE16-NEXT: v_mov_b32_e32 v7, v5
+; FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; FAKE16-NEXT: v_lshrrev_b32_e32 v5, v3, v7
+; FAKE16-NEXT: v_min_i16 v5, v5, v2
+; FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; FAKE16-NEXT: v_and_b32_e32 v5, 0xffff, v5
+; FAKE16-NEXT: v_lshlrev_b32_e32 v5, v3, v5
+; FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; FAKE16-NEXT: v_and_or_b32 v6, v7, v4, v5
+; FAKE16-NEXT: s_wait_xcnt 0x0
+; FAKE16-NEXT: global_atomic_cmpswap_b32 v5, v[0:1], v[6:7], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; FAKE16-NEXT: s_wait_loadcnt 0x0
+; FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; FAKE16-NEXT: s_or_b32 s0, vcc_lo, s0
+; FAKE16-NEXT: s_wait_xcnt 0x0
+; FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
+; FAKE16-NEXT: s_cbranch_execnz .LBB28_1
+; FAKE16-NEXT: ; %bb.2: ; %atomicrmw.end
+; FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; FAKE16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
+; FAKE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; REAL16-LABEL: global_one_as_atomic_min_i16:
+; REAL16: ; %bb.0:
+; REAL16-NEXT: s_wait_loadcnt_dscnt 0x0
+; REAL16-NEXT: s_wait_kmcnt 0x0
+; REAL16-NEXT: v_mov_b32_e32 v3, v0
+; REAL16-NEXT: s_mov_b32 s0, 0
+; REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; REAL16-NEXT: v_and_b32_e32 v0, -4, v3
+; REAL16-NEXT: v_and_b32_e32 v3, 3, v3
+; REAL16-NEXT: v_lshlrev_b32_e32 v3, 3, v3
+; REAL16-NEXT: global_load_b32 v5, v[0:1], off
+; REAL16-NEXT: v_lshlrev_b32_e64 v4, v3, 0xffff
+; REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; REAL16-NEXT: v_not_b32_e32 v4, v4
+; REAL16-NEXT: .LBB28_1: ; %atomicrmw.start
+; REAL16-NEXT: ; =>This Inner Loop Header: Depth=1
+; REAL16-NEXT: s_wait_loadcnt 0x0
+; REAL16-NEXT: v_mov_b32_e32 v7, v5
+; REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; REAL16-NEXT: v_lshrrev_b32_e32 v5, v3, v7
+; REAL16-NEXT: v_mov_b16_e32 v5.h, 0
+; REAL16-NEXT: v_min_i16 v5.l, v5.l, v2.l
+; REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; REAL16-NEXT: v_lshlrev_b32_e32 v5, v3, v5
+; REAL16-NEXT: v_and_or_b32 v6, v7, v4, v5
+; REAL16-NEXT: s_wait_xcnt 0x0
+; REAL16-NEXT: global_atomic_cmpswap_b32 v5, v[0:1], v[6:7], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; REAL16-NEXT: s_wait_loadcnt 0x0
+; REAL16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; REAL16-NEXT: s_or_b32 s0, vcc_lo, s0
+; REAL16-NEXT: s_wait_xcnt 0x0
+; REAL16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
+; REAL16-NEXT: s_cbranch_execnz .LBB28_1
+; REAL16-NEXT: ; %bb.2: ; %atomicrmw.end
+; REAL16-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; REAL16-NEXT: v_lshrrev_b32_e32 v0, v3, v5
+; REAL16-NEXT: s_set_pc_i64 s[30:31]
%result = atomicrmw min ptr addrspace(1) %ptr, i16 %val syncscope("one-as") monotonic
ret i16 %result
}
define i16 @global_one_as_atomic_umin_i16(ptr addrspace(1) %ptr, i16 %val) {
-; GFX1250-LABEL: global_one_as_atomic_umin_i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_mov_b32_e32 v3, v0
-; GFX1250-NEXT: s_mov_b32 s0, 0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_and_b32_e32 v0, -4, v3
-; GFX1250-NEXT: v_and_b32_e32 v3, 3, v3
-; GFX1250-NEXT: v_lshlrev_b32_e32 v3, 3, v3
-; GFX1250-NEXT: global_load_b32 v5, v[0:1], off
-; GFX1250-NEXT: v_lshlrev_b32_e64 v4, v3, 0xffff
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_not_b32_e32 v4, v4
-; GFX1250-NEXT: .LBB29_1: ; %atomicrmw.start
-; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_mov_b32_e32 v7, v5
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_lshrrev_b32_e32 v5, v3, v7
-; GFX1250-NEXT: v_min_u16 v5, v5, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
-; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: global_atomic_cmpswap_b32 v5, v[0:1], v[6:7], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
-; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0
-; GFX1250-NEXT: s_wait_xcnt 0...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/190390
More information about the llvm-commits
mailing list