[llvm] [AMDGPU][NFC] Test autogenerated llc tests for COV5 (PR #74339)

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 4 08:51:45 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Saiyedul Islam (saiislam)

<details>
<summary>Changes</summary>

Regenerate a few llc tests to test for COV5 instead of the default ABI version.

---

Patch is 847.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74339.diff


22 Files Affected:

- (modified) llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll (+77-35) 
- (modified) llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll (+49-20) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll (+33-30) 
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (+19-12) 
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+15-7) 
- (modified) llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll (+35-35) 
- (modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+400-379) 
- (modified) llvm/test/CodeGen/AMDGPU/call-argument-types.ll (+1222-1461) 
- (modified) llvm/test/CodeGen/AMDGPU/call-waitcnt.ll (+45-47) 
- (modified) llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll (+11-12) 
- (modified) llvm/test/CodeGen/AMDGPU/cc-update.ll (+147-160) 
- (modified) llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll (+48-45) 
- (modified) llvm/test/CodeGen/AMDGPU/collapse-endcf.ll (+13-10) 
- (modified) llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll (+31-32) 
- (modified) llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (+23-20) 
- (modified) llvm/test/CodeGen/AMDGPU/ds_read2.ll (+24-25) 
- (modified) llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll (+8-7) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll (+20-19) 
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll (+553-605) 
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (+333-363) 
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll (+333-363) 
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll (+552-605) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
index c7bc584d42d5a..121656b6a30f2 100644
--- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
+++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
@@ -276,16 +276,16 @@ define void @addrspacecast_requires_queue_ptr(ptr addrspace(5) %ptr.private, ptr
 ; FIXEDABI-SDAG-LABEL: addrspacecast_requires_queue_ptr:
 ; FIXEDABI-SDAG:       ; %bb.0:
 ; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FIXEDABI-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x40
+; FIXEDABI-SDAG-NEXT:    s_mov_b64 s[4:5], 0
+; FIXEDABI-SDAG-NEXT:    s_load_dword s4, s[4:5], 0x0
 ; FIXEDABI-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
-; FIXEDABI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v2, s5
-; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
 ; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
-; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v0, s4
-; FIXEDABI-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
-; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v0, vcc
 ; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; FIXEDABI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v4, s4
+; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v4, vcc
+; FIXEDABI-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
 ; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v1, vcc
 ; FIXEDABI-SDAG-NEXT:    flat_store_dword v[2:3], v0
 ; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0)
@@ -297,12 +297,15 @@ define void @addrspacecast_requires_queue_ptr(ptr addrspace(5) %ptr.private, ptr
 ; FIXEDABI-GISEL-LABEL: addrspacecast_requires_queue_ptr:
 ; FIXEDABI-GISEL:       ; %bb.0:
 ; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FIXEDABI-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x40
+; FIXEDABI-GISEL-NEXT:    s_mov_b64 s[4:5], 0xc0
+; FIXEDABI-GISEL-NEXT:    s_load_dword s6, s[4:5], 0x0
+; FIXEDABI-GISEL-NEXT:    s_mov_b64 s[4:5], 0xc4
+; FIXEDABI-GISEL-NEXT:    s_load_dword s4, s[4:5], 0x0
 ; FIXEDABI-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
 ; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
 ; FIXEDABI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; FIXEDABI-GISEL-NEXT:    v_mov_b32_e32 v0, s5
-; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v0, vcc
+; FIXEDABI-GISEL-NEXT:    v_mov_b32_e32 v3, s6
+; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; FIXEDABI-GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; FIXEDABI-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
 ; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
@@ -322,16 +325,29 @@ define void @addrspacecast_requires_queue_ptr(ptr addrspace(5) %ptr.private, ptr
 }
 
 define void @is_shared_requires_queue_ptr(ptr %ptr) #0 {
-; FIXEDABI-LABEL: is_shared_requires_queue_ptr:
-; FIXEDABI:       ; %bb.0:
-; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FIXEDABI-NEXT:    s_load_dword s4, s[6:7], 0x40
-; FIXEDABI-NEXT:    s_waitcnt lgkmcnt(0)
-; FIXEDABI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
-; FIXEDABI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; FIXEDABI-NEXT:    flat_store_dword v[0:1], v0
-; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
-; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
+; FIXEDABI-SDAG-LABEL: is_shared_requires_queue_ptr:
+; FIXEDABI-SDAG:       ; %bb.0:
+; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT:    s_mov_b64 s[4:5], 0
+; FIXEDABI-SDAG-NEXT:    s_load_dword s4, s[4:5], 0x0
+; FIXEDABI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
+; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; FIXEDABI-SDAG-NEXT:    flat_store_dword v[0:1], v0
+; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; FIXEDABI-GISEL-LABEL: is_shared_requires_queue_ptr:
+; FIXEDABI-GISEL:       ; %bb.0:
+; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT:    s_mov_b64 s[4:5], 0xc4
+; FIXEDABI-GISEL-NEXT:    s_load_dword s4, s[4:5], 0x0
+; FIXEDABI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
+; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; FIXEDABI-GISEL-NEXT:    flat_store_dword v[0:1], v0
+; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
   %zext = zext i1 %is.shared to i32
   store volatile i32 %zext, ptr addrspace(1) undef
@@ -339,16 +355,29 @@ define void @is_shared_requires_queue_ptr(ptr %ptr) #0 {
 }
 
 define void @is_private_requires_queue_ptr(ptr %ptr) #0 {
-; FIXEDABI-LABEL: is_private_requires_queue_ptr:
-; FIXEDABI:       ; %bb.0:
-; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FIXEDABI-NEXT:    s_load_dword s4, s[6:7], 0x44
-; FIXEDABI-NEXT:    s_waitcnt lgkmcnt(0)
-; FIXEDABI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
-; FIXEDABI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; FIXEDABI-NEXT:    flat_store_dword v[0:1], v0
-; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
-; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
+; FIXEDABI-SDAG-LABEL: is_private_requires_queue_ptr:
+; FIXEDABI-SDAG:       ; %bb.0:
+; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT:    s_mov_b64 s[4:5], 0
+; FIXEDABI-SDAG-NEXT:    s_load_dword s4, s[4:5], 0x0
+; FIXEDABI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
+; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; FIXEDABI-SDAG-NEXT:    flat_store_dword v[0:1], v0
+; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; FIXEDABI-GISEL-LABEL: is_private_requires_queue_ptr:
+; FIXEDABI-GISEL:       ; %bb.0:
+; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT:    s_mov_b64 s[4:5], 0xc0
+; FIXEDABI-GISEL-NEXT:    s_load_dword s4, s[4:5], 0x0
+; FIXEDABI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
+; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; FIXEDABI-GISEL-NEXT:    flat_store_dword v[0:1], v0
+; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %is.private = call i1 @llvm.amdgcn.is.private(ptr %ptr)
   %zext = zext i1 %is.private to i32
   store volatile i32 %zext, ptr addrspace(1) undef
@@ -356,11 +385,21 @@ define void @is_private_requires_queue_ptr(ptr %ptr) #0 {
 }
 
 define void @trap_requires_queue() #0 {
-; FIXEDABI-LABEL: trap_requires_queue:
-; FIXEDABI:       ; %bb.0:
-; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FIXEDABI-NEXT:    s_mov_b64 s[0:1], s[6:7]
-; FIXEDABI-NEXT:    s_trap 2
+; FIXEDABI-SDAG-LABEL: trap_requires_queue:
+; FIXEDABI-SDAG:       ; %bb.0:
+; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT:    s_mov_b64 s[4:5], 0
+; FIXEDABI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; FIXEDABI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT:    s_trap 2
+;
+; FIXEDABI-GISEL-LABEL: trap_requires_queue:
+; FIXEDABI-GISEL:       ; %bb.0:
+; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT:    s_mov_b64 s[4:5], 0xc8
+; FIXEDABI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; FIXEDABI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT:    s_trap 2
   call void @llvm.trap()
   unreachable
 }
@@ -390,3 +429,6 @@ declare void @llvm.trap()
 declare void @llvm.debugtrap()
 
 attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll
index 2d60faaf8c8c0..e962df69b45c1 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll
@@ -21,16 +21,29 @@ define ptr addrspace(1) @flat_to_gobal_addrspacecast(ptr %ptr) {
 }
 
 define ptr @group_to_flat_addrspacecast(ptr addrspace(3) %ptr) {
-; CHECK-LABEL: group_to_flat_addrspacecast:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_load_dword s4, s[6:7], 0x10
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    v_mov_b32_e32 v1, s4
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-LABEL: group_to_flat_addrspacecast:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    s_mov_b64 s[4:5], 0
+; SDAG-NEXT:    s_load_dword s4, s[4:5], 0x0
+; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-NEXT:    v_mov_b32_e32 v1, s4
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: group_to_flat_addrspacecast:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b64 s[4:5], 0xc4
+; GISEL-NEXT:    s_load_dword s4, s[4:5], 0x0
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v1, s4
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
   %stof = addrspacecast ptr addrspace(3) %ptr to ptr
   ret ptr %stof
 }
@@ -47,16 +60,29 @@ define ptr addrspace(3) @flat_to_group_addrspacecast(ptr %ptr) {
 }
 
 define ptr @private_to_flat_addrspacecast(ptr addrspace(5) %ptr) {
-; CHECK-LABEL: private_to_flat_addrspacecast:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_load_dword s4, s[6:7], 0x11
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    v_mov_b32_e32 v1, s4
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-LABEL: private_to_flat_addrspacecast:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    s_mov_b64 s[4:5], 0
+; SDAG-NEXT:    s_load_dword s4, s[4:5], 0x0
+; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-NEXT:    v_mov_b32_e32 v1, s4
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: private_to_flat_addrspacecast:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b64 s[4:5], 0xc0
+; GISEL-NEXT:    s_load_dword s4, s[4:5], 0x0
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v1, s4
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
   %stof = addrspacecast ptr addrspace(5) %ptr to ptr
   ret ptr %stof
 }
@@ -204,3 +230,6 @@ define ptr addrspace(6) @addrspacecast_flat_null_to_constant32bit() {
 }
 
 attributes #0 = { "amdgpu-32bit-address-high-bits"="0xffff8000" }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
index 8c64ab5952def..e53fa9650f9ec 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
@@ -146,13 +146,13 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
 ; CHECK-NEXT:    s_mov_b64 s[4:5], s[40:41]
 ; CHECK-NEXT:    v_writelane_b32 v40, s45, 13
 ; CHECK-NEXT:    v_mov_b32_e32 v41, v31
+; CHECK-NEXT:    s_mov_b64 s[34:35], s[6:7]
 ; CHECK-NEXT:    s_mov_b32 s42, s15
 ; CHECK-NEXT:    s_mov_b32 s43, s14
 ; CHECK-NEXT:    s_mov_b32 s44, s13
 ; CHECK-NEXT:    s_mov_b32 s45, s12
-; CHECK-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; CHECK-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT:    s_mov_b64 s[36:37], s[10:11]
+; CHECK-NEXT:    s_mov_b64 s[38:39], s[8:9]
 ; CHECK-NEXT:    v_mov_b32_e32 v42, v2
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
@@ -163,9 +163,9 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
 ; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
 ; CHECK-NEXT:    s_mov_b64 s[4:5], s[40:41]
 ; CHECK-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; CHECK-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; CHECK-NEXT:    s_mov_b64 s[6:7], s[34:35]
+; CHECK-NEXT:    s_mov_b64 s[8:9], s[38:39]
+; CHECK-NEXT:    s_mov_b64 s[10:11], s[36:37]
 ; CHECK-NEXT:    s_mov_b32 s12, s45
 ; CHECK-NEXT:    s_mov_b32 s13, s44
 ; CHECK-NEXT:    s_mov_b32 s14, s43
@@ -285,13 +285,13 @@ define double @test_powr_fast_f64(double %x, double %y) {
 ; CHECK-NEXT:    buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
 ; CHECK-NEXT:    v_writelane_b32 v40, s45, 13
 ; CHECK-NEXT:    v_mov_b32_e32 v43, v31
+; CHECK-NEXT:    s_mov_b64 s[34:35], s[6:7]
 ; CHECK-NEXT:    s_mov_b32 s42, s15
 ; CHECK-NEXT:    s_mov_b32 s43, s14
 ; CHECK-NEXT:    s_mov_b32 s44, s13
 ; CHECK-NEXT:    s_mov_b32 s45, s12
-; CHECK-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; CHECK-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT:    s_mov_b64 s[36:37], s[10:11]
+; CHECK-NEXT:    s_mov_b64 s[38:39], s[8:9]
 ; CHECK-NEXT:    v_mov_b32_e32 v42, v3
 ; CHECK-NEXT:    v_mov_b32_e32 v41, v2
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
@@ -302,9 +302,9 @@ define double @test_powr_fast_f64(double %x, double %y) {
 ; CHECK-NEXT:    s_addc_u32 s5, s5, _Z4exp2d at gotpcrel32@hi+12
 ; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
 ; CHECK-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; CHECK-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; CHECK-NEXT:    s_mov_b64 s[6:7], s[34:35]
+; CHECK-NEXT:    s_mov_b64 s[8:9], s[38:39]
+; CHECK-NEXT:    s_mov_b64 s[10:11], s[36:37]
 ; CHECK-NEXT:    s_mov_b32 s12, s45
 ; CHECK-NEXT:    s_mov_b32 s13, s44
 ; CHECK-NEXT:    s_mov_b32 s14, s43
@@ -430,13 +430,13 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
 ; CHECK-NEXT:    s_mov_b64 s[4:5], s[40:41]
 ; CHECK-NEXT:    v_writelane_b32 v40, s45, 13
 ; CHECK-NEXT:    v_mov_b32_e32 v41, v31
+; CHECK-NEXT:    s_mov_b64 s[34:35], s[6:7]
 ; CHECK-NEXT:    s_mov_b32 s42, s15
 ; CHECK-NEXT:    s_mov_b32 s43, s14
 ; CHECK-NEXT:    s_mov_b32 s44, s13
 ; CHECK-NEXT:    s_mov_b32 s45, s12
-; CHECK-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; CHECK-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT:    s_mov_b64 s[36:37], s[10:11]
+; CHECK-NEXT:    s_mov_b64 s[38:39], s[8:9]
 ; CHECK-NEXT:    v_mov_b32_e32 v42, v2
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
@@ -447,9 +447,9 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
 ; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
 ; CHECK-NEXT:    s_mov_b64 s[4:5], s[40:41]
 ; CHECK-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; CHECK-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; CHECK-NEXT:    s_mov_b64 s[6:7], s[34:35]
+; CHECK-NEXT:    s_mov_b64 s[8:9], s[38:39]
+; CHECK-NEXT:    s_mov_b64 s[10:11], s[36:37]
 ; CHECK-NEXT:    s_mov_b32 s12, s45
 ; CHECK-NEXT:    s_mov_b32 s13, s44
 ; CHECK-NEXT:    s_mov_b32 s14, s43
@@ -571,13 +571,13 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
 ; CHECK-NEXT:    buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
 ; CHECK-NEXT:    v_writelane_b32 v40, s45, 13
 ; CHECK-NEXT:    v_mov_b32_e32 v41, v31
+; CHECK-NEXT:    s_mov_b64 s[34:35], s[6:7]
 ; CHECK-NEXT:    s_mov_b32 s42, s15
 ; CHECK-NEXT:    s_mov_b32 s43, s14
 ; CHECK-NEXT:    s_mov_b32 s44, s13
 ; CHECK-NEXT:    s_mov_b32 s45, s12
-; CHECK-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; CHECK-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT:    s_mov_b64 s[36:37], s[10:11]
+; CHECK-NEXT:    s_mov_b64 s[38:39], s[8:9]
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v42, 1, v2
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
@@ -588,9 +588,9 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
 ; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
 ; CHECK-NEXT:    s_mov_b64 s[4:5], s[40:41]
 ; CHECK-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; CHECK-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; CHECK-NEXT:    s_mov_b64 s[6:7], s[34:35]
+; CHECK-NEXT:    s_mov_b64 s[8:9], s[38:39]
+; CHECK-NEXT:    s_mov_b64 s[10:11], s[36:37]
 ; CHECK-NEXT:    s_mov_b32 s12, s45
 ; CHECK-NEXT:    s_mov_b32 s13, s44
 ; CHECK-NEXT:    s_mov_b32 s14, s43
@@ -715,13 +715,13 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
 ; CHECK-NEXT:    s_mov_b64 s[4:5], s[40:41]
 ; CHECK-NEXT:    v_writelane_b32 v40, s45, 13
 ; CHECK-NEXT:    v_mov_b32_e32 v41, v31
+; CHECK-NEXT:    s_mov_b64 s[34:35], s[6:7]
 ; CHECK-NEXT:    s_mov_b32 s42, s15
 ; CHECK-NEXT:    s_mov_b32 s43, s14
 ; CHECK-NEXT:    s_mov_b32 s44, s13
 ; CHECK-NEXT:    s_mov_b32 s45, s12
-; CHECK-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; CHECK-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT:    s_mov_b64 s[36:37], s[10:11]
+; CHECK-NEXT:    s_mov_b64 s[38:39], s[8:9]
 ; CHECK-NEXT:    v_or_b32_e32 v43, 1, v2
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
@@ -732,9 +732,9 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
 ; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
 ; CHECK-NEXT:    s_mov_b64 s[4:5], s[40:41]
 ; CHECK-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; CHECK-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; CHECK-NEXT:    s_mov_b64 s[6:7], s[34:35]
+; CHECK-NEXT:    s_mov_b64 s[8:9], s[38:39]
+; CHECK-NEXT:    s_mov_b64 s[10:11], s[36:37]
 ; CHECK-NEXT:    s_mov_b32 s12, s45
 ; CHECK-NEXT:    s_mov_b32 s13, s44
 ; CHECK-NEXT:    s_mov_b32 s14, s43
@@ -773,3 +773,6 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
   %call = tail call fast double @_Z4powndi(double %x, i32 %y)
   ret double %call
 }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index 1f90c0d03a856..1396dab69c13a 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -458,7 +458,7 @@ define void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR8]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4)
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr addrspace(4) [[STOF]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -477,7 +477,7 @@ define void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) %ptr) #2 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTR...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/74339


More information about the llvm-commits mailing list