[llvm-branch-commits] [llvm] AMDGPU: Avoid default subtarget in generated codegen tests (3/9) (PR #205786)

Thu Jun 25 05:02:25 PDT 2026

llvmorg-github-actions[bot] wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

Another batch of tests updated by script.

Co-Authored-By: Claude <noreply@anthropic.com> (Claude-Opus-4.8)

---

Patch is 20.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/205786.diff


8 Files Affected:

- (modified) llvm/test/CodeGen/AMDGPU/valu-i1.ll (+25-26) 
- (modified) llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/vgpr_constant64_to_sgpr.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/vselect.ll (+51-45) 
- (modified) llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll (+26-26) 
- (modified) llvm/test/CodeGen/AMDGPU/wqm-debug-instr.mir (+1-1) 


``````````diff

diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
index 1ad335a5a2d13..9df052d6910e8 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -enable-misched -asm-verbose -disable-block-placement -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx600 -enable-misched -asm-verbose -disable-block-placement -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=SI %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
@@ -30,14 +30,13 @@ define amdgpu_kernel void @test_if(i32 %b, ptr addrspace(1) %src, ptr addrspace(
 ; SI-NEXT:    s_and_b64 s[2:3], s[2:3], exec
 ; SI-NEXT:  .LBB0_3: ; %Flow6
 ; SI-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
-; SI-NEXT:    s_cbranch_execz .LBB0_5
 ; SI-NEXT:  ; %bb.4: ; %LeafBlock
-; SI-NEXT:    s_mov_b64 s[10:11], exec
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
 ; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
 ; SI-NEXT:    s_and_b64 s[6:7], vcc, exec
+; SI-NEXT:    s_mov_b64 s[10:11], exec
 ; SI-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
-; SI-NEXT:  .LBB0_5: ; %Flow8
+; SI-NEXT:  ; %bb.5: ; %Flow8
 ; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; SI-NEXT:    s_and_saveexec_b64 s[4:5], s[2:3]
 ; SI-NEXT:    s_xor_b64 s[2:3], exec, s[4:5]
@@ -49,12 +48,12 @@ define amdgpu_kernel void @test_if(i32 %b, ptr addrspace(1) %src, ptr addrspace(
 ; SI-NEXT:  ; %bb.7: ; %case1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_ashr_i32 s9, s8, 31
-; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    s_mov_b32 s2, 0
 ; SI-NEXT:    s_lshl_b64 s[4:5], s[8:9], 2
-; SI-NEXT:    v_mov_b32_e32 v2, 13
 ; SI-NEXT:    s_waitcnt expcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, 0
+; SI-NEXT:    v_mov_b32_e32 v2, 13
 ; SI-NEXT:    v_mov_b32_e32 v1, s5
 ; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-NEXT:  .LBB0_8: ; %end
@@ -62,21 +61,21 @@ define amdgpu_kernel void @test_if(i32 %b, ptr addrspace(1) %src, ptr addrspace(
 ; SI-NEXT:  .LBB0_9: ; %case2
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_ashr_i32 s9, s8, 31
+; SI-NEXT:    s_lshl_b64 s[12:13], s[8:9], 2
+; SI-NEXT:    v_mov_b32_e32 v1, s12
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NEXT:    s_mov_b32 s2, 0
-; SI-NEXT:    s_lshl_b64 s[12:13], s[8:9], 2
 ; SI-NEXT:    v_mov_b32_e32 v3, 17
-; SI-NEXT:    v_mov_b32_e32 v1, s12
 ; SI-NEXT:    v_mov_b32_e32 v2, s13
 ; SI-NEXT:    buffer_store_dword v3, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_xor_b64 s[2:3], exec, -1
 ; SI-NEXT:    s_branch .LBB0_2
 ; SI-NEXT:  .LBB0_10: ; %default
-; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 2, v0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_ashr_i32 s9, s8, 31
 ; SI-NEXT:    s_lshl_b64 s[4:5], s[8:9], 2
 ; SI-NEXT:    s_add_u32 s4, s0, s4
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 2, v0
 ; SI-NEXT:    s_addc_u32 s5, s1, s5
 ; SI-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; SI-NEXT:    s_xor_b64 s[12:13], exec, s[6:7]
@@ -247,18 +246,18 @@ define amdgpu_kernel void @simple_test_v_loop(ptr addrspace(1) %dst, ptr addrspa
 ; SI-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; SI-NEXT:    s_cbranch_execz .LBB4_3
 ; SI-NEXT:  ; %bb.1: ; %loop.preheader
-; SI-NEXT:    s_load_dwordx4 s[12:15], s[4:5], 0x9
+; SI-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
 ; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; SI-NEXT:    s_mov_b32 s8, 64
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_mov_b32_e32 v1, s13
-; SI-NEXT:    v_add_i32_e32 v0, vcc, s12, v0
-; SI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    s_mov_b32 s4, s14
-; SI-NEXT:    s_mov_b32 s5, s15
 ; SI-NEXT:    s_mov_b32 s3, s7
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v1, s9
+; SI-NEXT:    v_add_i32_e32 v0, vcc, s8, v0
+; SI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; SI-NEXT:    s_mov_b32 s8, 64
+; SI-NEXT:    s_mov_b32 s4, s10
+; SI-NEXT:    s_mov_b32 s5, s11
 ; SI-NEXT:    s_mov_b32 s0, s2
 ; SI-NEXT:    s_mov_b32 s1, s2
 ; SI-NEXT:  .LBB4_2: ; %loop
@@ -314,7 +313,6 @@ define amdgpu_kernel void @multi_vcond_loop(ptr addrspace(1) noalias nocapture %
 ; SI-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; SI-NEXT:    s_cbranch_execz .LBB5_5
 ; SI-NEXT:  ; %bb.1: ; %bb10.preheader
-; SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
 ; SI-NEXT:    v_mov_b32_e32 v3, s1
 ; SI-NEXT:    v_add_i32_e32 v2, vcc, s0, v6
 ; SI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
@@ -323,37 +321,38 @@ define amdgpu_kernel void @multi_vcond_loop(ptr addrspace(1) noalias nocapture %
 ; SI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; SI-NEXT:    v_mov_b32_e32 v7, s3
 ; SI-NEXT:    v_add_i32_e32 v6, vcc, s2, v6
+; SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
 ; SI-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
 ; SI-NEXT:    s_mov_b64 s[2:3], 0
 ; SI-NEXT:    s_mov_b32 s8, s10
 ; SI-NEXT:    s_mov_b32 s9, s10
-; SI-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; SI-NEXT:    s_mov_b64 s[6:7], 0
+; SI-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; SI-NEXT:  .LBB5_2: ; %bb10
 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; SI-NEXT:    s_waitcnt expcnt(0)
 ; SI-NEXT:    buffer_load_dword v8, v[6:7], s[8:11], 0 addr64
 ; SI-NEXT:    buffer_load_dword v9, v[4:5], s[8:11], 0 addr64
+; SI-NEXT:    s_or_b64 s[4:5], s[4:5], exec
 ; SI-NEXT:    s_waitcnt vmcnt(1)
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v8
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cmp_ne_u32_e64 s[0:1], -1, v9
 ; SI-NEXT:    s_and_b64 s[12:13], vcc, s[0:1]
-; SI-NEXT:    s_or_b64 s[4:5], s[4:5], exec
 ; SI-NEXT:    s_and_saveexec_b64 s[0:1], s[12:13]
 ; SI-NEXT:    s_cbranch_execz .LBB5_4
 ; SI-NEXT:  ; %bb.3: ; %bb20
 ; SI-NEXT:    ; in Loop: Header=BB5_2 Depth=1
 ; SI-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; SI-NEXT:    s_add_u32 s6, s6, 1
+; SI-NEXT:    buffer_store_dword v8, v[2:3], s[8:11], 0 addr64
+; SI-NEXT:    v_add_i32_e32 v2, vcc, 4, v2
+; SI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; SI-NEXT:    v_add_i32_e32 v4, vcc, 4, v4
 ; SI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; SI-NEXT:    s_add_u32 s6, s6, 1
 ; SI-NEXT:    v_add_i32_e32 v6, vcc, 4, v6
-; SI-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
-; SI-NEXT:    buffer_store_dword v8, v[2:3], s[8:11], 0 addr64
 ; SI-NEXT:    s_addc_u32 s7, s7, 0
-; SI-NEXT:    v_add_i32_e32 v2, vcc, 4, v2
-; SI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; SI-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
 ; SI-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[0:1]
 ; SI-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
 ; SI-NEXT:    s_and_b64 s[12:13], vcc, exec
diff --git a/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll b/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll
index 8c634934947a4..c414acf927a1d 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s | FileCheck %s
 
 ; Tests for a bug in SelectionDAG::UpdateNodeOperands exposed by VectorLegalizer
 ; where divergence information is not updated.
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr_constant64_to_sgpr.mir b/llvm/test/CodeGen/AMDGPU/vgpr_constant64_to_sgpr.mir
index 5c6ddec8e56ea..a9d7cb495cfb3 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr_constant64_to_sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr_constant64_to_sgpr.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa --global-isel=0 --run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
+# RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --global-isel=0 --run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
 
 ---
 name:            test_64imm
diff --git a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
index fa0922590712a..f181ad9a19633 100644
--- a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck %s
 
 # The undef copy of %4 is allocated to $vgpr3, and the identity copy
 # was deleted, and $vgpr3 was considered undef. The code to replace
diff --git a/llvm/test/CodeGen/AMDGPU/vselect.ll b/llvm/test/CodeGen/AMDGPU/vselect.ll
index 6291600a4c00c..2e37b103c7db4 100644
--- a/llvm/test/CodeGen/AMDGPU/vselect.ll
+++ b/llvm/test/CodeGen/AMDGPU/vselect.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-;RUN: llc < %s -mtriple=amdgcn | FileCheck --check-prefixes=SI %s
+;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx600 | FileCheck --check-prefixes=SI %s
 ;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefixes=VI %s
 ;RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck --check-prefixes=EG %s
 
@@ -11,14 +11,14 @@ define amdgpu_kernel void @test_select_v2i32(ptr addrspace(1) %out, ptr addrspac
 ; SI-NEXT:    s_load_dwordx2 s[8:9], s[2:3], 0x0
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_cmp_gt_i32 s9, s5
 ; SI-NEXT:    s_cselect_b32 s5, s7, s9
 ; SI-NEXT:    s_cmp_gt_i32 s8, s4
 ; SI-NEXT:    s_cselect_b32 s4, s6, s8
-; SI-NEXT:    s_mov_b32 s2, -1
-; SI-NEXT:    v_mov_b32_e32 v1, s5
 ; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    v_mov_b32_e32 v1, s5
 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
@@ -73,23 +73,25 @@ entry:
 define amdgpu_kernel void @test_select_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
 ; SI-LABEL: test_select_v2f32:
 ; SI:       ; %bb.0: ; %entry
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0xd
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_load_dwordx2 s[6:7], s[2:3], 0x0
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
-; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[8:9], 0x0
+; SI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; SI-NEXT:    s_mov_b32 s4, s0
+; SI-NEXT:    s_mov_b32 s5, s1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_mov_b32_e32 v0, s4
-; SI-NEXT:    v_mov_b32_e32 v1, s5
-; SI-NEXT:    v_mov_b32_e32 v2, s7
-; SI-NEXT:    v_cmp_neq_f32_e32 vcc, s7, v1
+; SI-NEXT:    v_mov_b32_e32 v1, s9
+; SI-NEXT:    v_mov_b32_e32 v0, s8
+; SI-NEXT:    v_mov_b32_e32 v2, s3
+; SI-NEXT:    v_cmp_neq_f32_e32 vcc, s3, v1
 ; SI-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; SI-NEXT:    v_mov_b32_e32 v2, s6
-; SI-NEXT:    v_cmp_neq_f32_e32 vcc, s6, v0
+; SI-NEXT:    v_mov_b32_e32 v2, s2
+; SI-NEXT:    v_cmp_neq_f32_e32 vcc, s2, v0
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_select_v2f32:
@@ -149,27 +151,29 @@ define amdgpu_kernel void @test_select_v4i32(ptr addrspace(1) %out, ptr addrspac
 ; SI-LABEL: test_select_v4i32:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
+; SI-NEXT:    s_load_dwordx2 s[12:13], s[4:5], 0xd
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_load_dwordx4 s[8:11], s[2:3], 0x0
-; SI-NEXT:    s_load_dwordx4 s[12:15], s[6:7], 0x0
-; SI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x11
-; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_load_dwordx4 s[12:15], s[12:13], 0x0
+; SI-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x11
+; SI-NEXT:    s_mov_b32 s4, s0
+; SI-NEXT:    s_mov_b32 s5, s1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_cmp_gt_i32 s10, s14
-; SI-NEXT:    s_cselect_b32 s6, s6, s10
+; SI-NEXT:    s_cselect_b32 s0, s18, s10
 ; SI-NEXT:    s_cmp_gt_i32 s9, s13
-; SI-NEXT:    s_cselect_b32 s5, s5, s9
+; SI-NEXT:    s_cselect_b32 s1, s17, s9
 ; SI-NEXT:    s_cmp_gt_i32 s11, s15
-; SI-NEXT:    s_cselect_b32 s7, s7, s11
+; SI-NEXT:    s_cselect_b32 s2, s19, s11
 ; SI-NEXT:    s_cmp_gt_i32 s8, s12
-; SI-NEXT:    s_cselect_b32 s4, s4, s8
-; SI-NEXT:    s_mov_b32 s2, -1
-; SI-NEXT:    v_mov_b32_e32 v2, s6
-; SI-NEXT:    v_mov_b32_e32 v1, s5
-; SI-NEXT:    v_mov_b32_e32 v3, s7
-; SI-NEXT:    v_mov_b32_e32 v0, s4
-; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-NEXT:    s_cselect_b32 s3, s16, s8
+; SI-NEXT:    v_mov_b32_e32 v0, s3
+; SI-NEXT:    v_mov_b32_e32 v1, s1
+; SI-NEXT:    v_mov_b32_e32 v2, s0
+; SI-NEXT:    v_mov_b32_e32 v3, s2
+; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_select_v4i32:
@@ -237,31 +241,33 @@ entry:
 define amdgpu_kernel void @test_select_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
 ; SI-LABEL: test_select_v4f32:
 ; SI:       ; %bb.0: ; %entry
-; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
 ; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
 ; SI-NEXT:    s_load_dwordx4 s[8:11], s[8:9], 0x0
-; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_mov_b32 s4, s0
+; SI-NEXT:    s_mov_b32 s5, s1
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_mov_b32_e32 v0, s8
-; SI-NEXT:    v_mov_b32_e32 v1, s9
-; SI-NEXT:    v_mov_b32_e32 v2, s10
 ; SI-NEXT:    v_mov_b32_e32 v3, s11
-; SI-NEXT:    v_mov_b32_e32 v4, s7
-; SI-NEXT:    v_cmp_neq_f32_e32 vcc, s7, v3
+; SI-NEXT:    v_mov_b32_e32 v2, s10
+; SI-NEXT:    v_mov_b32_e32 v1, s9
+; SI-NEXT:    v_mov_b32_e32 v4, s3
+; SI-NEXT:    v_cmp_neq_f32_e32 vcc, s3, v3
 ; SI-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; SI-NEXT:    v_mov_b32_e32 v4, s6
-; SI-NEXT:    v_cmp_neq_f32_e32 vcc, s6, v2
+; SI-NEXT:    v_mov_b32_e32 v4, s2
+; SI-NEXT:    v_cmp_neq_f32_e32 vcc, s2, v2
+; SI-NEXT:    v_mov_b32_e32 v0, s8
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; SI-NEXT:    v_mov_b32_e32 v4, s5
-; SI-NEXT:    v_cmp_neq_f32_e32 vcc, s5, v1
+; SI-NEXT:    v_mov_b32_e32 v4, s1
+; SI-NEXT:    v_cmp_neq_f32_e32 vcc, s1, v1
 ; SI-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; SI-NEXT:    v_mov_b32_e32 v4, s4
-; SI-NEXT:    v_cmp_neq_f32_e32 vcc, s4, v0
+; SI-NEXT:    v_mov_b32_e32 v4, s0
+; SI-NEXT:    v_cmp_neq_f32_e32 vcc, s0, v0
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_select_v4f32:
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir
index 9dbc22b485447..736ede64ceba2 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-insert-waitcnts -amdgpu-waitcnt-forcezero=1 %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -run-pass si-insert-waitcnts -amdgpu-waitcnt-forcezero=1 %s -o - | FileCheck %s
 
 ---
 name: waitcnt-debug-non-first-terminators
diff --git a/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll b/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
index ecb0c8eb9e0be..e00e9c42ea63d 100644
--- a/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
+++ b/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 ; Check that DAGTypeLegalizer::WidenVSELECTAndMask doesn't try to
 ; create vselects with i64 condition masks.
@@ -9,26 +9,26 @@ define amdgpu_kernel void @widen_vselect_and_mask_v4f64(<4 x double> %arg) #0 {
 ; GCN-LABEL: widen_vselect_and_mask_v4f64:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s6, -1
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
-; GCN-NEXT:    s_mov_b64 s[4:5], 16
-; GCN-NEXT:    s_mov_b32 s7, 0xf000
-; GCN-NEXT:    v_mov_b32_e32 v1, v0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s2, -1
 ; GCN-NEXT:    v_mov_b32_e32 v2, v0
 ; GCN-NEXT:    v_mov_b32_e32 v3, v0
-; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
-; GCN-NEXT:    v_cmp_u_f64_e64 s[2:3], s[0:1], s[0:1]
-; GCN-NEXT:    s_waitcnt expcnt(0)
-; GCN-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[2:3]
+; GCN-NEXT:    v_cmp_u_f64_e64 s[4:5], s[0:1], s[0:1]
 ; GCN-NEXT:    v_cmp_neq_f64_e64 s[0:1], s[0:1], 0
+; GCN-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
 ; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v1
 ; GCN-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
 ; GCN-NEXT:    s_and_b64 s[0:1], s[0:1], exec
-; GCN-NEXT:    s_cselect_b32 s0, 0x3ff00000, 0
-; GCN-NEXT:    s_mov_b64 s[4:5], 0
-; GCN-NEXT:    v_mov_b32_e32 v1, s0
-; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GCN-NEXT:    s_cselect_b32 s4, 0x3ff00000, 0
+; GCN-NEXT:    v_mov_b32_e32 v1, v0
+; GCN-NEXT:    s_mov_b64 s[0:1], 16
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GCN-NEXT:    s_waitcnt expcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v1, s4
+; GCN-NEXT:    s_mov_b64 s[0:1], 0
+; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GCN-NEXT:    s_endpgm
 bb:
   %tmp = extractelement <4 x double> %arg, i64 0
@@ -51,23 +51,23 @@ define amdgpu_kernel void @widen_vselect_and_mask_v4i64(<4 x i64> %arg) #0 {
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_mov_b64 s[4:5], 0
-; GCN-NEXT:    s_mov_b32 s10, -1
+; GCN-NEXT:    s_mov_b32 s2, -1
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0
-; GCN-NEXT:    s_mov_b64 s[8:9], 16
-; GCN-NEXT:    s_mov_b32 s11, 0xf000
-; GCN-NEXT:    v_mov_b32_e32 v2, v1
-; GCN-NEXT:    v_mov_b32_e32 v3, v1
-; GCN-NEXT:    v_mov_b32_e32 v4, v1
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[0:1], 0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[2:3]
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    s_mov_b64 s[4:5], 0
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[6:7], s[0:1], 0
 ; GCN-NEXT:    v_cmp_ne_u64_e64 s[0:1], s[0:1], 0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[6:7]
 ; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
 ; GCN-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GCN-NEXT:    buffer_store_dwordx4 v[1:4], off, s[8:11], 0
-; GCN-NEXT:    s_mov_b32 s6, s10
-; GCN-NEXT:    s_mov_b32 s7, s11
+; GCN-NEXT:    v_mov_b32_e32 v2, v1
+; GCN-NEXT:    v_mov_b32_e32 v3, v1
+; GCN-NEXT:    v_mov_b32_e32 v4, v1
+; GCN-NEXT:    s_mov_b64 s[0:1], 16
+; GCN-NEXT:    s_mov_b32 s6, s2
+; GCN-NEXT:    s_mov_b32 s7, s3
+; GCN-NEXT:    buffer_store_dwordx4 v[1:4], off, s[0:3], 0
 ; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; GCN-NEXT:    s_endpgm
 bb:
diff --gi...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/205786