[llvm-branch-commits] [llvm] AMDGPU: Avoid default subtarget in generated codegen tests (3/9) (PR #205786)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jun 25 05:02:25 PDT 2026
llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
Another batch of tests updated by script.
Co-Authored-By: Claude <noreply@<!-- -->anthropic.com> (Claude-Opus-4.8)
---
Patch is 20.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/205786.diff
8 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/valu-i1.ll (+25-26)
- (modified) llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr_constant64_to_sgpr.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/vselect.ll (+51-45)
- (modified) llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll (+26-26)
- (modified) llvm/test/CodeGen/AMDGPU/wqm-debug-instr.mir (+1-1)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
index 1ad335a5a2d13..9df052d6910e8 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -enable-misched -asm-verbose -disable-block-placement -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx600 -enable-misched -asm-verbose -disable-block-placement -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -30,14 +30,13 @@ define amdgpu_kernel void @test_if(i32 %b, ptr addrspace(1) %src, ptr addrspace(
; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec
; SI-NEXT: .LBB0_3: ; %Flow6
; SI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
-; SI-NEXT: s_cbranch_execz .LBB0_5
; SI-NEXT: ; %bb.4: ; %LeafBlock
-; SI-NEXT: s_mov_b64 s[10:11], exec
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; SI-NEXT: s_and_b64 s[6:7], vcc, exec
+; SI-NEXT: s_mov_b64 s[10:11], exec
; SI-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
-; SI-NEXT: .LBB0_5: ; %Flow8
+; SI-NEXT: ; %bb.5: ; %Flow8
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
; SI-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
; SI-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
@@ -49,12 +48,12 @@ define amdgpu_kernel void @test_if(i32 %b, ptr addrspace(1) %src, ptr addrspace(
; SI-NEXT: ; %bb.7: ; %case1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_ashr_i32 s9, s8, 31
-; SI-NEXT: s_mov_b32 s3, 0xf000
-; SI-NEXT: s_mov_b32 s2, 0
; SI-NEXT: s_lshl_b64 s[4:5], s[8:9], 2
-; SI-NEXT: v_mov_b32_e32 v2, 13
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, 0
+; SI-NEXT: v_mov_b32_e32 v2, 13
; SI-NEXT: v_mov_b32_e32 v1, s5
; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; SI-NEXT: .LBB0_8: ; %end
@@ -62,21 +61,21 @@ define amdgpu_kernel void @test_if(i32 %b, ptr addrspace(1) %src, ptr addrspace(
; SI-NEXT: .LBB0_9: ; %case2
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_ashr_i32 s9, s8, 31
+; SI-NEXT: s_lshl_b64 s[12:13], s[8:9], 2
+; SI-NEXT: v_mov_b32_e32 v1, s12
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, 0
-; SI-NEXT: s_lshl_b64 s[12:13], s[8:9], 2
; SI-NEXT: v_mov_b32_e32 v3, 17
-; SI-NEXT: v_mov_b32_e32 v1, s12
; SI-NEXT: v_mov_b32_e32 v2, s13
; SI-NEXT: buffer_store_dword v3, v[1:2], s[0:3], 0 addr64
; SI-NEXT: s_xor_b64 s[2:3], exec, -1
; SI-NEXT: s_branch .LBB0_2
; SI-NEXT: .LBB0_10: ; %default
-; SI-NEXT: v_cmp_ne_u32_e32 vcc, 2, v0
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_ashr_i32 s9, s8, 31
; SI-NEXT: s_lshl_b64 s[4:5], s[8:9], 2
; SI-NEXT: s_add_u32 s4, s0, s4
+; SI-NEXT: v_cmp_ne_u32_e32 vcc, 2, v0
; SI-NEXT: s_addc_u32 s5, s1, s5
; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc
; SI-NEXT: s_xor_b64 s[12:13], exec, s[6:7]
@@ -247,18 +246,18 @@ define amdgpu_kernel void @simple_test_v_loop(ptr addrspace(1) %dst, ptr addrspa
; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc
; SI-NEXT: s_cbranch_execz .LBB4_3
; SI-NEXT: ; %bb.1: ; %loop.preheader
-; SI-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x9
+; SI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; SI-NEXT: s_mov_b32 s8, 64
; SI-NEXT: s_mov_b32 s7, 0xf000
-; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_mov_b32_e32 v1, s13
-; SI-NEXT: v_add_i32_e32 v0, vcc, s12, v0
-; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: s_mov_b32 s4, s14
-; SI-NEXT: s_mov_b32 s5, s15
; SI-NEXT: s_mov_b32 s3, s7
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_mov_b32_e32 v1, s9
+; SI-NEXT: v_add_i32_e32 v0, vcc, s8, v0
+; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; SI-NEXT: s_mov_b32 s8, 64
+; SI-NEXT: s_mov_b32 s4, s10
+; SI-NEXT: s_mov_b32 s5, s11
; SI-NEXT: s_mov_b32 s0, s2
; SI-NEXT: s_mov_b32 s1, s2
; SI-NEXT: .LBB4_2: ; %loop
@@ -314,7 +313,6 @@ define amdgpu_kernel void @multi_vcond_loop(ptr addrspace(1) noalias nocapture %
; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc
; SI-NEXT: s_cbranch_execz .LBB5_5
; SI-NEXT: ; %bb.1: ; %bb10.preheader
-; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; SI-NEXT: v_mov_b32_e32 v3, s1
; SI-NEXT: v_add_i32_e32 v2, vcc, s0, v6
; SI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
@@ -323,37 +321,38 @@ define amdgpu_kernel void @multi_vcond_loop(ptr addrspace(1) noalias nocapture %
; SI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
; SI-NEXT: v_mov_b32_e32 v7, s3
; SI-NEXT: v_add_i32_e32 v6, vcc, s2, v6
+; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc
; SI-NEXT: s_mov_b64 s[2:3], 0
; SI-NEXT: s_mov_b32 s8, s10
; SI-NEXT: s_mov_b32 s9, s10
-; SI-NEXT: ; implicit-def: $sgpr4_sgpr5
; SI-NEXT: s_mov_b64 s[6:7], 0
+; SI-NEXT: ; implicit-def: $sgpr4_sgpr5
; SI-NEXT: .LBB5_2: ; %bb10
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: buffer_load_dword v8, v[6:7], s[8:11], 0 addr64
; SI-NEXT: buffer_load_dword v9, v[4:5], s[8:11], 0 addr64
+; SI-NEXT: s_or_b64 s[4:5], s[4:5], exec
; SI-NEXT: s_waitcnt vmcnt(1)
; SI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v8
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], -1, v9
; SI-NEXT: s_and_b64 s[12:13], vcc, s[0:1]
-; SI-NEXT: s_or_b64 s[4:5], s[4:5], exec
; SI-NEXT: s_and_saveexec_b64 s[0:1], s[12:13]
; SI-NEXT: s_cbranch_execz .LBB5_4
; SI-NEXT: ; %bb.3: ; %bb20
; SI-NEXT: ; in Loop: Header=BB5_2 Depth=1
; SI-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; SI-NEXT: s_add_u32 s6, s6, 1
+; SI-NEXT: buffer_store_dword v8, v[2:3], s[8:11], 0 addr64
+; SI-NEXT: v_add_i32_e32 v2, vcc, 4, v2
+; SI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; SI-NEXT: v_add_i32_e32 v4, vcc, 4, v4
; SI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; SI-NEXT: s_add_u32 s6, s6, 1
; SI-NEXT: v_add_i32_e32 v6, vcc, 4, v6
-; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc
-; SI-NEXT: buffer_store_dword v8, v[2:3], s[8:11], 0 addr64
; SI-NEXT: s_addc_u32 s7, s7, 0
-; SI-NEXT: v_add_i32_e32 v2, vcc, 4, v2
-; SI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc
; SI-NEXT: v_cmp_ge_i64_e32 vcc, s[6:7], v[0:1]
; SI-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
; SI-NEXT: s_and_b64 s[12:13], vcc, exec
diff --git a/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll b/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll
index 8c634934947a4..c414acf927a1d 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s | FileCheck %s
; Tests for a bug in SelectionDAG::UpdateNodeOperands exposed by VectorLegalizer
; where divergence information is not updated.
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr_constant64_to_sgpr.mir b/llvm/test/CodeGen/AMDGPU/vgpr_constant64_to_sgpr.mir
index 5c6ddec8e56ea..a9d7cb495cfb3 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr_constant64_to_sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr_constant64_to_sgpr.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa --global-isel=0 --run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
+# RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --global-isel=0 --run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
---
name: test_64imm
diff --git a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
index fa0922590712a..f181ad9a19633 100644
--- a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck %s
# The undef copy of %4 is allocated to $vgpr3, and the identity copy
# was deleted, and $vgpr3 was considered undef. The code to replace
diff --git a/llvm/test/CodeGen/AMDGPU/vselect.ll b/llvm/test/CodeGen/AMDGPU/vselect.ll
index 6291600a4c00c..2e37b103c7db4 100644
--- a/llvm/test/CodeGen/AMDGPU/vselect.ll
+++ b/llvm/test/CodeGen/AMDGPU/vselect.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-;RUN: llc < %s -mtriple=amdgcn | FileCheck --check-prefixes=SI %s
+;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx600 | FileCheck --check-prefixes=SI %s
;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefixes=VI %s
;RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck --check-prefixes=EG %s
@@ -11,14 +11,14 @@ define amdgpu_kernel void @test_select_v2i32(ptr addrspace(1) %out, ptr addrspac
; SI-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x0
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_cmp_gt_i32 s9, s5
; SI-NEXT: s_cselect_b32 s5, s7, s9
; SI-NEXT: s_cmp_gt_i32 s8, s4
; SI-NEXT: s_cselect_b32 s4, s6, s8
-; SI-NEXT: s_mov_b32 s2, -1
-; SI-NEXT: v_mov_b32_e32 v1, s5
; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: v_mov_b32_e32 v1, s5
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-NEXT: s_endpgm
;
@@ -73,23 +73,25 @@ entry:
define amdgpu_kernel void @test_select_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
; SI-LABEL: test_select_v2f32:
; SI: ; %bb.0: ; %entry
+; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x0
-; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; SI-NEXT: s_mov_b32 s3, 0xf000
-; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_load_dwordx2 s[8:9], s[8:9], 0x0
+; SI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_mov_b32_e32 v0, s4
-; SI-NEXT: v_mov_b32_e32 v1, s5
-; SI-NEXT: v_mov_b32_e32 v2, s7
-; SI-NEXT: v_cmp_neq_f32_e32 vcc, s7, v1
+; SI-NEXT: v_mov_b32_e32 v1, s9
+; SI-NEXT: v_mov_b32_e32 v0, s8
+; SI-NEXT: v_mov_b32_e32 v2, s3
+; SI-NEXT: v_cmp_neq_f32_e32 vcc, s3, v1
; SI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; SI-NEXT: v_mov_b32_e32 v2, s6
-; SI-NEXT: v_cmp_neq_f32_e32 vcc, s6, v0
+; SI-NEXT: v_mov_b32_e32 v2, s2
+; SI-NEXT: v_cmp_neq_f32_e32 vcc, s2, v0
; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_select_v2f32:
@@ -149,27 +151,29 @@ define amdgpu_kernel void @test_select_v4i32(ptr addrspace(1) %out, ptr addrspac
; SI-LABEL: test_select_v4i32:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd
+; SI-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0
-; SI-NEXT: s_load_dwordx4 s[12:15], s[6:7], 0x0
-; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x11
-; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_load_dwordx4 s[12:15], s[12:13], 0x0
+; SI-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x11
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_cmp_gt_i32 s10, s14
-; SI-NEXT: s_cselect_b32 s6, s6, s10
+; SI-NEXT: s_cselect_b32 s0, s18, s10
; SI-NEXT: s_cmp_gt_i32 s9, s13
-; SI-NEXT: s_cselect_b32 s5, s5, s9
+; SI-NEXT: s_cselect_b32 s1, s17, s9
; SI-NEXT: s_cmp_gt_i32 s11, s15
-; SI-NEXT: s_cselect_b32 s7, s7, s11
+; SI-NEXT: s_cselect_b32 s2, s19, s11
; SI-NEXT: s_cmp_gt_i32 s8, s12
-; SI-NEXT: s_cselect_b32 s4, s4, s8
-; SI-NEXT: s_mov_b32 s2, -1
-; SI-NEXT: v_mov_b32_e32 v2, s6
-; SI-NEXT: v_mov_b32_e32 v1, s5
-; SI-NEXT: v_mov_b32_e32 v3, s7
-; SI-NEXT: v_mov_b32_e32 v0, s4
-; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-NEXT: s_cselect_b32 s3, s16, s8
+; SI-NEXT: v_mov_b32_e32 v0, s3
+; SI-NEXT: v_mov_b32_e32 v1, s1
+; SI-NEXT: v_mov_b32_e32 v2, s0
+; SI-NEXT: v_mov_b32_e32 v3, s2
+; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_select_v4i32:
@@ -237,31 +241,33 @@ entry:
define amdgpu_kernel void @test_select_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
; SI-LABEL: test_select_v4f32:
; SI: ; %bb.0: ; %entry
-; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
; SI-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x0
-; SI-NEXT: s_mov_b32 s3, 0xf000
-; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
+; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_mov_b32_e32 v0, s8
-; SI-NEXT: v_mov_b32_e32 v1, s9
-; SI-NEXT: v_mov_b32_e32 v2, s10
; SI-NEXT: v_mov_b32_e32 v3, s11
-; SI-NEXT: v_mov_b32_e32 v4, s7
-; SI-NEXT: v_cmp_neq_f32_e32 vcc, s7, v3
+; SI-NEXT: v_mov_b32_e32 v2, s10
+; SI-NEXT: v_mov_b32_e32 v1, s9
+; SI-NEXT: v_mov_b32_e32 v4, s3
+; SI-NEXT: v_cmp_neq_f32_e32 vcc, s3, v3
; SI-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; SI-NEXT: v_mov_b32_e32 v4, s6
-; SI-NEXT: v_cmp_neq_f32_e32 vcc, s6, v2
+; SI-NEXT: v_mov_b32_e32 v4, s2
+; SI-NEXT: v_cmp_neq_f32_e32 vcc, s2, v2
+; SI-NEXT: v_mov_b32_e32 v0, s8
; SI-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; SI-NEXT: v_mov_b32_e32 v4, s5
-; SI-NEXT: v_cmp_neq_f32_e32 vcc, s5, v1
+; SI-NEXT: v_mov_b32_e32 v4, s1
+; SI-NEXT: v_cmp_neq_f32_e32 vcc, s1, v1
; SI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
-; SI-NEXT: v_mov_b32_e32 v4, s4
-; SI-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0
+; SI-NEXT: v_mov_b32_e32 v4, s0
+; SI-NEXT: v_cmp_neq_f32_e32 vcc, s0, v0
; SI-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
-; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_select_v4f32:
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir
index 9dbc22b485447..736ede64ceba2 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-debug-non-first-terminators.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-insert-waitcnts -amdgpu-waitcnt-forcezero=1 %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -run-pass si-insert-waitcnts -amdgpu-waitcnt-forcezero=1 %s -o - | FileCheck %s
---
name: waitcnt-debug-non-first-terminators
diff --git a/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll b/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
index ecb0c8eb9e0be..e00e9c42ea63d 100644
--- a/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
+++ b/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; Check that DAGTypeLegalizer::WidenVSELECTAndMask doesn't try to
; create vselects with i64 condition masks.
@@ -9,26 +9,26 @@ define amdgpu_kernel void @widen_vselect_and_mask_v4f64(<4 x double> %arg) #0 {
; GCN-LABEL: widen_vselect_and_mask_v4f64:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_mov_b32 s6, -1
; GCN-NEXT: v_mov_b32_e32 v0, 0
-; GCN-NEXT: s_mov_b64 s[4:5], 16
-; GCN-NEXT: s_mov_b32 s7, 0xf000
-; GCN-NEXT: v_mov_b32_e32 v1, v0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: v_mov_b32_e32 v2, v0
; GCN-NEXT: v_mov_b32_e32 v3, v0
-; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
-; GCN-NEXT: v_cmp_u_f64_e64 s[2:3], s[0:1], s[0:1]
-; GCN-NEXT: s_waitcnt expcnt(0)
-; GCN-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3]
+; GCN-NEXT: v_cmp_u_f64_e64 s[4:5], s[0:1], s[0:1]
; GCN-NEXT: v_cmp_neq_f64_e64 s[0:1], s[0:1], 0
+; GCN-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, -1, v1
; GCN-NEXT: s_and_b64 s[0:1], vcc, s[0:1]
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], exec
-; GCN-NEXT: s_cselect_b32 s0, 0x3ff00000, 0
-; GCN-NEXT: s_mov_b64 s[4:5], 0
-; GCN-NEXT: v_mov_b32_e32 v1, s0
-; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GCN-NEXT: s_cselect_b32 s4, 0x3ff00000, 0
+; GCN-NEXT: v_mov_b32_e32 v1, v0
+; GCN-NEXT: s_mov_b64 s[0:1], 16
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GCN-NEXT: s_waitcnt expcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v1, s4
+; GCN-NEXT: s_mov_b64 s[0:1], 0
+; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GCN-NEXT: s_endpgm
bb:
%tmp = extractelement <4 x double> %arg, i64 0
@@ -51,23 +51,23 @@ define amdgpu_kernel void @widen_vselect_and_mask_v4i64(<4 x i64> %arg) #0 {
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_mov_b64 s[4:5], 0
-; GCN-NEXT: s_mov_b32 s10, -1
+; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: v_mov_b32_e32 v1, 0
-; GCN-NEXT: s_mov_b64 s[8:9], 16
-; GCN-NEXT: s_mov_b32 s11, 0xf000
-; GCN-NEXT: v_mov_b32_e32 v2, v1
-; GCN-NEXT: v_mov_b32_e32 v3, v1
-; GCN-NEXT: v_mov_b32_e32 v4, v1
-; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[0:1], 0
-; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3]
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b64 s[4:5], 0
+; GCN-NEXT: v_cmp_eq_u64_e64 s[6:7], s[0:1], 0
; GCN-NEXT: v_cmp_ne_u64_e64 s[0:1], s[0:1], 0
+; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[6:7]
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, -1, v0
; GCN-NEXT: s_and_b64 s[0:1], vcc, s[0:1]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GCN-NEXT: buffer_store_dwordx4 v[1:4], off, s[8:11], 0
-; GCN-NEXT: s_mov_b32 s6, s10
-; GCN-NEXT: s_mov_b32 s7, s11
+; GCN-NEXT: v_mov_b32_e32 v2, v1
+; GCN-NEXT: v_mov_b32_e32 v3, v1
+; GCN-NEXT: v_mov_b32_e32 v4, v1
+; GCN-NEXT: s_mov_b64 s[0:1], 16
+; GCN-NEXT: s_mov_b32 s6, s2
+; GCN-NEXT: s_mov_b32 s7, s3
+; GCN-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0
; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GCN-NEXT: s_endpgm
bb:
diff --gi...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/205786
More information about the llvm-branch-commits
mailing list