[llvm-branch-commits] [llvm] AMDGPU: Avoid default subtarget in generated codegen tests (2/9) (PR #205785)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Jun 25 05:02:25 PDT 2026


llvmorg-github-actions[bot] wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

Continue migrating away from testing the dummy target, and use
real targets approximating the old behavior. Performed by script.

Co-Authored-By: Claude <noreply@<!-- -->anthropic.com> (Claude-Opus-4.8)

---

Patch is 1.11 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/205785.diff


96 Files Affected:

- (modified) llvm/test/CodeGen/AMDGPU/icmp.i16.ll (+91-91) 
- (modified) llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll (+522-1179) 
- (modified) llvm/test/CodeGen/AMDGPU/indirect-call.ll (+32-32) 
- (modified) llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/ipra-regmask.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/kernel-args.ll (+71-70) 
- (modified) llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll (+4-8) 
- (modified) llvm/test/CodeGen/AMDGPU/livevars-implicitdef.mir (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll (+7-6) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll (+124-120) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll (+49-48) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll (+13-13) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll (+12-12) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll (+16-16) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll (+11-10) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setprio.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.barrier.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll (+33-30) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.mulo.ll (+106-106) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll (+10-10) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-f32.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-f64.ll (+9-9) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i1.ll (+646-638) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i16.ll (+655-630) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i32.ll (+304-284) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i64.ll (+18-20) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i8.ll (+868-857) 
- (modified) llvm/test/CodeGen/AMDGPU/load-global-f32.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/load-global-i16.ll (+605-614) 
- (modified) llvm/test/CodeGen/AMDGPU/load-global-i32.ll (+226-237) 
- (modified) llvm/test/CodeGen/AMDGPU/load-global-i8.ll (+698-756) 
- (modified) llvm/test/CodeGen/AMDGPU/load-local-i16.ll (+621-630) 
- (modified) llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll (+1-2) 
- (modified) llvm/test/CodeGen/AMDGPU/loop_break.ll (+17-17) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-control-flow-live-variables-update.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/machinelicm-copy-like-instrs.mir (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/mad_uint24.ll (+50-50) 
- (modified) llvm/test/CodeGen/AMDGPU/mcp-use-before-def.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-store-infinite-loop.ll (+7-7) 
- (modified) llvm/test/CodeGen/AMDGPU/mul_int24.ll (+30-31) 
- (modified) llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll (+37-37) 
- (modified) llvm/test/CodeGen/AMDGPU/multilevel-break.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/no-limit-coalesce.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll (+14-16) 
- (modified) llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/perfhint.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/rcp_iflag.ll (+13-13) 
- (modified) llvm/test/CodeGen/AMDGPU/regcoalesce-cannot-join-failures.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/regcoalesce-keep-valid-lanes-implicit-def-bug39602.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/regcoalescer-resolve-lane-conflict-by-subranges.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/remat-dead-subreg.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/rotate-add.ll (+15-15) 
- (modified) llvm/test/CodeGen/AMDGPU/rotl.ll (+32-32) 
- (modified) llvm/test/CodeGen/AMDGPU/rotr.ll (+45-45) 
- (modified) llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll (+10-10) 
- (modified) llvm/test/CodeGen/AMDGPU/setcc-select-hi32mask.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/setcc-select.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-to-vreg1-copy.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll (+67-67) 
- (modified) llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll (+7-9) 
- (modified) llvm/test/CodeGen/AMDGPU/si-i1-copies.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/si-lower-i1-copies.mir (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-cycle-header.mir (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-initial-insert-in-body.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-initial-insert-in-latch.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-multi-entry-cycle.mir (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll (+17-17) 
- (modified) llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll (+125-127) 
- (modified) llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/spill-partial-csr-sgpr-live-ins.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll (+27-27) 
- (modified) llvm/test/CodeGen/AMDGPU/stop-tail-duplicate-cfg-intrinsic.mir (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/tail-dup-bundle.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/trunc-bitcast-vector.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/twoaddr-regsequence.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/udivrem24.ll (+266-266) 
- (modified) llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll (+69-71) 
- (modified) llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll (+24-25) 
- (modified) llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll (+16-16) 
- (modified) llvm/test/CodeGen/AMDGPU/v_cndmask.ll (+136-134) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/icmp.i16.ll b/llvm/test/CodeGen/AMDGPU/icmp.i16.ll
index fc4cdcda99ae4..05445f8311ec8 100644
--- a/llvm/test/CodeGen/AMDGPU/icmp.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/icmp.i16.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=VI %s
-; RUN: llc -mtriple=amdgcn < %s| FileCheck -check-prefix=SI %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s| FileCheck -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s| FileCheck -check-prefix=GFX11-FAKE16 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s| FileCheck -check-prefix=GFX11-TRUE16 %s
 
@@ -35,20 +35,20 @@ define amdgpu_kernel void @i16_eq(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr
 ; SI-LABEL: i16_eq:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0xd
-; SI-NEXT:    s_mov_b32 s11, 0xf000
-; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
 ; SI-NEXT:    v_mov_b32_e32 v2, 0
-; SI-NEXT:    s_mov_b64 s[6:7], s[10:11]
+; SI-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-NEXT:    buffer_load_ushort v3, v[1:2], s[8:11], 0 addr64
 ; SI-NEXT:    buffer_load_ushort v4, v[1:2], s[4:7], 0 addr64
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; SI-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v4
+; SI-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v3
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -132,20 +132,20 @@ define amdgpu_kernel void @i16_ne(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr
 ; SI-LABEL: i16_ne:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0xd
-; SI-NEXT:    s_mov_b32 s11, 0xf000
-; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
 ; SI-NEXT:    v_mov_b32_e32 v2, 0
-; SI-NEXT:    s_mov_b64 s[6:7], s[10:11]
+; SI-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-NEXT:    buffer_load_ushort v3, v[1:2], s[8:11], 0 addr64
 ; SI-NEXT:    buffer_load_ushort v4, v[1:2], s[4:7], 0 addr64
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; SI-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_cmp_ne_u32_e32 vcc, v3, v4
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, v4, v3
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -229,20 +229,20 @@ define amdgpu_kernel void @i16_ugt(ptr addrspace(1) %out, ptr addrspace(1) %a.pt
 ; SI-LABEL: i16_ugt:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0xd
-; SI-NEXT:    s_mov_b32 s11, 0xf000
-; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
 ; SI-NEXT:    v_mov_b32_e32 v2, 0
-; SI-NEXT:    s_mov_b64 s[6:7], s[10:11]
+; SI-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-NEXT:    buffer_load_ushort v3, v[1:2], s[8:11], 0 addr64
 ; SI-NEXT:    buffer_load_ushort v4, v[1:2], s[4:7], 0 addr64
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; SI-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_cmp_gt_u32_e32 vcc, v3, v4
+; SI-NEXT:    v_cmp_gt_u32_e32 vcc, v4, v3
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -326,20 +326,20 @@ define amdgpu_kernel void @i16_uge(ptr addrspace(1) %out, ptr addrspace(1) %a.pt
 ; SI-LABEL: i16_uge:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0xd
-; SI-NEXT:    s_mov_b32 s11, 0xf000
-; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
 ; SI-NEXT:    v_mov_b32_e32 v2, 0
-; SI-NEXT:    s_mov_b64 s[6:7], s[10:11]
+; SI-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-NEXT:    buffer_load_ushort v3, v[1:2], s[8:11], 0 addr64
 ; SI-NEXT:    buffer_load_ushort v4, v[1:2], s[4:7], 0 addr64
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; SI-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v4
+; SI-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v3
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -423,20 +423,20 @@ define amdgpu_kernel void @i16_ult(ptr addrspace(1) %out, ptr addrspace(1) %a.pt
 ; SI-LABEL: i16_ult:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0xd
-; SI-NEXT:    s_mov_b32 s11, 0xf000
-; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
 ; SI-NEXT:    v_mov_b32_e32 v2, 0
-; SI-NEXT:    s_mov_b64 s[6:7], s[10:11]
+; SI-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-NEXT:    buffer_load_ushort v3, v[1:2], s[8:11], 0 addr64
 ; SI-NEXT:    buffer_load_ushort v4, v[1:2], s[4:7], 0 addr64
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; SI-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_cmp_lt_u32_e32 vcc, v3, v4
+; SI-NEXT:    v_cmp_lt_u32_e32 vcc, v4, v3
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -520,20 +520,20 @@ define amdgpu_kernel void @i16_ule(ptr addrspace(1) %out, ptr addrspace(1) %a.pt
 ; SI-LABEL: i16_ule:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0xd
-; SI-NEXT:    s_mov_b32 s11, 0xf000
-; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
 ; SI-NEXT:    v_mov_b32_e32 v2, 0
-; SI-NEXT:    s_mov_b64 s[6:7], s[10:11]
+; SI-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-NEXT:    buffer_load_ushort v3, v[1:2], s[8:11], 0 addr64
 ; SI-NEXT:    buffer_load_ushort v4, v[1:2], s[4:7], 0 addr64
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; SI-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_cmp_le_u32_e32 vcc, v3, v4
+; SI-NEXT:    v_cmp_le_u32_e32 vcc, v4, v3
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -618,20 +618,20 @@ define amdgpu_kernel void @i16_sgt(ptr addrspace(1) %out, ptr addrspace(1) %a.pt
 ; SI-LABEL: i16_sgt:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0xd
-; SI-NEXT:    s_mov_b32 s11, 0xf000
-; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
 ; SI-NEXT:    v_mov_b32_e32 v2, 0
-; SI-NEXT:    s_mov_b64 s[6:7], s[10:11]
+; SI-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-NEXT:    buffer_load_sshort v3, v[1:2], s[8:11], 0 addr64
 ; SI-NEXT:    buffer_load_sshort v4, v[1:2], s[4:7], 0 addr64
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; SI-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, v3, v4
+; SI-NEXT:    v_cmp_gt_i32_e32 vcc, v4, v3
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -715,20 +715,20 @@ define amdgpu_kernel void @i16_sge(ptr addrspace(1) %out, ptr addrspace(1) %a.pt
 ; SI-LABEL: i16_sge:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0xd
-; SI-NEXT:    s_mov_b32 s11, 0xf000
-; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
 ; SI-NEXT:    v_mov_b32_e32 v2, 0
-; SI-NEXT:    s_mov_b64 s[6:7], s[10:11]
+; SI-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-NEXT:    buffer_load_sshort v3, v[1:2], s[8:11], 0 addr64
 ; SI-NEXT:    buffer_load_sshort v4, v[1:2], s[4:7], 0 addr64
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; SI-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_cmp_ge_i32_e32 vcc, v3, v4
+; SI-NEXT:    v_cmp_ge_i32_e32 vcc, v4, v3
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -812,20 +812,20 @@ define amdgpu_kernel void @i16_slt(ptr addrspace(1) %out, ptr addrspace(1) %a.pt
 ; SI-LABEL: i16_slt:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0xd
-; SI-NEXT:    s_mov_b32 s11, 0xf000
-; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
 ; SI-NEXT:    v_mov_b32_e32 v2, 0
-; SI-NEXT:    s_mov_b64 s[6:7], s[10:11]
+; SI-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-NEXT:    buffer_load_sshort v3, v[1:2], s[8:11], 0 addr64
 ; SI-NEXT:    buffer_load_sshort v4, v[1:2], s[4:7], 0 addr64
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; SI-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_cmp_lt_i32_e32 vcc, v3, v4
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, v4, v3
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -909,20 +909,20 @@ define amdgpu_kernel void @i16_sle(ptr addrspace(1) %out, ptr addrspace(1) %a.pt
 ; SI-LABEL: i16_sle:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0xd
-; SI-NEXT:    s_mov_b32 s11, 0xf000
-; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
 ; SI-NEXT:    v_mov_b32_e32 v2, 0
-; SI-NEXT:    s_mov_b64 s[6:7], s[10:11]
+; SI-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-NEXT:    buffer_load_sshort v3, v[1:2], s[8:11], 0 addr64
 ; SI-NEXT:    buffer_load_sshort v4, v[1:2], s[4:7], 0 addr64
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; SI-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_cmp_le_i32_e32 vcc, v3, v4
+; SI-NEXT:    v_cmp_le_i32_e32 vcc, v4, v3
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
 ; SI-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
@@ -1007,12 +1007,12 @@ define amdgpu_kernel void @i16_eq_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    buffer_load_ushort v3, v[1:2], s[4:7], 0 addr64
-; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_and_b32 s4, s8, 0xffff
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v3
@@ -1091,12 +1091,12 @@ define amdgpu_kernel void @i16_ne_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    buffer_load_ushort v3, v[1:2], s[4:7], 0 addr64
-; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_and_b32 s4, s8, 0xffff
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, s4, v3
@@ -1175,12 +1175,12 @@ define amdgpu_kernel void @i16_ugt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    buffer_load_ushort v3, v[1:2], s[4:7], 0 addr64
-; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_and_b32 s4, s8, 0xffff
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cmp_lt_u32_e32 vcc, s4, v3
@@ -1259,12 +1259,12 @@ define amdgpu_kernel void @i16_uge_v_s(ptr addrspace(1) %out, ptr addrspace(1) %
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    buffer_load_ushort v3, v[1:2], s[4:7], 0 addr64
-; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_and_b32 s4, s8, 0xffff
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cmp_le_u32_e32 vcc, s4, v3
@@ -1343,12 +1343,12 @@ define amdgpu_kernel void @i16_ult_v_s(ptr addrspace(1) %out, ptr addrspace(1) %
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    buffer_load_ushort v3, v[1:2], s[4:7], 0 addr64
-; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_and_b32 s4, s8, 0xffff
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cmp_gt_u32_e32 vcc, s4, v3
@@ -1427,12 +1427,12 @@ define amdgpu_kernel void @i16_ule_v_s(ptr addrspace(1) %out, ptr addrspace(1) %
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    buffer_load_ushort v3, v[1:2], s[4:7], 0 addr64
-; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_and_b32 s4, s8, 0xffff
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cmp_ge_u32_e32 vcc, s4, v3
@@ -1511,12 +1511,12 @@ define amdgpu_kernel void @i16_sgt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    buffer_load_sshort v3, v[1:2], s[4:7], 0 addr64
-; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_sext_i32_i16 s4, s8
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cmp_lt_i32_e32 vcc, s4, v3
@@ -1595,12 +1595,12 @@ define amdgpu_kernel void @i16_sge_v_s(ptr addrspace(1) %out, ptr addrspace(1) %
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    buffer_load_sshort v3, v[1:2], s[4:7], 0 addr64
-; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_sext_i32_i16 s4, s8
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cmp_le_i32_e32 vcc, s4, v3
@@ -1679,12 +1679,12 @@ define amdgpu_kernel void @i16_slt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    buffer_load_sshort v3, v[1:2], s[4:7], 0 addr64
-; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_sext_i32_i16 s4, s8
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cmp_gt_i32_e32 vcc, s4, v3
@@ -1763,12 +1763,12 @@ define amdgpu_kernel void @i16_sle_v_s(ptr addrspace(1) %out, ptr addrspace(1) %
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, 0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
-; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    v_mov_b32_e32 v2, 0
 ; SI-NEXT:    buffer_load_sshort v3, v[1:2], s[4:7], 0 addr64
-; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_sext_i32_i16 s4, s8
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cmp_ge_i32_e32 vcc, s4, v3
diff --git a/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
index 0b4e1c2359a7b..3e4cd5bf5f44d 100644
--- a/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -mtriple=amdgcn < %s 2>&1 | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s 2>&1 | FileCheck -check-prefix=GCN %s
 
-; It is not illegal anymore because the SGPRs with divergent values are added with readfirstlane 
+; It is not illegal anymore because the SGPRs with divergent values are added with readfirstlane
...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/205785


More information about the llvm-branch-commits mailing list