[llvm-branch-commits] [llvm] AMDGPU: Avoid default subtarget in hand-written codegen tests (9/9) (PR #205792)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jun 25 05:01:26 PDT 2026
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/205792
Fix some manual test checks using amdgcn triples without -mcpu. These require the
most careful consideration. The highest impact changes are the optimizations
removing execz branch now that there's a sched model.
>From 250a0fee2ec3429f50b443f6640797884f1494c1 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 25 Jun 2026 12:19:12 +0200
Subject: [PATCH] AMDGPU: Avoid default subtarget in hand-written codegen tests
(9/9)
Fix some manual test checks using amdgcn triples without -mcpu. These require the
most careful consideration. The highest impact changes are the optimizations
removing execz branch now that there's a sched model.
---
llvm/test/CodeGen/AMDGPU/gep-address-space.ll | 16 ++++++---------
.../AMDGPU/multi-divergent-exit-region.ll | 8 ++++----
.../AMDGPU/schedule-amdgpu-trackers.ll | 6 +++---
.../CodeGen/AMDGPU/schedule-physregdeps.mir | 4 ++--
llvm/test/CodeGen/AMDGPU/setcc.ll | 14 ++++++-------
...si-lower-control-flow-unreachable-block.ll | 10 ++++------
.../CodeGen/AMDGPU/use-sgpr-multiple-times.ll | 20 ++++++-------------
7 files changed, 32 insertions(+), 46 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/gep-address-space.ll b/llvm/test/CodeGen/AMDGPU/gep-address-space.ll
index df32959543bde..fc72c5886c83b 100644
--- a/llvm/test/CodeGen/AMDGPU/gep-address-space.ll
+++ b/llvm/test/CodeGen/AMDGPU/gep-address-space.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn < %s | FileCheck --check-prefix=SI --check-prefix=CHECK %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s | FileCheck --check-prefix=SI --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s
@@ -27,17 +27,13 @@ define amdgpu_kernel void @use_gep_address_space_large_offset(ptr addrspace(3) %
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
+; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; SI: s_add_i32
+; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-
-; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
-; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
-; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
-; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
+; CI-COUNT-4: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
; CHECK: s_endpgm
define amdgpu_kernel void @gep_as_vector_v4(<4 x ptr addrspace(3)> %array) nounwind {
%p = getelementptr [1024 x i32], <4 x ptr addrspace(3)> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
index d250c5ab42559..7abd2c29016e2 100644
--- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
+++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
@@ -1,6 +1,6 @@
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx600 -S -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+wavefrontsize64 -S -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
-; RUN: llc -mtriple=amdgcn -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx600 -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s
; Add an extra verifier runs. There were some cases where invalid IR
; was produced but happened to be fixed by the later passes.
@@ -61,15 +61,15 @@
; GCN-LABEL: {{^}}multi_divergent_region_exit_ret_ret:
-; GCN-DAG: s_mov_b64 [[EXIT1:s\[[0-9]+:[0-9]+\]]], 0
-; GCN-DAG: v_cmp_lt_i32_e32 vcc, 1,
; GCN-DAG: s_mov_b64 [[EXIT0:s\[[0-9]+:[0-9]+\]]], 0
+; GCN-DAG: v_cmp_lt_i32_e32 vcc, 1,
+; GCN-DAG: s_mov_b64 [[EXIT1:s\[[0-9]+:[0-9]+\]]], 0
; GCN-DAG: s_and_saveexec_b64
; GCN-DAG: s_xor_b64
; GCN: ; %LeafBlock1
-; GCN-NEXT: s_mov_b64 [[EXIT0]], exec
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 2,
+; GCN-NEXT: s_mov_b64 [[EXIT0]], exec
; GCN-NEXT: s_and_b64 [[EXIT1]], vcc, exec
; GCN: ; %Flow
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
index 71981e3599b87..deec6a2d307f2 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
@@ -4,8 +4,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=tonga -amdgpu-scalarize-global-loads=false -amdgpu-use-amdgpu-trackers=1 -verify-misched < %s | FileCheck --check-prefixes=TONGA-GCNTRACKERS %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-misched < %s | FileCheck --check-prefixes=GFX908 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -amdgpu-use-amdgpu-trackers=1 -verify-misched < %s | FileCheck --check-prefixes=GFX908-GCNTRACKERS %s
-; RUN: llc -mtriple=amdgcn -verify-misched < %s | FileCheck --check-prefixes=GENERIC %s
-; RUN: llc -mtriple=amdgcn -amdgpu-use-amdgpu-trackers=1 -verify-misched < %s | FileCheck --check-prefixes=GENERIC-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx600 -verify-misched < %s | FileCheck --check-prefixes=GENERIC %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx600 -amdgpu-use-amdgpu-trackers=1 -verify-misched < %s | FileCheck --check-prefixes=GENERIC-GCNTRACKERS %s
; GCN Trackers are sensitive to minor changes in RP, and will avoid scheduling certain instructions, which, if scheduled,
; allow scheduling of other instructions which reduce RP
@@ -58,7 +58,7 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out
}
; CHECK-LABEL: {{^}}constant_zextload_v64i16_to_v64i32:
-; GENERIC: NumSgprs: 71
+; GENERIC: NumSgprs: 69
; GENERIC-GCNTRACKERS: NumSgprs: 45
; GENERIC: NumVgprs: 20
; GENERIC-GCNTRACKERS: NumVgprs: 20
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-physregdeps.mir b/llvm/test/CodeGen/AMDGPU/schedule-physregdeps.mir
index 27908957b5886..d758bbc80c95a 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-physregdeps.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-physregdeps.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn -run-pass machine-scheduler -verify-machineinstrs %s -o - -debug-only=machine-scheduler 2>&1 | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass machine-scheduler -verify-machineinstrs %s -o - -debug-only=machine-scheduler 2>&1 | FileCheck %s
# REQUIRES: asserts
# CHECK: SU(0): $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec
@@ -34,7 +34,7 @@
# CHECK-NEXT: SU(2): Out Latency=1
# CHECK-NEXT: SU(2): Data Latency=1 Reg=$vgpr0
# CHECK: Successors:
-# CHECK-NEXT: ExitSU: Ord Latency=3 Artificial
+# CHECK-NEXT: ExitSU: Ord Latency=79 Artificial
---
name: test
diff --git a/llvm/test/CodeGen/AMDGPU/setcc.ll b/llvm/test/CodeGen/AMDGPU/setcc.ll
index d25ca0e6e04e9..8e115eb7d2881 100644
--- a/llvm/test/CodeGen/AMDGPU/setcc.ll
+++ b/llvm/test/CodeGen/AMDGPU/setcc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-- < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx600 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -mtriple=r600-- -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=R600 -check-prefix=FUNC %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -102,7 +102,7 @@ entry:
; R600-DAG: SETNE_INT
; GCN: v_cmp_lg_f32_e32 vcc
-; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
+; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define amdgpu_kernel void @f32_one(ptr addrspace(1) %out, float %a, float %b) #0 {
entry:
%0 = fcmp one float %a, %b
@@ -132,7 +132,7 @@ entry:
; R600-DAG: SETE_INT
; GCN: v_cmp_nlg_f32_e32 vcc
-; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
+; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define amdgpu_kernel void @f32_ueq(ptr addrspace(1) %out, float %a, float %b) #0 {
entry:
%0 = fcmp ueq float %a, %b
@@ -145,7 +145,7 @@ entry:
; R600: SETGE
; R600: SETE_DX10
; GCN: v_cmp_nle_f32_e32 vcc
-; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
+; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define amdgpu_kernel void @f32_ugt(ptr addrspace(1) %out, float %a, float %b) #0 {
entry:
%0 = fcmp ugt float %a, %b
@@ -159,7 +159,7 @@ entry:
; R600: SETE_DX10
; GCN: v_cmp_nlt_f32_e32 vcc
-; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
+; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define amdgpu_kernel void @f32_uge(ptr addrspace(1) %out, float %a, float %b) #0 {
entry:
%0 = fcmp uge float %a, %b
@@ -173,7 +173,7 @@ entry:
; R600: SETE_DX10
; GCN: v_cmp_nge_f32_e32 vcc
-; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
+; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define amdgpu_kernel void @f32_ult(ptr addrspace(1) %out, float %a, float %b) #0 {
entry:
%0 = fcmp ult float %a, %b
@@ -187,7 +187,7 @@ entry:
; R600: SETE_DX10
; GCN: v_cmp_ngt_f32_e32 vcc
-; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
+; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define amdgpu_kernel void @f32_ule(ptr addrspace(1) %out, float %a, float %b) #0 {
entry:
%0 = fcmp ule float %a, %b
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
index 90a76c3c493e9..3f097857cf744 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
@@ -1,16 +1,15 @@
-; RUN: llc -mtriple=amdgcn -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx600 -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator:
; GCN: v_cmp_eq_u32
; GCN: s_and_saveexec_b64
-; GCN-NEXT: s_cbranch_execz .LBB0_{{[0-9]+}}
; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %unreachable
; GCN: ds_write_b32
; GCN: ; divergent unreachable
-; GCN-NEXT: BB0_{{[0-9]+}}: ; %UnifiedReturnBlock
-; GCN: s_endpgm
+; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %UnifiedReturnBlock
+; GCN-NEXT: s_endpgm
define amdgpu_kernel void @lower_control_flow_unreachable_terminator() #0 {
bb:
@@ -29,13 +28,12 @@ ret:
; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator_swap_block_order:
; GCN: v_cmp_ne_u32
; GCN: s_and_saveexec_b64
-; GCN-NEXT: s_cbranch_execz .LBB1_{{[0-9]+}}
; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %unreachable
; GCN: ds_write_b32
; GCN: ; divergent unreachable
-; GCN: BB1_{{[0-9]+}}:
+; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %UnifiedReturnBlock
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @lower_control_flow_unreachable_terminator_swap_block_order() #0 {
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
index f50576eaad0cf..3636dc7b96c37 100644
--- a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
+++ b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
declare float @llvm.fma.f32(float, float, float) #1
@@ -40,19 +40,11 @@ define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_b(ptr addrspace(1)
}
; GCN-LABEL: {{^}}test_use_s_v_s:
-; SI: s_load_dwordx4 s[[[#LOAD:]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, {{0x9|0x24}}
-; SI: buffer_load_dword [[VA0:v[0-9]+]]
-; SI-NEXT: s_waitcnt vmcnt(0)
-; SI-NEXT: buffer_load_dword [[VA1:v[0-9]+]]
-; SI-NEXT: s_waitcnt vmcnt(0)
-
-; GCN-NOT: v_mov_b32
-
-; VI: buffer_load_dword [[VA0:v[0-9]+]]
-; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: buffer_load_dword [[VA1:v[0-9]+]]
-; VI-NEXT: s_waitcnt vmcnt(0)
-; VI: s_load_dwordx4 s[[[#LOAD:]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, {{0x9|0x24}}
+; GCN: buffer_load_dword [[VA0:v[0-9]+]]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_load_dword [[VA1:v[0-9]+]]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN: s_load_dwordx4 s[[[#LOAD:]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, {{0x9|0x24}}
; GCN-NOT: v_mov_b32
; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], s[[#LOAD + 3]]
More information about the llvm-branch-commits
mailing list