[llvm] [AMDGPU] change some tests to make downstream merging easier (PR #90626)
Gang Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 30 09:57:30 PDT 2024
https://github.com/cmc-rep created https://github.com/llvm/llvm-project/pull/90626
change some amdgpu codegen tests to make it easier to merge code into our downstream branches
>From 605c233551831341164aa84ee0fd692ff64966f0 Mon Sep 17 00:00:00 2001
From: gangc <gangc at amd.com>
Date: Tue, 30 Apr 2024 09:41:25 -0700
Subject: [PATCH] [AMDGPU] change some tests to make downstream merging easier
Signed-off-by: gangc <gangc at amd.com>
---
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 12 ++++++------
.../CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll | 4 ++--
llvm/test/CodeGen/AMDGPU/cc-update.ll | 12 ++++++------
.../AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll | 2 +-
llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll | 8 ++++----
5 files changed, 19 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 4f106bf0dfb114..eae666ab0e7d77 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -683,6 +683,12 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
}
assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
+ if (hasFP(MF)) {
+ Register FPReg = MFI->getFrameOffsetReg();
+ assert(FPReg != AMDGPU::FP_REG);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
+ }
+
if (requiresStackPointerReference(MF)) {
Register SPReg = MFI->getStackPtrOffsetReg();
assert(SPReg != AMDGPU::SP_REG);
@@ -690,12 +696,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
.addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
}
- if (hasFP(MF)) {
- Register FPReg = MFI->getFrameOffsetReg();
- assert(FPReg != AMDGPU::FP_REG);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
- }
-
bool NeedsFlatScratchInit =
MFI->getUserSGPRInfo().hasFlatScratchInit() &&
(MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
index b940dc74839b26..eaaeb3dc77a419 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
@@ -16,8 +16,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; GCN-NEXT: s_load_dword s6, s[4:5], 0x8
; GCN-NEXT: s_add_u32 s0, s0, s9
; GCN-NEXT: s_addc_u32 s1, s1, 0
-; GCN-NEXT: s_movk_i32 s32, 0x400
; GCN-NEXT: s_mov_b32 s33, 0
+; GCN-NEXT: s_movk_i32 s32, 0x400
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_cbranch_scc1 .LBB0_3
@@ -87,8 +87,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; GCN-NEXT: s_load_dword s6, s[4:5], 0x8
; GCN-NEXT: s_add_u32 s0, s0, s9
; GCN-NEXT: s_addc_u32 s1, s1, 0
-; GCN-NEXT: s_movk_i32 s32, 0x1000
; GCN-NEXT: s_mov_b32 s33, 0
+; GCN-NEXT: s_movk_i32 s32, 0x1000
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_cbranch_scc1 .LBB1_2
diff --git a/llvm/test/CodeGen/AMDGPU/cc-update.ll b/llvm/test/CodeGen/AMDGPU/cc-update.ll
index c674aebabcc8d2..8e773cad3b3357 100644
--- a/llvm/test/CodeGen/AMDGPU/cc-update.ll
+++ b/llvm/test/CodeGen/AMDGPU/cc-update.ll
@@ -321,8 +321,8 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX803-NEXT: s_mov_b64 s[10:11], s[8:9]
; GFX803-NEXT: v_or_b32_e32 v31, v0, v2
; GFX803-NEXT: s_mov_b64 s[8:9], s[6:7]
-; GFX803-NEXT: s_mov_b32 s32, 0
; GFX803-NEXT: s_mov_b32 s33, 0
+; GFX803-NEXT: s_mov_b32 s32, 0
; GFX803-NEXT: s_getpc_b64 s[16:17]
; GFX803-NEXT: s_add_u32 s16, s16, ex at rel32@lo+4
; GFX803-NEXT: s_addc_u32 s17, s17, ex at rel32@hi+12
@@ -340,8 +340,8 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX900-NEXT: s_mov_b64 s[10:11], s[8:9]
; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX900-NEXT: s_mov_b64 s[8:9], s[6:7]
-; GFX900-NEXT: s_mov_b32 s32, 0
; GFX900-NEXT: s_mov_b32 s33, 0
+; GFX900-NEXT: s_mov_b32 s32, 0
; GFX900-NEXT: s_getpc_b64 s[16:17]
; GFX900-NEXT: s_add_u32 s16, s16, ex at rel32@lo+4
; GFX900-NEXT: s_addc_u32 s17, s17, ex at rel32@hi+12
@@ -351,8 +351,8 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX1010-LABEL: test_force_fp_kern_call:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_add_u32 s10, s10, s15
-; GFX1010-NEXT: s_mov_b32 s32, 0
; GFX1010-NEXT: s_mov_b32 s33, 0
+; GFX1010-NEXT: s_mov_b32 s32, 0
; GFX1010-NEXT: s_addc_u32 s11, s11, 0
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
@@ -378,16 +378,16 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX1100-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX1100-NEXT: s_mov_b32 s13, s14
; GFX1100-NEXT: s_mov_b32 s14, s15
-; GFX1100-NEXT: s_mov_b32 s32, 0
; GFX1100-NEXT: s_mov_b32 s33, 0
+; GFX1100-NEXT: s_mov_b32 s32, 0
; GFX1100-NEXT: s_getpc_b64 s[6:7]
; GFX1100-NEXT: s_add_u32 s6, s6, ex at rel32@lo+4
; GFX1100-NEXT: s_addc_u32 s7, s7, ex at rel32@hi+12
; GFX1100-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GFX1100-NEXT: s_endpgm
; GFX1010-NEXT s_add_u32 s12, s12, s17
-; GFX1010-NEXT s_mov_b32 s32, 0
; GFX1010-NEXT s_mov_b32 s33, 0
+; GFX1010-NEXT s_mov_b32 s32, 0
; GFX1010-NEXT s_addc_u32 s13, s13, 0
; GFX1010-NEXT s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
; GFX1010-NEXT s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
@@ -459,8 +459,8 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
; GFX1010-LABEL: test_force_fp_kern_stack_and_call:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_add_u32 s10, s10, s15
-; GFX1010-NEXT: s_movk_i32 s32, 0x200
; GFX1010-NEXT: s_mov_b32 s33, 0
+; GFX1010-NEXT: s_movk_i32 s32, 0x200
; GFX1010-NEXT: s_addc_u32 s11, s11, 0
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
index fbf2ee1145ae94..ec446f1f3bf27d 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
@@ -7,8 +7,8 @@
define amdgpu_kernel void @test_kernel(i32 %val) #0 {
; CHECK-LABEL: test_kernel:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_mov_b32 s32, 0x180000
; CHECK-NEXT: s_mov_b32 s33, 0
+; CHECK-NEXT: s_mov_b32 s32, 0x180000
; CHECK-NEXT: s_add_u32 flat_scratch_lo, s10, s15
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
; CHECK-NEXT: s_add_u32 s0, s0, s15
diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
index 125e6bc0f787f1..ba012b208c957a 100644
--- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
@@ -21,8 +21,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; MUBUF-NEXT: s_add_u32 s0, s0, s9
; MUBUF-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x8
; MUBUF-NEXT: s_addc_u32 s1, s1, 0
-; MUBUF-NEXT: s_movk_i32 s32, 0x400
; MUBUF-NEXT: s_mov_b32 s33, 0
+; MUBUF-NEXT: s_movk_i32 s32, 0x400
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
; MUBUF-NEXT: s_cmp_lg_u32 s8, 0
; MUBUF-NEXT: s_cbranch_scc1 .LBB0_3
@@ -57,8 +57,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5
; FLATSCR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
-; FLATSCR-NEXT: s_mov_b32 s32, 16
; FLATSCR-NEXT: s_mov_b32 s33, 0
+; FLATSCR-NEXT: s_mov_b32 s32, 16
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
; FLATSCR-NEXT: s_cmp_lg_u32 s4, 0
; FLATSCR-NEXT: s_cbranch_scc1 .LBB0_3
@@ -125,8 +125,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; MUBUF-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x8
; MUBUF-NEXT: s_add_u32 s0, s0, s9
; MUBUF-NEXT: s_addc_u32 s1, s1, 0
-; MUBUF-NEXT: s_movk_i32 s32, 0x1000
; MUBUF-NEXT: s_mov_b32 s33, 0
+; MUBUF-NEXT: s_movk_i32 s32, 0x1000
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
; MUBUF-NEXT: s_cmp_lg_u32 s6, 0
; MUBUF-NEXT: s_cbranch_scc1 .LBB1_2
@@ -159,8 +159,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; FLATSCR-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8
-; FLATSCR-NEXT: s_mov_b32 s32, 64
; FLATSCR-NEXT: s_mov_b32 s33, 0
+; FLATSCR-NEXT: s_mov_b32 s32, 64
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
; FLATSCR-NEXT: s_cmp_lg_u32 s2, 0
; FLATSCR-NEXT: s_cbranch_scc1 .LBB1_2
More information about the llvm-commits
mailing list