[llvm] 91aac7c - AMDGPU: Handle s_add_u32 in eliminateFrameIndex (#129628)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 4 17:09:49 PST 2025
Author: Matt Arsenault
Date: 2025-03-05T08:09:46+07:00
New Revision: 91aac7c379fb92348593d51e3f2d9e490ff67526
URL: https://github.com/llvm/llvm-project/commit/91aac7c379fb92348593d51e3f2d9e490ff67526
DIFF: https://github.com/llvm/llvm-project/commit/91aac7c379fb92348593d51e3f2d9e490ff67526.diff
LOG: AMDGPU: Handle s_add_u32 in eliminateFrameIndex (#129628)
We can fold frame indexes directly into existing immediate operands,
just like is already done for s_add_i32. We happen to use s_add_i32 in
the 32-bit add case, but s_add_u32 appears in the a 64-bit add sequence
of a flat pointer if an addrpacecast source is a frame index.
This avoids, but does not address a failure exposed after
a3165398db0736588daedb07650195502592e567 where two literal operands
end up in the final instruction. The underlying issue still exists for
some instructions without special handling in eliminateFrameIndex.
Added:
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir
Modified:
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll
llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
llvm/test/CodeGen/AMDGPU/frame-index.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 7d6990c097774..128cd8244a477 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2713,7 +2713,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
return true;
}
- case AMDGPU::S_ADD_I32: {
+ case AMDGPU::S_ADD_I32:
+ case AMDGPU::S_ADD_U32: {
// TODO: Handle s_or_b32, s_and_b32.
unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1;
MachineOperand &OtherOp = MI->getOperand(OtherOpIdx);
@@ -2773,7 +2774,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
DstReg = TmpReg;
}
- auto AddI32 = BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_ADD_I32))
+ auto AddI32 = BuildMI(*MBB, *MI, DL, MI->getDesc())
.addDef(DstReg, RegState::Renamable)
.addReg(MaterializedReg, RegState::Kill)
.add(OtherOp);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll
index c3b48b5d2ddff..378c6312c52be 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll
@@ -142,13 +142,12 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr
; GCN-NEXT: v_mov_b32_e32 v0, s48
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:240
; GCN-NEXT: v_mov_b32_e32 v0, s49
-; GCN-NEXT: s_and_b32 s4, s25, 63
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:244
; GCN-NEXT: v_mov_b32_e32 v0, s50
-; GCN-NEXT: s_lshl_b32 s4, s4, 2
+; GCN-NEXT: s_and_b32 s4, s25, 63
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:248
; GCN-NEXT: v_mov_b32_e32 v0, s51
-; GCN-NEXT: s_add_u32 s4, 0, s4
+; GCN-NEXT: s_lshl_b32 s4, s4, 2
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:252
; GCN-NEXT: v_mov_b32_e32 v0, s24
; GCN-NEXT: v_mov_b32_e32 v1, s4
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir
new file mode 100644
index 0000000000000..af61bd70f16b6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir
@@ -0,0 +1,123 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW32 %s
+
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW64 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s
+
+---
+name: s_add_u32__inline_imm__fi_offset0
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; MUBUFW64-LABEL: name: s_add_u32__inline_imm__fi_offset0
+ ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_U32 12, $sgpr4, implicit-def dead $scc
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; MUBUFW32-LABEL: name: s_add_u32__inline_imm__fi_offset0
+ ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_U32 12, $sgpr4, implicit-def dead $scc
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_u32__inline_imm__fi_offset0
+ ; FLATSCRW64: renamable $sgpr7 = S_ADD_U32 12, $sgpr32, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_u32__inline_imm__fi_offset0
+ ; FLATSCRW32: renamable $sgpr7 = S_ADD_U32 12, $sgpr32, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
+ renamable $sgpr7 = S_ADD_U32 12, %stack.0, implicit-def dead $scc
+ SI_RETURN implicit $sgpr7
+
+...
+
+---
+name: s_add_u32__kernel__literal__fi_offset96__offset_literal
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 96, alignment: 16 }
+ - { id: 1, size: 128, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ ; MUBUFW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal
+ ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 164
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; MUBUFW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal
+ ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 164
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal
+ ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 164
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal
+ ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 164
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
+ renamable $sgpr7 = S_ADD_U32 68, %stack.1, implicit-def dead $scc
+ SI_RETURN implicit $sgpr7
+...
+
+---
+name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 96, alignment: 16 }
+ - { id: 1, size: 128, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ ; MUBUFW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc
+ ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
+ ;
+ ; MUBUFW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc
+ ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc
+ ; FLATSCRW64: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc
+ ; FLATSCRW32: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
+ renamable $sgpr7 = S_ADD_U32 68, %stack.1, implicit-def $scc
+ SI_RETURN implicit $sgpr7, implicit $scc
+...
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
index 346b69c362c04..96d0e383761d1 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
@@ -38,7 +38,6 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) {
; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0
; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0
; GFX942-GISEL-NEXT: v_add_u32_e32 v3, 2, v0
@@ -76,12 +75,9 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) {
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
-; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_add_nc_u32 v0, s0, v0
; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0
; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 4, v0
@@ -113,8 +109,7 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) {
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
@@ -168,7 +163,6 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) {
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0
; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0
; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1
@@ -207,11 +201,9 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) {
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
@@ -246,11 +238,9 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) {
; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
@@ -304,7 +294,6 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) {
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0
; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0
; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1
@@ -343,11 +332,9 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) {
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
@@ -382,11 +369,9 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) {
; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
@@ -440,7 +425,6 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) {
; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 1
-; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0
; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0
; GFX942-GISEL-NEXT: v_add_u32_e32 v3, 2, v0
@@ -483,8 +467,7 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) {
; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
@@ -520,8 +503,7 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) {
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 1
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
@@ -576,7 +558,6 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) {
; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 1
-; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0
; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0
; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1
@@ -616,11 +597,10 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) {
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1
-; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -657,11 +637,10 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) {
; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 1
-; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
@@ -717,7 +696,6 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) {
; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 1
-; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0
; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0
; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1
@@ -757,11 +735,10 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) {
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1
-; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -798,11 +775,10 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) {
; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 1
-; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
@@ -857,7 +833,6 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) {
; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 2
-; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0
; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0
; GFX942-GISEL-NEXT: v_add_u32_e32 v3, 2, v0
@@ -900,8 +875,7 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) {
; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0
@@ -937,8 +911,7 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) {
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 2
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
@@ -993,7 +966,6 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) {
; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 2
-; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0
; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0
; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1
@@ -1033,11 +1005,10 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) {
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2
-; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -1074,11 +1045,10 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) {
; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 2
-; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
@@ -1133,7 +1103,6 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) {
; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 2
-; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0
; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0
; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1
@@ -1173,11 +1142,10 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) {
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2
-; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -1214,11 +1182,10 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) {
; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 2
-; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS
@@ -1263,7 +1230,6 @@ define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) {
; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0
; GFX942-GISEL-NEXT: v_add3_u32 v0, s0, v0, -1
; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
@@ -1285,8 +1251,7 @@ define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) {
; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:-1 dlc
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
@@ -1306,8 +1271,7 @@ define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) {
; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0
-; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:-1 scope:SCOPE_SYS
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index b1ea275a97a39..004403f46a4d4 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -346,4 +346,32 @@ entry:
ret void
}
+; Check for "SOP2/SOPC instruction requires too many immediate
+; constants" verifier error. Frame index would fold into low half of
+; the lowered flat pointer add, and use s_add_u32 instead of
+; s_add_i32.
+
+; GCN-LABEL: {{^}}fi_sop2_s_add_u32_literal_error:
+; GCN: s_add_u32 [[ADD_LO:s[0-9]+]], 0, 0x2010
+; GCN: s_addc_u32 [[ADD_HI:s[0-9]+]], s{{[0-9]+}}, 0
+define amdgpu_kernel void @fi_sop2_s_add_u32_literal_error() #0 {
+entry:
+ %.omp.reduction.element.i.i.i.i = alloca [1024 x i32], align 4, addrspace(5)
+ %Total3.i.i = alloca [1024 x i32], align 16, addrspace(5)
+ %Total3.ascast.i.i = addrspacecast ptr addrspace(5) %Total3.i.i to ptr
+ %gep = getelementptr i8, ptr %Total3.ascast.i.i, i64 4096
+ %p2i = ptrtoint ptr %gep to i64
+ br label %.shuffle.then.i.i.i.i
+
+.shuffle.then.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i, %entry
+ store i64 0, ptr addrspace(5) null, align 4
+ %icmp = icmp ugt i64 %p2i, 1
+ br i1 %icmp, label %.shuffle.then.i.i.i.i, label %vector.body.i.i.i.i
+
+vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i
+ %wide.load9.i.i.i.i = load <2 x i32>, ptr addrspace(5) %.omp.reduction.element.i.i.i.i, align 4
+ store <2 x i32> %wide.load9.i.i.i.i, ptr addrspace(5) null, align 4
+ ret void
+}
+
attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index 5815d2bfe7bf8..81bd8baaa0e5d 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -91,8 +91,8 @@ body: |
; GCN-LABEL: name: func_add_constant_to_fi_uniform_SCC_clobber_i32
; GCN: liveins: $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 killed $sgpr0, 4, implicit-def $scc
+ ; GCN-NEXT: renamable $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 $sgpr0, 4, implicit-def $scc
; GCN-NEXT: renamable $sgpr5 = S_ADDC_U32 $sgpr4, 1234567, implicit-def $scc, implicit $scc
; GCN-NEXT: $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
; GCN-NEXT: $sgpr0 = S_ADD_I32 killed $sgpr0, 8, implicit-def $scc
More information about the llvm-commits
mailing list