[llvm] [CodeGen] Consider imm offsets when sorting framerefs (PR #171012)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 6 21:32:50 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Anshil Gandhi (gandhi56)
<details>
<summary>Changes</summary>
LocalStackSlotAllocation pass disallows negative offsets with respect to a base register. The pass ends up introducing a new register for such frame references. This patch helps LocalStackSlotAlloca to additionally consider the immediate offset of an instruction, when sorting frame refs - hence, avoiding negative offsets and maximizing reuse of the existing registers.
---
Patch is 101.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171012.diff
5 Files Affected:
- (modified) llvm/lib/CodeGen/LocalStackSlotAllocation.cpp (+27-15)
- (modified) llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll (+6-5)
- (added) llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir (+1093)
- (modified) llvm/test/CodeGen/Thumb/frame-chain.ll (+4-8)
``````````diff
diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 987f64f56403d..d316f8d804f53 100644
--- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -51,6 +51,7 @@ namespace {
class FrameRef {
MachineBasicBlock::iterator MI; // Instr referencing the frame
int64_t LocalOffset; // Local offset of the frame idx referenced
+ int64_t InstrOffset; // Offset of the instruction from the frame index
int FrameIdx; // The frame index
// Order reference instruction appears in program. Used to ensure
@@ -59,16 +60,20 @@ namespace {
unsigned Order;
public:
- FrameRef(MachineInstr *I, int64_t Offset, int Idx, unsigned Ord) :
- MI(I), LocalOffset(Offset), FrameIdx(Idx), Order(Ord) {}
+ FrameRef(MachineInstr *I, int64_t Offset, int64_t InstrOffset, int Idx,
+ unsigned Ord)
+ : MI(I), LocalOffset(Offset), InstrOffset(InstrOffset), FrameIdx(Idx),
+ Order(Ord) {}
bool operator<(const FrameRef &RHS) const {
- return std::tie(LocalOffset, FrameIdx, Order) <
- std::tie(RHS.LocalOffset, RHS.FrameIdx, RHS.Order);
+ return std::tuple(LocalOffset + InstrOffset, FrameIdx, Order) <
+ std::tuple(RHS.LocalOffset + RHS.InstrOffset, RHS.FrameIdx,
+ RHS.Order);
}
MachineBasicBlock::iterator getMachineInstr() const { return MI; }
int64_t getLocalOffset() const { return LocalOffset; }
+ int64_t getInstrOffset() const { return InstrOffset; }
int getFrameIndex() const { return FrameIdx; }
};
@@ -335,20 +340,27 @@ bool LocalStackSlotImpl::insertFrameReferenceRegisters(MachineFunction &Fn) {
// than that, but the increased register pressure makes that a
// tricky thing to balance. Investigate if re-materializing these
// becomes an issue.
- for (const MachineOperand &MO : MI.operands()) {
+ for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx != OpEnd;
+ ++OpIdx) {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
// Consider replacing all frame index operands that reference
// an object allocated in the local block.
- if (MO.isFI()) {
- // Don't try this with values not in the local block.
- if (!MFI.isObjectPreAllocated(MO.getIndex()))
- break;
- int Idx = MO.getIndex();
- int64_t LocalOffset = LocalOffsets[Idx];
- if (!TRI->needsFrameBaseReg(&MI, LocalOffset))
- break;
- FrameReferenceInsns.push_back(FrameRef(&MI, LocalOffset, Idx, Order++));
+ if (!MO.isFI())
+ continue;
+
+ int FrameIdx = MO.getIndex();
+ // Don't try this with values not in the local block.
+ if (!MFI.isObjectPreAllocated(FrameIdx))
+ break;
+
+ int64_t LocalOffset = LocalOffsets[FrameIdx];
+ if (!TRI->needsFrameBaseReg(&MI, LocalOffset))
break;
- }
+
+ int64_t InstrOffset = TRI->getFrameIndexInstrOffset(&MI, OpIdx);
+ FrameReferenceInsns.emplace_back(&MI, LocalOffset, InstrOffset,
+ FrameIdx, Order++);
+ break;
}
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir
index 8ea9ec397fe06..3be6456213168 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir
@@ -49,15 +49,15 @@ machineFunctionInfo:
body: |
bb.0:
; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute
- ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256
+ ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 100
; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
- ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[COPY]]
- ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 256, [[V_ADD_U32_e64_]], 0, implicit $exec
+ ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 156, [[V_ADD_U32_e64_]], 0, implicit $exec
; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
- ; GFX10-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], -156, 0, implicit $exec
+ ; GFX10-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 412, [[V_ADD_U32_e64_]], 0, implicit $exec
; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_2]]
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[COPY]]
; GFX10-NEXT: SI_RETURN
;
; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 5f0ca7bc42ae0..3d02d70d2fdbb 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -294,12 +294,13 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
-; FLATSCR-NEXT: s_mov_b32 s0, 0
-; FLATSCR-NEXT: scratch_store_dword off, v0, s0 offset:1024
+; FLATSCR-NEXT: s_movk_i32 s0, 0x2000
+; FLATSCR-NEXT: scratch_store_dword off, v0, s0
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; FLATSCR-NEXT: s_mov_b32 s0, 0
; FLATSCR-NEXT: .LBB2_1: ; %loadstoreloop
; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
-; FLATSCR-NEXT: s_add_i32 s1, s0, 0x2000
+; FLATSCR-NEXT: s_add_i32 s1, s0, 0x4000
; FLATSCR-NEXT: s_add_i32 s0, s0, 1
; FLATSCR-NEXT: s_cmpk_lt_u32 s0, 0x2120
; FLATSCR-NEXT: scratch_store_byte off, v0, s1
@@ -307,12 +308,12 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
; FLATSCR-NEXT: s_cbranch_scc1 .LBB2_1
; FLATSCR-NEXT: ; %bb.2: ; %split
; FLATSCR-NEXT: s_movk_i32 s0, 0x1000
-; FLATSCR-NEXT: s_addk_i32 s0, 0x2000
+; FLATSCR-NEXT: s_addk_i32 s0, 0x4000
; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s0 offset:720 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 offset:704 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; FLATSCR-NEXT: s_movk_i32 s0, 0x2000
+; FLATSCR-NEXT: s_movk_i32 s0, 0x4000
; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s0 offset:16 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: scratch_load_dwordx4 v[4:7], off, s0 glc
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir
new file mode 100644
index 0000000000000..eb9ba3b1b2618
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir
@@ -0,0 +1,1093 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: issue155902
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: false
+isSSA: true
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 1, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 2, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 3, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 4, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 5, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 6, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 7, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 8, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 9, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 10, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 11, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 12, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 13, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 14, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 15, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 16, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 17, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 18, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 19, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 20, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 21, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 22, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 23, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 24, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 25, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 26, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 27, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 28, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 29, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 30, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 31, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 32, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 33, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 34, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 35, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 36, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 37, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 38, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 39, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 40, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 41, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 42, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 43, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 44, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 45, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 46, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 47, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 48, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 49, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 50, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 51, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 52, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 53, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 54, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 55, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 56, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 57, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 58, class: vreg_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 59, class: vreg_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 60, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 61, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 62, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 63, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 64, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 65, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 66, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 67, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 68, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 69, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 70, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 71, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 72, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 73, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 74, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 75, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 76, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 77, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 78, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 79, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 80, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 81, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 82, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 83, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 84, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 85, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 86, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 87, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 88, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 89, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 90, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 91, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 92, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 93, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 94, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 95, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 96, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 97, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 98, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 99, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 100, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 101, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 102, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 103, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 104, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 105, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 106, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 107, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 108, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 109, class: av_64_align2, preferred-register: '', flags: [ ] }
+liveins:
+ - { reg: '$sgpr4_sgpr5', virtual-reg: '%3' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 8
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+fixedStack: []
+stack:
+ - { id: 0, type: default, offset: 0, size: 16384, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, type: default, offset: 0, size: 16, alignment: 8,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 2, type: default, offset: 0, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ explicitKernArgSize: 400
+ maxKernArgAlign: 8
+ ldsSize: 0
+ gdsSize: 0
+ dynLDSAlign: 1
+ isEntryFunction: true
+ isChainFunction: false
+ noSignedZerosFPMath: false
+ memoryBound: false
+ waveLimiter: false
+ hasSpilledSGPRs: false
+ hasSpilledVGPRs: false
+ numWaveDispatchSGPRs: 0
+ numWaveDispatchVGPRs: 0
+ scratchRSrcReg: '$private_rsrc_reg'
+ frameOffsetReg: '$fp_reg'
+ stackPtrOffsetReg: '$sgpr32'
+ bytesInStackArgArea: 0
+ returnsVoid: true
+ argumentInfo:
+ dispatchPtr: { reg: '$sgpr0_sgpr1' }
+ queuePtr: { reg: '$sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ dispatchID: { reg: '$sgpr6_sgpr7' }
+ workGroupIDX: { reg: '$sgpr8' }
+ workGroupIDY: { reg: '$sgpr9' }
+ workGroupIDZ: { reg: '$sgpr10' }
+ workItemIDX: { reg: '$vgpr0', mask: 1023 }
+ workItemIDY: { reg: '$vgpr0', mask: 1047552 }
+ workItemIDZ: { reg: '$vgpr0', mask: 1072693248 }
+ psInputAddr: 0
+ psInputEnable: 0
+ maxMemoryClusterDWords: 8
+ mode:
+ ieee: true
+ dx10-clamp: true
+ fp32-input-denormals: true
+ fp32-output-denormals: true
+ fp64-fp16-input-denormals: true
+ fp64-fp16-output-denormals: true
+ highBitsOf32BitAddress: 0
+ occupancy: 8
+ vgprForAGPRCopy: ''
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+ longBranchReservedReg: ''
+ hasInitWholeWave: false
+ dynamicVGPRBlockSize: 0
+ scratchReservedForDynamicVGPRs: 0
+ numKernargPreloadSGPRs: 0
+ isWholeWaveFunction: false
+body: |
+ bb.0:
+ liveins: $sgpr4_sgpr5
+
+ ; CHECK-LABEL: name: issue155902
+ ; CHECK: liveins: $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 8, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 16, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM3:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 24, 0 :: (dereferenceable...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/171012
More information about the llvm-commits
mailing list