[llvm] [CodeGen] Consider imm offsets when sorting framerefs (PR #171012)

via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 6 21:32:50 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Anshil Gandhi (gandhi56)

<details>
<summary>Changes</summary>

LocalStackSlotAllocation pass disallows negative offsets with respect to a base register. The pass ends up introducing a new register for such frame references. This patch helps LocalStackSlotAlloca to additionally consider the immediate offset of an instruction, when sorting frame refs - hence, avoiding negative offsets and maximizing reuse of the existing registers.

---

Patch is 101.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171012.diff


5 Files Affected:

- (modified) llvm/lib/CodeGen/LocalStackSlotAllocation.cpp (+27-15) 
- (modified) llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll (+6-5) 
- (added) llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir (+1093) 
- (modified) llvm/test/CodeGen/Thumb/frame-chain.ll (+4-8) 


``````````diff
diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 987f64f56403d..d316f8d804f53 100644
--- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -51,6 +51,7 @@ namespace {
   class FrameRef {
     MachineBasicBlock::iterator MI; // Instr referencing the frame
     int64_t LocalOffset;            // Local offset of the frame idx referenced
+    int64_t InstrOffset; // Offset of the instruction from the frame index
     int FrameIdx;                   // The frame index
 
     // Order reference instruction appears in program. Used to ensure
@@ -59,16 +60,20 @@ namespace {
     unsigned Order;
 
   public:
-    FrameRef(MachineInstr *I, int64_t Offset, int Idx, unsigned Ord) :
-      MI(I), LocalOffset(Offset), FrameIdx(Idx), Order(Ord) {}
+    FrameRef(MachineInstr *I, int64_t Offset, int64_t InstrOffset, int Idx,
+             unsigned Ord)
+        : MI(I), LocalOffset(Offset), InstrOffset(InstrOffset), FrameIdx(Idx),
+          Order(Ord) {}
 
     bool operator<(const FrameRef &RHS) const {
-      return std::tie(LocalOffset, FrameIdx, Order) <
-             std::tie(RHS.LocalOffset, RHS.FrameIdx, RHS.Order);
+      return std::tuple(LocalOffset + InstrOffset, FrameIdx, Order) <
+             std::tuple(RHS.LocalOffset + RHS.InstrOffset, RHS.FrameIdx,
+                        RHS.Order);
     }
 
     MachineBasicBlock::iterator getMachineInstr() const { return MI; }
     int64_t getLocalOffset() const { return LocalOffset; }
+    int64_t getInstrOffset() const { return InstrOffset; }
     int getFrameIndex() const { return FrameIdx; }
   };
 
@@ -335,20 +340,27 @@ bool LocalStackSlotImpl::insertFrameReferenceRegisters(MachineFunction &Fn) {
       // than that, but the increased register pressure makes that a
       // tricky thing to balance. Investigate if re-materializing these
       // becomes an issue.
-      for (const MachineOperand &MO : MI.operands()) {
+      for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx != OpEnd;
+           ++OpIdx) {
+        const MachineOperand &MO = MI.getOperand(OpIdx);
         // Consider replacing all frame index operands that reference
         // an object allocated in the local block.
-        if (MO.isFI()) {
-          // Don't try this with values not in the local block.
-          if (!MFI.isObjectPreAllocated(MO.getIndex()))
-            break;
-          int Idx = MO.getIndex();
-          int64_t LocalOffset = LocalOffsets[Idx];
-          if (!TRI->needsFrameBaseReg(&MI, LocalOffset))
-            break;
-          FrameReferenceInsns.push_back(FrameRef(&MI, LocalOffset, Idx, Order++));
+        if (!MO.isFI())
+          continue;
+
+        int FrameIdx = MO.getIndex();
+        // Don't try this with values not in the local block.
+        if (!MFI.isObjectPreAllocated(FrameIdx))
+          break;
+
+        int64_t LocalOffset = LocalOffsets[FrameIdx];
+        if (!TRI->needsFrameBaseReg(&MI, LocalOffset))
           break;
-        }
+
+        int64_t InstrOffset = TRI->getFrameIndexInstrOffset(&MI, OpIdx);
+        FrameReferenceInsns.emplace_back(&MI, LocalOffset, InstrOffset,
+                                         FrameIdx, Order++);
+        break;
       }
     }
   }
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir
index 8ea9ec397fe06..3be6456213168 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir
@@ -49,15 +49,15 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute
-    ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256
+    ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 100
     ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
     ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
-    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
-    ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[COPY]]
-    ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 256, [[V_ADD_U32_e64_]], 0, implicit $exec
+    ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 156, [[V_ADD_U32_e64_]], 0, implicit $exec
     ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
-    ; GFX10-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], -156, 0, implicit $exec
+    ; GFX10-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 412, [[V_ADD_U32_e64_]], 0, implicit $exec
     ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_2]]
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+    ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[COPY]]
     ; GFX10-NEXT: SI_RETURN
     ;
     ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 5f0ca7bc42ae0..3d02d70d2fdbb 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -294,12 +294,13 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
 ; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 0
-; FLATSCR-NEXT:    s_mov_b32 s0, 0
-; FLATSCR-NEXT:    scratch_store_dword off, v0, s0 offset:1024
+; FLATSCR-NEXT:    s_movk_i32 s0, 0x2000
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s0
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_mov_b32 s0, 0
 ; FLATSCR-NEXT:  .LBB2_1: ; %loadstoreloop
 ; FLATSCR-NEXT:    ; =>This Inner Loop Header: Depth=1
-; FLATSCR-NEXT:    s_add_i32 s1, s0, 0x2000
+; FLATSCR-NEXT:    s_add_i32 s1, s0, 0x4000
 ; FLATSCR-NEXT:    s_add_i32 s0, s0, 1
 ; FLATSCR-NEXT:    s_cmpk_lt_u32 s0, 0x2120
 ; FLATSCR-NEXT:    scratch_store_byte off, v0, s1
@@ -307,12 +308,12 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
 ; FLATSCR-NEXT:    s_cbranch_scc1 .LBB2_1
 ; FLATSCR-NEXT:  ; %bb.2: ; %split
 ; FLATSCR-NEXT:    s_movk_i32 s0, 0x1000
-; FLATSCR-NEXT:    s_addk_i32 s0, 0x2000
+; FLATSCR-NEXT:    s_addk_i32 s0, 0x4000
 ; FLATSCR-NEXT:    scratch_load_dwordx2 v[8:9], off, s0 offset:720 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s0 offset:704 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    s_movk_i32 s0, 0x2000
+; FLATSCR-NEXT:    s_movk_i32 s0, 0x4000
 ; FLATSCR-NEXT:    scratch_load_dwordx2 v[10:11], off, s0 offset:16 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    scratch_load_dwordx4 v[4:7], off, s0 glc
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir
new file mode 100644
index 0000000000000..eb9ba3b1b2618
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir
@@ -0,0 +1,1093 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name:            issue155902
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+noPhis:          false
+isSSA:           true
+noVRegs:         false
+hasFakeUses:     false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: vgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 1, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 2, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 3, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 4, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 5, class: sgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 6, class: sgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 7, class: sgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 8, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 9, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 10, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 11, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 12, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 13, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 14, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 15, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 16, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 17, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 18, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 19, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 20, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 21, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 22, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 23, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 24, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 25, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 26, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 27, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 28, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 29, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 30, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 31, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 32, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 33, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 34, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 35, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 36, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 37, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 38, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 39, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 40, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 41, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 42, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 43, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 44, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 45, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 46, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 47, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 48, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 49, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 50, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 51, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 52, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 53, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 54, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 55, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 56, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 57, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 58, class: vreg_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 59, class: vreg_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 60, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 61, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 62, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 63, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 64, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 65, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 66, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 67, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 68, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 69, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 70, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 71, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 72, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 73, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 74, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 75, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 76, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 77, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 78, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 79, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 80, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 81, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 82, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 83, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 84, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 85, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 86, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 87, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 88, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 89, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 90, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 91, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 92, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 93, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 94, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 95, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 96, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 97, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 98, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 99, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 100, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 101, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 102, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 103, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 104, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 105, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 106, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 107, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 108, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 109, class: av_64_align2, preferred-register: '', flags: [  ] }
+liveins:
+  - { reg: '$sgpr4_sgpr5', virtual-reg: '%3' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    8
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+fixedStack:      []
+stack:
+  - { id: 0, type: default, offset: 0, size: 16384, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, type: default, offset: 0, size: 16, alignment: 8,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, type: default, offset: 0, size: 8, alignment: 8,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo:
+  explicitKernArgSize: 400
+  maxKernArgAlign: 8
+  ldsSize:         0
+  gdsSize:         0
+  dynLDSAlign:     1
+  isEntryFunction: true
+  isChainFunction: false
+  noSignedZerosFPMath: false
+  memoryBound:     false
+  waveLimiter:     false
+  hasSpilledSGPRs: false
+  hasSpilledVGPRs: false
+  numWaveDispatchSGPRs: 0
+  numWaveDispatchVGPRs: 0
+  scratchRSrcReg:  '$private_rsrc_reg'
+  frameOffsetReg:  '$fp_reg'
+  stackPtrOffsetReg: '$sgpr32'
+  bytesInStackArgArea: 0
+  returnsVoid:     true
+  argumentInfo:
+    dispatchPtr:     { reg: '$sgpr0_sgpr1' }
+    queuePtr:        { reg: '$sgpr2_sgpr3' }
+    kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+    dispatchID:      { reg: '$sgpr6_sgpr7' }
+    workGroupIDX:    { reg: '$sgpr8' }
+    workGroupIDY:    { reg: '$sgpr9' }
+    workGroupIDZ:    { reg: '$sgpr10' }
+    workItemIDX:     { reg: '$vgpr0', mask: 1023 }
+    workItemIDY:     { reg: '$vgpr0', mask: 1047552 }
+    workItemIDZ:     { reg: '$vgpr0', mask: 1072693248 }
+  psInputAddr:     0
+  psInputEnable:   0
+  maxMemoryClusterDWords: 8
+  mode:
+    ieee:            true
+    dx10-clamp:      true
+    fp32-input-denormals: true
+    fp32-output-denormals: true
+    fp64-fp16-input-denormals: true
+    fp64-fp16-output-denormals: true
+  highBitsOf32BitAddress: 0
+  occupancy:       8
+  vgprForAGPRCopy: ''
+  sgprForEXECCopy: '$sgpr100_sgpr101'
+  longBranchReservedReg: ''
+  hasInitWholeWave: false
+  dynamicVGPRBlockSize: 0
+  scratchReservedForDynamicVGPRs: 0
+  numKernargPreloadSGPRs: 0
+  isWholeWaveFunction: false
+body:             |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+
+    ; CHECK-LABEL: name: issue155902
+    ; CHECK: liveins: $sgpr4_sgpr5
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.1
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 8, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 16, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM3:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 24, 0 :: (dereferenceable...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/171012


More information about the llvm-commits mailing list