[llvm] [CodeGen] Consider imm offsets when sorting framerefs (PR #171012)

Anshil Gandhi via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 6 21:32:22 PST 2025


https://github.com/gandhi56 created https://github.com/llvm/llvm-project/pull/171012

LocalStackSlotAllocation pass disallows negative offsets with respect to a base register. The pass ends up introducing a new register for such frame references. This patch helps LocalStackSlotAlloca to additionally consider the immediate offset of an instruction, when sorting frame refs - hence, avoiding negative offsets and maximizing reuse of the existing registers.

>From 2a548630b96cc63bedfc1819e699529bd6a695b9 Mon Sep 17 00:00:00 2001
From: Anshil Gandhi <Anshil.Gandhi at amd.com>
Date: Fri, 5 Dec 2025 16:41:24 -0600
Subject: [PATCH 1/2] Precommit test

---
 .../local-stack-alloc-sort-framerefs.mir      | 1093 +++++++++++++++++
 1 file changed, 1093 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir

diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir
new file mode 100644
index 0000000000000..d3eac690312bc
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir
@@ -0,0 +1,1093 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name:            issue155902
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+noPhis:          false
+isSSA:           true
+noVRegs:         false
+hasFakeUses:     false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: vgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 1, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 2, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 3, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 4, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 5, class: sgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 6, class: sgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 7, class: sgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 8, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 9, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 10, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 11, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 12, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 13, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 14, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 15, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 16, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 17, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 18, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 19, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 20, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 21, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 22, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 23, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 24, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 25, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 26, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 27, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 28, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 29, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 30, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 31, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 32, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 33, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 34, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 35, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 36, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 37, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 38, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 39, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 40, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 41, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 42, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 43, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 44, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 45, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 46, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 47, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 48, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 49, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 50, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 51, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 52, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 53, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 54, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 55, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 56, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 57, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 58, class: vreg_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 59, class: vreg_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 60, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 61, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 62, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 63, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 64, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 65, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 66, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 67, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 68, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 69, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 70, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 71, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 72, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 73, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 74, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 75, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 76, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 77, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 78, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 79, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 80, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 81, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 82, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 83, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 84, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 85, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 86, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 87, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 88, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 89, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 90, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 91, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 92, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 93, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 94, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 95, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 96, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 97, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 98, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 99, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 100, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 101, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 102, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 103, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 104, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 105, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 106, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 107, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 108, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 109, class: av_64_align2, preferred-register: '', flags: [  ] }
+liveins:
+  - { reg: '$sgpr4_sgpr5', virtual-reg: '%3' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    8
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+fixedStack:      []
+stack:
+  - { id: 0, type: default, offset: 0, size: 16384, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, type: default, offset: 0, size: 16, alignment: 8,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, type: default, offset: 0, size: 8, alignment: 8,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo:
+  explicitKernArgSize: 400
+  maxKernArgAlign: 8
+  ldsSize:         0
+  gdsSize:         0
+  dynLDSAlign:     1
+  isEntryFunction: true
+  isChainFunction: false
+  noSignedZerosFPMath: false
+  memoryBound:     false
+  waveLimiter:     false
+  hasSpilledSGPRs: false
+  hasSpilledVGPRs: false
+  numWaveDispatchSGPRs: 0
+  numWaveDispatchVGPRs: 0
+  scratchRSrcReg:  '$private_rsrc_reg'
+  frameOffsetReg:  '$fp_reg'
+  stackPtrOffsetReg: '$sgpr32'
+  bytesInStackArgArea: 0
+  returnsVoid:     true
+  argumentInfo:
+    dispatchPtr:     { reg: '$sgpr0_sgpr1' }
+    queuePtr:        { reg: '$sgpr2_sgpr3' }
+    kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+    dispatchID:      { reg: '$sgpr6_sgpr7' }
+    workGroupIDX:    { reg: '$sgpr8' }
+    workGroupIDY:    { reg: '$sgpr9' }
+    workGroupIDZ:    { reg: '$sgpr10' }
+    workItemIDX:     { reg: '$vgpr0', mask: 1023 }
+    workItemIDY:     { reg: '$vgpr0', mask: 1047552 }
+    workItemIDZ:     { reg: '$vgpr0', mask: 1072693248 }
+  psInputAddr:     0
+  psInputEnable:   0
+  maxMemoryClusterDWords: 8
+  mode:
+    ieee:            true
+    dx10-clamp:      true
+    fp32-input-denormals: true
+    fp32-output-denormals: true
+    fp64-fp16-input-denormals: true
+    fp64-fp16-output-denormals: true
+  highBitsOf32BitAddress: 0
+  occupancy:       8
+  vgprForAGPRCopy: ''
+  sgprForEXECCopy: '$sgpr100_sgpr101'
+  longBranchReservedReg: ''
+  hasInitWholeWave: false
+  dynamicVGPRBlockSize: 0
+  scratchReservedForDynamicVGPRs: 0
+  numKernargPreloadSGPRs: 0
+  isWholeWaveFunction: false
+body:             |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+
+    ; CHECK-LABEL: name: issue155902
+    ; CHECK: liveins: $sgpr4_sgpr5
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.1
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 8, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 16, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM3:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 24, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 32, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM5:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 40, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM6:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 48, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM7:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 56, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM8:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 64, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM9:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 72, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM10:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 80, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM11:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 88, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM12:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 96, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM13:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 104, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM14:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 112, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM15:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 120, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM16:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 128, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM17:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 136, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM18:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 144, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM19:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 152, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM20:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 160, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM21:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 168, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM22:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 176, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM23:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 184, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM24:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 192, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM25:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 200, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM26:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 208, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM27:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 216, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM28:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 224, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM29:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 232, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM30:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 240, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM31:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 248, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM32:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 256, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM33:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 264, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM34:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 272, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM35:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 280, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM36:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 288, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM37:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 296, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM38:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 304, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM39:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 312, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM40:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 320, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM41:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 328, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM42:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 336, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM43:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 344, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM44:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 352, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM45:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 360, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM46:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 368, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM47:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 376, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM48:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 384, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM49:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 392, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR [[V_MOV_B]], %stack.1, 8, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR [[V_MOV_B]], [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 900, implicit $exec
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[V_MOV_B1]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY1]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM1]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY2]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM2]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY3]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM3]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY4]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM4]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY5]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM5]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY6]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM6]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY7]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM7]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY8]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM8]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY9]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM9]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY10]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM10]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY11]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM11]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY12]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY13:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM12]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY13]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY14:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM13]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY14]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY15:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM14]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY15]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY16:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM15]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY16]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY17:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM16]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY17]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY18:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM17]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY18]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY19:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM18]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY19]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY20:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM19]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY20]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY21:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM20]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY21]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM21]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY22]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY23:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM22]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY23]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY24:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM23]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY24]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY25:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM24]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY25]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY26:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM25]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY26]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY27:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM26]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY27]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY28:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM27]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY28]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY29:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM28]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY29]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY30:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM29]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY30]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY31:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM30]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY31]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY32:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM31]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY32]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY33:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM32]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY33]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY34:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM33]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY34]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY35:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM34]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY35]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY36:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM35]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY36]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY37:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM36]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY37]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY38:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM37]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY38]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY39:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM38]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY39]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY40:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM39]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY40]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY41:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM40]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY41]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY42:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM41]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY42]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY43:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM42]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY43]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY44:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM43]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY44]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY45:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM44]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY45]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY46:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM45]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY46]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY47:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM46]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY47]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY48:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM47]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY48]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY49:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM48]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY49]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY50:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM49]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY50]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: S_ENDPGM 0
+    %3:sgpr_64(p4) = COPY $sgpr4_sgpr5
+    %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 0, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 8, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 16, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %11:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 24, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %12:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 32, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %13:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 40, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %14:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 48, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %15:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 56, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %16:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 64, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %17:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 72, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %18:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 80, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %19:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 88, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %20:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 96, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %21:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 104, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %22:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 112, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %23:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 120, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %24:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 128, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %25:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 136, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %26:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 144, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %27:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 152, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %28:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 160, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %29:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 168, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %30:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 176, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %31:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 184, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %32:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 192, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %33:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 200, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %34:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 208, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %35:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 216, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %36:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 224, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %37:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 232, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %38:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 240, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %39:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 248, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %40:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 256, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %41:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 264, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %42:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 272, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %43:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 280, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %44:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 288, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %45:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 296, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %46:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 304, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %47:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 312, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %48:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 320, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %49:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 328, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %50:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 336, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %51:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 344, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %52:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 352, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %53:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 360, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %54:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 368, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %55:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 376, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %56:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 384, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %57:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 392, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %58:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
+    SCRATCH_STORE_DWORDX2_SADDR %58, %stack.1, 8, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    SCRATCH_STORE_DWORDX2_SADDR %58, %stack.1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %59:vreg_64_align2 = V_MOV_B64_PSEUDO 900, implicit $exec
+    SCRATCH_STORE_DWORDX2_SADDR killed %59, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %60:av_64_align2 = COPY %8
+    SCRATCH_STORE_DWORDX2_SADDR killed %60, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %61:av_64_align2 = COPY %9
+    SCRATCH_STORE_DWORDX2_SADDR killed %61, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %62:av_64_align2 = COPY %10
+    SCRATCH_STORE_DWORDX2_SADDR killed %62, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %63:av_64_align2 = COPY %11
+    SCRATCH_STORE_DWORDX2_SADDR killed %63, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %64:av_64_align2 = COPY %12
+    SCRATCH_STORE_DWORDX2_SADDR killed %64, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %65:av_64_align2 = COPY %13
+    SCRATCH_STORE_DWORDX2_SADDR killed %65, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %66:av_64_align2 = COPY %14
+    SCRATCH_STORE_DWORDX2_SADDR killed %66, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %67:av_64_align2 = COPY %15
+    SCRATCH_STORE_DWORDX2_SADDR killed %67, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %68:av_64_align2 = COPY %16
+    SCRATCH_STORE_DWORDX2_SADDR killed %68, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %69:av_64_align2 = COPY %17
+    SCRATCH_STORE_DWORDX2_SADDR killed %69, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %70:av_64_align2 = COPY %18
+    SCRATCH_STORE_DWORDX2_SADDR killed %70, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %71:av_64_align2 = COPY %19
+    SCRATCH_STORE_DWORDX2_SADDR killed %71, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %72:av_64_align2 = COPY %20
+    SCRATCH_STORE_DWORDX2_SADDR killed %72, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %73:av_64_align2 = COPY %21
+    SCRATCH_STORE_DWORDX2_SADDR killed %73, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %74:av_64_align2 = COPY %22
+    SCRATCH_STORE_DWORDX2_SADDR killed %74, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %75:av_64_align2 = COPY %23
+    SCRATCH_STORE_DWORDX2_SADDR killed %75, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %76:av_64_align2 = COPY %24
+    SCRATCH_STORE_DWORDX2_SADDR killed %76, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %77:av_64_align2 = COPY %25
+    SCRATCH_STORE_DWORDX2_SADDR killed %77, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %78:av_64_align2 = COPY %26
+    SCRATCH_STORE_DWORDX2_SADDR killed %78, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %79:av_64_align2 = COPY %27
+    SCRATCH_STORE_DWORDX2_SADDR killed %79, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %80:av_64_align2 = COPY %28
+    SCRATCH_STORE_DWORDX2_SADDR killed %80, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %81:av_64_align2 = COPY %29
+    SCRATCH_STORE_DWORDX2_SADDR killed %81, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %82:av_64_align2 = COPY %30
+    SCRATCH_STORE_DWORDX2_SADDR killed %82, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %83:av_64_align2 = COPY %31
+    SCRATCH_STORE_DWORDX2_SADDR killed %83, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %84:av_64_align2 = COPY %32
+    SCRATCH_STORE_DWORDX2_SADDR killed %84, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %85:av_64_align2 = COPY %33
+    SCRATCH_STORE_DWORDX2_SADDR killed %85, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %86:av_64_align2 = COPY %34
+    SCRATCH_STORE_DWORDX2_SADDR killed %86, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %87:av_64_align2 = COPY %35
+    SCRATCH_STORE_DWORDX2_SADDR killed %87, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %88:av_64_align2 = COPY %36
+    SCRATCH_STORE_DWORDX2_SADDR killed %88, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %89:av_64_align2 = COPY %37
+    SCRATCH_STORE_DWORDX2_SADDR killed %89, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %90:av_64_align2 = COPY %38
+    SCRATCH_STORE_DWORDX2_SADDR killed %90, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %91:av_64_align2 = COPY %39
+    SCRATCH_STORE_DWORDX2_SADDR killed %91, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %92:av_64_align2 = COPY %40
+    SCRATCH_STORE_DWORDX2_SADDR killed %92, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %93:av_64_align2 = COPY %41
+    SCRATCH_STORE_DWORDX2_SADDR killed %93, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %94:av_64_align2 = COPY %42
+    SCRATCH_STORE_DWORDX2_SADDR killed %94, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %95:av_64_align2 = COPY %43
+    SCRATCH_STORE_DWORDX2_SADDR killed %95, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %96:av_64_align2 = COPY %44
+    SCRATCH_STORE_DWORDX2_SADDR killed %96, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %97:av_64_align2 = COPY %45
+    SCRATCH_STORE_DWORDX2_SADDR killed %97, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %98:av_64_align2 = COPY %46
+    SCRATCH_STORE_DWORDX2_SADDR killed %98, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %99:av_64_align2 = COPY %47
+    SCRATCH_STORE_DWORDX2_SADDR killed %99, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %100:av_64_align2 = COPY %48
+    SCRATCH_STORE_DWORDX2_SADDR killed %100, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %101:av_64_align2 = COPY %49
+    SCRATCH_STORE_DWORDX2_SADDR killed %101, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %102:av_64_align2 = COPY %50
+    SCRATCH_STORE_DWORDX2_SADDR killed %102, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %103:av_64_align2 = COPY %51
+    SCRATCH_STORE_DWORDX2_SADDR killed %103, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %104:av_64_align2 = COPY %52
+    SCRATCH_STORE_DWORDX2_SADDR killed %104, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %105:av_64_align2 = COPY %53
+    SCRATCH_STORE_DWORDX2_SADDR killed %105, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %106:av_64_align2 = COPY %54
+    SCRATCH_STORE_DWORDX2_SADDR killed %106, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %107:av_64_align2 = COPY %55
+    SCRATCH_STORE_DWORDX2_SADDR killed %107, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %108:av_64_align2 = COPY %56
+    SCRATCH_STORE_DWORDX2_SADDR killed %108, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %109:av_64_align2 = COPY %57
+    SCRATCH_STORE_DWORDX2_SADDR killed %109, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    S_ENDPGM 0
+...
+---
+name:            issue155902_fp
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+noPhis:          false
+isSSA:           true
+noVRegs:         false
+hasFakeUses:     false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: vgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 1, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 2, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 3, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 4, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 5, class: sgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 6, class: sgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 7, class: sgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 8, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 9, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 10, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 11, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 12, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 13, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 14, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 15, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 16, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 17, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 18, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 19, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 20, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 21, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 22, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 23, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 24, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 25, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 26, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 27, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 28, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 29, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 30, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 31, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 32, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 33, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 34, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 35, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 36, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 37, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 38, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 39, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 40, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 41, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 42, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 43, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 44, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 45, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 46, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 47, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 48, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 49, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 50, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 51, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 52, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 53, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 54, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 55, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 56, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 57, class: sreg_64_xexec, preferred-register: '', flags: [  ] }
+  - { id: 58, class: vreg_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 59, class: vreg_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 60, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 61, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 62, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 63, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 64, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 65, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 66, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 67, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 68, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 69, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 70, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 71, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 72, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 73, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 74, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 75, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 76, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 77, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 78, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 79, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 80, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 81, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 82, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 83, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 84, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 85, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 86, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 87, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 88, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 89, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 90, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 91, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 92, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 93, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 94, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 95, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 96, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 97, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 98, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 99, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 100, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 101, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 102, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 103, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 104, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 105, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 106, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 107, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 108, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 109, class: av_64_align2, preferred-register: '', flags: [  ] }
+liveins:
+  - { reg: '$sgpr4_sgpr5', virtual-reg: '%3' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    8
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+fixedStack:      []
+stack:
+  - { id: 0, type: default, offset: 0, size: 16384, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, type: default, offset: 0, size: 16, alignment: 8,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, type: default, offset: 0, size: 8, alignment: 8,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo:
+  explicitKernArgSize: 400
+  maxKernArgAlign: 8
+  ldsSize:         0
+  gdsSize:         0
+  dynLDSAlign:     1
+  isEntryFunction: true
+  isChainFunction: false
+  noSignedZerosFPMath: false
+  memoryBound:     false
+  waveLimiter:     false
+  hasSpilledSGPRs: false
+  hasSpilledVGPRs: false
+  numWaveDispatchSGPRs: 0
+  numWaveDispatchVGPRs: 0
+  scratchRSrcReg:  '$private_rsrc_reg'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+  bytesInStackArgArea: 0
+  returnsVoid:     true
+  argumentInfo:
+    dispatchPtr:     { reg: '$sgpr0_sgpr1' }
+    queuePtr:        { reg: '$sgpr2_sgpr3' }
+    kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+    dispatchID:      { reg: '$sgpr6_sgpr7' }
+    workGroupIDX:    { reg: '$sgpr8' }
+    workGroupIDY:    { reg: '$sgpr9' }
+    workGroupIDZ:    { reg: '$sgpr10' }
+    workItemIDX:     { reg: '$vgpr0', mask: 1023 }
+    workItemIDY:     { reg: '$vgpr0', mask: 1047552 }
+    workItemIDZ:     { reg: '$vgpr0', mask: 1072693248 }
+  psInputAddr:     0
+  psInputEnable:   0
+  maxMemoryClusterDWords: 8
+  mode:
+    ieee:            true
+    dx10-clamp:      true
+    fp32-input-denormals: true
+    fp32-output-denormals: true
+    fp64-fp16-input-denormals: true
+    fp64-fp16-output-denormals: true
+  highBitsOf32BitAddress: 0
+  occupancy:       8
+  vgprForAGPRCopy: ''
+  sgprForEXECCopy: '$sgpr100_sgpr101'
+  longBranchReservedReg: ''
+  hasInitWholeWave: false
+  dynamicVGPRBlockSize: 0
+  scratchReservedForDynamicVGPRs: 0
+  numKernargPreloadSGPRs: 0
+  isWholeWaveFunction: false
+body:             |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+
+    ; CHECK-LABEL: name: issue155902_fp
+    ; CHECK: liveins: $sgpr4_sgpr5
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.1
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 8, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 16, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM3:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 24, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 32, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM5:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 40, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM6:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 48, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM7:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 56, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM8:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 64, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM9:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 72, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM10:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 80, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM11:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 88, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM12:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 96, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM13:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 104, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM14:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 112, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM15:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 120, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM16:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 128, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM17:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 136, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM18:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 144, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM19:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 152, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM20:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 160, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM21:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 168, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM22:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 176, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM23:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 184, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM24:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 192, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM25:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 200, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM26:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 208, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM27:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 216, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM28:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 224, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM29:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 232, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM30:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 240, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM31:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 248, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM32:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 256, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM33:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 264, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM34:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 272, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM35:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 280, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM36:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 288, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM37:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 296, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM38:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 304, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM39:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 312, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM40:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 320, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM41:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 328, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM42:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 336, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM43:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 344, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM44:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 352, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM45:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 360, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM46:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 368, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM47:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 376, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM48:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 384, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM49:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 392, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR [[V_MOV_B]], %stack.1, 8, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR [[V_MOV_B]], [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 900, implicit $exec
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[V_MOV_B1]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY1]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM1]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY2]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM2]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY3]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM3]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY4]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM4]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY5]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM5]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY6]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM6]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY7]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM7]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY8]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM8]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY9]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM9]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY10]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM10]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY11]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM11]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY12]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY13:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM12]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY13]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY14:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM13]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY14]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY15:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM14]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY15]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY16:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM15]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY16]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY17:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM16]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY17]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY18:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM17]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY18]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY19:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM18]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY19]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY20:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM19]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY20]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY21:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM20]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY21]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM21]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY22]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY23:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM22]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY23]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY24:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM23]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY24]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY25:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM24]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY25]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY26:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM25]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY26]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY27:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM26]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY27]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY28:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM27]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY28]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY29:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM28]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY29]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY30:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM29]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY30]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY31:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM30]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY31]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY32:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM31]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY32]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY33:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM32]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY33]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY34:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM33]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY34]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY35:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM34]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY35]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY36:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM35]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY36]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY37:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM36]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY37]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY38:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM37]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY38]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY39:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM38]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY39]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY40:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM39]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY40]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY41:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM40]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY41]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY42:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM41]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY42]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY43:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM42]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY43]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY44:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM43]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY44]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY45:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM44]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY45]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY46:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM45]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY46]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY47:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM46]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY47]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY48:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM47]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY48]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY49:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM48]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY49]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: [[COPY50:%[0-9]+]]:av_64_align2 = COPY [[S_LOAD_DWORDX2_IMM49]]
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[COPY50]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: S_ENDPGM 0
+    %3:sgpr_64(p4) = COPY $sgpr4_sgpr5
+    %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 0, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 8, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 16, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %11:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 24, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %12:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 32, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %13:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 40, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %14:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 48, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %15:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 56, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %16:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 64, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %17:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 72, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %18:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 80, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %19:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 88, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %20:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 96, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %21:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 104, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %22:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 112, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %23:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 120, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %24:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 128, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %25:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 136, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %26:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 144, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %27:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 152, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %28:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 160, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %29:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 168, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %30:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 176, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %31:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 184, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %32:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 192, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %33:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 200, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %34:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 208, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %35:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 216, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %36:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 224, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %37:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 232, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %38:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 240, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %39:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 248, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %40:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 256, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %41:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 264, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %42:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 272, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %43:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 280, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %44:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 288, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %45:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 296, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %46:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 304, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %47:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 312, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %48:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 320, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %49:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 328, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %50:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 336, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %51:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 344, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %52:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 352, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %53:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 360, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %54:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 368, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %55:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 376, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %56:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 384, 0 :: (dereferenceable invariant load (s64) , align 16, addrspace 4)
+    %57:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3(p4), 392, 0 :: (dereferenceable invariant load (s64) , addrspace 4)
+    %58:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
+    SCRATCH_STORE_DWORDX2_SADDR %58, %stack.1, 8, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    SCRATCH_STORE_DWORDX2_SADDR %58, %stack.1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %59:vreg_64_align2 = V_MOV_B64_PSEUDO 900, implicit $exec
+    SCRATCH_STORE_DWORDX2_SADDR killed %59, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %60:av_64_align2 = COPY %8
+    SCRATCH_STORE_DWORDX2_SADDR killed %60, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %61:av_64_align2 = COPY %9
+    SCRATCH_STORE_DWORDX2_SADDR killed %61, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %62:av_64_align2 = COPY %10
+    SCRATCH_STORE_DWORDX2_SADDR killed %62, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %63:av_64_align2 = COPY %11
+    SCRATCH_STORE_DWORDX2_SADDR killed %63, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %64:av_64_align2 = COPY %12
+    SCRATCH_STORE_DWORDX2_SADDR killed %64, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %65:av_64_align2 = COPY %13
+    SCRATCH_STORE_DWORDX2_SADDR killed %65, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %66:av_64_align2 = COPY %14
+    SCRATCH_STORE_DWORDX2_SADDR killed %66, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %67:av_64_align2 = COPY %15
+    SCRATCH_STORE_DWORDX2_SADDR killed %67, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %68:av_64_align2 = COPY %16
+    SCRATCH_STORE_DWORDX2_SADDR killed %68, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %69:av_64_align2 = COPY %17
+    SCRATCH_STORE_DWORDX2_SADDR killed %69, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %70:av_64_align2 = COPY %18
+    SCRATCH_STORE_DWORDX2_SADDR killed %70, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %71:av_64_align2 = COPY %19
+    SCRATCH_STORE_DWORDX2_SADDR killed %71, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %72:av_64_align2 = COPY %20
+    SCRATCH_STORE_DWORDX2_SADDR killed %72, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %73:av_64_align2 = COPY %21
+    SCRATCH_STORE_DWORDX2_SADDR killed %73, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %74:av_64_align2 = COPY %22
+    SCRATCH_STORE_DWORDX2_SADDR killed %74, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %75:av_64_align2 = COPY %23
+    SCRATCH_STORE_DWORDX2_SADDR killed %75, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %76:av_64_align2 = COPY %24
+    SCRATCH_STORE_DWORDX2_SADDR killed %76, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %77:av_64_align2 = COPY %25
+    SCRATCH_STORE_DWORDX2_SADDR killed %77, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %78:av_64_align2 = COPY %26
+    SCRATCH_STORE_DWORDX2_SADDR killed %78, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %79:av_64_align2 = COPY %27
+    SCRATCH_STORE_DWORDX2_SADDR killed %79, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %80:av_64_align2 = COPY %28
+    SCRATCH_STORE_DWORDX2_SADDR killed %80, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %81:av_64_align2 = COPY %29
+    SCRATCH_STORE_DWORDX2_SADDR killed %81, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %82:av_64_align2 = COPY %30
+    SCRATCH_STORE_DWORDX2_SADDR killed %82, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %83:av_64_align2 = COPY %31
+    SCRATCH_STORE_DWORDX2_SADDR killed %83, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %84:av_64_align2 = COPY %32
+    SCRATCH_STORE_DWORDX2_SADDR killed %84, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %85:av_64_align2 = COPY %33
+    SCRATCH_STORE_DWORDX2_SADDR killed %85, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %86:av_64_align2 = COPY %34
+    SCRATCH_STORE_DWORDX2_SADDR killed %86, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %87:av_64_align2 = COPY %35
+    SCRATCH_STORE_DWORDX2_SADDR killed %87, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %88:av_64_align2 = COPY %36
+    SCRATCH_STORE_DWORDX2_SADDR killed %88, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %89:av_64_align2 = COPY %37
+    SCRATCH_STORE_DWORDX2_SADDR killed %89, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %90:av_64_align2 = COPY %38
+    SCRATCH_STORE_DWORDX2_SADDR killed %90, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %91:av_64_align2 = COPY %39
+    SCRATCH_STORE_DWORDX2_SADDR killed %91, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %92:av_64_align2 = COPY %40
+    SCRATCH_STORE_DWORDX2_SADDR killed %92, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %93:av_64_align2 = COPY %41
+    SCRATCH_STORE_DWORDX2_SADDR killed %93, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %94:av_64_align2 = COPY %42
+    SCRATCH_STORE_DWORDX2_SADDR killed %94, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %95:av_64_align2 = COPY %43
+    SCRATCH_STORE_DWORDX2_SADDR killed %95, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %96:av_64_align2 = COPY %44
+    SCRATCH_STORE_DWORDX2_SADDR killed %96, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %97:av_64_align2 = COPY %45
+    SCRATCH_STORE_DWORDX2_SADDR killed %97, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %98:av_64_align2 = COPY %46
+    SCRATCH_STORE_DWORDX2_SADDR killed %98, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %99:av_64_align2 = COPY %47
+    SCRATCH_STORE_DWORDX2_SADDR killed %99, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %100:av_64_align2 = COPY %48
+    SCRATCH_STORE_DWORDX2_SADDR killed %100, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %101:av_64_align2 = COPY %49
+    SCRATCH_STORE_DWORDX2_SADDR killed %101, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %102:av_64_align2 = COPY %50
+    SCRATCH_STORE_DWORDX2_SADDR killed %102, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %103:av_64_align2 = COPY %51
+    SCRATCH_STORE_DWORDX2_SADDR killed %103, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %104:av_64_align2 = COPY %52
+    SCRATCH_STORE_DWORDX2_SADDR killed %104, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %105:av_64_align2 = COPY %53
+    SCRATCH_STORE_DWORDX2_SADDR killed %105, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %106:av_64_align2 = COPY %54
+    SCRATCH_STORE_DWORDX2_SADDR killed %106, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %107:av_64_align2 = COPY %55
+    SCRATCH_STORE_DWORDX2_SADDR killed %107, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %108:av_64_align2 = COPY %56
+    SCRATCH_STORE_DWORDX2_SADDR killed %108, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    %109:av_64_align2 = COPY %57
+    SCRATCH_STORE_DWORDX2_SADDR killed %109, %stack.2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) , addrspace 5)
+    S_ENDPGM 0
+...

>From 96b556a621d15646a5c8ed6fd044ad9654115203 Mon Sep 17 00:00:00 2001
From: Anshil Gandhi <gandhi21299 at gmail.com>
Date: Fri, 7 Nov 2025 12:24:43 -0500
Subject: [PATCH 2/2] [CodeGen] Consider imm offsets when sorting framerefs

LocalStackSlotAllocation pass disallows negative offsets wrt
to a base register. The pass ends up introducing a new register
for such frame references. This patch helps LocalStackSlotAlloca
to additionally consider the immediate offset of an instruction,
when sorting frame refs - hence, avoiding negative offsets and
maximizing reuse of the existing registers.
---
 llvm/lib/CodeGen/LocalStackSlotAllocation.cpp | 42 ++++++++++++-------
 ...local-stack-alloc-add-references.gfx10.mir | 10 ++---
 .../local-stack-alloc-block-sp-reference.ll   | 11 ++---
 .../local-stack-alloc-sort-framerefs.mir      |  4 +-
 llvm/test/CodeGen/Thumb/frame-chain.ll        | 12 ++----
 5 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 987f64f56403d..d316f8d804f53 100644
--- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -51,6 +51,7 @@ namespace {
   class FrameRef {
     MachineBasicBlock::iterator MI; // Instr referencing the frame
     int64_t LocalOffset;            // Local offset of the frame idx referenced
+    int64_t InstrOffset; // Offset of the instruction from the frame index
     int FrameIdx;                   // The frame index
 
     // Order reference instruction appears in program. Used to ensure
@@ -59,16 +60,20 @@ namespace {
     unsigned Order;
 
   public:
-    FrameRef(MachineInstr *I, int64_t Offset, int Idx, unsigned Ord) :
-      MI(I), LocalOffset(Offset), FrameIdx(Idx), Order(Ord) {}
+    FrameRef(MachineInstr *I, int64_t Offset, int64_t InstrOffset, int Idx,
+             unsigned Ord)
+        : MI(I), LocalOffset(Offset), InstrOffset(InstrOffset), FrameIdx(Idx),
+          Order(Ord) {}
 
     bool operator<(const FrameRef &RHS) const {
-      return std::tie(LocalOffset, FrameIdx, Order) <
-             std::tie(RHS.LocalOffset, RHS.FrameIdx, RHS.Order);
+      return std::tuple(LocalOffset + InstrOffset, FrameIdx, Order) <
+             std::tuple(RHS.LocalOffset + RHS.InstrOffset, RHS.FrameIdx,
+                        RHS.Order);
     }
 
     MachineBasicBlock::iterator getMachineInstr() const { return MI; }
     int64_t getLocalOffset() const { return LocalOffset; }
+    int64_t getInstrOffset() const { return InstrOffset; }
     int getFrameIndex() const { return FrameIdx; }
   };
 
@@ -335,20 +340,27 @@ bool LocalStackSlotImpl::insertFrameReferenceRegisters(MachineFunction &Fn) {
       // than that, but the increased register pressure makes that a
       // tricky thing to balance. Investigate if re-materializing these
       // becomes an issue.
-      for (const MachineOperand &MO : MI.operands()) {
+      for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx != OpEnd;
+           ++OpIdx) {
+        const MachineOperand &MO = MI.getOperand(OpIdx);
         // Consider replacing all frame index operands that reference
         // an object allocated in the local block.
-        if (MO.isFI()) {
-          // Don't try this with values not in the local block.
-          if (!MFI.isObjectPreAllocated(MO.getIndex()))
-            break;
-          int Idx = MO.getIndex();
-          int64_t LocalOffset = LocalOffsets[Idx];
-          if (!TRI->needsFrameBaseReg(&MI, LocalOffset))
-            break;
-          FrameReferenceInsns.push_back(FrameRef(&MI, LocalOffset, Idx, Order++));
+        if (!MO.isFI())
+          continue;
+
+        int FrameIdx = MO.getIndex();
+        // Don't try this with values not in the local block.
+        if (!MFI.isObjectPreAllocated(FrameIdx))
+          break;
+
+        int64_t LocalOffset = LocalOffsets[FrameIdx];
+        if (!TRI->needsFrameBaseReg(&MI, LocalOffset))
           break;
-        }
+
+        int64_t InstrOffset = TRI->getFrameIndexInstrOffset(&MI, OpIdx);
+        FrameReferenceInsns.emplace_back(&MI, LocalOffset, InstrOffset,
+                                         FrameIdx, Order++);
+        break;
       }
     }
   }
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir
index 8ea9ec397fe06..3be6456213168 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir
@@ -49,15 +49,15 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute
-    ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256
+    ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 100
     ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
     ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
-    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
-    ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[COPY]]
-    ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 256, [[V_ADD_U32_e64_]], 0, implicit $exec
+    ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 156, [[V_ADD_U32_e64_]], 0, implicit $exec
     ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
-    ; GFX10-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], -156, 0, implicit $exec
+    ; GFX10-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 412, [[V_ADD_U32_e64_]], 0, implicit $exec
     ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_2]]
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+    ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1245193 /* reguse:VGPR_32 */, [[COPY]]
     ; GFX10-NEXT: SI_RETURN
     ;
     ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 5f0ca7bc42ae0..3d02d70d2fdbb 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -294,12 +294,13 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
 ; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 0
-; FLATSCR-NEXT:    s_mov_b32 s0, 0
-; FLATSCR-NEXT:    scratch_store_dword off, v0, s0 offset:1024
+; FLATSCR-NEXT:    s_movk_i32 s0, 0x2000
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s0
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    s_mov_b32 s0, 0
 ; FLATSCR-NEXT:  .LBB2_1: ; %loadstoreloop
 ; FLATSCR-NEXT:    ; =>This Inner Loop Header: Depth=1
-; FLATSCR-NEXT:    s_add_i32 s1, s0, 0x2000
+; FLATSCR-NEXT:    s_add_i32 s1, s0, 0x4000
 ; FLATSCR-NEXT:    s_add_i32 s0, s0, 1
 ; FLATSCR-NEXT:    s_cmpk_lt_u32 s0, 0x2120
 ; FLATSCR-NEXT:    scratch_store_byte off, v0, s1
@@ -307,12 +308,12 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
 ; FLATSCR-NEXT:    s_cbranch_scc1 .LBB2_1
 ; FLATSCR-NEXT:  ; %bb.2: ; %split
 ; FLATSCR-NEXT:    s_movk_i32 s0, 0x1000
-; FLATSCR-NEXT:    s_addk_i32 s0, 0x2000
+; FLATSCR-NEXT:    s_addk_i32 s0, 0x4000
 ; FLATSCR-NEXT:    scratch_load_dwordx2 v[8:9], off, s0 offset:720 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    scratch_load_dwordx4 v[0:3], off, s0 offset:704 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    s_movk_i32 s0, 0x2000
+; FLATSCR-NEXT:    s_movk_i32 s0, 0x4000
 ; FLATSCR-NEXT:    scratch_load_dwordx2 v[10:11], off, s0 offset:16 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    scratch_load_dwordx4 v[4:7], off, s0 glc
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir
index d3eac690312bc..eb9ba3b1b2618 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-sort-framerefs.mir
@@ -283,7 +283,7 @@ body:             |
     ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM48:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 384, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
     ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM49:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 392, 0 :: (dereferenceable invariant load (s64), addrspace 4)
     ; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR [[V_MOV_B]], %stack.1, 8, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR [[V_MOV_B]], [[S_MOV_B32_]], 8, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
     ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR [[V_MOV_B]], [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
     ; CHECK-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 900, implicit $exec
     ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[V_MOV_B1]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
@@ -828,7 +828,7 @@ body:             |
     ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM48:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 384, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
     ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM49:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 392, 0 :: (dereferenceable invariant load (s64), addrspace 4)
     ; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR [[V_MOV_B]], %stack.1, 8, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR [[V_MOV_B]], [[S_MOV_B32_]], 8, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
     ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR [[V_MOV_B]], [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
     ; CHECK-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 900, implicit $exec
     ; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[V_MOV_B1]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 5)
diff --git a/llvm/test/CodeGen/Thumb/frame-chain.ll b/llvm/test/CodeGen/Thumb/frame-chain.ll
index a680f2fa4a481..8dde3b5fed750 100644
--- a/llvm/test/CodeGen/Thumb/frame-chain.ll
+++ b/llvm/test/CodeGen/Thumb/frame-chain.ll
@@ -150,8 +150,7 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
 ; FP-NEXT:    subs r1, r3, r1
 ; FP-NEXT:    mov sp, r1
 ; FP-NEXT:    movs r1, #0
-; FP-NEXT:    str r1, [r6, #4]
-; FP-NEXT:    str r0, [r2]
+; FP-NEXT:    stm r2!, {r0, r1}
 ; FP-NEXT:    subs r6, r7, #7
 ; FP-NEXT:    subs r6, #1
 ; FP-NEXT:    mov sp, r6
@@ -184,8 +183,7 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
 ; FP-AAPCS-NEXT:    subs r1, r3, r1
 ; FP-AAPCS-NEXT:    mov sp, r1
 ; FP-AAPCS-NEXT:    movs r1, #0
-; FP-AAPCS-NEXT:    str r1, [r6, #4]
-; FP-AAPCS-NEXT:    str r0, [r2]
+; FP-AAPCS-NEXT:    stm r2!, {r0, r1}
 ; FP-AAPCS-NEXT:    mov r6, r11
 ; FP-AAPCS-NEXT:    subs r6, #8
 ; FP-AAPCS-NEXT:    mov sp, r6
@@ -216,8 +214,7 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
 ; NOFP-NEXT:    subs r1, r3, r1
 ; NOFP-NEXT:    mov sp, r1
 ; NOFP-NEXT:    movs r1, #0
-; NOFP-NEXT:    str r1, [r6, #4]
-; NOFP-NEXT:    str r0, [r2]
+; NOFP-NEXT:    stm r2!, {r0, r1}
 ; NOFP-NEXT:    subs r6, r7, #7
 ; NOFP-NEXT:    subs r6, #1
 ; NOFP-NEXT:    mov sp, r6
@@ -250,8 +247,7 @@ define dso_local void @required_fp(i32 %0, i32 %1) {
 ; NOFP-AAPCS-NEXT:    subs r1, r3, r1
 ; NOFP-AAPCS-NEXT:    mov sp, r1
 ; NOFP-AAPCS-NEXT:    movs r1, #0
-; NOFP-AAPCS-NEXT:    str r1, [r6, #4]
-; NOFP-AAPCS-NEXT:    str r0, [r2]
+; NOFP-AAPCS-NEXT:    stm r2!, {r0, r1}
 ; NOFP-AAPCS-NEXT:    mov r6, r11
 ; NOFP-AAPCS-NEXT:    subs r6, #8
 ; NOFP-AAPCS-NEXT:    mov sp, r6



More information about the llvm-commits mailing list